class Epuber::Compiler::XHTMLProcessor

Public Class Methods

add_missing_root_elements(xhtml_doc, title, epub_version) click to toggle source

Method to add all missing items in XML root

Required items:

- html (with all namespaces and other attributes)
- body
  - head (with title)

@param [Nokogiri::XML::Document] xhtml_doc input XML document to work with @param [String] title title of this document, since this is required by EPUB specification @param [Epuber::Version] epub_version version of result EPUB

@return nil

# File lib/epuber/compiler/xhtml_processor.rb, line 103
def self.add_missing_root_elements(xhtml_doc, title, epub_version)
  # add missing body element
  if xhtml_doc.at_css('body').nil?
    if xhtml_doc.root.node_name == 'html'
      xhtml_doc.root << xhtml_doc.create_element('body')
    else
      xhtml_doc.root.surround_with_element('body')
    end
  end

  html = xhtml_doc.at_css('html')

  # add missing root html element
  if html.nil?
    attrs               = {}
    attrs['xmlns']      = 'http://www.w3.org/1999/xhtml'
    attrs['xmlns:epub'] = 'http://www.idpf.org/2007/ops' if epub_version >= 3
    html = xhtml_doc.root.surround_with_element('html', attrs)
  elsif html.namespaces.empty?
    html['xmlns']      = 'http://www.w3.org/1999/xhtml'
    html['xmlns:epub'] = 'http://www.idpf.org/2007/ops' if epub_version >= 3
  end

  # add missing head in html
  if xhtml_doc.at_css('html > head').nil?
    head = xhtml_doc.create_element('head')
    head << xhtml_doc.create_element('title', title)
    head << xhtml_doc.create_element('meta', charset: 'utf-8') if epub_version >= 3.0

    if (first = html.children.first)
      first.before(head)
    else
      html << head
    end
  end

  # https://github.com/IDPF/epubcheck/issues/631
  return unless epub_version < 3.0

  xhtml_doc.internal_subset&.remove
  xhtml_doc.create_internal_subset('html', '-//W3C//DTD XHTML 1.1//EN', 'http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd')
end
add_scripts(xhtml_doc, scripts) click to toggle source

Method for adding scripts with links, method will not add duplicate items

@param [Nokogiri::XML::Document] xhtml_doc input XML document to work with @param [Array<String>] styles links to files

@return nil

# File lib/epuber/compiler/xhtml_processor.rb, line 171
def self.add_scripts(xhtml_doc, scripts)
  head = xhtml_doc.at_css('html > head')
  old_links = head.css('script').map { |node| node['src'] }

  links_to_add = scripts - old_links

  links_to_add.each do |path|
    head << xhtml_doc.create_element('script', src: path, type: 'text/javascript')
  end
end
add_styles(xhtml_doc, styles) click to toggle source

Method for adding style sheets with links, method will not add duplicate items

@param [Nokogiri::XML::Document] xhtml_doc input XML document to work with @param [Array<String>] styles links to files

@return nil

# File lib/epuber/compiler/xhtml_processor.rb, line 153
def self.add_styles(xhtml_doc, styles)
  head = xhtml_doc.at_css('html > head')
  old_links = head.css('link[rel="stylesheet"]').map { |node| node['href'] }

  links_to_add = styles - old_links

  links_to_add.each do |path|
    head << xhtml_doc.create_element('link', href: path, rel: 'stylesheet', type: 'text/css')
  end
end
add_viewport(xhtml_doc, viewport_size) click to toggle source

Adds viewport meta tag to head of some document, but only if there is not some existing tag

@param [Nokogiri::XML::Document] xhtml_doc @param [Epuber::Size] viewport_size

# File lib/epuber/compiler/xhtml_processor.rb, line 187
def self.add_viewport(xhtml_doc, viewport_size)
  head = xhtml_doc.at_css('html > head')
  return unless head.at_css("meta[name='viewport']").nil?

  s = viewport_size
  head << xhtml_doc.create_element('meta', name: 'viewport', content: "width=#{s.width},height=#{s.height}")
end
find_global_ids(xhtml_doc) click to toggle source

@param [Nokogiri::XML::Document] xhtml_doc input XML document to work with @return [Array<string>] list of global ids (without dollar signs)

# File lib/epuber/compiler/xhtml_processor.rb, line 371
def self.find_global_ids(xhtml_doc)
  find_global_ids_nodes(xhtml_doc)
    .map { |node| node['id'][1..-1] }
end
find_global_ids_nodes(xhtml_doc) click to toggle source

@param [Nokogiri::XML::Document] xhtml_doc input XML document to work with @return [Array<Nokogiri::XML::Node>] list of nodes with global ids

# File lib/epuber/compiler/xhtml_processor.rb, line 363
def self.find_global_ids_nodes(xhtml_doc)
  xhtml_doc
    .css('[id^="$"]')
end
resolve_images(xhtml_doc, file_path, file_resolver) click to toggle source

@param [Nokogiri::XML::Document] xhtml_doc @param [String] file_path path of referring file @param [FileResolver] file_resolver

@return nil

# File lib/epuber/compiler/xhtml_processor.rb, line 322
def self.resolve_images(xhtml_doc, file_path, file_resolver)
  resolve_resources_in('img', 'src', :image, xhtml_doc, file_path, file_resolver)
end
resolve_mathml_namespace(xhtml_doc) click to toggle source
# File lib/epuber/compiler/xhtml_processor.rb, line 310
def self.resolve_mathml_namespace(xhtml_doc)
  xhtml_doc.css('math').each do |math_node|
    math_node.add_namespace('xmlns', 'http://www.w3.org/1998/Math/MathML')
  end
end
resolve_resources_in(node_css_query, attribute_name, resource_group, xhtml_doc, file_path, file_resolver) click to toggle source
# File lib/epuber/compiler/xhtml_processor.rb, line 346
def self.resolve_resources_in(node_css_query, attribute_name, resource_group, xhtml_doc, file_path, file_resolver)
  xhtml_doc.css(node_css_query).each do |img|
    path = img[attribute_name]
    next if path.nil?

    new_path = Compiler::FileTypes::SourceFile.resolve_relative_file(file_path,
                                                                     path,
                                                                     file_resolver,
                                                                     group: resource_group,
                                                                     location: img)
    img[attribute_name] = new_path if new_path
  end
end
resolve_scripts(xhtml_doc, file_path, file_resolver) click to toggle source

@param [Nokogiri::XML::Document] xhtml_doc @param [String] file_path path of referring file @param [FileResolver] file_resolver

@return nil

# File lib/epuber/compiler/xhtml_processor.rb, line 332
def self.resolve_scripts(xhtml_doc, file_path, file_resolver)
  resolve_resources_in('script', 'src', :script, xhtml_doc, file_path, file_resolver)
end
resolve_stylesheets(xhtml_doc, file_path, file_resolver) click to toggle source

@param [Nokogiri::XML::Document] xhtml_doc @param [String] file_path path of referring file @param [FileResolver] file_resolver

@return nil

# File lib/epuber/compiler/xhtml_processor.rb, line 342
def self.resolve_stylesheets(xhtml_doc, file_path, file_resolver)
  resolve_resources_in('link[rel="stylesheet"]', 'href', :style, xhtml_doc, file_path, file_resolver)
end
using_javascript?(xhtml_doc) click to toggle source

@param [Nokogiri::XML::Document] xhtml_doc input XML document to work with

@return [Bool]

# File lib/epuber/compiler/xhtml_processor.rb, line 289
def self.using_javascript?(xhtml_doc)
  !xhtml_doc.at_css('script').nil?
end
using_mathml?(xhtml_doc) click to toggle source

@param [Nokogiri::XML::Document] xhtml_doc input XML document to work with

@return [Bool]

# File lib/epuber/compiler/xhtml_processor.rb, line 306
def self.using_mathml?(xhtml_doc)
  !xhtml_doc.at_css('math|math', 'math' => 'http://www.w3.org/1998/Math/MathML').nil?
end
using_remote_resources?(xhtml_doc) click to toggle source
# File lib/epuber/compiler/xhtml_processor.rb, line 293
def self.using_remote_resources?(xhtml_doc)
  regexp = %r{^[^:/?#]+://.*}

  result = false
  result ||= xhtml_doc.css('[src]').any? { |node| node['src'] =~ regexp }
  result ||= xhtml_doc.css('link[href]').any? { |node| node['href'] =~ regexp }
  result
end
xml_doc_from_str_with_errors(text, file_path = nil) click to toggle source

Method for parsing incomplete XML, supports multiple root elements

@warning Because of nature of XML, when input string don’t contain root element, it will create own called ‘body`, since it will be used in next steps.

@param [String] text input XHTML text

@return [Nokogiri::XML::Document] parsed document

# File lib/epuber/compiler/xhtml_processor.rb, line 23
def self.xml_doc_from_str_with_errors(text, file_path = nil)
  text = text.dup

  if /\A[\n\r ]+(<\?xml)/ =~ text
    UI.warning('XML header must be at the beginning of document',
               location: Epuber::Location.new(path: file_path, lineno: 1))

    text = text.lstrip
  end

  xml_header = nil
  if /\A\s*(<\?xml[^>]*\?>)/ =~ text
    match = Regexp.last_match
    xml_header = text[match.begin(1)...match.end(1)]
    text[match.begin(1)...match.end(1)] = ''
  end

  doctypes = []
  while /(\n|\?>|\A)?(<!DOCTYPE [^>]*>\n*)/ =~ text
    doctypes << ::Regexp.last_match(2).strip

    match = Regexp.last_match
    text[match.begin(2)...match.end(2)] = ''
  end

  before = ([xml_header] + doctypes).compact.join("\n")
  before += "\n" unless before.empty?

  parse_options = Nokogiri::XML::ParseOptions::DEFAULT_XML |
                  Nokogiri::XML::ParseOptions::NOERROR | # to silence any errors or warnings printing into console
                  Nokogiri::XML::ParseOptions::NOWARNING |
                  Nokogiri::XML::ParseOptions::NOENT

  doc = Nokogiri::XML("#{before}<root>#{text}</root>", file_path, nil, parse_options)
  text_for_errors = before + text
  doc.encoding = 'UTF-8'
  doc.file_path = file_path

  if doc.errors.empty?
    errors = []
  else
    errors = doc.errors.map do |e|
      Problem.new(:error, e.message, text_for_errors, line: e.line, column: e.column, file_path: file_path)
    end
  end

  root = root_node = doc.root
  root_elements = root.children.select { |a| a.element? || a.comment? }

  if root_elements.count == 1
    doc.root = root_elements.first
  elsif root_node.at_css('html')
    doc.root = root_node.at_css('html')
  elsif root_node.at_css('body').nil?
    root_node.node_name = 'body'
  else
    root_node.node_name = 'html'
  end

  [doc, errors]
end
xml_document_from_string(text, file_path = nil) click to toggle source
# File lib/epuber/compiler/xhtml_processor.rb, line 85
def self.xml_document_from_string(text, file_path = nil)
  xml, = xml_doc_from_str_with_errors(text, file_path)
  xml
end