class Epuber::Compiler::XHTMLProcessor
Public Class Methods
Source
# File lib/epuber/compiler/xhtml_processor.rb, line 103 def self.add_missing_root_elements(xhtml_doc, title, epub_version) # add missing body element if xhtml_doc.at_css('body').nil? if xhtml_doc.root.node_name == 'html' xhtml_doc.root << xhtml_doc.create_element('body') else xhtml_doc.root.surround_with_element('body') end end html = xhtml_doc.at_css('html') # add missing root html element if html.nil? attrs = {} attrs['xmlns'] = 'http://www.w3.org/1999/xhtml' attrs['xmlns:epub'] = 'http://www.idpf.org/2007/ops' if epub_version >= 3 html = xhtml_doc.root.surround_with_element('html', attrs) elsif html.namespaces.empty? html['xmlns'] = 'http://www.w3.org/1999/xhtml' html['xmlns:epub'] = 'http://www.idpf.org/2007/ops' if epub_version >= 3 end # add missing head in html if xhtml_doc.at_css('html > head').nil? head = xhtml_doc.create_element('head') head << xhtml_doc.create_element('title', title) head << xhtml_doc.create_element('meta', charset: 'utf-8') if epub_version >= 3.0 if (first = html.children.first) first.before(head) else html << head end end # https://github.com/IDPF/epubcheck/issues/631 return unless epub_version < 3.0 xhtml_doc.internal_subset&.remove xhtml_doc.create_internal_subset('html', '-//W3C//DTD XHTML 1.1//EN', 'http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd') end
Method to add all missing items in XML root
Required items:
- html (with all namespaces and other attributes) - body - head (with title)
@param [Nokogiri::XML::Document] xhtml_doc input XML document to work with @param [String] title title of this document, since this is required by EPUB specification @param [Epuber::Version] epub_version version of result EPUB
@return nil
Source
# File lib/epuber/compiler/xhtml_processor.rb, line 171 def self.add_scripts(xhtml_doc, scripts) head = xhtml_doc.at_css('html > head') old_links = head.css('script').map { |node| node['src'] } links_to_add = scripts - old_links links_to_add.each do |path| head << xhtml_doc.create_element('script', src: path, type: 'text/javascript') end end
Method for adding scripts with links, method will not add duplicate items
@param [Nokogiri::XML::Document] xhtml_doc input XML document to work with @param [Array<String>] styles links to files
@return nil
Source
# File lib/epuber/compiler/xhtml_processor.rb, line 153 def self.add_styles(xhtml_doc, styles) head = xhtml_doc.at_css('html > head') old_links = head.css('link[rel="stylesheet"]').map { |node| node['href'] } links_to_add = styles - old_links links_to_add.each do |path| head << xhtml_doc.create_element('link', href: path, rel: 'stylesheet', type: 'text/css') end end
Method for adding style sheets with links, method will not add duplicate items
@param [Nokogiri::XML::Document] xhtml_doc input XML document to work with @param [Array<String>] styles links to files
@return nil
Source
# File lib/epuber/compiler/xhtml_processor.rb, line 187 def self.add_viewport(xhtml_doc, viewport_size) head = xhtml_doc.at_css('html > head') return unless head.at_css("meta[name='viewport']").nil? s = viewport_size head << xhtml_doc.create_element('meta', name: 'viewport', content: "width=#{s.width},height=#{s.height}") end
Adds viewport meta tag to head of some document, but only if there is not some existing tag
@param [Nokogiri::XML::Document] xhtml_doc @param [Epuber::Size] viewport_size
Source
# File lib/epuber/compiler/xhtml_processor.rb, line 371 def self.find_global_ids(xhtml_doc) find_global_ids_nodes(xhtml_doc) .map { |node| node['id'][1..-1] } end
@param [Nokogiri::XML::Document] xhtml_doc input XML document to work with @return [Array<string>] list of global ids (without dollar signs)
Source
# File lib/epuber/compiler/xhtml_processor.rb, line 363 def self.find_global_ids_nodes(xhtml_doc) xhtml_doc .css('[id^="$"]') end
@param [Nokogiri::XML::Document] xhtml_doc input XML document to work with @return [Array<Nokogiri::XML::Node>] list of nodes with global ids
Source
# File lib/epuber/compiler/xhtml_processor.rb, line 387 def self.find_global_links(xhtml_doc) find_global_links_nodes(xhtml_doc) .map { |node| node['href'][1..-1] } end
@param [Nokogiri::XML::Document] xhtml_doc input XML document to work with @return [Array<string>] list of global ids (without dollar signs)
Source
# File lib/epuber/compiler/xhtml_processor.rb, line 379 def self.find_global_links_nodes(xhtml_doc) xhtml_doc .css('[href^="$"]') end
@param [Nokogiri::XML::Document] xhtml_doc input XML document to work with @return [Array<Nokogiri::XML::Node>] list of nodes with global links
Source
# File lib/epuber/compiler/xhtml_processor.rb, line 322 def self.resolve_images(xhtml_doc, file_path, file_resolver) resolve_resources_in('img', 'src', :image, xhtml_doc, file_path, file_resolver) end
@param [Nokogiri::XML::Document] xhtml_doc @param [String] file_path path of referring file @param [FileResolver] file_resolver
@return nil
Source
# File lib/epuber/compiler/xhtml_processor.rb, line 278 def self.resolve_links(xhtml_doc, file_path, file_finder) [ resolve_links_for(xhtml_doc, 'a', 'href', :text, file_path, file_finder), resolve_links_for(xhtml_doc, 'map > area', 'href', :text, file_path, file_finder), ].flatten end
Resolves all links to files in XHTML document and returns the valid and resolved versions
@param [Nokogiri::XML::Document] xhtml_doc input XML document to work with @param [String] file_path path to file from which is searching for other file @param [Epuber::Compiler::FileFinder] file_finder finder for searching for files
@return [Array<URI>] resolved links
Source
# File lib/epuber/compiler/xhtml_processor.rb, line 246 def self.resolve_links_for(xhtml_doc, tag_name, attribute_name, groups, file_path, file_finder) founded_links = [] xhtml_doc.css("#{tag_name}[#{attribute_name}]").each do |node| src = node[attribute_name] # @type [String] src next if src.nil? next if src.start_with?('$') target_file = resolved_link_to_file(src, groups, file_path, file_finder) founded_links << target_file node[attribute_name] = target_file.to_s rescue UnparseableLinkError, FileFinders::FileNotFoundError, FileFinders::MultipleFilesFoundError => e UI.warning(e.to_s, location: node) # skip not found files next end founded_links end
Resolves all links to files in XHTML document and returns the valid and resolved versions
@param [Nokogiri::XML::Document] xhtml_doc input XML document to work with @param [String] tag_name CSS selector for tag @param [String] attribute_name name of attribute @param [Symbol | Array<Symbol>] groups groups of the searching file, could be for example :image when searching
for file from tag <img>
@param [String] file_path path to file from which is searching for other file @param [Epuber::Compiler::FileFinder] file_finder finder for searching for files
@return [Array<URI>] resolved links
Source
# File lib/epuber/compiler/xhtml_processor.rb, line 310 def self.resolve_mathml_namespace(xhtml_doc) xhtml_doc.css('math').each do |math_node| math_node.add_namespace('xmlns', 'http://www.w3.org/1998/Math/MathML') end end
Source
# File lib/epuber/compiler/xhtml_processor.rb, line 346 def self.resolve_resources_in(node_css_query, attribute_name, resource_group, xhtml_doc, file_path, file_resolver) xhtml_doc.css(node_css_query).each do |img| path = img[attribute_name] next if path.nil? new_path = Compiler::FileTypes::SourceFile.resolve_relative_file(file_path, path, file_resolver, group: resource_group, location: img) img[attribute_name] = new_path if new_path end end
Source
# File lib/epuber/compiler/xhtml_processor.rb, line 332 def self.resolve_scripts(xhtml_doc, file_path, file_resolver) resolve_resources_in('script', 'src', :script, xhtml_doc, file_path, file_resolver) end
@param [Nokogiri::XML::Document] xhtml_doc @param [String] file_path path of referring file @param [FileResolver] file_resolver
@return nil
Source
# File lib/epuber/compiler/xhtml_processor.rb, line 342 def self.resolve_stylesheets(xhtml_doc, file_path, file_resolver) resolve_resources_in('link[rel="stylesheet"]', 'href', :style, xhtml_doc, file_path, file_resolver) end
@param [Nokogiri::XML::Document] xhtml_doc @param [String] file_path path of referring file @param [FileResolver] file_resolver
@return nil
Source
# File lib/epuber/compiler/xhtml_processor.rb, line 207 def self.resolved_link_to_file(path, groups, file_path, file_finder) raise FileFinders::FileNotFoundError.new(path, file_path) if path.empty? begin uri = URI(path) rescue URI::InvalidURIError begin uri = URI(Addressable::URI.encode(path)) rescue URI::InvalidURIError # skip not valid uri raise UnparseableLinkError, "Unparseable link `#{path}`" end end return uri if path == '#' # skip uri with scheme (links to web pages) return uri unless uri.scheme.nil? # skip empty path return uri if uri.path.empty? && !uri.fragment.nil? && !uri.fragment.empty? uri.path = file_finder.find_file(uri.path, groups: groups, context_path: file_path) uri end
Method which will resolve path to file from pattern
@param [String] path pattern or path of the file @param [Symbol | Array<Symbol>] groups groups of the searching file, could be for example :image when searching
for file from tag <img>
@param [String] file_path path to file from which is searching for other file @param [Epuber::Compiler::FileFinders::Abstract] file_finder finder for searching for files
@raise UnparseableLinkError
, FileFinder::FileNotFoundError, FileFinder::MultipleFilesFoundError
@return [URI] resolved path to file or remote web page
Source
# File lib/epuber/compiler/xhtml_processor.rb, line 289 def self.using_javascript?(xhtml_doc) !xhtml_doc.at_css('script').nil? end
@param [Nokogiri::XML::Document] xhtml_doc input XML document to work with
@return [Bool]
Source
# File lib/epuber/compiler/xhtml_processor.rb, line 306 def self.using_mathml?(xhtml_doc) !xhtml_doc.at_css('math|math', 'math' => 'http://www.w3.org/1998/Math/MathML').nil? end
@param [Nokogiri::XML::Document] xhtml_doc input XML document to work with
@return [Bool]
Source
# File lib/epuber/compiler/xhtml_processor.rb, line 293 def self.using_remote_resources?(xhtml_doc) regexp = %r{^[^:/?#]+://.*} result = false result ||= xhtml_doc.css('[src]').any? { |node| node['src'] =~ regexp } result ||= xhtml_doc.css('link[href]').any? { |node| node['href'] =~ regexp } result end
Source
# File lib/epuber/compiler/xhtml_processor.rb, line 23 def self.xml_doc_from_str_with_errors(text, file_path = nil) text = text.dup if /\A[\n\r ]+(<\?xml)/ =~ text UI.warning('XML header must be at the beginning of document', location: Epuber::Location.new(path: file_path, lineno: 1)) text = text.lstrip end xml_header = nil if /\A\s*(<\?xml[^>]*\?>)/ =~ text match = Regexp.last_match xml_header = text[match.begin(1)...match.end(1)] text[match.begin(1)...match.end(1)] = '' end doctypes = [] while /(\n|\?>|\A)?(<!DOCTYPE [^>]*>\n*)/ =~ text doctypes << ::Regexp.last_match(2).strip match = Regexp.last_match text[match.begin(2)...match.end(2)] = '' end before = ([xml_header] + doctypes).compact.join("\n") before += "\n" unless before.empty? parse_options = Nokogiri::XML::ParseOptions::DEFAULT_XML | Nokogiri::XML::ParseOptions::NOERROR | # to silence any errors or warnings printing into console Nokogiri::XML::ParseOptions::NOWARNING | Nokogiri::XML::ParseOptions::NOENT doc = Nokogiri::XML("#{before}<root>#{text}</root>", file_path, nil, parse_options) text_for_errors = before + text doc.encoding = 'UTF-8' doc.file_path = file_path if doc.errors.empty? errors = [] else errors = doc.errors.map do |e| Problem.new(:error, e.message, text_for_errors, line: e.line, column: e.column, file_path: file_path) end end root = root_node = doc.root root_elements = root.children.select { |a| a.element? || a.comment? } if root_elements.count == 1 doc.root = root_elements.first elsif root_node.at_css('html') doc.root = root_node.at_css('html') elsif root_node.at_css('body').nil? root_node.node_name = 'body' else root_node.node_name = 'html' end [doc, errors] end
Method for parsing incomplete XML, supports multiple root elements
@warning Because of nature of XML, when input string don’t contain root element, it will create own called ‘body`, since it will be used in next steps.
@param [String] text input XHTML text
@return [Nokogiri::XML::Document] parsed document
Source
# File lib/epuber/compiler/xhtml_processor.rb, line 85 def self.xml_document_from_string(text, file_path = nil) xml, = xml_doc_from_str_with_errors(text, file_path) xml end