class LMDocstache::Document

Constants

BLOCK_CHILDREN_ELEMENTS
GENERAL_TAG_REGEX
ROLES_REGEXP
RUN_LIKE_ELEMENTS
WHOLE_BLOCK_START_REGEX

Attributes

document[R]

Public Class Methods

new(*paths) click to toggle source
# File lib/lm_docstache/document.rb, line 11
def initialize(*paths)
  raise ArgumentError if paths.empty?

  @path = paths.shift
  @zip_file = Zip::File.open(@path)
  @document = Nokogiri::XML(unzip_read(@zip_file, "word/document.xml"))
  zip_files = paths.map { |path| Zip::File.open(path) }
  documents = zip_files.map { |f| Nokogiri::XML(unzip_read(f, "word/document.xml")) }

  load_references
  documents.each do |doc|
    @document.css('w|p').last.after(page_break)
    @document.css('w|p').last.after(doc.css('w|body > *:not(w|sectPr)'))
  end

  find_documents_to_interpolate
end

Public Instance Methods

errors?() click to toggle source
# File lib/lm_docstache/document.rb, line 79
def errors?
  tags.length != usable_tags.length
end
fix_errors() click to toggle source
# File lib/lm_docstache/document.rb, line 75
def fix_errors
  problem_paragraphs.each { |pg| flatten_text_blocks(pg) if pg }
end
render_file(output, data = {}, render_options = {}) click to toggle source
# File lib/lm_docstache/document.rb, line 88
def render_file(output, data = {}, render_options = {})
  buffer = zip_buffer(render_documents(data, nil, render_options))
  File.open(output, "w") { |f| f.write buffer.string }
end
render_replace(output, text) click to toggle source
# File lib/lm_docstache/document.rb, line 93
def render_replace(output, text)
  buffer = zip_buffer(render_documents({}, text))
  File.open(output, "w") { |f| f.write buffer.string }
end
render_stream(data = {}) click to toggle source
# File lib/lm_docstache/document.rb, line 98
def render_stream(data = {})
  buffer = zip_buffer(render_documents(data))
  buffer.rewind
  buffer.sysread
end
render_xml(data = {}, render_options = {}) click to toggle source
# File lib/lm_docstache/document.rb, line 104
def render_xml(data = {}, render_options = {})
  render_documents(data, nil, render_options)
end
save(path = @path) click to toggle source
# File lib/lm_docstache/document.rb, line 83
def save(path = @path)
  buffer = zip_buffer(@documents)
  File.open(path, "w") { |f| f.write buffer.string }
end
tags() click to toggle source
# File lib/lm_docstache/document.rb, line 40
def tags
  @documents.values.flat_map do |document|
    document_text = document.text
    extract_tag_names(document_text) + extract_tag_names(document_text, :full_block)
  end
end
unusable_tags() click to toggle source
# File lib/lm_docstache/document.rb, line 67
def unusable_tags
  usable_tags.reduce(tags) do |broken_tags, usable_tag|
    next broken_tags unless index = broken_tags.index(usable_tag)

    broken_tags.delete_at(index) && broken_tags
  end
end
usable_role_tags() click to toggle source
# File lib/lm_docstache/document.rb, line 29
def usable_role_tags
  @documents.values.flat_map do |document|
    document.css('w|t')
      .select { |tag| tag.text =~ ROLES_REGEXP }
      .flat_map { |tag|
        tag.text.scan(ROLES_REGEXP)
          .map {|r| r.first }
      }
  end
end
usable_tag_names() click to toggle source
# File lib/lm_docstache/document.rb, line 58
def usable_tag_names
  usable_tags.reduce([]) do |memo, tag|
    next memo if !tag.is_a?(Regexp) && tag =~ ROLES_REGEXP

    tag = unescape_escaped_start_block(tag.source) if tag.is_a?(Regexp)
    memo << (tag.scan(GENERAL_TAG_REGEX) && $1)
  end.compact.uniq
end
usable_tags() click to toggle source
# File lib/lm_docstache/document.rb, line 47
def usable_tags
  @documents.values.reduce([]) do |tags, document|
    document.css('w|t').reduce(tags) do |document_tags, text_node|
      text = text_node.text
      document_tags.push(*extract_tag_names(text))
      document_tags.push(*extract_tag_names(text, :start_block))
      document_tags.push(*extract_tag_names(text, :full_block))
    end
  end
end

Private Instance Methods

extract_tag_names(text, tag_type = :variable) click to toggle source
# File lib/lm_docstache/document.rb, line 119
def extract_tag_names(text, tag_type = :variable)
  text, regex, extractor =
    if tag_type == :variable
      [text, Parser::VARIABLE_MATCHER, ->(match) { "{{%s}}" % match }]
    else
      extractor = ->(match) { /#{Regexp.escape("{{%s%s %s %s}}" % match)}/ }
      tag_type == :full_block ? [text, Parser::BLOCK_MATCHER, extractor] :
        [text.strip, WHOLE_BLOCK_START_REGEX, extractor]
    end

  text.scan(regex).map(&extractor)
end
filtered_children(node, selector = BLOCK_CHILDREN_ELEMENTS) click to toggle source
# File lib/lm_docstache/document.rb, line 189
def filtered_children(node, selector = BLOCK_CHILDREN_ELEMENTS)
  Nokogiri::XML::NodeSet.new(node.document, node.children.filter(selector))
end
find_documents_to_interpolate() click to toggle source
# File lib/lm_docstache/document.rb, line 243
def find_documents_to_interpolate
  @documents = { "word/document.xml" => @document }

  @document.css("w|headerReference, w|footerReference").each do |header_ref|
    next unless @references.has_key?(header_ref.attributes["id"].value)

    ref = @references[header_ref.attributes["id"].value]
    document_path = "word/#{ref[:target]}"
    @documents[document_path] = Nokogiri::XML(unzip_read(@zip_file, document_path))
  end
end
flatten_text_blocks(runs_wrapper) click to toggle source
# File lib/lm_docstache/document.rb, line 158
def flatten_text_blocks(runs_wrapper)
  return if (children = filtered_children(runs_wrapper)).size < 2

  while node = children.pop
    is_run_node = node.matches?(RUN_LIKE_ELEMENTS)
    previous_node = children.last

    if !is_run_node && filtered_children(node, RUN_LIKE_ELEMENTS).any?
      next flatten_text_blocks(node)
    end
    next if !is_run_node || children.empty? || !previous_node.matches?(RUN_LIKE_ELEMENTS)
    next if node.at_css('w|tab') || previous_node.at_css('w|tab')

    style_node = node.at_css('w|rPr')
    style_html = style_node ? style_node.inner_html : ''
    previous_style_node = previous_node.at_css('w|rPr')
    previous_style_html = previous_style_node ? previous_style_node.inner_html : ''
    previous_text_node = previous_node.at_css('w|t')
    current_text_node = node.at_css('w|t')

    next if style_html != previous_style_html
    next if current_text_node.nil? || previous_text_node.nil?

    whitespace_attr = current_text_node['xml:space']
    previous_text_node['xml:space'] = whitespace_attr if whitespace_attr
    previous_text_node.content = previous_text_node.text + current_text_node.text

    node.unlink
  end
end
load_references() click to toggle source
# File lib/lm_docstache/document.rb, line 229
def load_references
  @references = {}
  ref_xml = Nokogiri::XML(unzip_read(@zip_file, "word/_rels/document.xml.rels"))

  ref_xml.css("Relationship").each do |ref|
    id = ref.attributes["Id"].value
    @references[id] = {
      id: id,
      type: ref.attributes["Type"].value.split("/")[-1].to_sym,
      target: ref.attributes["Target"].value
    }
  end
end
page_break() click to toggle source
# File lib/lm_docstache/document.rb, line 217
def page_break
  Nokogiri::XML::Node.new('p', @document).tap do |paragraph_node|
    paragraph_node.namespace = @document.at_css('w|p:last').namespace
    run_node = Nokogiri::XML::Node.new('r', @document)
    page_break_node = Nokogiri::XML::Node.new('br', @document)
    page_break_node['w:type'] = 'page'

    paragraph_node << run_node
    paragraph_node << page_break_node
  end
end
problem_paragraphs() click to toggle source
# File lib/lm_docstache/document.rb, line 145
def problem_paragraphs
  unusable_tags.flat_map do |tag|
    @documents.values.inject([]) do |tags, document|
      faulty_paragraphs = document.css('w|p').select do |paragraph|
        tag_regex = tag.is_a?(Regexp) ? tag : /#{Regexp.escape(tag)}/
        paragraph.text =~ tag_regex
      end

      tags + faulty_paragraphs
    end
  end
end
render_document(document, data, text, render_options) click to toggle source
# File lib/lm_docstache/document.rb, line 140
def render_document(document, data, text, render_options)
  renderer = LMDocstache::Renderer.new(document.dup, data, render_options)
  text ? renderer.render_replace(text) : renderer.render
end
render_documents(data, text = nil, render_options = {}) click to toggle source
# File lib/lm_docstache/document.rb, line 132
def render_documents(data, text = nil, render_options = {})
  Hash[
    @documents.map do |(path, document)|
      [path, render_document(document, data, text, render_options)]
    end
  ]
end
unescape_escaped_start_block(regex_source_string) click to toggle source
# File lib/lm_docstache/document.rb, line 110
def unescape_escaped_start_block(regex_source_string)
  regex_source_string
    .gsub('\\{', '{')
    .gsub('\\#', '#')
    .gsub('\\}', '}')
    .gsub('\\^', '^')
    .gsub('\\ ', ' ')
end
unzip_read(zip, zip_path) click to toggle source
# File lib/lm_docstache/document.rb, line 193
def unzip_read(zip, zip_path)
  file = zip.find_entry(zip_path)
  contents = ""
  file.get_input_stream { |f| contents = f.read }

  contents
end
zip_buffer(documents) click to toggle source
# File lib/lm_docstache/document.rb, line 201
def zip_buffer(documents)
  Zip::OutputStream.write_buffer do |output|
    @zip_file.entries.each do |entry|
      next if documents.keys.include?(entry.name)

      output.put_next_entry(entry.name)
      output.write(entry.get_input_stream.read)
    end

    documents.each do |path, document|
      output.put_next_entry(path)
      output.write(document.to_xml(indent: 0).gsub("\n", ""))
    end
  end
end