class OoxmlParser::DocumentStructure

Basic class for DocumentStructure

Attributes

background[RW]

@return [DocumentBackground] background of document

comments[RW]

@return [Comments] comment of document

comments_document[RW]

@return [CommentsDocument] comments of whole document

comments_extended[RW]

@return [CommentsExtended] extended comments

default_paragraph_style[RW]

@return [DocxParagraph] default paragraph style

default_run_style[RW]

@return [DocxParagraphRun] default run style

default_table_paragraph_style[RW]

@return [DocxParagraph] default table paragraph style

default_table_run_style[RW]

@return [DocxParagraphRun] default table run style

document_properties[RW]

@return [DocumentProperties] properties of document

elements[RW]

@return [Array<OOXMLDocumentObject>] list of elements

notes[RW]

@return [Note] notes of document

numbering[RW]

@return [Numbering] store numbering data

page_properties[RW]

@return [PageProperties] properties of document

relationships[RW]

@return [Relationships] relationships

settings[RW]

@return [DocumentSettings] settings

styles[RW]

@return [Styles] styles of document

theme[RW]

@return [PresentationTheme] theme of docx

theme_colors[RW]

@return [PresentationTheme] theme of docx

Public Class Methods

new(params = {}) click to toggle source
# File lib/ooxml_parser/docx_parser/document_structure.rb, line 57
def initialize(params = {})
  @elements = []
  @notes = []
  @document_properties = DocumentProperties.new
  @comments = []
  super
end

Public Instance Methods

==(other) click to toggle source

Compare this object to other @param other [Object] any other object @return [True, False] result of comparision

# File lib/ooxml_parser/docx_parser/document_structure.rb, line 70
def ==(other)
  @elements == other.elements &&
    @page_properties == other.page_properties &&
    @notes == other.notes &&
    @background == other.background &&
    @document_properties == other.document_properties &&
    @comments == other.comments
end
document_styles() click to toggle source

@return [Array<DocumentStyle>] style of documents

# File lib/ooxml_parser/docx_parser/document_structure.rb, line 163
def document_styles
  styles.styles
end
element_by_description(location: :canvas, type: :docx_paragraph) click to toggle source

Get element by it’s type @param location [Symbol] location of object @param type [Symbol] type of object @return [OOXMLDocumentObject]

# File lib/ooxml_parser/docx_parser/document_structure.rb, line 83
def element_by_description(location: :canvas, type: :docx_paragraph)
  case location
  when :canvas
    case type
    when :table
      elements[1].rows[0].cells[0].elements
    when :docx_paragraph, :simple, :paragraph
      elements
    when :shape
      elements[0].nonempty_runs.first.alternate_content.office2007_content.data.text_box
    else
      raise 'Wrong location(Need One of ":table", ":paragraph", ":shape")'
    end
  when :footer
    case type
    when :table
      note_by_description(:footer1).elements[0].rows[0].cells[0].elements
    when :docx_paragraph, :simple, :paragraph
      note_by_description(:footer1).elements
    when :shape
      note_by_description(:footer1).elements[0].nonempty_runs.first.alternate_content.office2007_content.data.text_box
    else
      raise 'Wrong location(Need One of ":table", ":simple", ":shape")'
    end
  when :header
    case type
    when :table
      note_by_description(:header1).elements[0].rows[0].cells[0].elements
    when :docx_paragraph, :simple, :paragraph
      note_by_description(:header1).elements
    when :shape
      note_by_description(:header1).elements[0].nonempty_runs.first.alternate_content.office2007_content.data.text_box
    else
      raise 'Wrong location(Need One of ":table", ":simple", ":shape")'
    end
  when :comment
    comments[0].paragraphs
  else
    raise 'Wrong global location(Need One of ":canvas", ":footer", ":header", ":comment")'
  end
end
note_by_description(type) click to toggle source

Get note by it’s description @param type [Symbol] note type @return [Note]

# File lib/ooxml_parser/docx_parser/document_structure.rb, line 128
def note_by_description(type)
  notes.each do |note|
    return note if note.type.to_sym == type
  end
  raise 'There isn\'t this type of the note'
end
outline(location: :canvas, type: :simple, levels_count: 1) click to toggle source

Return outline type @param location [Symbol] location of object @param type [Symbol] type of object @param levels_count [Integer] count of levels to detect @return [Array<String,String>] type of outline

# File lib/ooxml_parser/docx_parser/document_structure.rb, line 152
def outline(location: :canvas, type: :simple, levels_count: 1)
  elements = element_by_description(location: location, type: type)
  set = []
  levels_count.times do |col|
    set[0] = elements[col].numbering.abstruct_numbering.level_list[col].numbering_format.value
    set[1] = elements[col].numbering.abstruct_numbering.level_list[col].text.value
  end
  set
end
parse() click to toggle source

Parse docx file @return [DocumentStructure] parsed structure

# File lib/ooxml_parser/docx_parser/document_structure.rb, line 169
def parse
  @content_types = ContentTypes.new(parent: self).parse
  @root_subfolder = 'word/'
  @comments = []
  @default_paragraph_style = DocxParagraph.new
  @default_run_style = DocxParagraphRun.new(parent: self)
  @theme = PresentationTheme.new(parent: self).parse('word/theme/theme1.xml')
  @relationships = Relationships.new(parent: self).parse_file("#{root_object.unpacked_folder}word/_rels/document.xml.rels")
  parse_styles
  number = 0
  root_object.add_to_xmls_stack('word/document.xml')
  doc = parse_xml(root_object.current_xml)
  doc.search('//w:document').each do |document|
    document.xpath('w:background').each do |background|
      @background = DocumentBackground.new(parent: self).parse(background)
    end
    document.xpath('w:body').each do |body|
      body.xpath('*').each do |element|
        case element.name
        when 'p'
          child = element.child
          unless child.nil? && @elements.last.instance_of?(Table)
            paragraph_style = default_paragraph_style.dup.parse(element, number, default_run_style, parent: self)
            number += 1
            @elements << paragraph_style.dup
          end
        when 'tbl'
          table = Table.new(parent: self).parse(element,
                                                number,
                                                TableProperties.new)
          number += 1
          @elements << table
        when 'sdt'
          @elements << StructuredDocumentTag.new(parent: self).parse(element)
        end
      end
      body.xpath('w:sectPr').each do |sect_pr|
        @page_properties = PageProperties.new(parent: self).parse(sect_pr,
                                                                  default_paragraph_style,
                                                                  default_run_style)
        @notes = page_properties.notes # keep copy of notes to compatibility with previous docx models
      end
    end
  end
  root_object.xmls_stack.pop
  @document_properties = DocumentProperties.new(parent: self).parse
  @comments = Comments.new(parent: self).parse
  @comments_extended = CommentsExtended.new(parent: self).parse
  @comments_document = Comments.new(parent: self,
                                    file: "#{root_object.unpacked_folder}word/#{relationships.target_by_type('commentsDocument').first}")
                               .parse
  @settings = DocumentSettings.new(parent: self).parse
  self
end
recognize_numbering(location: :canvas, type: :simple, paragraph_number: 0) click to toggle source

Detect numbering type @param location [Symbol] location of object @param type [Symbol] type of object @param paragraph_number [Integer] number of object @return [Array<String,String>] type of numbering

# File lib/ooxml_parser/docx_parser/document_structure.rb, line 140
def recognize_numbering(location: :canvas, type: :simple, paragraph_number: 0)
  elements = element_by_description(location: location, type: type)
  lvl_text = elements[paragraph_number].numbering.abstruct_numbering.level_list[0].text.value
  num_format = elements[paragraph_number].numbering.abstruct_numbering.level_list[0].numbering_format.value
  [num_format, lvl_text]
end