class EPUB::Searcher::XHTML::Seamless
Public Class Methods
Source
# File lib/epub/searcher/xhtml.rb, line 62 def initialize(element) super @indices = nil end
Calls superclass method
Public Instance Methods
Source
# File lib/epub/searcher/xhtml.rb, line 74 def build_indices(element) indices = {} content = '' elem_index = 0 element.children.each do |child| if child.element? child_step = [:element, elem_index, {:name => child.name, :id => child.attribute_with_prefix('id')}] elem_index += 1 if child.name == 'img' alt = child.attribute_with_prefix('alt') next if alt.nil? || alt.empty? indices[content.length] = [child_step] content << alt else # TODO: Consider block level elements content_length = content.length sub_indices, sub_content = build_indices(child) # TODO: Pass content_length and child_step to build_indices and remove this block sub_indices.each_pair do |sub_pos, child_steps| indices[content_length + sub_pos] = [child_step] + child_steps end content << sub_content end elsif child.text? || child.cdata? text_index = elem_index text_step = [:text, text_index] indices[content.length] = [text_step] content << child.content end end [indices, content] end
Source
# File lib/epub/searcher/xhtml.rb, line 67 def search_text(word) unless @indices @indices, @content = build_indices(@element) end visit(@indices, @content, word) end
Private Instance Methods
Source
# File lib/epub/searcher/xhtml.rb, line 143 def find_offset(offsets, index, for_end_position=false) comparison_operator = for_end_position ? :< : :<= l = offsets.length offset_index = (0..l).bsearch {|i| o = offsets[l - i] next false unless o o.send(comparison_operator, index) } offsets[l - offset_index] end
Find max offset greater than or equal to index @param offsets [Array<Integer>] keys of indices @param index [Integer] position of search word in content string @todo: more efficient algorithm
Source
# File lib/epub/searcher/xhtml.rb, line 154 def to_result_steps(steps) steps.map {|step| Result::Step.new(*step)} end
Source
# File lib/epub/searcher/xhtml.rb, line 111 def visit(indices, content, word) results = [] offsets = indices.keys i = 0 while i = content.index(word, i) offset = find_offset(offsets, i) start_steps = to_result_steps(indices[offset]) last_step = start_steps.last if last_step.info[:name] == 'img' parent_steps = start_steps start_steps = end_steps = nil else word_length = word.length start_char_step = Result::Step.new(:character, i - offset) end_offset = find_offset(offsets, i + word_length, true) end_steps = to_result_steps(indices[end_offset]) end_char_step = Result::Step.new(:character, i + word_length - end_offset) parent_steps, start_steps, end_steps = Result.aggregate_step_intersection(start_steps, end_steps) start_steps << start_char_step end_steps << end_char_step end results << Result.new(parent_steps, start_steps, end_steps) i += 1 end results end