class PDF::Reader::Turtletext::Textangle

A DSL syntax for text extraction.

textangle = PDF::Reader::Turtletext::Textangle.new(reader) do |r|

r.page = 1
r.below = "Electricity Services"
r.above = "Gas Services by City Gas Pte Ltd"
r.right_of = 240.0
r.left_of = "Total ($)"

end textangle.text

Attributes

above[W]
below[W]
inclusive[W]
left_of[W]
page[W]
reader[R]
right_of[W]

Public Class Methods

new(turtletext_reader) { |self| ... } click to toggle source

turtletext_reader is a PDF::Reader::Turtletext

# File lib/pdf/reader/turtletext/textangle.rb, line 18
def initialize(turtletext_reader,&block)
  @reader = turtletext_reader
  @page = 1
  @inclusive = false
  if block_given?
    if block.arity == 1
      yield self
    else
      instance_eval &block
    end
  end
end

Public Instance Methods

above(*args) click to toggle source
# File lib/pdf/reader/turtletext/textangle.rb, line 59
def above(*args)
  if value = args.first
    @above = value
  end
  @above
end
below(*args) click to toggle source
# File lib/pdf/reader/turtletext/textangle.rb, line 67
def below(*args)
  if value = args.first
    @below = value
  end
  @below
end
exclusive!() click to toggle source

Command: sets +inclusive false

# File lib/pdf/reader/turtletext/textangle.rb, line 46
def exclusive!
  @inclusive = false
end
inclusive(*args) click to toggle source
# File lib/pdf/reader/turtletext/textangle.rb, line 33
def inclusive(*args)
  if value = args.first
    @inclusive = value
  end
  @inclusive
end
inclusive!() click to toggle source

Command: sets +inclusive true

# File lib/pdf/reader/turtletext/textangle.rb, line 41
def inclusive!
  @inclusive = true
end
left_of(*args) click to toggle source
# File lib/pdf/reader/turtletext/textangle.rb, line 75
def left_of(*args)
  if value = args.first
    @left_of = value
  end
  @left_of
end
page(*args) click to toggle source
# File lib/pdf/reader/turtletext/textangle.rb, line 51
def page(*args)
  if value = args.first
    @page = value
  end
  @page
end
right_of(*args) click to toggle source
# File lib/pdf/reader/turtletext/textangle.rb, line 83
def right_of(*args)
  if value = args.first
    @right_of = value
  end
  @right_of
end
text() click to toggle source

Returns the text array found within the defined region. Each line of text is an array of the seperate text elements found on that line.

[["first line first text", "first line last text"],["second line text"]]
# File lib/pdf/reader/turtletext/textangle.rb, line 93
def text
  return unless reader

  xmin = if right_of
    if [Fixnum,Float].include?(right_of.class)
      right_of
    elsif xy = reader.text_position(right_of,page)
      xy[:x]
    end
  else
    0
  end
  xmax = if left_of
    if [Fixnum,Float].include?(left_of.class)
      left_of
    elsif xy = reader.text_position(left_of,page)
      xy[:x]
    end
  else
    99999 # TODO: figure out the actual limit?
  end

  ymin = if above
    if [Fixnum,Float].include?(above.class)
      above
    elsif xy = reader.text_position(above,page)
      xy[:y]
    end
  else
    0
  end
  ymax = if below
    if [Fixnum,Float].include?(below.class)
      below
    elsif xy = reader.text_position(below,page)
      xy[:y]
    end
  else
    99999 # TODO: figure out the actual limit?
  end

  reader.text_in_region(xmin,xmax,ymin,ymax,page,inclusive)
end