module Pdf::Parser

Constants

VERSION

Public Class Methods

parse(pdf, print_option = "") click to toggle source
# File lib/pdf/parser.rb, line 15
def self.parse(pdf, print_option = "")
  parsed = PDF.read(pdf)
  data = parsed.grep("><xfa:data")
  string = data.to_s
  val = string.string_between_markers('<xfa:data\n>', '</xfa:data\n>')
  encoded = val.encode("ASCII-8BIT").force_encoding("utf-8")
  final = encoded.gsub('\n','')
  xml = Nokogiri::XML(final).to_xml
  doc = Nokogiri::XML(xml)
  puts doc if print_option == true
  return doc
end