class FeedParser::Parser

Constants

JSONFEED_VERSION_RE

Public Class Methods

new( text ) click to toggle source

Note: lets keep/use same API as RSS::Parser for now

# File lib/feedparser/parser.rb, line 17
def initialize( text )
  @text = text
  @head = @text[0..100].strip     # note: remove leading spaces if present
end
parse( text, opts={} ) click to toggle source

convenience class/factory method

# File lib/feedparser/parser.rb, line 12
def self.parse( text, opts={} )
  self.new( text ).parse
end

Public Instance Methods

is_json?() click to toggle source
# File lib/feedparser/parser.rb, line 36
def is_json?
  ## check if starts with { for json object/hash
  ##    or if includes jsonfeed prolog
  @head.start_with?( '{' ) ||
  @head =~ JSONFEED_VERSION_RE
end
Also aliased as: json?
is_microformats?() click to toggle source
# File lib/feedparser/parser.rb, line 44
def is_microformats?
  #  for now check for microformats v2 (e.g. h-entry, h-feed)
  #    check for v1 too - why? why not? (e.g. hentry, hatom ??)
  @text.include?( 'h-entry' ) ||
  @text.include?( 'h-feed' )
end
Also aliased as: microformats?
is_xml?() click to toggle source
note:

make format checks callable from outside (that is, use builtin helper methods)

# File lib/feedparser/parser.rb, line 27
def is_xml?
  ## check if starts with knownn xml prologs
  @head.start_with?( '<?xml' )  ||
  @head.start_with?( '<feed' ) ||
  @head.start_with?( '<rss' )
end
Also aliased as: xml?
json?()
Alias for: is_json?
microformats?()
Alias for: is_microformats?
parse() click to toggle source
# File lib/feedparser/parser.rb, line 54
def parse
  if is_xml?
     parse_xml
  elsif is_json?
     parse_json
  ##  note: reading/parsing microformat is for now optional
  ##    microformats gem requires nokogiri
  ##       nokogiri (uses libxml c-extensions) makes it hard to install (sometime)
  ##       thus, if you want to use it, please opt-in to keep the install "light"
  elsif defined?( Microformats ) && is_microformats?
     parse_microformats
  else  ## fallback - assume xml for now
     parse_xml
  end
end
parse_json() click to toggle source
# File lib/feedparser/parser.rb, line 86
def parse_json
  logger.debug "using stdlib json/#{JSON::VERSION}"

  logger.debug "Parsing feed in json..."
  feed_hash = JSON.parse( @text )

  feed = JsonFeedBuilder.build( feed_hash )

  logger.debug "== #{feed.format} / #{feed.title} =="
  feed # return new (normalized) feed
end
parse_microformats() click to toggle source
# File lib/feedparser/parser.rb, line 71
def parse_microformats
  logger.debug "using microformats/#{Microformats::VERSION}"

  logger.debug "Parsing feed in html (w/ microformats)..."

  collection = Microformats.parse( @text )
  collection_hash = collection.to_hash

  feed = HyFeedBuilder.build( collection_hash )

  logger.debug "== #{feed.format} / #{feed.title} =="
  feed # return new (normalized) feed
end
parse_xml() click to toggle source
# File lib/feedparser/parser.rb, line 99
def parse_xml
  logger.debug "using stdlib rss/#{RSS::VERSION}"

  parser = RSS::Parser.new( @text )

  parser.do_validate            = false
  parser.ignore_unknown_element = true

  logger.debug "Parsing feed in xml..."
  feed_wild = parser.parse  # not yet normalized

  logger.debug "  feed.class=#{feed_wild.class.name}"

  if feed_wild.is_a?( RSS::Atom::Feed )
    feed = AtomFeedBuilder.build( feed_wild, @text )
  else  # -- assume RSS::Rss::Feed
    feed = RssFeedBuilder.build( feed_wild, @text )
  end

  logger.debug "== #{feed.format} / #{feed.title} =="
  feed # return new (normalized) feed
end
xml?()
Alias for: is_xml?