class FeedParser::AtomFeedBuilder
Public Class Methods
build( atom_feed, raw )
click to toggle source
# File lib/feedparser/builder/atom.rb, line 10 def self.build( atom_feed, raw ) feed = self.new( atom_feed, raw ) feed.to_feed end
new( atom_feed, raw )
click to toggle source
# File lib/feedparser/builder/atom.rb, line 15 def initialize( atom_feed, raw ) @feed = build_feed( atom_feed, raw ) end
Public Instance Methods
add_meta_items( feed_item, xml_item )
click to toggle source
Add additional elements, currently the media: namespace elements Note: This tries to accomodate both the different ways to transport the data via the spec www.rssboard.org/media-rss/ and the practice by Youtube of grouping everything under media:group
# File lib/feedparser/builder/atom.rb, line 236 def add_meta_items( feed_item, xml_item ) if xml_item.at_xpath('media:group') || xml_item.at_xpath('media:title') || xml_item.at_xpath('media:content') || xml_item.at_xpath('media:thumbnail') || xml_item.at_xpath('media:description') feed_item.attachments << Attachment.new unless feed_item.attachments.first titleElement = xml_item.at_xpath('media:title') || xml_item.at_xpath('media:content/media:title') || xml_item.at_xpath('media:group/media:title') feed_item.attachments.first.title = titleElement.text if titleElement contentElement = xml_item.at_xpath('media:content') || xml_item.at_xpath('media:group/media:content') if contentElement feed_item.attachments.first.url = contentElement.get('url') feed_item.attachments.first.length = contentElement.get('duration') end thumbnailElement = xml_item.at_xpath('media:thumbnail') || xml_item.at_xpath('media:content/media:thumbnail') || xml_item.at_xpath('media:group/media:thumbnail') if thumbnailElement thumbnail = Thumbnail.new thumbnail.url = thumbnailElement.get('url') thumbnail.width = thumbnailElement.get('width') thumbnail.height = thumbnailElement.get('height') feed_item.attachments.first.thumbnail = thumbnail end descriptionElement = xml_item.at_xpath('media:description') || xml_item.at_xpath('media:content/media:description') || xml_item.at_xpath('media:group/media:description') feed_item.attachments.first.description = descriptionElement.text if descriptionElement end feed_item end
build_feed( atom_feed, raw )
click to toggle source
# File lib/feedparser/builder/atom.rb, line 25 def build_feed( atom_feed, raw ) ## fix/todo: rename atom_feed to atom or wire or xml or in ??? feed = Feed.new feed.format = 'atom' feed.title = handle_content( atom_feed.title, 'feed.title' ) logger.debug " atom | feed.id.content >#{atom_feed.id.content}< : #{atom_feed.id.content.class.name}" ## try to find self link if present ## note: use links (plural to allow multiple links e.g. self,alternate,etc.) atom_feed.links.each_with_index do |link,i| logger.debug " atom | feed.link[#{i+1}] rel=>#{link.rel}< : #{link.rel.class.name} type=>#{link.type}< href=>#{link.href}<" if feed.feed_url.nil? && link.rel == 'self' feed.feed_url = link.href end end feed.url = nil ## note: use links (plural to allow multiple links e.g. self,alternate,etc.) atom_feed.links.each_with_index do |link,i| logger.debug " atom | feed.link[#{i+1}] rel=>#{link.rel}< : #{link.rel.class.name} type=>#{link.type}< href=>#{link.href}<" ## for now assume alternate is link or no rel specified (assumes alternate) ## note: only set if feed.url is NOT already set (via <id> for example) if feed.url.nil? && (link.rel == 'alternate' || link.rel.nil?) feed.url = link.href end end if feed.url.nil? ### todo/fix: issue warning - no link found!!!! end ## note: as fallback try id if still no url found - why?? why not?? ## use url only if starts_with http ## might not be link e.g blogger uses for ids => ## <id>tag:blogger.com,1999:blog-4704664917418794835</id> ## ## note: id might actually be link to feed NOT to site (remove fallback - why - why not???) ## ## Note: remove (strip) leading and trailing spaces and newlines if feed.url.nil? && atom_feed.id.content.strip.start_with?( 'http' ) feed.url = atom_feed.id.content.strip end if atom_feed.updated && atom_feed.updated.content ## note: content might be nil if <updated></updated> empty feed.updated_local = handle_date( atom_feed.updated, 'feed.updated' ) feed.updated = feed.updated_local.utc end if atom_feed.generator ## Note: remove (strip) leading and trailing spaces and newlines feed.generator.name = atom_feed.generator.content.strip logger.debug " atom | feed.generator.content >#{atom_feed.generator.content}< : #{atom_feed.generator.content.class.name}" # pp atom_feed.generator feed.generator.version = atom_feed.generator.version feed.generator.url = atom_feed.generator.uri logger.debug " atom | feed.generator.version >#{atom_feed.generator.version}< : #{atom_feed.generator.version.class.name}" logger.debug " atom | feed.generator.uri >#{atom_feed.generator.uri}< : #{atom_feed.generator.uri.class.name}" end if atom_feed.subtitle feed.summary = handle_content( atom_feed.subtitle, 'feed.subtitle => summary' ) end ## check for authors atom_feed.authors.each do |atom_author| feed.authors << build_author( atom_author ) end ## check for categories/tags atom_feed.categories.each do |atom_cat| feed.tags << build_tag( atom_cat ) end atom_feed.items.each do |atom_item| feed.items << build_item( atom_item ) end if defined?( Oga ) # Use Oga as generic xml parser to access elements not adressed by the core RSS module like media: parsed_xml = Oga.parse_xml( raw ) xml_items = parsed_xml.xpath( '/feed/entry' ) xml_items.each_with_index do |xml_item, i| feed.items[i] = add_meta_items( feed.items[i], xml_item ) end end feed # return new feed end
build_item( atom_item )
click to toggle source
# File lib/feedparser/builder/atom.rb, line 153 def build_item( atom_item ) item = Item.new # Item.new item.title = handle_content( atom_item.title, 'item.title' ) ## Note: item might have many links ## e.g. see blogger (headius) ## <link rel='replies' type='application/atom+xml' href='http://blog.headius.com/feeds/3430080308857860963/comments/default' title='Post Comments'/> ## <link rel='replies' type='text/html' href='http://blog.headius.com/2014/05/jrubyconfeu-2014.html#comment-form' title='0 Comments'/> ## <link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/4704664917418794835/posts/default/3430080308857860963'/> ## <link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/4704664917418794835/posts/default/3430080308857860963'/> ## <link rel='alternate' type='text/html' href='http://blog.headius.com/2014/05/jrubyconfeu-2014.html' item.url = nil if atom_item.links.size == 1 item.url = atom_item.link.href logger.debug " atom | item.link.href >#{atom_item.link.href}< : #{atom_item.link.href.class.name}" else ## note: use links (plural to allow multiple links e.g. self,alternate,etc.) atom_item.links.each_with_index do |link,i| logger.debug " atom | item.link[#{i+1}] rel=>#{link.rel}< : #{link.rel.class.name} type=>#{link.type}< href=>#{link.href}<" ## for now assume alternate is link or no rel specified (assumes alternate) ## note: only set if feed.url is NOT already set (via <id> for example) if item.url.nil? && (link.rel == 'alternate' || link.rel.nil?) item.url = link.href end end end if atom_item.updated && atom_item.updated.content item.updated_local = handle_date( atom_item.updated, 'item.updated' ) item.updated = item.updated_local.utc end if atom_item.published && atom_item.published.content item.published_local = handle_date( atom_item.published, 'item.published' ) item.published = item.published_local.utc end item.guid = atom_item.id.content logger.debug " atom | item.id.content >#{atom_item.id.content}< : #{atom_item.id.content.class.name}" if atom_item.content item.content = atom_item.content.content end if atom_item.summary item.summary = handle_content( atom_item.summary, 'item.summary' ) end ## check for authors atom_item.authors.each do |atom_author| item.authors << build_author( atom_author ) end ## check for categories/tags atom_item.categories.each do |atom_cat| item.tags << build_tag( atom_cat ) end ## check for attachments / media enclosures ### todo/fix: allow more than one attachment/enclosure if atom_item.links enclosure = atom_item.links.detect{ |x| x.rel == 'enclosure' } if enclosure attachment = Attachment.new attachment.url = enclosure.href attachment.length = enclosure.length attachment.type = enclosure.type item.attachments << attachment end end item end
build_tag( atom_cat )
click to toggle source
# File lib/feedparser/builder/atom.rb, line 140 def build_tag( atom_cat ) ## pp atom_cat tag = Tag.new ## note: always strip leading n trailing spaces ## and add if preset (not blank/empty e.g. not nil or "") tag.name = atom_cat.term.strip if atom_cat.term && !atom_cat.term.empty? tag.scheme = atom_cat.scheme.strip if atom_cat.scheme && !atom_cat.scheme.empty? tag end
handle_content( el, name )
click to toggle source
# File lib/feedparser/builder/atom.rb, line 289 def handle_content( el, name ) ## rename to handle_plain_vanilla_text_content - why? why not? ### todo/fix: if type html ?? strip html tags n attributes ## always strip html tags n attributes?? why? why not? ## check if content.nil? possible e.g. <title></title> => empty string or nil? ## note: dump head (first 30 chars) logger.debug " atom | #{name}.content[0..30] (type=>#{el.type}<) >#{el.content[0..30]}< : #{el.content.class.name}" ## note: always strip leading and trailing whitespaces (spaces/tabs/newlines) text = el.content.strip text end
handle_date( el, name )
click to toggle source
# File lib/feedparser/builder/atom.rb, line 265 def handle_date( el, name ) ## change time to utc if present? why? why not? # -- .utc.strftime( "%Y-%m-%d %H:%M" ) ############### # examples: # 2015-01-02 01:56:06 +0100 logger.debug " atom | #{name}.content >#{el.content}< : #{el.content.class.name}" # NOTE: empty updated.content possible e.g. used by google groups feed (e.g. <updated></updated>) # will return nil : NilClass ## convert from time to to_datetime (avoid errors on windows w/ builtin rss lib) date = if el.content.nil? nil else el.content.to_datetime end date end
to_feed()
click to toggle source
# File lib/feedparser/builder/atom.rb, line 19 def to_feed @feed end