class FeedParser::RssFeedBuilder

todo/fix:

rename to Rss20FeedBuilder?? or FeedBuilderRss20 ??

Public Class Methods

build( rss_feed, raw ) click to toggle source
# File lib/feedparser/builder/rss.rb, line 13
def self.build( rss_feed, raw )
  feed = self.new( rss_feed, raw )
  feed.to_feed
end
new( rss_feed, raw ) click to toggle source
# File lib/feedparser/builder/rss.rb, line 18
def initialize( rss_feed, raw )
  @feed = build_feed( rss_feed, raw )
end

Public Instance Methods

add_meta_items( feed_item, xml_item ) click to toggle source

Add additional elements, currently the media: namespace elements

# File lib/feedparser/builder/rss.rb, line 233
def add_meta_items( feed_item, xml_item )
  if xml_item.at_xpath('media:group') || xml_item.at_xpath('media:title') || xml_item.at_xpath('media:content') || xml_item.at_xpath('media:thumbnail') || xml_item.at_xpath('media:description')
    feed_item.attachments << Attachment.new unless feed_item.attachments.first

    titleElement = xml_item.at_xpath('media:title') || xml_item.at_xpath('media:content/media:title') || xml_item.at_xpath('media:group/media:title')
    feed_item.attachments.first.title = titleElement.text if titleElement

    contentElement = xml_item.at_xpath('media:content') || xml_item.at_xpath('media:group/media:content')
    if contentElement
      feed_item.attachments.first.url = contentElement.get('url')
      feed_item.attachments.first.length = contentElement.get('duration')
    end

    thumbnailElement = xml_item.at_xpath('media:thumbnail') || xml_item.at_xpath('media:content/media:thumbnail') || xml_item.at_xpath('media:group/media:thumbnail')
    if thumbnailElement
      thumbnail = Thumbnail.new
      thumbnail.url = thumbnailElement.get('url')
      thumbnail.width = thumbnailElement.get('width')
      thumbnail.height = thumbnailElement.get('height')
      feed_item.attachments.first.thumbnail = thumbnail
    end

    descriptionElement = xml_item.at_xpath('media:description') || xml_item.at_xpath('media:content/media:description') || xml_item.at_xpath('media:group/media:description')
    feed_item.attachments.first.description = descriptionElement.text if descriptionElement
  end
  feed_item
end
build_author_from_dublic_core_creator( dc_creator ) click to toggle source
# File lib/feedparser/builder/rss.rb, line 115
def build_author_from_dublic_core_creator( dc_creator )
  author = Author.new
  author.text = dc_creator.strip
  author.name = author.text    # note: for now use "unparsed" creator line also for name (may change in the future!!!)
  author
end
build_feed( rss_feed, raw ) click to toggle source
# File lib/feedparser/builder/rss.rb, line 28
def build_feed( rss_feed, raw )
  feed = Feed.new
  feed.format = "rss #{rss_feed.rss_version}"

  logger.debug "  rss | feed.version  >#{rss_feed.rss_version}<"

  feed.title     = handle_content( rss_feed.channel.title,       'feed.title'       )  # required
  feed.summary   = handle_content( rss_feed.channel.description, 'feed.description => summary' )  # required
  feed.url       = rss_feed.channel.link            # required

  begin
      feed.updated_local   = handle_date( rss_feed.channel.lastBuildDate, 'feed.lastBuildDate => updated' )  # optional
  rescue
  end
  feed.updated         = feed.updated_local.utc     if feed.updated_local

  begin
      feed.published_local = handle_date( rss_feed.channel.pubDate,       'feed.pubDate => published'     )  # optional
  rescue
  end
  feed.published       = feed.published_local.utc   if feed.published_local

  begin
      logger.debug "  rss | feed.generator  >#{rss_feed.channel.generator}< : #{rss_feed.channel.generator.class.name}"
  rescue
  end

  begin
      feed.generator.text = rss_feed.channel.generator    # optional
  rescue
  end
  feed.generator.name = feed.generator.text   ## note: for now set also name/title to "unparsed" (content) line (may change in the future!!!)



  ## check for managingEditor and/or  webMaster

  if rss_feed.channel.respond_to?(:managingEditor) && rss_feed.channel.managingEditor
    author = Author.new
    author.text = rss_feed.channel.managingEditor.strip
    author.name = author.text   ## note: for now use "unparsed" (content) line also for name
    feed.authors << author
  end

  ## todo/check - if tag is called webmaster or webMaster ???
  if rss_feed.channel.respond_to?(:webMaster) && rss_feed.channel.webMaster
    author = Author.new
    author.text = rss_feed.channel.webMaster.strip
    author.name = author.text   ## note: for now use "unparsed" (content) line also for name
    feed.authors << author
  end


  ## check for dublin core (dc) metadata

  if rss_feed.channel.dc_creator
    ## note: dc_creator wipes out authors if set with rss tag(s)
    authors = []
    authors << build_author_from_dublic_core_creator( rss_feed.channel.dc_creator )
    feed.authors = authors
  end


  ###  check for categories (tags)
  if rss_feed.channel.respond_to?(:categories)
      rss_feed.channel.categories.each do |rss_cat|
        feed.tags << build_tag( rss_cat )
      end
  end


  rss_feed.items.each do |rss_item|
    feed.items << build_item( rss_item )
  end

  if defined?( Oga )
    parsed_xml = Oga.parse_xml( raw )
    xml_items = parsed_xml.xpath( '/rss/channel/item' )
    xml_items.each_with_index do |xml_item, i|
      feed.items[i] = add_meta_items( feed.items[i], xml_item )
    end
  end

  feed # return new feed
end
build_item( rss_item ) click to toggle source
# File lib/feedparser/builder/rss.rb, line 138
  def build_item( rss_item )

    item = Item.new

    item.title     = handle_content( rss_item.title, 'item.title' )
    item.url       = rss_item.link

    logger.debug "  rss | item.link  >#{rss_item.link}< : #{rss_item.link.class.name}"


## todo:
##  check if feedburner:origLink present - if yes, use it for url/link
##  example: use
##  - <feedburner:origLink>http://www.rubyflow.com/items/9803-gotta-ruby-s-syntax</feedburner:origLink></item>
##   instead of
##  - <link>http://feedproxy.google.com/~r/Rubyflow/~3/Ym9Sltg_2_c/9803-gotta-ruby-s-syntax</link>


    item.summary   = handle_content( rss_item.description, 'item.description => summary' )

    # check for <content:encoded>
    # -- using RSS 1.0 content module in RSS 2.0
    item.content = rss_item.content_encoded
    logger.debug "  rss | item.content_encoded[0..40]  >#{rss_item.content_encoded ? rss_item.content_encoded[0..40] : ''}< : #{rss_item.content_encoded.class.name}"

    begin
        item.published_local   = handle_date( rss_item.pubDate, 'item.pubDate => published' )
    rescue
    end
    item.published         = item.published_local.utc    if item.published_local


    ## fix/todo: check if rss_item.guid present? !!!!
    ##
    ##  might be the case e.g. check lambda-the-ultimate.org, for example

    if rss_item.respond_to?(:guid) && rss_item.guid && rss_item.guid.content
      item.guid     = rss_item.guid.content
      logger.debug "  rss | item.guid.content  >#{rss_item.guid.content}< : #{rss_item.guid.content.class.name}"
    else
      item.guid     = rss_item.link
      logger.warn "  rss | item.guid.content missing !!!! - using link for guid"
    end


    if rss_item.respond_to?(:author) && rss_item.author
      author = Author.new
      author.text = rss_item.author.strip
      author.name = author.text   ## note: for now use "unparsed" (content) line also for name
      item.authors << author
    end


    ## check for dublin core (dc) metadata

    if rss_item.dc_creator
      ## note: dc_creator wipes out authors if set with rss tag(s)
      authors = []
      authors << build_author_from_dublic_core_creator( rss_item.dc_creator )
      item.authors = authors
    end

    unless item.published_local
        # use dc_date only of no regular item date was given
        begin
            item.published_local   = handle_date( rss_item.dc_date, 'item.dc_date => published' )
        rescue
        end
        item.published         = item.published_local.utc    if item.published_local
    end

    ###  check for categories (tags)
    if rss_item.respond_to?(:categories)
        rss_item.categories.each do |rss_cat|
          item.tags << build_tag( rss_cat )
        end
    end


    ## check for enclosure
    ##   todo/check: rss can only include at most one enclosure?

    if rss_item.respond_to?(:enclosure) && rss_item.enclosure
      attachment = Attachment.new
      attachment.url    = rss_item.enclosure.url
      attachment.length = rss_item.enclosure.length
      attachment.type   = rss_item.enclosure.type
      item.attachments << attachment
    end

    item
  end
build_tag( rss_cat ) click to toggle source
# File lib/feedparser/builder/rss.rb, line 124
def build_tag( rss_cat )
  ## pp rss_cat
  tag = Tag.new

  ## note: always strip leading n trailing spaces
  ##         and add if preset (not blank/empty e.g. not nil or "")
  tag.name     = rss_cat.content.strip    if rss_cat.content
  tag.domain   = rss_cat.domain.strip     if rss_cat.domain

  tag
end
handle_content( el, name ) click to toggle source
# File lib/feedparser/builder/rss.rb, line 288
def handle_content( el, name )
  ## note:
  #   use for feed.title, feed.description
  #           item.title, item.description
  #
  # do NOT use for others e.g. feed.generator, etc.


  ## todo/fix: strip html tags n attributes ???

  logger.debug "  rss | #{name}  >#{el}< : #{el.class.name}"

  text = if el.nil?
           nil
         else
           el.strip
         end
  text
end
handle_date( el, name ) click to toggle source
# File lib/feedparser/builder/rss.rb, line 263
def handle_date( el, name )
  ## change time to utc if present? why? why not?
  #  --  .utc.strftime( "%Y-%m-%d %H:%M" )

  # NOTE:
  # All date-times in RSS conform
  #   to the Date and Time Specification of RFC 822
  #  e.g. Sun, 19 May 2012 15:21:36 GMT  or
  #       Sat, 07 Sep 2013 00:00:01 GMT

  ## convert from time to to_datetime  (avoid errors on windows w/ builtin rss lib)

  logger.debug "  rss | #{name}  >#{el}< : #{el.class.name}"


  ## convert from time to to_datetime  (avoid errors on windows w/ builtin rss lib)
  date = if el.nil?
           nil
         else
           el.to_datetime
         end

  date
end
to_feed() click to toggle source
# File lib/feedparser/builder/rss.rb, line 22
def to_feed
  @feed
end