class ContentUrls::StyleParser

StyleParser finds and rewrites URLs in HTML style attributes.

Implementation note:

This methods in this class identify URLs by using regular expressions based on the W3C CSS 2.1 Specification (www.w3.org/TR/CSS21/syndata.html).

Public Class Methods

rewrite_each_url(style) { |url| ... } click to toggle source

Rewrites each URL in an style attribute by calling the supplied block with each URL.

@param [String] content the style attribute.

@example Rewrite URLs in style attribute

style = 'background: url(/images/rainbows.jpg);'
style = ContentUrls::StyleParser.rewrite_each_url(style) {|url| url.sub(/rainbows.jpg/, 'unicorns.jpg')}
puts "Rewritten: #{style}"
# => "Rewritten: background: url(/images/unicorns.jpg);"
# File lib/content_urls/parsers/css_parser.rb, line 176
def self.rewrite_each_url(style, &block)
  urls = {}
  remaining = style
  while !remaining.empty?
    if match = @@regex_uri.match(remaining)
      urls[match[:url]] = match[:uri]
      remaining = match.post_match
    else
      remaining = ''
    end
  end
  rewritten_content = [{:content => style, :is_rewritten => false}]
  urls.each do |property_value, url|
    rewritten_url = yield url
    if rewritten_url != url
      rewritten_property_value = property_value.dup
      rewritten_property_value[url] = rewritten_url
      i = 0
      while i < rewritten_content.count
        if !rewritten_content[i][:is_rewritten]
          if match = /#{Regexp.escape(property_value)}/.match(rewritten_content[i][:content])
            if match.pre_match.length > 0
              rewritten_content.insert(i, {:content => match.pre_match, :is_rewritten => false})
              i += 1
            end
            rewritten_content[i] = {:content => rewritten_property_value, :is_rewritten => true}
            if match.post_match.length > 0
              rewritten_content.insert(i+1, {:content => match.post_match, :is_rewritten => false})
            end
          end
        end
        i += 1
      end
    end
  end
  rewritten_content.map { |c| c[:content]}.join
end
urls(style) click to toggle source

Returns the URLs found in a style attribute.

@param [String] content the style attribute. @return [Array] the unique URLs found in the content.

@example Parse style attribute for URLs

style = 'background: url(/images/rainbows.jpg);'
ContentUrls::StyleParser.urls(style).each do |url|
  puts "Found URL: #{url}"
end
# => "Found URL: /images/rainbows.jpg"
# File lib/content_urls/parsers/css_parser.rb, line 159
def self.urls(style)
  urls = []
  rewrite_each_url(style) { |url| urls << url; url }
  urls.uniq!
  urls
end