class Browser::Cache

An SQLite3-backed browser cache (with gzip compressed pages)

Attributes

agent[R]
db[R]

Public Class Methods

new(filename="browsercache.db", agent=nil) click to toggle source
# File lib/epitools/browser/cache.rb, line 15
def initialize(filename="browsercache.db", agent=nil)
  @agent    = agent
  @filename = filename

  @db = SQLite3::Database.new(filename)
  @db.busy_timeout(50)

  create_tables
end

Public Instance Methods

clear(pattern=nil) click to toggle source
# File lib/epitools/browser/cache.rb, line 152
def clear(pattern=nil)
  if pattern
    db.execute("DELETE FROM cache WHERE url LIKE '%#{pattern}%'")
  else
    db.execute("DELETE FROM cache")
  end
end
count() click to toggle source
# File lib/epitools/browser/cache.rb, line 29
def count
  db.execute("SELECT COUNT(1) FROM cache").first.first.to_i
end
Also aliased as: size
delete!() click to toggle source
# File lib/epitools/browser/cache.rb, line 179
def delete!
  db.close
  File.unlink @filename
end
each(&block) click to toggle source
# File lib/epitools/browser/cache.rb, line 160
def each(&block)
  pages_via_sql("SELECT * FROM cache", &block)
end
each_url() { |first| ... } click to toggle source
# File lib/epitools/browser/cache.rb, line 164
def each_url
  db.execute("SELECT url FROM cache") do |row|
    yield row.first
  end
end
expire(url) click to toggle source
# File lib/epitools/browser/cache.rb, line 170
def expire(url)
  db.execute("DELETE FROM cache WHERE url = ?", url)
end
get(url) click to toggle source
# File lib/epitools/browser/cache.rb, line 127
def get(url)
  pages = pages_via_sql("SELECT * FROM cache WHERE url = ?", url.to_s)

  if pages.any?
    pages.first
  else
    nil
  end
end
grep(pattern, &block) click to toggle source
# File lib/epitools/browser/cache.rb, line 123
def grep(pattern, &block)
  pages_via_sql("SELECT * FROM cache WHERE url like '%#{pattern}%'", &block)
end
include?(url)
Alias for: includes?
includes?(url) click to toggle source
# File lib/epitools/browser/cache.rb, line 137
def includes?(url)
  db.execute("SELECT url FROM cache WHERE url = ?", url.to_s).any?
end
Also aliased as: include?
inspect() click to toggle source
# File lib/epitools/browser/cache.rb, line 25
def inspect
  "#<Browser::Cache filename=#{@filename.inspect}, count=#{count}, size=#{File.size @filename} bytes>"
end
pages_via_sql(*args) { |row_to_page(row)| ... } click to toggle source
# File lib/epitools/browser/cache.rb, line 112
def pages_via_sql(*args, &block)
  dmsg [:pages_via_sql, args]
  if block_given?
    db.execute(*args) do |row|
      yield row_to_page(row)
    end
  else
    db.execute(*args).map{|row| row_to_page(row) }
  end
end
put(page, original_url=nil, **options) click to toggle source
# File lib/epitools/browser/cache.rb, line 40
def put(page, original_url=nil, **options)
  dmsg [:put, original_url]

  raise "Invalid page" unless valid_page?(page)

  url = page.uri.to_s

  dmsg [:page_uri, url]
  dmsg [:original_url, url]

  if url != original_url
    # redirect original_url to url
    expire(original_url) if options[:overwrite]
    db.execute(
      "INSERT INTO cache VALUES ( ?, ?, ?, ? )",
      original_url,
      page.content_type,
      nil,
      url
    )
  end

  #compressed_body = page.body
  compressed_body = Zlib::Deflate.deflate(page.body)

  expire(url) if options[:overwrite]
  db.execute(
    "INSERT INTO cache VALUES ( ?, ?, ?, ? )",
    url,
    page.content_type,
    SQLite3::Blob.new( compressed_body  ),
    nil
  )

  true

rescue SQLite3::SQLException => e
  p [:exception, e]
  false
end
recreate_tables() click to toggle source
# File lib/epitools/browser/cache.rb, line 174
def recreate_tables
  drop_tables rescue nil
  create_tables
end
row_to_page(row) click to toggle source
# File lib/epitools/browser/cache.rb, line 81
def row_to_page(row)
  url, content_type, compressed_body, redirect = row

  if redirect
    get(redirect)
  else
    #body = compressed_body
    body = Zlib::Inflate.inflate(compressed_body)

    if content_type =~ %r{^(text/html|text/xml|application/xhtml\+xml)}i
      Mechanize::Page.new(
        #initialize(uri=nil, response=nil, body=nil, code=nil, mech=nil)
        URI.parse(url),
        {'content-type'=>content_type},
        body,
        nil,
        agent
      )
    else
      Mechanize::File.new(
        #initialize(uri=nil, response=nil, body=nil, code=nil
        URI.parse(url),
        {'content-type'=>content_type},
        body,
        nil
      )
    end

  end
end
size()
Alias for: count
urls(pattern=nil) click to toggle source
# File lib/epitools/browser/cache.rb, line 143
def urls(pattern=nil)
  if pattern
    rows = db.execute("SELECT url FROM cache WHERE url LIKE '%#{pattern}%'")
  else
    rows = db.execute('SELECT url FROM cache')
  end
  rows.map{|row| row.first}
end
valid_page?(page) click to toggle source
# File lib/epitools/browser/cache.rb, line 35
def valid_page?(page)
  [:body, :content_type, :uri].all?{|m| page.respond_to? m }
end

Private Instance Methods

create_tables() click to toggle source
# File lib/epitools/browser/cache.rb, line 190
def create_tables
  db.execute("CREATE TABLE IF NOT EXISTS cache ( url varchar(2048), content_type varchar(255), body blob, redirect varchar(2048) )")
  db.execute("CREATE UNIQUE INDEX IF NOT EXISTS url_index ON cache ( url )")
end
drop_tables() click to toggle source
# File lib/epitools/browser/cache.rb, line 195
def drop_tables
  db.execute("DROP TABLE cache")
end
list_tables() click to toggle source
# File lib/epitools/browser/cache.rb, line 186
def list_tables
  db.execute("SELECT name FROM SQLITE_MASTER WHERE type='table'")
end