class Sec::Firms::Lists

Constants

URL

Public Class Methods

new() click to toggle source
Calls superclass method
# File lib/sec/firms/lists.rb, line 10
def initialize
  xml = Net::HTTP.get(URI.parse(URL))
  super(xml)
end

Public Instance Methods

latest_as_csv() click to toggle source
# File lib/sec/firms/lists.rb, line 15
def latest_as_csv
  convert_to_csv(unzip(latest_report).first)
end
urls() click to toggle source
# File lib/sec/firms/lists.rb, line 19
def urls
  doc.css('#main-content ul li a').map do |anchor|
    [
      Date.parse(anchor.text),
      expand_url(anchor.attr('href'))
    ]
  end.sort.reverse
end

Private Instance Methods

convert_to_csv(xls_path) click to toggle source
# File lib/sec/firms/lists.rb, line 54
def convert_to_csv(xls_path)
  csv_path = File.join(File.dirname(xls_path),
                       File.basename(xls_path) + '.csv')
  xls = Roo::Excel.new(xls_path)
  xls.to_csv(csv_path)
  csv_path
end
expand_url(relative_path) click to toggle source
# File lib/sec/firms/lists.rb, line 66
def expand_url(relative_path)
  URI.join(URL, relative_path).to_s
end
latest_report() click to toggle source
# File lib/sec/firms/lists.rb, line 30
def latest_report
  url = urls.first[1]
  full_path = "/tmp/#{Digest::MD5.hexdigest(url)}.zip"

  unless File.exist?(full_path)
    remote_file = open(url)
    File.open(full_path, 'wb') do |file|
      file.write(remote_file.read)
    end
  end

  full_path
end
latest_report_url() click to toggle source
# File lib/sec/firms/lists.rb, line 62
def latest_report_url
  doc.css('table:nth-child(2) li a').first.attribute('href')
end
unzip(zip_path) click to toggle source
# File lib/sec/firms/lists.rb, line 44
def unzip(zip_path)
  Zip::File.open(zip_path) do |zip_file|
    zip_file.map do |entry|
      file_path = File.join(File.dirname(zip_path), entry.name)
      entry.extract(file_path) unless File.exist?(file_path)
      file_path
    end
  end
end