class MboxHeaderScraper::Mail
mail object
Public Class Methods
new(mail_fp)
click to toggle source
# File lib/mbox_header_scraper/mail.rb, line 7 def initialize(mail_fp) @mail = mail_fp end
Public Instance Methods
header_to_tsv(options)
click to toggle source
rubocop:disable Metrics/AbcSize
# File lib/mbox_header_scraper/mail.rb, line 12 def header_to_tsv(options) # TODO: prevent undefined options targets = get_target_keys(options) result = initialize_result_hash(targets) last_symbol = nil IO.foreach(@mail) do |line| line.chomp! # this must be continuous value of previous header if /^ / =~ line # use extract_email_address because Subjects and Dates can't be multiple line result[last_symbol].concat(extract_email_address(line.chomp)) if last_symbol next end targets.each do |param| matched = get_if_matched(param, line) if matched result[param].concat(matched) unless matched.empty? last_symbol = param break end last_symbol = nil end result = uniquify_result_hash(result) # mail header must finish with the first empty line return build_tsv(result, options) if /^$/ =~ line end # mail without empty line must be broken raise 'this mail is broken' end
Private Instance Methods
build_tsv(hash, options)
click to toggle source
# File lib/mbox_header_scraper/mail.rb, line 90 def build_tsv(hash, options) targets = get_target_keys(options) tsv = '' i = 0 loop do line = [] targets.each do |v| line << (hash[v][i] || '') end break if line.all? { |v| v == '' } tsv += (line.join("\t") + "\n") i += 1 end tsv end
extract_email_address(str)
click to toggle source
# File lib/mbox_header_scraper/mail.rb, line 110 def extract_email_address(str) str.scan(/[0-9a-z.\+\-\_]+@[0-9a-z.\+\-\_]+/i) end
get_if_matched(param, line)
click to toggle source
# File lib/mbox_header_scraper/mail.rb, line 77 def get_if_matched(param, line) return nil unless /^#{param}: /i =~ line if param == :Subject subject = NKF.nkf('-mw', ($')) [subject.chomp] elsif param == :Date [($').chomp] else extract_email_address(($').chomp) end end
get_target_keys(options)
click to toggle source
rubocop:enable Metrics/AbcSize
# File lib/mbox_header_scraper/mail.rb, line 56 def get_target_keys(options) options.select { |v| options[v] == true }.keys end
initialize_result_hash(targets)
click to toggle source
# File lib/mbox_header_scraper/mail.rb, line 60 def initialize_result_hash(targets) result = {} targets.each do |v| result[v] = [] end result end
uniquify_result_hash(hash)
click to toggle source
# File lib/mbox_header_scraper/mail.rb, line 69 def uniquify_result_hash(hash) hash.keys.each do |v| hash[v].uniq! end hash end