class MboxHeaderScraper::Mail

mail object

Public Class Methods

new(mail_fp) click to toggle source
# File lib/mbox_header_scraper/mail.rb, line 7
def initialize(mail_fp)
  @mail = mail_fp
end

Public Instance Methods

header_to_tsv(options) click to toggle source

rubocop:disable Metrics/AbcSize

# File lib/mbox_header_scraper/mail.rb, line 12
def header_to_tsv(options)
  # TODO: prevent undefined options

  targets = get_target_keys(options)
  result = initialize_result_hash(targets)

  last_symbol = nil

  IO.foreach(@mail) do |line|
    line.chomp!

    # this must be continuous value of previous header
    if /^ / =~ line
      # use extract_email_address because Subjects and Dates can't be multiple line
      result[last_symbol].concat(extract_email_address(line.chomp)) if last_symbol

      next
    end

    targets.each do |param|
      matched = get_if_matched(param, line)

      if matched
        result[param].concat(matched) unless matched.empty?
        last_symbol = param
        break
      end

      last_symbol = nil
    end

    result = uniquify_result_hash(result)

    # mail header must finish with the first empty line
    return build_tsv(result, options) if /^$/ =~ line
  end

  # mail without empty line must be broken
  raise 'this mail is broken'
end

Private Instance Methods

build_tsv(hash, options) click to toggle source
# File lib/mbox_header_scraper/mail.rb, line 90
def build_tsv(hash, options)
  targets = get_target_keys(options)

  tsv = ''

  i = 0
  loop do
    line = []
    targets.each do |v|
      line << (hash[v][i] || '')
    end

    break if line.all? { |v| v == '' }
    tsv += (line.join("\t") + "\n")
    i += 1
  end

  tsv
end
extract_email_address(str) click to toggle source
# File lib/mbox_header_scraper/mail.rb, line 110
def extract_email_address(str)
  str.scan(/[0-9a-z.\+\-\_]+@[0-9a-z.\+\-\_]+/i)
end
get_if_matched(param, line) click to toggle source
# File lib/mbox_header_scraper/mail.rb, line 77
def get_if_matched(param, line)
  return nil unless /^#{param}: /i =~ line

  if param == :Subject
    subject = NKF.nkf('-mw', ($'))
    [subject.chomp]
  elsif param == :Date
    [($').chomp]
  else
    extract_email_address(($').chomp)
  end
end
get_target_keys(options) click to toggle source

rubocop:enable Metrics/AbcSize

# File lib/mbox_header_scraper/mail.rb, line 56
def get_target_keys(options)
  options.select { |v| options[v] == true }.keys
end
initialize_result_hash(targets) click to toggle source
# File lib/mbox_header_scraper/mail.rb, line 60
def initialize_result_hash(targets)
  result = {}
  targets.each do |v|
    result[v] = []
  end

  result
end
uniquify_result_hash(hash) click to toggle source
# File lib/mbox_header_scraper/mail.rb, line 69
def uniquify_result_hash(hash)
  hash.keys.each do |v|
    hash[v].uniq!
  end

  hash
end