class Lite::Address::Parser

Constants

CAPITALIZATION_PARTS
LOOKUPS
STREET_POSITIONS

Attributes

address[R]
country_code[R]

Public Class Methods

new(address, country_code: 'US') click to toggle source
# File lib/lite/address/parser.rb, line 15
def initialize(address, country_code: 'US')
  @address = sanitize_address(address)
  @country_code = sanitize_country_code(country_code)
end

Public Instance Methods

any(args = {}) click to toggle source
# File lib/lite/address/parser.rb, line 31
def any(args = {})
  return intersectional(args) if regexp.corner.match(address)

  formal(args) || informal(args)
end
formal(args = {}) click to toggle source
# File lib/lite/address/parser.rb, line 37
def formal(args = {})
  return unless (match = regexp.formal_address.match(address))

  map = match_map(match)
  generate_address(map, args)
end
informal(args = {}) click to toggle source
# File lib/lite/address/parser.rb, line 44
def informal(args = {})
  return unless (match = regexp.informal_address.match(address))

  map = match_map(match)
  generate_address(map, args)
end
intersectional(args = {}) click to toggle source
# File lib/lite/address/parser.rb, line 51
def intersectional(args = {})
  return unless (match = regexp.intersectional_address.match(address))

  map = match_map(match)
  intersectional_submatch(match, map, 'street')
  intersectional_submatch(match, map, 'street_type')
  intersectional_rematch(match, map, 'street_type')

  generate_address(map, args)
end

Protected Instance Methods

country() click to toggle source
# File lib/lite/address/parser.rb, line 64
def country
  @country ||= ISO3166::Country.new(country_code)
end
list() click to toggle source
# File lib/lite/address/parser.rb, line 68
def list
  @list ||= Lite::Address::List.new(country)
end
regexp() click to toggle source
# File lib/lite/address/parser.rb, line 72
def regexp
  @regexp ||= Lite::Address::Regexp.new(list)
end

Private Instance Methods

address_abbreviate_unit_prefixes(map) click to toggle source
# File lib/lite/address/parser.rb, line 146
def address_abbreviate_unit_prefixes(map)
  list.unit_abbr_regexps.each do |abbr, regex|
    regex.match(map['unit_prefix']) do |_match|
      map['unit_prefix'] = abbr
    end
  end
end
address_avoid_redundant_street_type(map) click to toggle source
# File lib/lite/address/parser.rb, line 163
def address_avoid_redundant_street_type(map)
  STREET_POSITIONS.each do |suffix|
    street = map["street#{suffix}"]
    street_type = map["street_type#{suffix}"]
    next if !street || !street_type

    type_regexp = list.street_type_regexps[street_type.downcase]
    next unless type_regexp.match(street)

    map.delete("street_type#{suffix}")
  end
end
address_capitalize_parts(map) click to toggle source
# File lib/lite/address/parser.rb, line 193
def address_capitalize_parts(map)
  CAPITALIZATION_PARTS.each do |k|
    map[k] = map[k].split.map(&:capitalize).join(' ') if map[k]
  end
end
address_expand_cardinals(map) click to toggle source
# File lib/lite/address/parser.rb, line 176
def address_expand_cardinals(map)
  return unless map['city']

  map['city'].gsub!(/^(#{regexp.cardinal_code})\s+(?=\S)/o) do |match|
    "#{list.cardinal_codes[match[0].upcase]} "
  end
end
address_fix_dirty_ordinals(map) click to toggle source
# File lib/lite/address/parser.rb, line 184
def address_fix_dirty_ordinals(map)
  # Sometimes parcel data will have addresses like "1 1ST ST" as "1 1 ST ST"
  return unless map['street']

  map['street'].gsub!(/\A(\d+\s+st|\d+\s+nd|\d+\s+rd|\d+\s+th)\z/i) do |match|
    match.gsub!(/\s+/, '')
  end
end
address_normalize_values(map) click to toggle source
# File lib/lite/address/parser.rb, line 154
def address_normalize_values(map)
  normalization_map.each do |key, hash|
    next unless (map_key = map[key])

    mapping = hash[map_key.downcase]
    map[key] = mapping if mapping
  end
end
address_redundantize_street_type(map) click to toggle source
# File lib/lite/address/parser.rb, line 137
def address_redundantize_street_type(map)
  map['redundant_street_type'] = false
  return unless map['street'] && !map['street_type']

  match = regexp.street.match(map['street'])
  map['street_type'] = match['street_type'] if match
  map['redundant_street_type'] = true
end
address_strip_chars(map) click to toggle source
# File lib/lite/address/parser.rb, line 125
def address_strip_chars(map)
  map.each do |key, string|
    string.strip!

    if key == 'number'
      string.gsub!(%r{[^\w\s\-\#&/.]}, '')
    else
      string.gsub!(%r{[^\w\s\-\#&/]}, '')
    end
  end
end
generate_address(map, args = {}) click to toggle source
# File lib/lite/address/parser.rb, line 199
def generate_address(map, args = {})
  address_strip_chars(map)
  address_redundantize_street_type(map)
  address_abbreviate_unit_prefixes(map)
  address_normalize_values(map)
  address_avoid_redundant_street_type(map) if args[:avoid_redundant_street_type]
  address_expand_cardinals(map)
  address_fix_dirty_ordinals(map)
  address_capitalize_parts(map)

  map.merge!(country: country, list: list, regexp: regexp)
  Lite::Address::Format.new(map)
end
intersectional_rematch(_match, map, part) click to toggle source
# File lib/lite/address/parser.rb, line 116
def intersectional_rematch(_match, map, part)
  return unless map[part] && (!map["#{part}2"] || (map[part] == map["#{part}2"]))

  type = map[part].dup
  return unless type.gsub!(/s\W*$/i, '') && (/\A#{regexp.public_send(part)}\z/io =~ type)

  map[part] = map["#{part}2"] = type
end
intersectional_submatch(match, map, part) click to toggle source

rubocop:enable Metrics/AbcSize

# File lib/lite/address/parser.rb, line 109
def intersectional_submatch(match, map, part)
  parts = regexp.intersectional_address.named_captures
  parts = parts[part].filter_map { |i| match[i.to_i] }
  map[part] = parts[0] if parts[0]
  map["#{part}2"] = parts[1] if parts[1]
end
match_map(match) click to toggle source
# File lib/lite/address/parser.rb, line 86
def match_map(match)
  match.names.each_with_object({}) do |name, hash|
    hash[name] = match[name] if match[name]
  end
end
normalization_map() click to toggle source

rubocop:disable Metrics/AbcSize

# File lib/lite/address/parser.rb, line 93
def normalization_map
  @normalization_map ||= {
    'prefix' => list.cardinal_types,
    'prefix1' => list.cardinal_types,
    'prefix2' => list.cardinal_types,
    'suffix' => list.cardinal_types,
    'suffix1' => list.cardinal_types,
    'suffix2' => list.cardinal_types,
    'street_type' => list.street_types,
    'street_type1' => list.street_types,
    'street_type2' => list.street_types,
    'state' => list.subdivision_names
  }
end
sanitize_address(value) click to toggle source
# File lib/lite/address/parser.rb, line 78
def sanitize_address(value)
  value.delete_prefix('(').delete_suffix(')')
end
sanitize_country_code(value) click to toggle source
# File lib/lite/address/parser.rb, line 82
def sanitize_country_code(value)
  value.to_s.upcase
end