class Lite::Address::Parser
Constants
- CAPITALIZATION_PARTS
- LOOKUPS
- STREET_POSITIONS
Attributes
address[R]
country_code[R]
Public Class Methods
new(address, country_code: 'US')
click to toggle source
# File lib/lite/address/parser.rb, line 15 def initialize(address, country_code: 'US') @address = sanitize_address(address) @country_code = sanitize_country_code(country_code) end
Public Instance Methods
any(args = {})
click to toggle source
# File lib/lite/address/parser.rb, line 31 def any(args = {}) return intersectional(args) if regexp.corner.match(address) formal(args) || informal(args) end
formal(args = {})
click to toggle source
# File lib/lite/address/parser.rb, line 37 def formal(args = {}) return unless (match = regexp.formal_address.match(address)) map = match_map(match) generate_address(map, args) end
informal(args = {})
click to toggle source
# File lib/lite/address/parser.rb, line 44 def informal(args = {}) return unless (match = regexp.informal_address.match(address)) map = match_map(match) generate_address(map, args) end
intersectional(args = {})
click to toggle source
# File lib/lite/address/parser.rb, line 51 def intersectional(args = {}) return unless (match = regexp.intersectional_address.match(address)) map = match_map(match) intersectional_submatch(match, map, 'street') intersectional_submatch(match, map, 'street_type') intersectional_rematch(match, map, 'street_type') generate_address(map, args) end
Protected Instance Methods
country()
click to toggle source
# File lib/lite/address/parser.rb, line 64 def country @country ||= ISO3166::Country.new(country_code) end
list()
click to toggle source
# File lib/lite/address/parser.rb, line 68 def list @list ||= Lite::Address::List.new(country) end
regexp()
click to toggle source
# File lib/lite/address/parser.rb, line 72 def regexp @regexp ||= Lite::Address::Regexp.new(list) end
Private Instance Methods
address_abbreviate_unit_prefixes(map)
click to toggle source
# File lib/lite/address/parser.rb, line 146 def address_abbreviate_unit_prefixes(map) list.unit_abbr_regexps.each do |abbr, regex| regex.match(map['unit_prefix']) do |_match| map['unit_prefix'] = abbr end end end
address_avoid_redundant_street_type(map)
click to toggle source
# File lib/lite/address/parser.rb, line 163 def address_avoid_redundant_street_type(map) STREET_POSITIONS.each do |suffix| street = map["street#{suffix}"] street_type = map["street_type#{suffix}"] next if !street || !street_type type_regexp = list.street_type_regexps[street_type.downcase] next unless type_regexp.match(street) map.delete("street_type#{suffix}") end end
address_capitalize_parts(map)
click to toggle source
# File lib/lite/address/parser.rb, line 193 def address_capitalize_parts(map) CAPITALIZATION_PARTS.each do |k| map[k] = map[k].split.map(&:capitalize).join(' ') if map[k] end end
address_expand_cardinals(map)
click to toggle source
# File lib/lite/address/parser.rb, line 176 def address_expand_cardinals(map) return unless map['city'] map['city'].gsub!(/^(#{regexp.cardinal_code})\s+(?=\S)/o) do |match| "#{list.cardinal_codes[match[0].upcase]} " end end
address_fix_dirty_ordinals(map)
click to toggle source
# File lib/lite/address/parser.rb, line 184 def address_fix_dirty_ordinals(map) # Sometimes parcel data will have addresses like "1 1ST ST" as "1 1 ST ST" return unless map['street'] map['street'].gsub!(/\A(\d+\s+st|\d+\s+nd|\d+\s+rd|\d+\s+th)\z/i) do |match| match.gsub!(/\s+/, '') end end
address_normalize_values(map)
click to toggle source
# File lib/lite/address/parser.rb, line 154 def address_normalize_values(map) normalization_map.each do |key, hash| next unless (map_key = map[key]) mapping = hash[map_key.downcase] map[key] = mapping if mapping end end
address_redundantize_street_type(map)
click to toggle source
# File lib/lite/address/parser.rb, line 137 def address_redundantize_street_type(map) map['redundant_street_type'] = false return unless map['street'] && !map['street_type'] match = regexp.street.match(map['street']) map['street_type'] = match['street_type'] if match map['redundant_street_type'] = true end
address_strip_chars(map)
click to toggle source
# File lib/lite/address/parser.rb, line 125 def address_strip_chars(map) map.each do |key, string| string.strip! if key == 'number' string.gsub!(%r{[^\w\s\-\#&/.]}, '') else string.gsub!(%r{[^\w\s\-\#&/]}, '') end end end
generate_address(map, args = {})
click to toggle source
# File lib/lite/address/parser.rb, line 199 def generate_address(map, args = {}) address_strip_chars(map) address_redundantize_street_type(map) address_abbreviate_unit_prefixes(map) address_normalize_values(map) address_avoid_redundant_street_type(map) if args[:avoid_redundant_street_type] address_expand_cardinals(map) address_fix_dirty_ordinals(map) address_capitalize_parts(map) map.merge!(country: country, list: list, regexp: regexp) Lite::Address::Format.new(map) end
intersectional_rematch(_match, map, part)
click to toggle source
# File lib/lite/address/parser.rb, line 116 def intersectional_rematch(_match, map, part) return unless map[part] && (!map["#{part}2"] || (map[part] == map["#{part}2"])) type = map[part].dup return unless type.gsub!(/s\W*$/i, '') && (/\A#{regexp.public_send(part)}\z/io =~ type) map[part] = map["#{part}2"] = type end
intersectional_submatch(match, map, part)
click to toggle source
rubocop:enable Metrics/AbcSize
# File lib/lite/address/parser.rb, line 109 def intersectional_submatch(match, map, part) parts = regexp.intersectional_address.named_captures parts = parts[part].filter_map { |i| match[i.to_i] } map[part] = parts[0] if parts[0] map["#{part}2"] = parts[1] if parts[1] end
match_map(match)
click to toggle source
# File lib/lite/address/parser.rb, line 86 def match_map(match) match.names.each_with_object({}) do |name, hash| hash[name] = match[name] if match[name] end end
normalization_map()
click to toggle source
rubocop:disable Metrics/AbcSize
# File lib/lite/address/parser.rb, line 93 def normalization_map @normalization_map ||= { 'prefix' => list.cardinal_types, 'prefix1' => list.cardinal_types, 'prefix2' => list.cardinal_types, 'suffix' => list.cardinal_types, 'suffix1' => list.cardinal_types, 'suffix2' => list.cardinal_types, 'street_type' => list.street_types, 'street_type1' => list.street_types, 'street_type2' => list.street_types, 'state' => list.subdivision_names } end
sanitize_address(value)
click to toggle source
# File lib/lite/address/parser.rb, line 78 def sanitize_address(value) value.delete_prefix('(').delete_suffix(')') end
sanitize_country_code(value)
click to toggle source
# File lib/lite/address/parser.rb, line 82 def sanitize_country_code(value) value.to_s.upcase end