class NameTamer::Name
Attributes
References: www.w3.org/International/questions/qa-personal-names github.com/berkmancenter/namae github.com/mericson/people en.wikipedia.org/wiki/Types_of_business_entity en.wikipedia.org/wiki/List_of_post-nominal_letters_(USA) en.wikipedia.org/wiki/List_of_post-nominal_letters_(United_Kingdom) en.wikipedia.org/wiki/Nobiliary_particle en.wikipedia.org/wiki/Spanish_naming_customs linguistlist.org/pubs/tocs/JournalUnifiedStyleSheet2007.pdf [PDF]
Public Class Methods
# File lib/name_tamer/name.rb, line 298 def initialize(new_name, args = {}) @name = new_name || '' @contact_type = contact_type_from args @tidy_name = nil @nice_name = nil @simple_name = nil @slug = nil @last_name = nil @remainder = nil @adfix_found = false end
Public Instance Methods
# File lib/name_tamer/name.rb, line 65 def array @array ||= slug.split(SLUG_DELIMITER) end
# File lib/name_tamer/name.rb, line 69 def contact_type nice_name # make sure we've done the bit which infers contact_type contact_type_best_effort end
# File lib/name_tamer/name.rb, line 74 def contact_type=(new_contact_type) ct_as_sym = new_contact_type.to_sym unless @contact_type.nil? || @contact_type == ct_as_sym puts "Changing contact type of #{@name} from #{@contact_type} to #{new_contact_type}" end @contact_type = ct_as_sym end
Useful method for iterating through the words in the name
# File lib/name_tamer/name.rb, line 85 def each_word(&block) @words ||= slug.split(SLUG_DELIMITER) @words.each(&block) end
# File lib/name_tamer/name.rb, line 31 def nice_name unless @nice_name @nice_name = tidy_name.dup # Start with the tidied name remove_adfixes # prefixes and suffixes: "Smith, John, Jr." -> "Smith, John" fixup_last_name_first # "Smith, John" -> "John Smith" fixup_mismatched_braces # "Ceres (AZ" -> "Ceres (AZ)" remove_adfixes # prefixes and suffixes: "Mr John Smith Jr." -> "John Smith" name_wrangle # proper name case and non-breaking spaces use_nonbreaking_spaces_in_compound_names end @nice_name end
# File lib/name_tamer/name.rb, line 46 def simple_name unless @simple_name @simple_name = nice_name.dup # Start with nice name remove_initials # "John Q. Doe" -> "John Doe" remove_middle_names # "Philip Seymour Hoffman" -> "Philip Hoffman" remove_periods_from_initials # "J.P.R. Williams" -> "JPR Williams" standardize_words # "B&Q Intl" -> "B and Q International" @simple_name.whitespace_to!(ASCII_SPACE) end @simple_name end
# File lib/name_tamer/name.rb, line 61 def slug @slug ||= NameTamer.parameterize simple_name.dup # "John Doe" -> "john-doe" end
# File lib/name_tamer/name.rb, line 17 def tidy_name unless @tidy_name @tidy_name = name.dup # Start with the name we've received unescape # Unescape percent-encoded characters and fix UTF-8 encoding remove_zero_width # remove zero-width characters tidy_spacing # " John Smith " -> "John Smith" fix_encoding_errors # "Ren\u00c3\u00a9 Descartes" -> "Ren\u00e9 Descartes" consolidate_initials # "I. B. M." -> "I.B.M." end @tidy_name end
Private Instance Methods
Remove spaces from groups of initials
# File lib/name_tamer/name.rb, line 134 def consolidate_initials @tidy_name .remove_spaces_from_initials! .ensure_space_after_initials! end
If we don't know the contact type, what's our best guess?
# File lib/name_tamer/name.rb, line 327 def contact_type_best_effort if @contact_type @contact_type else # If it's just one word we'll assume organization. # If more then we'll assume a person @name.include?(ASCII_SPACE) ? :person : :organization end end
# File lib/name_tamer/name.rb, line 313 def contact_type_from(args) args_ct = args[:contact_type] return unless args_ct ct = args_ct.is_a?(Symbol) ? args_ct : args_ct.dup ct = ct.to_s unless [String, Symbol].include? ct.class ct.downcase! if ct.class == String ct = ct.to_sym ct = nil unless %i[person organization].include? ct ct end
# File lib/name_tamer/name.rb, line 352 def find_contact_type_and_parts(adfixes, name_part) ct = contact_type_best_effort parts = name_part.partition adfixes[ct] @adfix_found = !parts[1].empty? return [ct, parts] if @contact_type || @adfix_found # If the contact type is indeterminate and we didn't find a diagnostic adfix # for a person then try again for an organization ct = :organization parts = name_part.partition adfixes[ct] @adfix_found = !parts[1].empty? [ct, parts] end
# File lib/name_tamer/name.rb, line 257 def find_first_usable_name(parts) part = nil parts.each_index do |i| part = parts[i] next if part.gsub(FILTER_COMPAT, '').empty? parts = parts.slice(i + 1, parts.length) # don't use "slice!" break end [part, parts] end
# File lib/name_tamer/name.rb, line 270 def find_last_usable_name(parts) part = nil parts.reverse_each do |p| next if p.gsub(FILTER_COMPAT, '').empty? part = p break end part end
# File lib/name_tamer/name.rb, line 129 def fix_encoding_errors @tidy_name.fix_encoding_errors! end
Names in the form “Smith, John” need to be turned around to “John Smith”
# File lib/name_tamer/name.rb, line 168 def fixup_last_name_first return if @contact_type == :organization parts = @nice_name.split ', ' return unless parts.count == 2 @last_name = parts[0] # Sometimes the last name alone is all caps and we can name-case it @remainder = parts[1] end
Sometimes we end up with mismatched braces after adfix stripping e.g. “Ceres (Ceres Holdings LLC)” -> “Ceres (Ceres Holdings”
# File lib/name_tamer/name.rb, line 181 def fixup_mismatched_braces left_brace_count = @nice_name.count '(' right_brace_count = @nice_name.count ')' if left_brace_count > right_brace_count @nice_name += ')' elsif left_brace_count < right_brace_count @nice_name = '(' + @nice_name end end
Original Version of NameCase: Copyright © Mark Summerfield 1998-2008. All Rights Reserved This module may be used/distributed/modified under the same terms as Perl itself dev.perl.org/licenses/ (GPL)
Ruby Version: Copyright © Aaron Patterson 2006 NameCase is distributed under the GPL license.
Substantially modified for Xendata Improved in several areas, also now adds non-breaking spaces for compound names like “van der Pump”
# File lib/name_tamer/name.rb, line 380 def name_case(lowercase) n = lowercase.dup # We assume the name is passed already downcased n .upcase_first_letter! .downcase_after_apostrophe! .fix_mac! .fix_ff! .fix_name_modifiers! .upcase_initials! end
# File lib/name_tamer/name.rb, line 192 def name_wrangle # Fix case if all caps or all lowercase if @last_name.nil? name_wrangle_single_name else name_wrangle_split_name end end
# File lib/name_tamer/name.rb, line 201 def name_wrangle_single_name lowercase = @nice_name.downcase uppercase = @nice_name.upcase fix_case = false if @contact_type == :organization fix_case = true if @nice_name == uppercase && @nice_name.length > 4 elsif [uppercase, lowercase].include?(@nice_name) fix_case = true end @nice_name = name_case(lowercase) if fix_case end
# File lib/name_tamer/name.rb, line 215 def name_wrangle_split_name # It's a person if we've split the name, so no organization logic here lowercase = @last_name.downcase uppercase = @last_name.upcase @last_name = name_case(lowercase) if [uppercase, lowercase].include?(@last_name) @nice_name = +"#{@remainder} #{@last_name}" end
An adfix is either a prefix or a suffix
# File lib/name_tamer/name.rb, line 141 def remove_adfixes if @last_name.nil? # Our name is still in one part, not two loop do @nice_name = remove_outermost_adfix(:suffix, @nice_name) break unless @adfix_found end loop do @nice_name = remove_outermost_adfix(:prefix, @nice_name) break unless @adfix_found end else # Our name is currently in two halves loop do @last_name = remove_outermost_adfix(:suffix, @last_name) break unless @adfix_found end loop do @remainder = remove_outermost_adfix(:prefix, @remainder) break unless @adfix_found end end end
Remove initials from personal names unless they are the only identifier. i.e. only remove initials if there's also a proper name there
# File lib/name_tamer/name.rb, line 236 def remove_initials return unless @contact_type == :person temp_name = @simple_name.gsub(/\b([a-z](?:\.*\s+|\.))/i, '') # If the name still has at least one space we're OK @simple_name = temp_name if temp_name.include?(ASCII_SPACE) end
# File lib/name_tamer/name.rb, line 245 def remove_middle_names return unless @contact_type == :person first_name, parts = find_first_usable_name(@simple_name.split) last_name, = find_last_usable_name(parts) return unless first_name || last_name separator = first_name && last_name ? ' ' : '' @simple_name = +"#{first_name}#{separator}#{last_name}" end
We pass to this routine either prefixes or suffixes
# File lib/name_tamer/name.rb, line 338 def remove_outermost_adfix(adfix_type, name_part) ct, parts = find_contact_type_and_parts(ADFIX_PATTERNS[adfix_type], name_part) return name_part unless @adfix_found # If we've found a diagnostic adfix then set the contact type self.contact_type = ct # The remainder of the name will be in parts[0] or parts[2] depending # on whether this is a prefix or a suffix. # We'll also remove any trailing commas we've exposed. (parts[0] + parts[2]).gsub(/\s*,\s*$/, '') end
# File lib/name_tamer/name.rb, line 282 def remove_periods_from_initials @simple_name.remove_periods_from_initials! end
# File lib/name_tamer/name.rb, line 118 def remove_zero_width @tidy_name.strip_unwanted!(ZERO_WIDTH_FILTER) end
# File lib/name_tamer/name.rb, line 286 def standardize_words @simple_name.gsub!(/ *& */, ' and ') # replace ampersand characters with ' and ' @simple_name.gsub!(/ *\+ */, ' plus ') # replace plus signs with ' plus ' @simple_name.gsub!(/\bintl\b/i, 'International') # replace 'intl' with 'International' @simple_name.gsub!(/[־‐‑‒–—―−﹘﹣-]/, SLUG_DELIMITER) # Replace Unicode dashes with ASCII hyphen @simple_name.strip_unwanted!(/["“”™℠®©℗]/) # remove quotes and commercial decoration end
# File lib/name_tamer/name.rb, line 122 def tidy_spacing @tidy_name .space_around_comma! .strip_or_self! .whitespace_to!(ASCII_SPACE) end
# File lib/name_tamer/name.rb, line 114 def unescape @tidy_name.ensure_safe!.safe_unescape!.unescape_html! end
Conjoin compound names with non-breaking spaces
# File lib/name_tamer/name.rb, line 224 def use_nonbreaking_spaces_in_compound_names @nice_name .nbsp_in_compound_name! .nbsp_in_name_modifier! end