module Charwidth::ClassMethods
Constants
- HALFWIDTH_TO_FULLWIDTH
- TYPES
Public Instance Methods
normalize(string, options = {})
click to toggle source
Normalize Unicode fullwidth / halfwidth (zenkaku / hankaku) characters options: {
only: [:ascii, :white_parenthesis, :cjk_punctuation, :katakana, :space], except: [:ascii, :white_parenthesis, :cjk_punctuation, :katakana, :space]
}
# File lib/charwidth.rb, line 49 def normalize(string, options = {}) normalize!(string.dup, options) end
normalize!(string, options = {})
click to toggle source
Normalize Unicode fullwidth / halfwidth (zenkaku / hankaku) characters (destructive)
# File lib/charwidth.rb, line 54 def normalize!(string, options = {}) unify_voiced_katakana!(string) normalize_charwidth!(string, options) end
to_full_width(string)
click to toggle source
# File lib/charwidth.rb, line 59 def to_full_width(string) to_full_width!(string.dup) end
to_full_width!(src)
click to toggle source
# File lib/charwidth.rb, line 63 def to_full_width!(src) unify_voiced_katakana!(src) before = "" after = "" HALFWIDTH_TO_FULLWIDTH.each_value do |half, full| before << half after << full end escape_for_tr!(before) escape_for_tr!(after) src.tr!(before, after) || src end
Private Instance Methods
escape_for_tr!(s)
click to toggle source
# File lib/charwidth.rb, line 144 def escape_for_tr!(s) s.gsub!('\\', '\\\\') s.gsub!("-", '\\-') s.gsub!("^", '\\^') s.gsub!("[", '\\[') s.gsub!("]", '\\]') s end
normalize_charwidth!(src, options = {})
click to toggle source
# File lib/charwidth.rb, line 83 def normalize_charwidth!(src, options = {}) types = TYPES.dup # Check options unless (unexpected_options = options.keys - [:only, :except]).empty? raise "Unexpected normalize option(s): #{unexpected_options}" end if options[:only] unless (unexpected_types = options[:only] - TYPES).empty? raise "Unexpected normalize type(s): #{unexpected_types.inspect}" end types &= options[:only] end if options[:expect] unless (unexpected_types = options[:expected] - TYPES).empty? raise "Unexpected normalize type(s): #{unexpected_types.inspect}" end types -= options[:expect] end before = "" after = "" types.each do |type| case type when :ascii, :white_parenthesis, :latin_1_punctuation_and_symbols, :space # convert fullwidth to halfwidth HALFWIDTH_TO_FULLWIDTH[type].tap {|half, full| before << full after << half } when :cjk_punctuation, :katakana, :hangul, :mathematical_symbols # convert halfwidth to fullwidth HALFWIDTH_TO_FULLWIDTH[type].tap {|half, full| before << half after << full } end end escape_for_tr!(before) escape_for_tr!(after) src.tr!(before, after) || src end
unify_voiced_katakana!(src)
click to toggle source
Unify halfwidth (semi) voiced katakana to one fullwidth voiced katakana
# File lib/charwidth.rb, line 132 def unify_voiced_katakana!(src) halfwidth = Characters::HALFWIDTH_VOICED_KATAKANA + Characters::HALFWIDTH_SEMI_VOICED_KATAKANA fullwidth = Characters::VOICED_KATAKANA + Characters::SEMI_VOICED_KATAKANA halfwidth.zip(fullwidth).inject(src) do |str, (h, f)| str.gsub!(h, f) || str end end