module Charwidth::ClassMethods

Constants

HALFWIDTH_TO_FULLWIDTH
TYPES

Public Instance Methods

normalize(string, options = {}) click to toggle source

Normalize Unicode fullwidth / halfwidth (zenkaku / hankaku) characters options: {

only: [:ascii, :white_parenthesis, :cjk_punctuation, :katakana, :space],
except: [:ascii, :white_parenthesis, :cjk_punctuation, :katakana, :space]

}

# File lib/charwidth.rb, line 49
def normalize(string, options = {})
  normalize!(string.dup, options)
end
normalize!(string, options = {}) click to toggle source

Normalize Unicode fullwidth / halfwidth (zenkaku / hankaku) characters (destructive)

# File lib/charwidth.rb, line 54
def normalize!(string, options = {})
  unify_voiced_katakana!(string)
  normalize_charwidth!(string, options)
end
to_full_width(string) click to toggle source
# File lib/charwidth.rb, line 59
def to_full_width(string)
  to_full_width!(string.dup)
end
to_full_width!(src) click to toggle source
# File lib/charwidth.rb, line 63
def to_full_width!(src)
  unify_voiced_katakana!(src)

  before = ""
  after = ""
  HALFWIDTH_TO_FULLWIDTH.each_value do |half, full|
    before << half
    after << full
  end

  escape_for_tr!(before)
  escape_for_tr!(after)
  src.tr!(before, after) || src
end

Private Instance Methods

escape_for_tr!(s) click to toggle source
# File lib/charwidth.rb, line 144
def escape_for_tr!(s)
  s.gsub!('\\', '\\\\')
  s.gsub!("-", '\\-')
  s.gsub!("^", '\\^')
  s.gsub!("[", '\\[')
  s.gsub!("]", '\\]')
  s
end
normalize_charwidth!(src, options = {}) click to toggle source
# File lib/charwidth.rb, line 83
def normalize_charwidth!(src, options = {})
  types = TYPES.dup

  # Check options
  unless (unexpected_options = options.keys - [:only, :except]).empty?
    raise "Unexpected normalize option(s): #{unexpected_options}"
  end

  if options[:only]
    unless (unexpected_types = options[:only] - TYPES).empty?
      raise "Unexpected normalize type(s): #{unexpected_types.inspect}"
    end

    types &= options[:only]
  end

  if options[:expect]
    unless (unexpected_types = options[:expected] - TYPES).empty?
      raise "Unexpected normalize type(s): #{unexpected_types.inspect}"
    end

    types -= options[:expect]
  end

  before = ""
  after = ""
  types.each do |type|
    case type
    when :ascii, :white_parenthesis, :latin_1_punctuation_and_symbols, :space
      # convert fullwidth to halfwidth
      HALFWIDTH_TO_FULLWIDTH[type].tap {|half, full|
        before << full
        after << half
      }
    when :cjk_punctuation, :katakana, :hangul, :mathematical_symbols
      # convert halfwidth to fullwidth
      HALFWIDTH_TO_FULLWIDTH[type].tap {|half, full|
        before << half
        after << full
      }
    end
  end

  escape_for_tr!(before)
  escape_for_tr!(after)
  src.tr!(before, after) || src
end
unify_voiced_katakana!(src) click to toggle source

Unify halfwidth (semi) voiced katakana to one fullwidth voiced katakana

# File lib/charwidth.rb, line 132
def unify_voiced_katakana!(src)
  halfwidth =
    Characters::HALFWIDTH_VOICED_KATAKANA +
    Characters::HALFWIDTH_SEMI_VOICED_KATAKANA
  fullwidth =
    Characters::VOICED_KATAKANA +
    Characters::SEMI_VOICED_KATAKANA
  halfwidth.zip(fullwidth).inject(src) do |str, (h, f)|
    str.gsub!(h, f) || str
  end
end