class PinYin::Backend::Simple

Public Class Methods

new(override_files=[]) click to toggle source
# File lib/ruby-pinyin/backend/simple.rb, line 6
def initialize(override_files=[])
  @override_files = override_files || []
end

Public Instance Methods

romanize(str, tone=nil, include_punctuations=false) click to toggle source
# File lib/ruby-pinyin/backend/simple.rb, line 10
def romanize(str, tone=nil, include_punctuations=false)
  res = []
  return res unless str && !str.empty?

  str.unpack('U*').each_with_index do |t,idx|
    code = sprintf('%x',t).upcase
    readings = codes[code]

    if readings
      res << Value.new(format(readings, tone), false)
    else
      val = [t].pack('U*')
      if val =~ /^[0-9a-zA-Z\s]*$/ # 复原,去除特殊字符,如全角符号等。
        if res.last && res.last.english?
          res.last << Value.new(val, true)
        elsif val != ' '
          res << Value.new(val, true)
        end
      elsif include_punctuations
        val = [Punctuation[code]].pack('H*') if Punctuation.include?(code)
        (res.last ? res.last : res) << Value.new(val, false)
      end
    end
  end

  res.map {|phrase| phrase.split(/\s+/)}.flatten
end

Private Instance Methods

codes() click to toggle source
# File lib/ruby-pinyin/backend/simple.rb, line 40
def codes
  return @codes if @codes

  @codes = {}
  src = File.expand_path('../../data/Mandarin.dat', __FILE__)
  @override_files.unshift(src).each do |file|
    load_codes_from(file)
  end
  @codes
end
format(readings, tone) click to toggle source
# File lib/ruby-pinyin/backend/simple.rb, line 58
def format(readings, tone)
  case tone
  when :unicode
    readings[0]
  when :ascii, true
    PinYin::Util.to_ascii(readings[0])
  else
    PinYin::Util.to_ascii(readings[0], false)
  end
end
load_codes_from(file) click to toggle source
# File lib/ruby-pinyin/backend/simple.rb, line 51
def load_codes_from(file)
  File.readlines(file).map do |line|
    code, readings = line.split(' ')
    @codes[code] = readings.split(',')
  end
end