class Japanese

Constants

JAPANESE_PONCTUATION

Public Class Methods

parse(text) click to toggle source
# File lib/lib/japanese.rb, line 41
def self.parse(text)
  mecab_parsed = MecabStandalone.parse(text)
    .split("\n")
    .map{|k| k.split("\t")}.tap(&:pop)
    .map{|k| [k[0]].concat(k[1].split(','))}
    .map{|k| [k[0], k[1], k[-2]]}
  tokenized_kana = []
  mecab_parsed.each do |token|
    if token[1] == "助動詞"
      tokenized_kana[-1][0] +=  token[0]
      tokenized_kana[-1][-1] += token[-1]
    elsif token[-1] == '*'
      tokenized_kana << [token[0], token[1], token[0]]
    else
      tokenized_kana << token
    end
  end
  tokenized_kana
end
t(text) click to toggle source
# File lib/lib/japanese.rb, line 29
def self.t(text)
  latin = text.dup
  parsed = parse(text)
  parsed.each do |token|
    if token[-1]=~ /\p{Katakana}/
      latin.sub!(token[0], ' ' + Romaji.kana2romaji(token[-1]) )
    end
  end
  JAPANESE_PONCTUATION.each { |k,v| latin.gsub!(k, v)}
  latin
end