module RMMSeg::Chunk

A Chunk holds one or more successive Word .

Public Class Methods

average_length(words) click to toggle source

The average length of words.

# File lib/rmmseg/chunk.rb, line 15
def self.average_length(words)
  total_length(words).to_f/words.size
end
degree_of_morphemic_freedom(words) click to toggle source

The sum of all frequencies of one-character words.

# File lib/rmmseg/chunk.rb, line 31
def self.degree_of_morphemic_freedom(words)
  sum = 0
  for word in words
    if word.length == 1 && word.type == Word::TYPES[:cjk_word]
      sum += word.frequency
    end
  end
  sum
end
total_length(words) click to toggle source

The sum of length of all words.

# File lib/rmmseg/chunk.rb, line 6
def self.total_length(words)
  len = 0
  for word in words
    len += word.length
  end
  len
end
variance(words) click to toggle source

The square of the standard deviation of length of all words.

# File lib/rmmseg/chunk.rb, line 20
def self.variance(words)
  avglen = average_length(words)
  sqr_sum = 0.0
  for word in words
    tmp = word.length - avglen
    sqr_sum += tmp*tmp
  end
  Math.sqrt(sqr_sum)
end