class Guillaume::SourceText

Public Class Methods

new(file_name) click to toggle source
# File lib/guillaume/source_text.rb, line 4
def initialize(file_name)
  @file_name = file_name
  @raw_source = File.open(file_name, "rb") { |f| f.read }
  @sentences = normalized.split(/(?<=\.|\?|!) /).reject(&:empty?)
end

Public Instance Methods

bigrams() click to toggle source
# File lib/guillaume/source_text.rb, line 27
def bigrams
  @bigrams ||= ngrams 2
end
ngrams(n) click to toggle source
# File lib/guillaume/source_text.rb, line 14
def ngrams(n)
  $LOGGER.info("Grabbing #{n}-grams from #{@file_name}...")
  result = []
  @sentences.each do |sentence|
    result += sentence.split(" ").each_cons(n).to_a
  end
  result
end
normalized() click to toggle source
# File lib/guillaume/source_text.rb, line 10
def normalized
  @raw_source.gsub(/[\r\n ]+/, " ").strip
end
trigrams() click to toggle source
# File lib/guillaume/source_text.rb, line 31
def trigrams
  @trigrams ||= ngrams 3
end
unigrams() click to toggle source
# File lib/guillaume/source_text.rb, line 23
def unigrams
  @unigrams ||= ngrams 1
end