class Guillaume::SourceText
Public Class Methods
new(file_name)
click to toggle source
# File lib/guillaume/source_text.rb, line 4 def initialize(file_name) @file_name = file_name @raw_source = File.open(file_name, "rb") { |f| f.read } @sentences = normalized.split(/(?<=\.|\?|!) /).reject(&:empty?) end
Public Instance Methods
bigrams()
click to toggle source
# File lib/guillaume/source_text.rb, line 27 def bigrams @bigrams ||= ngrams 2 end
ngrams(n)
click to toggle source
# File lib/guillaume/source_text.rb, line 14 def ngrams(n) $LOGGER.info("Grabbing #{n}-grams from #{@file_name}...") result = [] @sentences.each do |sentence| result += sentence.split(" ").each_cons(n).to_a end result end
normalized()
click to toggle source
# File lib/guillaume/source_text.rb, line 10 def normalized @raw_source.gsub(/[\r\n ]+/, " ").strip end
trigrams()
click to toggle source
# File lib/guillaume/source_text.rb, line 31 def trigrams @trigrams ||= ngrams 3 end
unigrams()
click to toggle source
# File lib/guillaume/source_text.rb, line 23 def unigrams @unigrams ||= ngrams 1 end