class RSemantic::Corpus
Attributes
documents[R]
@return [Array<Document>]
Public Class Methods
new(documents = [], options = {})
click to toggle source
@param [Array<Document>] documents The {Document documents} to
index
@param [Hash] options TODO document options
# File lib/rsemantic/corpus.rb, line 10 def initialize(documents = [], options = {}) @documents = documents @options = options @search = nil end
Public Instance Methods
add_document(document)
click to toggle source
Adds a new {Document document} to the index.
@param [Document] document @return [void]
# File lib/rsemantic/corpus.rb, line 20 def add_document(document) @documents << document document.corpora << self end
Also aliased as: <<
build_index()
click to toggle source
Build the index. This is required to be able to search for words or compute related documents.
If you add new documents, you have to rebuild the index.
@return [void]
# File lib/rsemantic/corpus.rb, line 32 def build_index @search = RSemantic::Search.new(@documents.map(&:text), @options) end
find_keywords(document, num = 5)
click to toggle source
# File lib/rsemantic/corpus.rb, line 52 def find_keywords(document, num = 5) # TODO allow limiting keywords to words that occur in this document end
search(*words)
click to toggle source
# File lib/rsemantic/corpus.rb, line 36 def search(*words) # TODO raise if no index built yet results = @search.search(words) results.map.with_index { |result, index| document = @documents[index] RSemantic::SearchResult.new(document, result) }.sort end
to_s()
click to toggle source
# File lib/rsemantic/corpus.rb, line 57 def to_s "#<%s %d documents, @options=%s>" % [self.class.name, @documents.size, @options.inspect] end