class Plagiarism::Strategies::Engine

Constants

THRESHOLD

Attributes

content[RW]
params[RW]

Public Class Methods

exists?(response) click to toggle source
# File lib/plagiarism/strategries/engine.rb, line 17
def exists?(response)
  iterate(response) { |uri| uri.host =~ whitelists_regex }
end
fetch(content, params) click to toggle source
# File lib/plagiarism/strategries/engine.rb, line 9
def fetch(content, params)
  raise
end
iterate(r, a = :all?) click to toggle source
# File lib/plagiarism/strategries/engine.rb, line 13
def iterate(r, a = :all?)
  raise
end
new(c, p) click to toggle source
# File lib/plagiarism/strategries/engine.rb, line 34
def initialize(c, p)
  @content, @params = c, p
end
valid_segments(ps, params) click to toggle source
# File lib/plagiarism/strategries/engine.rb, line 21
def valid_segments(ps, params)
  ps.segment.count do |sentence|
    typhoeus = fetch("\"#{sentence}\"", params)
    typhoeus.success? && exists?(typhoeus.response_body)
  end
end
whitelists_regex() click to toggle source
# File lib/plagiarism/strategries/engine.rb, line 28
def whitelists_regex
  whitelists = Config.whitelists.map { |w| Regexp.new w }
  Regexp.union whitelists
end

Public Instance Methods

match() click to toggle source
# File lib/plagiarism/strategries/engine.rb, line 45
def match
  typhoeus = self.class.fetch("\"#{content}\"", params)
  typhoeus.success? && retrieve_link(typhoeus.response_body)
end
unique?() click to toggle source
# File lib/plagiarism/strategries/engine.rb, line 38
def unique?
  threshold = Config.threshold || THRESHOLD
  ps = PragmaticSegmenter::Segmenter.new(text: content)
  valid_segments = self.class.valid_segments(ps, params)
  valid_segments.to_f / ps.segment.size >= threshold
end