class Plagiarism::Strategies::Engine
Constants
- THRESHOLD
Attributes
content[RW]
params[RW]
Public Class Methods
exists?(response)
click to toggle source
# File lib/plagiarism/strategries/engine.rb, line 17 def exists?(response) iterate(response) { |uri| uri.host =~ whitelists_regex } end
fetch(content, params)
click to toggle source
# File lib/plagiarism/strategries/engine.rb, line 9 def fetch(content, params) raise end
iterate(r, a = :all?)
click to toggle source
# File lib/plagiarism/strategries/engine.rb, line 13 def iterate(r, a = :all?) raise end
new(c, p)
click to toggle source
# File lib/plagiarism/strategries/engine.rb, line 34 def initialize(c, p) @content, @params = c, p end
valid_segments(ps, params)
click to toggle source
# File lib/plagiarism/strategries/engine.rb, line 21 def valid_segments(ps, params) ps.segment.count do |sentence| typhoeus = fetch("\"#{sentence}\"", params) typhoeus.success? && exists?(typhoeus.response_body) end end
whitelists_regex()
click to toggle source
# File lib/plagiarism/strategries/engine.rb, line 28 def whitelists_regex whitelists = Config.whitelists.map { |w| Regexp.new w } Regexp.union whitelists end
Public Instance Methods
match()
click to toggle source
# File lib/plagiarism/strategries/engine.rb, line 45 def match typhoeus = self.class.fetch("\"#{content}\"", params) typhoeus.success? && retrieve_link(typhoeus.response_body) end
retrieve_link(response)
click to toggle source
# File lib/plagiarism/strategries/engine.rb, line 50 def retrieve_link(response) self.class.iterate(response, :find) { |uri| uri.host !~ self.class.whitelists_regex and return uri.to_s } end
unique?()
click to toggle source
# File lib/plagiarism/strategries/engine.rb, line 38 def unique? threshold = Config.threshold || THRESHOLD ps = PragmaticSegmenter::Segmenter.new(text: content) valid_segments = self.class.valid_segments(ps, params) valid_segments.to_f / ps.segment.size >= threshold end