class ReplacerBot::SeenTweets

Public Class Methods

clean_urls(string) click to toggle source
# File lib/replacer_bot/seen_tweets.rb, line 45
def self.clean_urls string
  string.gsub /https?:\/\/[^ ]*/, '__URL__'
end
hashtag_nuker(string:, other_end: false) click to toggle source
# File lib/replacer_bot/seen_tweets.rb, line 49
def self.hashtag_nuker string:, other_end: false
  words = string.split ' '
  words.reverse! if other_end

  no_hashtag_yet = false

  a = []
  words.each do |token|
    unless ReplacerBot.is_hashtag token
      no_hashtag_yet = true
    end

    if no_hashtag_yet
      a.push token
    end
  end

  a.reverse! if other_end
  a.join ' '
end
nuke_hashtags(string) click to toggle source
# File lib/replacer_bot/seen_tweets.rb, line 70
def self.nuke_hashtags string
  hashtag_nuker string: (hashtag_nuker string: string, other_end: true)
end
retrieve() click to toggle source
# File lib/replacer_bot/seen_tweets.rb, line 37
def self.retrieve
  begin
    Marshal.load File.open Config.instance.config.seen_tweets
  rescue Errno::ENOENT
    Set.new
  end
end
sanitise(tweet) click to toggle source
# File lib/replacer_bot/seen_tweets.rb, line 74
def self.sanitise tweet
  nuke_hashtags clean_urls tweet
end
save(set) click to toggle source
# File lib/replacer_bot/seen_tweets.rb, line 88
def self.save set
  File.open Config.instance.config.seen_tweets, 'w' do |file|
    Marshal.dump unshift(set), file
  end
end
similar(tweet, other_tweet, weighting: Config.instance.config.similarity_weighting) click to toggle source
# File lib/replacer_bot/seen_tweets.rb, line 23
def self.similar tweet, other_tweet, weighting: Config.instance.config.similarity_weighting
  tweet_words = tweet.split ' '
  return false if tweet_words.count < weighting

  match = false

  (tweet_words.count - (weighting - 1)).times do |i|
    sample = tweet_words[i, weighting].join(' ').downcase
    match = true if sanitise(other_tweet.downcase).index sanitise(sample)
  end

  match
end
similar_to_archive(tweet, archive) click to toggle source
# File lib/replacer_bot/seen_tweets.rb, line 13
def self.similar_to_archive tweet, archive
  match = false

  archive.each do |archived_tweet|
    match = true if similar(tweet, archived_tweet)
  end

  match
end
unshift(set) click to toggle source
# File lib/replacer_bot/seen_tweets.rb, line 78
def self.unshift set
  a = set.to_a
  max_size = Config.instance.config.max_seen_tweets
  if a.count > max_size
    a = a[-max_size..-1]
  end

  Set.new a
end
validate(tweet) click to toggle source
# File lib/replacer_bot/seen_tweets.rb, line 3
def self.validate tweet
  archive = retrieve
  t = sanitise tweet
  valid = not(archive.include? t) && not(similar_to_archive tweet, archive)
  archive.add t
  save archive

  valid
end