class ReplacerBot::SeenTweets
Public Class Methods
clean_urls(string)
click to toggle source
# File lib/replacer_bot/seen_tweets.rb, line 45 def self.clean_urls string string.gsub /https?:\/\/[^ ]*/, '__URL__' end
hashtag_nuker(string:, other_end: false)
click to toggle source
# File lib/replacer_bot/seen_tweets.rb, line 49 def self.hashtag_nuker string:, other_end: false words = string.split ' ' words.reverse! if other_end no_hashtag_yet = false a = [] words.each do |token| unless ReplacerBot.is_hashtag token no_hashtag_yet = true end if no_hashtag_yet a.push token end end a.reverse! if other_end a.join ' ' end
retrieve()
click to toggle source
# File lib/replacer_bot/seen_tweets.rb, line 37 def self.retrieve begin Marshal.load File.open Config.instance.config.seen_tweets rescue Errno::ENOENT Set.new end end
sanitise(tweet)
click to toggle source
# File lib/replacer_bot/seen_tweets.rb, line 74 def self.sanitise tweet nuke_hashtags clean_urls tweet end
save(set)
click to toggle source
# File lib/replacer_bot/seen_tweets.rb, line 88 def self.save set File.open Config.instance.config.seen_tweets, 'w' do |file| Marshal.dump unshift(set), file end end
similar(tweet, other_tweet, weighting: Config.instance.config.similarity_weighting)
click to toggle source
# File lib/replacer_bot/seen_tweets.rb, line 23 def self.similar tweet, other_tweet, weighting: Config.instance.config.similarity_weighting tweet_words = tweet.split ' ' return false if tweet_words.count < weighting match = false (tweet_words.count - (weighting - 1)).times do |i| sample = tweet_words[i, weighting].join(' ').downcase match = true if sanitise(other_tweet.downcase).index sanitise(sample) end match end
similar_to_archive(tweet, archive)
click to toggle source
# File lib/replacer_bot/seen_tweets.rb, line 13 def self.similar_to_archive tweet, archive match = false archive.each do |archived_tweet| match = true if similar(tweet, archived_tweet) end match end
unshift(set)
click to toggle source
# File lib/replacer_bot/seen_tweets.rb, line 78 def self.unshift set a = set.to_a max_size = Config.instance.config.max_seen_tweets if a.count > max_size a = a[-max_size..-1] end Set.new a end
validate(tweet)
click to toggle source
# File lib/replacer_bot/seen_tweets.rb, line 3 def self.validate tweet archive = retrieve t = sanitise tweet valid = not(archive.include? t) && not(similar_to_archive tweet, archive) archive.add t save archive valid end