module CollectTwitterMedia

Constants

VERSION

Public Instance Methods

access_token(value) click to toggle source
# File lib/collect_twitter_media.rb, line 32
def access_token(value)
  @access_token = value
end
access_token_secret(value) click to toggle source
# File lib/collect_twitter_media.rb, line 36
def access_token_secret(value)
  @access_token_secret = value
end
append_csv_row(csv_filename, tweet) click to toggle source

HACK: 'media_uri_and_filename' method is duplicated in 'save_image_file' method

# File lib/collect_twitter_media.rb, line 169
def append_csv_row(csv_filename, tweet)
  media_uri_and_filename(tweet).each do |media_data|
    row = [
      media_data['tweet_id'],
      media_data['screen_name'],
      media_data['media_filename'],
      "@#{media_data['screen_name']}_#{media_data['tweet_id']}_#{media_data['media_filename']}",
      media_data['media_original_uri'],
      media_data['created_at'],
    ]

    CSV.open(csv_filename, 'a') do |csv_file|
      csv_file << row
    end
  end
end
collect_tweets(until_tweet_id='', count=200) click to toggle source

'until_tweet_id' is EQUAL OR LESS THAN 'until_tweet_id'

# File lib/collect_twitter_media.rb, line 50
def collect_tweets(until_tweet_id='', count=200)
  begin
    unless until_tweet_id.is_a?(Integer)
      @client.home_timeline(count: count, include_rts: true, tweet_mode: 'extended')
    else
      @client.home_timeline(count: count, max_id: until_tweet_id, include_rts: true, tweet_mode: 'extended')
    end
  rescue => e
    puts e
    exit(1)
  end
end
collect_tweets_with_loop(loop_count=1, tweet_count=200, start_tweet_id='') click to toggle source
# File lib/collect_twitter_media.rb, line 63
def collect_tweets_with_loop(loop_count=1, tweet_count=200, start_tweet_id='')
  tweet_collection  = []
  until_tweet_id    = start_tweet_id

  loop_count.times do
    tweets = collect_tweets(until_tweet_id, tweet_count)
    break if tweets.empty?
    tweet_collection << tweets

    next_start_tweet_id = min_tweet_id(tweet_id_collection(tweets)) - 1
    until_tweet_id      = next_start_tweet_id
  end
  tweet_collection.flatten
end
consumer_key(value) click to toggle source

HACK: TOO LONG…(can I use block?)

# File lib/collect_twitter_media.rb, line 24
def consumer_key(value)
  @consumer_key = value
end
consumer_secret(value) click to toggle source
# File lib/collect_twitter_media.rb, line 28
def consumer_secret(value)
  @consumer_secret = value
end
create_csv_file(save_directory, base_filename='image_from_twitter') click to toggle source
# File lib/collect_twitter_media.rb, line 150
def create_csv_file(save_directory, base_filename='image_from_twitter')
  now_time = Time.now.strftime("%Y%m%d_%H%M%S")
  filename = "#{save_directory}/#{base_filename}_#{now_time}.csv"
  header = [
    'tweet_id',
    'screen_name',
    'original_filename',
    'save_filename',
    'uri',
    'created_at',
  ]
  CSV.open(filename, 'w') do |csv_file|
    csv_file << header
  end

  filename
end
max_tweet_id(tweet_id_collection) click to toggle source
# File lib/collect_twitter_media.rb, line 90
def max_tweet_id(tweet_id_collection)
  tweet_id_collection.max
end
media_filename(media_uri) click to toggle source
# File lib/collect_twitter_media.rb, line 110
def media_filename(media_uri)
  media_uri.match(/https:\/\/pbs\.twimg\.com\/media\/(.*)\z/)[1] # with extension
end
media_original_uri(media_uri) click to toggle source
# File lib/collect_twitter_media.rb, line 106
def media_original_uri(media_uri)
  media_original_uri = "#{media_uri}:orig"
end
media_uri_and_filename(tweet) click to toggle source

if several attachment image files exist, we save all ones

# File lib/collect_twitter_media.rb, line 135
def media_uri_and_filename(tweet)
  media_uri_and_filename = []
  media_uris(tweet).each do |media_uri|
    insert = {}
    insert['tweet_id']            = tweet.id
    insert['media_original_uri']  = media_original_uri(media_uri)
    insert['media_filename']      = media_filename(media_uri)
    insert['screen_name']         = tweet.attrs[:user][:screen_name]
    insert['created_at']          = tweet.created_at

    media_uri_and_filename << insert
  end
  media_uri_and_filename
end
media_uris(tweet) click to toggle source
# File lib/collect_twitter_media.rb, line 94
def media_uris(tweet)
  media_uris = []
  if tweet.media?
    tweet.media.each do |media|
      if media.instance_of?(Twitter::Media::Photo)
        media_uris << media.media_url_https.to_s
      end
    end
  end
  media_uris
end
media_uris_and_filenames(tweets) click to toggle source

deprecated method because when tweet is retweet it doesn't work correctly

# File lib/collect_twitter_media.rb, line 187
def media_uris_and_filenames(tweets)
  media_uris_and_filenames = []
  tweets.each do |tweet|
    media_uris_and_filenames << media_uri_and_filename(tweet)
  end
  media_uris_and_filenames.flatten
end
min_tweet_id(tweet_id_collection) click to toggle source
# File lib/collect_twitter_media.rb, line 86
def min_tweet_id(tweet_id_collection)
  tweet_id_collection.min
end
original_tweet(tweet) click to toggle source
# File lib/collect_twitter_media.rb, line 114
def original_tweet(tweet)
  if tweet.retweet?
    @client.status(tweet.attrs[:retweeted_status][:id], tweet_mode: "extended") # HACK: this occurs be slow response sometimes
  else
    tweet
  end
end
save(directory, tweet_count=200, loop_count=1, start_tweet_id='') click to toggle source
# File lib/collect_twitter_media.rb, line 10
def save(directory, tweet_count=200, loop_count=1, start_tweet_id='') # HACK: too many argv
  twitter_client
  tweet_collection  = collect_tweets_with_loop(loop_count)
  save_directory    = make_directory_if_not_exist(directory)
  csv_filename      = create_csv_file(save_directory)

  tweet_collection.each do |tweet|
    tweet = original_tweet(tweet) # retweet is NOT correct data, so need to get from original tweet
    save_image_file(save_directory, tweet)
    append_csv_row(csv_filename, tweet)
  end
end
save_image_file(save_directory, tweet) click to toggle source
# File lib/collect_twitter_media.rb, line 122
def save_image_file(save_directory, tweet)
  media_uri_and_filename(tweet).each do |media_data|
    tweet_id            = media_data['tweet_id']
    media_original_uri  = media_data['media_original_uri']
    media_filename      = media_data['media_filename']
    screen_name         = media_data['screen_name']

    command = "wget -q #{media_original_uri} -O #{save_directory}/@#{screen_name}_#{tweet_id}_#{media_filename}"
    `#{command}`
  end
end
tweet_id_collection(tweets) click to toggle source
# File lib/collect_twitter_media.rb, line 78
def tweet_id_collection(tweets)
  tweet_id_collection = []
  tweets.each do |tweet|
    tweet_id_collection << tweet.id
  end
  tweet_id_collection
end
twitter_client() click to toggle source
# File lib/collect_twitter_media.rb, line 40
def twitter_client
  @client = Twitter::REST::Client.new do |config|
    config.consumer_key        = @consumer_key
    config.consumer_secret     = @consumer_secret
    config.access_token        = @access_token
    config.access_token_secret = @access_token_secret
  end
end
via_client(tweet) click to toggle source

not used

# File lib/collect_twitter_media.rb, line 196
def via_client(tweet)
  source = tweet.source # ex. "<a href=\"http://twitter.com\" rel=\"nofollow\">Twitter Web Client</a>"
  source.match(/\A<a href=".*>(.*)<\/a>\z/)[1]
end