class Logfiction::AccessLog
Public Class Methods
new(assumptions={})
click to toggle source
# File lib/logfiction.rb, line 14 def initialize(assumptions={}) @users = [] @items = [] @states = [] @start_state = [] @transitions = [] @auto_transiton = {} @assumptions = {} @access_log = {} # access_log: user_id & seuquence of states # ex.) {user_id1(int): # [{timestamp: timestamp(str), # state_id: state_id(int), # item [item(int), ...]}, ...], ...} # set assumption set_assumptions(assumptions) end
Public Instance Methods
choice_next_state(states_with_probability)
click to toggle source
randam choice from states, which has a different probability
- TODO
-
more logically correct sampling
Input: states_with_probability(Array of state_with_probability(Hash)) state_id(int): probability(int) Output: state_id(int)
# File lib/logfiction.rb, line 215 def choice_next_state(states_with_probability) # transrate probability to number of trials n_trials = 100 total = states_with_probability.values.inject(:+) * n_trials pick = rand(total) currentStack = 0 states_with_probability.each do |state_id, probability| if (pick <= currentStack + probability * n_trials) return state_id else currentStack += probability * n_trials end end return states_with_probability.keys.sample end
export_logfile(n=10000, filetype='CSV', filepath='./fiction_log.csv')
click to toggle source
# File lib/logfiction.rb, line 416 def export_logfile(n=10000, filetype='CSV', filepath='./fiction_log.csv') logs = self.generate_accesslog(n) headers = logs.first.keys if filetype == 'CSV' CSV.open(filepath, "wb") do |output| output.puts headers logs.each do |log| output.puts headers.map{|key| log[key]} end end else #[TODO] support output type (json, tsv, ...) raise FileTypeError, "Your input file type is not support..." end end
generate_accesslog(n, output_form={})
click to toggle source
Output:
n_actions(int): total number of user's actions n_sessions: total number of user's sessions
# File lib/logfiction.rb, line 325 def generate_accesslog(n, output_form={}) # add row number because of "false" log is not counted if n < 5000 n_max = n * 2 else n_max = n + (n/4) end # initialize access_log @access_log = {} # set default value unless another manual settting done if @transitions.size == 0 self.generate_state_transiton() end if @users.size == 0 self.generate_users(n_users=100, users=[]) end if @items.size == 0 self.generate_items(n_items=100, items=[]) end n_row = 1 while n_row < n_max # only one time update each users in second loop @users.each do |user| user_id = user[:user_id] n_actions = 0 n_sessions = 0 user_max_sessions = @assumptions[:user_max_sessions] user_max_actions = @assumptions[:user_max_actions] while n_actions < user_max_actions && n_sessions < user_max_sessions n_actions, n_sessions = self.update_user_state(user_id) n_row += 1 break if n_row > n_max end break if n_row > n_max end break if n_row > n_max end self.output_accesslog(n, output_form) end
generate_items(n_items=100, items=[])
click to toggle source
Item generater
Input: n_items(int), items(hash) Ouptput: object of array item(object): { item_id: item_id, options}
# File lib/logfiction.rb, line 74 def generate_items(n_items=100, items=[]) if items.size == 0 item_ids = (0..n_items - 1).to_a @items = item_ids.map{|item_id| { item_id: item_id } } else # [TODO] validate input data @items = items end end
generate_state_transiton(state_transtion={})
click to toggle source
States generater
Input: state_transtion(object) Output: nill
# File lib/logfiction.rb, line 87 def generate_state_transiton(state_transtion={}) if state_transtion == {} # default setting is like EC states = [ {state_id: 0, state_name: 'top_page_view', item_type: :no_item, request: '/'}, {state_id: 1, state_name: 'list_page_view', item_type: :many, request: '/list'}, {state_id: 2, state_name: 'detail_page_view', item_type: :one, request: '/item'}, {state_id: 3, state_name: 'item_purchase', item_type: :one, request: '/purchace'} ] start_state = [0] taranstion = [ # probability is 0.6 if user transit from top(id:0) to list(id:1) page. # transition restrict by item is none. {from: 0, to: 1, probability: 0.6, dependent_item: false}, # probability is 0.4 if user transit from list(id:1) to detail(id:2) page # "to state" item should be choosed "from state" items. {from: 1, to: 2, probability: 0.4, dependent_item: true}, # probability is 0.2 if user transit from detatil(id:2) to purchase(id:3) page # "to state" item should be choosed "from state" items. # after transition to state '3', automatically transition to state "0" {from: 2, to: 3, probability: 0.2, dependent_item: true, auto_transiton: 0} ] @start_state, @transitions = start_state, taranstion else @start_state = state_transtion[:start_state] @transitions = state_transtion[:transitions] states = state_transtion[:states] end # convert states states_hash = {} states.each do |state| states_hash[state[:state_id]] = state end @states = states_hash # generate auto transiton @transitions.each do |transition| if transition[:auto_transiton] @auto_transiton[transition[:to]] = transition[:auto_transiton] end end end
generate_users(n_users=100, users=[])
click to toggle source
User generater
Input: n_users(int), users(hash) Ouptput: array of user_info user_info(hash): {user_id: user_id(int), options}
# File lib/logfiction.rb, line 60 def generate_users(n_users=100, users=[]) if users.size == 0 user_ids = (0..n_users - 1).to_a @users = user_ids.map{|user_id| { user_id: user_id } } else # [TODO] validate input data @users = users end end
get_next_items(from_state_id, to_state_id, current_items)
click to toggle source
generate state with items
Input: items Output: items
# File lib/logfiction.rb, line 137 def get_next_items(from_state_id, to_state_id, current_items) unless from_state_id to_item_type = @states[to_state_id] unless to_item_type return [] end to_item_type = [:item_type] if to_item_type == 'many' item_list = @items.each_slice(10).to_a pick_index = rand(item_list.size) return item_list[pick_index] elsif to_item_type == 'one' pick_index = rand(@items.size) return [@items[pick_index]] else return [] end else from_item_type = @states[from_state_id][:item_type] to_state = @states[to_state_id] unless to_state return [] end to_item_type = to_state[:item_type] dependent_item = false @transitions.each do |transiton| # normal transition dependent_item = true if transiton[:from] == from_state_id && transiton[:to] == to_state_id # back transition dependent_item = true if transiton[:from] == to_state_id && transiton[:to] == from_state_id end unless dependent_item return [] end # no_item -> many if from_item_type == :no_item && to_item_type == :many item_list = @items.each_slice(10).to_a pick_index = rand(item_list.size) return item_list[pick_index] # many -> one elsif from_item_type == :many && to_item_type == :one pick_index = rand(current_items.size) return [current_items[pick_index]] # one -> many elsif from_item_type == :one && to_item_type == :many next_items = [] item_list = @items.each_slice(10).to_a item_list.each_with_index do |items, i| if items.include?(current_items[0]) next_items = items end end return next_items # one -> one elsif from_item_type == :one && to_item_type == :one return current_items else return [] end end end
output_accesslog(n_max, output_form)
click to toggle source
# File lib/logfiction.rb, line 369 def output_accesslog(n_max, output_form) # default settings output_form = { basic_log: [:timestamp, :user_id, :state_id, :items], state: [:state_name], user: [] } if output_form != {} output_form.each do |key, value| output_form[key] = value end end output_accesslogs = [] @access_log.each do |user_id, logs| logs.each do |log| if log[:state_id] output_accesslog = {} # basic_log output_form[:basic_log].each do |log_item| if log_item == :items output_accesslog[log_item] = log[log_item].map{|e| e[:item_id] }.join(":") elsif log_item == :user_id output_accesslog[log_item] = user_id else output_accesslog[log_item] = log[log_item] end end # states output_form[:state].each do |log_item| output_accesslog[log_item] = @states[log[:state_id]][log_item] end # users output_form[:user].each do |log_item| output_accesslog[log_item] = @users[log[:state_id]][log_item] end output_accesslogs << output_accesslog end end end output_accesslogs.sort{|a, b| a[:timestamp] <=> b[:timestamp]}[0,n_max] end
set_assumptions(assumptions={})
click to toggle source
Set assumptions
assumptions: object(hash) time_access_from(str): from what time generate logs user_n_sessions(int): how many sessions in day user_max_states(int): how many states in session n_users(int): how many users n_items(int): how many items
# File lib/logfiction.rb, line 40 def set_assumptions(assumptions={}) @assumptions = { time_access_from: Time.parse("2018-06-29 09:00:00"), user_max_sessions: 5, user_max_actions: 100, n_users: 100, n_items: 100 } if assumptions != {} # [TODO] validate assumptions assumptions.each do |key, value| @assumptions[key] = value end end end
update_user_state(user_id)
click to toggle source
random walk update user state
Input: current_states(Hash): before_state(Hash): state_id(int): state_id item(Array): item list states_sequence(Array): states sequence list
# File lib/logfiction.rb, line 238 def update_user_state(user_id) next_state_interval = USER_ACTION_INTERVAL[0] + rand * USER_ACTION_INTERVAL[1] states_sequence = @access_log[user_id] || [] # first action if states_sequence == [] @access_log[user_id] = [] next_state_id = @start_state.sample next_timestamp = @assumptions[:time_access_from] + next_state_interval next_items = [] else from_state = states_sequence.last # check auto transiton auto_transiton_state = @auto_transiton[from_state[:state_id]] unless auto_transiton_state == nil next_state_id = auto_transiton_state next_timestamp = from_state[:timestamp] + 1 else # new session unless states_sequence.last[:state_id] next_state_id = @start_state.sample next_timestamp = from_state[:timestamp] + (next_state_interval + SESSION_OUT_MIN_TIME) else # pickup possible transition states possible_transition_states = [] # add state from transition_states total_probability = 0 @transitions.each do |transition| if transition[:from] == from_state[:state_id] possible_transition_states << {state_id: transition[:to], probability: transition[:probability]} total_probability += transition[:probability] end end # add state back and exit # exclude auto transion back_state_id = states_sequence.last(2)[0][:state_id] if states_sequence.size == 1 || @auto_transiton.keys.include?(back_state_id) # exit only possible_transition_states << {state_id: false, probability: 1 - total_probability} else exit_probability = (1 - total_probability) * 0.3 back_probability = (1 - total_probability) * 0.7 # exit and back possible_transition_states << {state_id: false, probability: exit_probability} possible_transition_states << {state_id: back_state_id, probability: back_probability} end # choice next state state_probability_hash = possible_transition_states.map {|sp| {sp[:state_id] => sp[:probability]} }.reduce(&:merge) next_state_id = choice_next_state(state_probability_hash) next_timestamp = from_state[:timestamp] + next_state_interval end end from_state_id = from_state[:state_id] from_statre_items = from_state[:items] next_items = self.get_next_items(from_state_id, next_state_id, from_statre_items) end log = { timestamp: next_timestamp, state_id: next_state_id, items: next_items } @access_log[user_id] << log # return n_actions and n_sessions n_actions = @access_log[user_id].size n_sessions = 0 @access_log[user_id].each do |state| n_sessions += 1 unless state[:state_id] end return n_actions, n_sessions end