Class: Tango::App
- Inherits:
-
Object
- Object
- Tango::App
- Defined in:
- lib/tango/app.rb
Overview
Tango application
Instance Attribute Summary (collapse)
-
- (Object) config
readonly
Returns the value of attribute config.
-
- (Object) dispatcher
readonly
Returns the value of attribute dispatcher.
-
- (Object) link_stack
readonly
Returns the value of attribute link_stack.
-
- (Object) logger
readonly
Returns the value of attribute logger.
Instance Method Summary (collapse)
-
- (Object) after
Filter run after Tango execution.
-
- (Object) before
Filter run before Tango execution.
- - (Tango::App) initialize(config: {}, link_stack: nil, dispatcher: nil, cache: nil, http_client: nil, parser: nil, db_locker: nil, logger: nil) constructor
-
- (Object) register_model(symbol, model)
Register new resource model.
-
- (Object) register_operator(symbol, operator)
Register new resource operator.
-
- (Integer) run
Run ETL process.
Constructor Details
- (Tango::App) initialize(config: {}, link_stack: nil, dispatcher: nil, cache: nil, http_client: nil, parser: nil, db_locker: nil, logger: nil)
20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 |
# File 'lib/tango/app.rb', line 20 def initialize( config: {}, link_stack: nil, dispatcher: nil, cache: nil, http_client: nil, parser: nil, db_locker: nil, logger: nil ) # Init app properties @models = {} @operators = {} # Set config @config = config # Set dependencies @link_stack = link_stack || LinkStack.new( config['target_url'] ) @dispatcher = dispatcher || ETL::Dispatcher.new @cache = cache || Resource::Cache.new( Resource::Buffer.new ) @http_client = http_client || HTTParty @parser = parser || Nokogiri::HTML @db_locker = db_locker || DatabaseLocker.new( Multidb.databases ) @logger = logger || Logger.new( STDOUT ) end |
Instance Attribute Details
- (Object) config (readonly)
Returns the value of attribute config
10 11 12 |
# File 'lib/tango/app.rb', line 10 def config @config end |
- (Object) dispatcher (readonly)
Returns the value of attribute dispatcher
10 11 12 |
# File 'lib/tango/app.rb', line 10 def dispatcher @dispatcher end |
- (Object) link_stack (readonly)
Returns the value of attribute link_stack
10 11 12 |
# File 'lib/tango/app.rb', line 10 def link_stack @link_stack end |
- (Object) logger (readonly)
Returns the value of attribute logger
10 11 12 |
# File 'lib/tango/app.rb', line 10 def logger @logger end |
Instance Method Details
- (Object) after
Filter run after Tango execution
45 46 |
# File 'lib/tango/app.rb', line 45 def after end |
- (Object) before
Filter run before Tango execution
41 42 |
# File 'lib/tango/app.rb', line 41 def before end |
- (Object) register_model(symbol, model)
Register new resource model
52 53 54 55 56 57 58 59 60 61 |
# File 'lib/tango/app.rb', line 52 def register_model( symbol, model ) @models[symbol] = model # Truncate table of non persistent model unless model.persistent? ActiveRecord::Base.connection.execute( "TRUNCATE #{model.table_name}" ) end end |
- (Object) register_operator(symbol, operator)
Register new resource operator
67 68 69 70 71 72 73 74 75 76 |
# File 'lib/tango/app.rb', line 67 def register_operator( symbol, operator ) @operators[symbol] = operator # Register operator with resource cache system @cache.register( symbol ) do |resource| operator.load( resource ) end end |
- (Integer) run
Run ETL process
87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 |
# File 'lib/tango/app.rb', line 87 def run # Save beginning time start_time = Time.now @logger.info "Running Tango v.#{Tango::VERSION} ..." @logger.info "Target: #{@link_stack.host}." # Use next unlocked database Multidb.use( @db_locker.unlocked ) @logger.info "Using database '#{@db_locker.unlocked}'." # Run before filter @logger.info "Loading cache ..." load_cache # Run before filter @logger.info "Running before callback ..." before # Init counter of crawled links links_counter = 0 @logger.info "Tango starts crawling ..." # Start crawling website while( @link_stack.has_links? ) # Get a link from the stack link = @link_stack.shift # Skip iteration if no handler found if ! handler_klass = @dispatcher.find_handler( link ) @logger.error "No handler for link: #{link}." next end # Try to get contents of the link begin response = @http_client.get( @link_stack.host + link ) rescue StandardError => e @logger.error "Could not download contents of #{@link_stack.host + link} link."; @logger.error e. next end # Continue only when response has code 200 or 201 if ! [ 200, 201 ].include?( response.code ) @logger.error "Response code for link #{link} is #{response.code}. Only code 200 is accepted." next end # Parse response contents document = @parser.parse( response.body ) # Init handler handler = handler_klass.new( link, document, @cache ) # Append links fetched from handler @link_stack.append( handler.links ) # Try to fire the handler begin handler.trigger rescue StandardError => e # Log error @logger.error "Link: #{link}. Handler had some troubles." @logger.error e. @logger.error e.backtrace.join( "\n" ) else links_counter += 1 @logger.debug "Link: #{link}. Handler triggered successfully." end # Sleep to give crawled server time to breath sleep( @config["sleep"] || 0 ) end # Release buffers @logger.info "Releasing buffers ..." @cache.buffer.release_all() # Run after filter @logger.info "Running after callback ..." after # Lock database used in this Tango iteration @db_locker.lock( @db_locker.unlocked ) # Get time of script execution ending end_time = Time.now @logger.info "Tango crawled #{links_counter}/#{@link_stack.shifted} links successfully." @logger.info "Start time: #{start_time}, end time: #{end_time}, time elapsed: #{end_time - start_time} seconds." # Close logger @logger.close end |