class TurbotRunner::Runner
Constants
- RC_OK
- RC_SCRAPER_FAILED
- RC_TRANSFORMER_FAILED
Attributes
base_directory[R]
Public Class Methods
new(directory, options={})
click to toggle source
# File lib/turbot_runner/runner.rb, line 13 def initialize(directory, options={}) assert_absolute_path(directory) @base_directory = directory @config = load_config(directory) @record_handler = options[:record_handler] @log_to_file = options[:log_to_file] @timeout = options[:timeout] @scraper_provided = options[:scraper_provided] if options[:output_directory] assert_absolute_path(options[:output_directory]) @output_directory = options[:output_directory] else @output_directory = File.join(@base_directory, 'output') end end
Public Instance Methods
process_output(opts={})
click to toggle source
# File lib/turbot_runner/runner.rb, line 71 def process_output(opts={}) process_script_output(scraper_config, opts) transformers.each do |transformer_config| process_script_output(transformer_config.merge(:base_directory => @base_directory), opts) end end
run()
click to toggle source
# File lib/turbot_runner/runner.rb, line 29 def run set_up_output_directory if @scraper_provided scraper_succeeded = true else scraper_succeeded = run_script(scraper_config) end # Run the transformers even if the scraper fails transformers_succeeded = true transformers.each do |transformer_config| config = transformer_config.merge({ :base_directory => @base_directory, :duplicates_allowed => duplicates_allowed, }) transformers_succeeded = run_script(config, input_file=scraper_output_file) && transformers_succeeded end if !scraper_succeeded RC_SCRAPER_FAILED elsif !transformers_succeeded RC_TRANSFORMER_FAILED else RC_OK end end
set_up_output_directory()
click to toggle source
# File lib/turbot_runner/runner.rb, line 57 def set_up_output_directory FileUtils.mkdir_p(@output_directory) if !@scraper_provided FileUtils.rm_f(output_file('scraper', '.out')) FileUtils.rm_f(output_file('scraper', '.err')) end transformers.each do |transformer_config| FileUtils.rm_f(output_file(transformer_config[:file], '.out')) FileUtils.rm_f(output_file(transformer_config[:file], '.err')) end end
Private Instance Methods
additional_args()
click to toggle source
# File lib/turbot_runner/runner.rb, line 156 def additional_args { 'ruby' => "-r#{File.expand_path('../prerun.rb', __FILE__)}", 'python' => '-u', }[language] end
assert_absolute_path(path)
click to toggle source
# File lib/turbot_runner/runner.rb, line 201 def assert_absolute_path(path) unless Pathname.new(path).absolute? raise "#{path} must be an absolute path" end end
build_command(script, input_file=nil)
click to toggle source
# File lib/turbot_runner/runner.rb, line 136 def build_command(script, input_file=nil) raise "Could not run #{script} with #{language}" unless script_extension == File.extname(script) command = "#{full_interpreter_path} #{additional_args} #{script} >#{output_file(script)}" command << " 2>#{output_file(script, '.err')}" if @log_to_file command << " <#{input_file}" unless input_file.nil? command end
duplicates_allowed()
click to toggle source
# File lib/turbot_runner/runner.rb, line 197 def duplicates_allowed @config[:duplicates_allowed] end
full_interpreter_path()
click to toggle source
# File lib/turbot_runner/runner.rb, line 80 def full_interpreter_path if language == "ruby" # Ensure we use the same ruby as the current interpreter when # creating a subshell. Necessary for OSX packaged version. RbConfig.ruby else # Assume the first python in PATH language end end
language()
click to toggle source
# File lib/turbot_runner/runner.rb, line 185 def language @config[:language].downcase end
load_config(directory)
click to toggle source
# File lib/turbot_runner/runner.rb, line 91 def load_config(directory) manifest_path = File.join(directory, 'manifest.json') raise "Could not find #{manifest_path}" unless File.exist?(manifest_path) begin json = open(manifest_path) {|f| f.read} JSON.parse(json, :symbolize_names => true) rescue JSON::ParserError # TODO provide better error message raise "Could not parse #{manifest_path} as JSON" end end
output_file(script, extension='.out')
click to toggle source
# File lib/turbot_runner/runner.rb, line 144 def output_file(script, extension='.out') basename = File.basename(script, script_extension) File.join(@output_directory, basename) + extension end
process_script_output(script_config, opts)
click to toggle source
# File lib/turbot_runner/runner.rb, line 118 def process_script_output(script_config, opts) return if opts[:skip_data_types] && opts[:skip_data_types].include?(script_config[:data_type]) # The first argument to the Processor constructor is a nil # Runner. This is because no running behaviour # (e.g. interruptions etc) is required; we just want to do # record handling. processor = Processor.new(nil, script_config, @record_handler) file = output_file(script_config[:file]) if File.exist?(file) File.open(file) do |f| f.each_line do |line| processor.process(line, opts) end end end end
run_script(script_config, input_file=nil)
click to toggle source
# File lib/turbot_runner/runner.rb, line 105 def run_script(script_config, input_file=nil) command = build_command(script_config[:file], input_file) script_runner = ScriptRunner.new( command, output_file(script_config[:file]), script_config, :record_handler => @record_handler, :timeout => @timeout ) script_runner.run # returns boolean indicating success end
scraper_config()
click to toggle source
# File lib/turbot_runner/runner.rb, line 163 def scraper_config { :base_directory => @base_directory, :file => scraper_script, :data_type => scraper_data_type, :identifying_fields => scraper_identifying_fields, :duplicates_allowed => duplicates_allowed, } end
scraper_data_type()
click to toggle source
# File lib/turbot_runner/runner.rb, line 189 def scraper_data_type @config[:data_type] end
scraper_identifying_fields()
click to toggle source
# File lib/turbot_runner/runner.rb, line 193 def scraper_identifying_fields @config[:identifying_fields] end
scraper_output_file()
click to toggle source
# File lib/turbot_runner/runner.rb, line 181 def scraper_output_file File.join(@output_directory, 'scraper.out') end
scraper_script()
click to toggle source
# File lib/turbot_runner/runner.rb, line 173 def scraper_script "scraper#{script_extension}" end
script_extension()
click to toggle source
# File lib/turbot_runner/runner.rb, line 149 def script_extension { 'ruby' => '.rb', 'python' => '.py', }[language] end
transformers()
click to toggle source
# File lib/turbot_runner/runner.rb, line 177 def transformers @config[:transformers] || [] end