module Polyphen2::Batch
Constants
- OPTIONS
- REFRESH_OPTIONS
Public Class Methods
chunked_predict(query, max = 1000)
click to toggle source
# File lib/rbbt/mutation/polyphen.rb, line 90 def self.chunked_predict(query, max = 1000) mutations = query.split("\n") chunks = mutations.length.to_f / max chunks = chunks.ceil num = 0 Log.debug("Polyphen2 ran with #{chunks} chunks of #{ max } mutations") if chunks > 1 Misc.divide(mutations, chunks).inject(nil) do |acc, list| num += 1 Log.debug("Polyphen2 ran with #{chunks} chunks: chunk #{num}") if chunks > 1 list = list * "\n" if acc.nil? acc = predict(list) else acc = TSV.setup(acc.merge(predict(list))) end acc end end
predict(query)
click to toggle source
# File lib/rbbt/mutation/polyphen.rb, line 34 def self.predict(query) options = OPTIONS.merge "_ggi_batch" => query desc = Digest::MD5.hexdigest(options.inspect) options["description"] = desc doc = Nokogiri::HTML(Open.read(Polyphen2::URL, :wget_options => {"--post-data" => "'#{options.collect{|k,v| [k,v] * "="} * "&"}'"}, :nocache => true)) sid = doc.css('input[name=sid]').attr('value') options = REFRESH_OPTIONS.merge "sid" => sid finished = false view_link = nil while not finished do doc = Nokogiri::HTML(Open.read(Polyphen2::URL, :wget_options => {"--post-data" => "'#{options.collect{|k,v| [k,v] * "="} * "&"}'"}, :nocache => true)) result_table = doc.css('body > table')[1].css('table')[2] rows = result_table.css('tr') row = rows.select{|row| row.css('td').length == 6}.select{|row| row.css('td').last.content.strip == desc}.first cells = row.css('td') if cells[2].content =~ /Error/ view_link = nil break end if cells[1].content =~ /Short/ view_link = cells[1].css('a').attr('href') break end sleep 5 end return nil if view_link.nil? tsv = TSV.open Open.open(Polyphen2::URL_BASE + view_link, :nocache => true), :double, :merge => true, :fix => Proc.new{|l| l.gsub(/ *\t */, "\t")} tsv.fields = tsv.fields.collect{|f| f.strip} tsv.key_field = tsv.key_field.strip new_tsv = TSV.setup({}, :key_field => "Protein Mutation", :fields => tsv.fields) tsv.through do |acc, values| values.zip_fields.each do |v| pos, wt, mt = v.values_at "o_pos", "o_aa1", "o_aa2" key = [acc, [wt,pos,mt] * "" ] * ":" new_tsv[key] = v end end return new_tsv end