module Polyphen2::Batch

Constants

OPTIONS
REFRESH_OPTIONS

Public Class Methods

chunked_predict(query, max = 1000) click to toggle source
# File lib/rbbt/mutation/polyphen.rb, line 90
def self.chunked_predict(query, max = 1000)
  mutations = query.split("\n")
  chunks = mutations.length.to_f / max
  chunks = chunks.ceil

  num = 0
  Log.debug("Polyphen2 ran with #{chunks} chunks of #{ max } mutations") if chunks > 1
  Misc.divide(mutations, chunks).inject(nil) do |acc, list|
    num += 1
    Log.debug("Polyphen2 ran with #{chunks} chunks: chunk #{num}") if chunks > 1
    list = list * "\n"
    if acc.nil?
      acc = predict(list)
    else
      acc = TSV.setup(acc.merge(predict(list)))
    end
    acc
  end
end
predict(query) click to toggle source
# File lib/rbbt/mutation/polyphen.rb, line 34
def self.predict(query)
  options = OPTIONS.merge "_ggi_batch" => query

  desc =  Digest::MD5.hexdigest(options.inspect)
  options["description"] = desc

  doc = Nokogiri::HTML(Open.read(Polyphen2::URL, :wget_options => {"--post-data" => "'#{options.collect{|k,v| [k,v] * "="} * "&"}'"}, :nocache => true))

  sid = doc.css('input[name=sid]').attr('value')

  options = REFRESH_OPTIONS.merge "sid" => sid
  finished = false

  view_link = nil
  while not finished do
    doc = Nokogiri::HTML(Open.read(Polyphen2::URL, :wget_options => {"--post-data" => "'#{options.collect{|k,v| [k,v] * "="} * "&"}'"}, :nocache => true))

    result_table =  doc.css('body > table')[1].css('table')[2]

    rows = result_table.css('tr')

    row = rows.select{|row| row.css('td').length == 6}.select{|row| row.css('td').last.content.strip == desc}.first

    cells = row.css('td')
    if cells[2].content =~ /Error/
      view_link = nil
      break
    end

    if cells[1].content =~ /Short/
      view_link =  cells[1].css('a').attr('href')
      break
    end

    sleep 5
  end

  return nil if view_link.nil?

  tsv = TSV.open Open.open(Polyphen2::URL_BASE + view_link, :nocache => true), :double, :merge => true, :fix => Proc.new{|l| l.gsub(/ *\t */, "\t")}
  tsv.fields = tsv.fields.collect{|f| f.strip}
  tsv.key_field = tsv.key_field.strip

  new_tsv = TSV.setup({}, :key_field => "Protein Mutation", :fields => tsv.fields)

  tsv.through do |acc, values|
    values.zip_fields.each do |v|
      pos, wt, mt = v.values_at "o_pos", "o_aa1", "o_aa2"
      key = [acc, [wt,pos,mt] * "" ] * ":"
      new_tsv[key] = v
    end
  end

  return new_tsv
end