module MutationAssessor

Constants

ASTERISK
URL

Public Class Methods

add_predictions(tsv) click to toggle source
# File lib/rbbt/mutation/mutation_assessor.rb, line 117
def self.add_predictions(tsv)
  raise "Input not TSV" unless TSV === tsv

  raise "Field 'UniProt/SwissProt ID' Not in TSV" unless tsv.all_fields.include? "UniProt/SwissProt ID" 

  raise "Field 'Protein Mutation' Not in TSV" unless tsv.fields.include? "Protein Mutation" 

  data = []
  if tsv.type == :double
    tsv.through :key, ["UniProt/SwissProt ID", "Protein Mutation"] do |key,values|
      uni_accs, mutations = values
      mutations = mutations.reject{|mutation| mutation =~ /Indel/ or mutation[0] == mutation[-1] or mutation[-1] == ASTERISK or mutation[0] == ASTERISK }
      next if uni_accs.nil? or uni_accs.compact.reject{|v| v.nil? or v.empty?}.empty? or mutations.empty?

      uni_accs.compact.uniq.each do |uni_acc|
        data << [uni_acc, mutations]
      end
    end
  else
    tsv.through :key, ["UniProt/SwissProt ID", "Protein Mutation"] do |key,values|
      uni_acc, mutation = values
      next if uni_acc.nil? or uni_acc.empty?
      next if mutation[0] == mutation[-1] or mutation[-1] == ASTERISK or mutation[0] == ASTERISK
      data << [uni_acc, mutation]
    end
  end

  data.sort!

  predictions = {}
  predict(data).each{|uni_acc, values| 
    protein, mutation = uni_acc.split(/\s+/)

    pred     = values["Func. Impact"]
    predictions[protein] ||= {}
    predictions[protein][mutation] = pred
  }
  
  uni_acc_pos = tsv.identify_field "UniProt/SwissProt ID" 
  protein_field = tsv.identify_field "Protein Mutation" 
 
  if tsv.type == :double
    tsv.add_field "MutationAssessor:Prediction" do |key,values|
      uni_accs = if uni_acc_pos === :key
                  [key]
                else
                  values[uni_acc_pos] || []
                end

      next if uni_accs.compact.reject{|v| v.nil? or v.empty?}.empty?

      mutations = values[protein_field]

      uni_accs.zip(mutations).collect do |uni_acc,mutation|
        res = case
              when (mutation.nil? or mutation.empty?)
                "No Prediction"
              when mutation[0] == mutation[-1]
                "TOLERATED"
              when (uni_acc.nil? or uni_acc.empty?)
                "No Prediction"
              else
                list = []
                pred = predictions[uni_acc][mutation] if predictions.include? uni_acc
                if pred.nil?
                  "No Prediction"
                else
                  pred
                end
              end
        res
      end
    end
  else
    tsv.add_field "MutationAssessor:Prediction" do |key,values|
      uni_acc = if uni_acc_pos === :key
                  key
                else
                  values[uni_acc_pos]
                end

      next if uni_acc.nil? or uni_acc.empty?

      mutation = values[protein_field]

      case
      when (mutation.nil? or mutation.empty?)
        "No Prediction"
      when mutation[0] == mutation[-1]
        "TOLERATED"
      when (uni_acc.nil? or uni_acc.empty?)
        "No Prediction"
      else
        list = []
        list = predictions[uni_acc][mutation] if predictions.include? uni_acc
        if list.nil?
          "No Prediction"
        else
          list.first
        end
      end
    end
 
  end

  tsv
end
chunked_predict(mutations, max = 1000) click to toggle source
# File lib/rbbt/mutation/mutation_assessor.rb, line 96
def self.chunked_predict(mutations, max = 1000)
  flattened_mutations = mutations.collect{|g,list| list = [list] unless Array === list; list.collect{|m| [g,m] } }.flatten(1)
  chunks = flattened_mutations.length.to_f / max
  chunks = chunks.ceil

  Log.debug("Mutation Assessor ran with #{chunks} chunks of #{ max } mutations") if chunks > 1
  num = 1
  Misc.divide(flattened_mutations, chunks).inject(nil) do |acc, list|
    Log.debug("Mutation Assessor ran with #{chunks} chunks: chunk #{num}") if chunks > 1
    unflattened_mutations = {}
    list.each{|g,m| next if g.nil?; unflattened_mutations[g] ||= []; unflattened_mutations[g] << m}
    if acc.nil?
      acc = predict(unflattened_mutations)
    else
      acc = TSV.setup(acc.merge(predict(unflattened_mutations)))
    end
    num += 1
    acc
  end
end
predict(mutations) click to toggle source

mutations is a hash of genes in Uniprot protein accession pointing to lists of aminoacid substitutions

# File lib/rbbt/mutation/mutation_assessor.rb, line 13
def self.predict(mutations)
  return TSV.setup({}, :header_hash => "", :type => :list) if mutations.empty? or mutations.nil?
  vars = mutations.collect{|gene, list|
    list = [list] unless Array === list
    list.collect do |mut|
      [gene, mut] * "\t"
    end
  }.flatten.sort.uniq * "\n" + "\n"

  post_data = { :beenQ => "1",
    :info=> "on",
    :tableQ=> "on",
    :chr=> "on",
    :bsites=> "on",
    :timeout => 600,
    :vars => vars}.collect{|k,v| [k,v] * "="} * "&"

  Log.debug "Querying Mutation Assessor for: #{vars.split(/\n/).length}"
  tries = 0
  nocache = false
  begin
    doc = nil
    TmpFile.with_file(post_data) do |post_file|
      Log.medium "Updating cache:" if nocache == :update
      doc = Nokogiri::HTML(Open.read(URL, :wget_options => {"--post-file" => post_file }, :nocache => nocache))
    end

    textareas = doc.css('p')

    if textareas.empty?
      Log.debug "No text area"
      Log.debug doc.to_s
      raise NotDone, "No text aread found in response HTML"
    end

    result = textareas.last.content

    if result =~ /Cannot parse variant/
      tmp = TmpFile.tmp_file
      variants = tmp + ".list"
      Open.write(variants, post_data )
      raise "Cannot parse variants. Variants in file #{ variants }"
    end

    raise NotDone, "Not done" if result =~ /\t\[sent\]\t/
  rescue NotDone
    tries += 1
    nocache = :update

    Log.medium "Mutation Assessor not done, waiting:"
    sleep 30

    if tries < 10
      Log.medium "Retrying mutation assessor"
      retry
    else
      raise "Error processing Mutation Assessor response"
    end
  end

  if result.empty? and mutations.any?
    tmp = TmpFile.tmp_file
    html = tmp + ".html"
    variants = tmp + ".list"
    Open.write(html, doc.content)
    Open.write(variants, post_data )
    raise "Result empty. Possible error. html in #{ html }, variants in #{variants}" 
  end

  result.sub! /^\t/, ''
  result.gsub! /\n\s*\d+\s*\t/s, "\n"

  Log.medium "Mutation Assessor DONE."

  if result.empty?
    TSV.setup({}, :header_hash => "", :type => :list)
  else
    res = TSV.open(StringIO.new(result), :header_hash => "", :type => :list)
    res = res.slice((res.fields - ["Mapping issue"]))
    res
  end
end