module TransFIC
Constants
- ASTERISK
- URL
Public Class Methods
chunked_predict(mutations, max = 1000)
click to toggle source
# File lib/rbbt/mutation/transFIC.rb, line 69 def self.chunked_predict(mutations, max = 1000) chunks = mutations.length.to_f / max chunks = chunks.ceil Log.debug("TransFIC ran with #{chunks} chunks of #{ max } mutations") if chunks > 1 num = 1 Misc.divide(mutations, chunks).inject(nil) do |acc, list| Log.debug("TransFIC ran with #{chunks} chunks: chunk #{num}") if chunks > 1 begin result = predict(list) rescue if list.length > 2 Log.debug("Error predicting in transFIC. Divinding list of size #{list.length}") result = chunked_predict(list, list.length / 2) else Log.debug("Error predicting in transFIC. Single error detected") next end end if acc.nil? acc = result else acc = TSV.setup(acc.merge(result)) end num += 1 acc end end
predict(mutations)
click to toggle source
mutations is a hash of genes in Uniprot protein accession pointing to lists of aminoacid substitutions
# File lib/rbbt/mutation/transFIC.rb, line 18 def self.predict(mutations) options = {} ensp2uni = Organism.identifiers("Hsa").index :target => "UniProt/SwissProt ID", :fields => "Ensembl Protein ID", :persist => true searchText = mutations.collect{|mutation| protein, change = mutation.split(":"); next if ensp2uni[protein].nil?; [ensp2uni[protein], change] * "\t"}.compact.uniq * "\n" Log.debug "Querying TransFIC for: #{mutations.length} mutations" TmpFile.with_file(searchText) do |file| test_url = CMD.cmd("curl -X PUT -T '#{ file }' '#{ URL }'").read result = nil begin Misc.insist(5) do result = CMD.cmd("curl -X GET '#{ test_url }'").read raise result.split("\n").select{|line| line =~ /Error/}.first if result =~ /Error/ while result =~ /executing/ sleep 10 result = CMD.cmd("curl -X GET '#{ test_url }'").read end raise result.split("\n").select{|line| line =~ /Error/}.first if result =~ /Error/ end rescue if $!.message =~ /validating/ Log.debug(Open.read(file)) end raise $! end Log.medium("TransFIC DONE") tsv = TSV.setup({}, :key_field => "Protein Mutation", :fields => %w(siftTransfic siftTransficLabel pph2Transfic pph2TransficLabel maTransfic maTransficLabel), :type => :list) result.split("\n").each do |line| next if line[0] == "#"[0] id, hgnc, hgncdesc, transcript, ensp, sw, protein_position, amino_acids, sift, polyphen, mass, siftTransfic, siftTransficLabel, pph2Transfic, pph2TransficLabel, maTransfic, maTransficLabel = line.split("\t") change = [amino_acids.split("/").first, protein_position, amino_acids.split("/").last] * "" mutation = [ensp,change] * ":" tsv[mutation] = [siftTransfic, siftTransficLabel, pph2Transfic, pph2TransficLabel, maTransfic, maTransficLabel] end tsv.select(mutations) end end