class Bio::Spoctopus::Result
Public Class Methods
create_from_output(spoctopus_output)
click to toggle source
Given the fasta-ish file output from spoctopus, parse it into a SignalPeptideTransmembraneDomainProtein.
Example without TMD: >wrapperSeq gggggggggggggggggggggggggggggggggggggggggggggggggggggggggggg ggggggggggggggggggggggggggggggggggggg
Example with 2 TMD >wrapperSeq iiiiiiiiiiMMMMMMMMMMMMMMMMMMMMMooooooooooooooooooooooooooooo ooooMMMMMMMMMMMMMMMMMMMMMiiiiiMMMMMMMMMMMMMMMMMMMMMo
Example with SP and TMD >wrapperSeq nnnnnnnnnnnnnnnnnnnnnnnnnnnnnnSSSSSSSSSSSSSSSooooooooooooooo ooooooooooooooooooooooooooooooooooooo
# File lib/bio/appl/octopus.rb, line 114 def self.create_from_output(spoctopus_output) # puts spoctopus_output # split the fasta into the real parts lines = spoctopus_output.split("\n") # Error checking unless lines[0].match(/^\>/) and lines.length > 1 raise Exception, "Unexpected OCTOPUS output file: #{spoctopus_output.inspect}. STDERR: #{File.open(err.path).read}" end seq = lines[1..(lines.length-1)].join('') # Taken from http://octopus.cbr.su.se/OCTOPUS_DATA/readme # and supplemented by experiment, as there doesn't seem to be one available for # SPOCTOPUS, only OCTOPUS. # # Currently dips, hairpins, unannotated and reentrants are ignored. unless seq.match(/^[ioMgnSHRrDd\.T]+$/) raise Exception, "Unexpected characters in SPOCTOPUS output sequence: #{seq}" end tmd = Bio::Transmembrane::SignalPeptideTransmembraneDomainProtein.new # deal with nothing proteins return tmd if seq.match(/^g*$/) seq.scan(/S+/) do if tmd.signal? raise Exception, "Only 1 Signal Peptide is expected!. SPOCTOPUS output was #{seq}" end s = Bio::Transmembrane::SignalPeptide.new s.start = $~.offset(0)[0]+1 s.stop = $~.offset(0)[1] tmd.signal_peptide = s end seq.scan(/M+/) do # for each transmembrane domain t = Bio::Transmembrane::OrientedTransmembraneDomain.new t.start = $~.offset(0)[0]+1 t.stop = $~.offset(0)[1] # set orientation # if at the start of the protein it is harder if t.start == 1 if t.stop == seq.length #all TMD, so we don't know t.orientation = Bio::Transmembrane::OrientedTransmembraneDomain::UNKNOWN else char = seq[t.stop-2..t.stop-2] if char == 'o' t.orientation = Bio::Transmembrane::OrientedTransmembraneDomain::INSIDE_OUT else t.orientation = Bio::Transmembrane::OrientedTransmembraneDomain::OUTSIDE_IN end end else # usual - TMD does not start at exactly the beginning char = seq[t.start-2..t.start-2] if char == 'i' t.orientation = Bio::Transmembrane::OrientedTransmembraneDomain::INSIDE_OUT else t.orientation = Bio::Transmembrane::OrientedTransmembraneDomain::OUTSIDE_IN end end tmd.transmembrane_domains.push t end return tmd end