class ORF
Constants
- DEFAULT_CODON_TABLE
Attributes
logger[R]
options[RW]
seq[R]
sequence[R]
Public Class Methods
find(sequence, options = {})
click to toggle source
For a given sequence, find longest ORF
# File lib/orf.rb, line 40 def self.find(sequence, options = {}) # merge options with default orf = ORF.new(sequence, options) @result = orf.find # end
new(sequence, options = {}, logger_file = nil)
click to toggle source
class initializer that normalizes sequence to Bio::Sequence,
merges given options and creates logger
# File lib/orf.rb, line 18 def initialize(sequence, options = {}, logger_file = nil) # logger for instance if logger_file.nil? @logger = Logger.new(STDOUT) else @logger = logger_file.clone end logger.progname = 'ORFCommon' logger.level = (options[:debug] ? Logger::INFO : Logger::ERROR) # sequence = Bio::Sequence::NA.new(sequence) if sequence.class == String @sequence = sequence @seq = @sequence.to_s # self.options = ORFFinder::DEFAULT_OPTIONS.merge(options.nil? ? {} : options) logger.info 'ORF has been initialized' find end
Public Instance Methods
aa(codon_table = DEFAULT_CODON_TABLE)
click to toggle source
return aminoacid sequence
# File lib/orf.rb, line 49 def aa(codon_table = DEFAULT_CODON_TABLE) # return already generated aa sequence return @res_aa unless @res_aa.nil? # save result l = longest(codon_table) return l if @res_aa.nil? @res_aa end
find()
click to toggle source
finds all possible orfs in sequence
# File lib/orf.rb, line 68 def find # if sequence is nil or empty there is no point # in trying to run the find algorithm return sequence if sequence.nil? || sequence.size == 0 # orf = { frame1: {}, frame2: {}, frame3: {} } # start_idx = all_codons_indices(:start) stop_idx = all_codons_indices(:stop) res = all_sequences(start_idx, stop_idx, seq.size, [0, 1, 2]) # logger.info "start codons idx: #{start_idx}" logger.info "stop codons idx: #{stop_idx}" logger.info res # iterate over each frame and range to return the # longest above the minimum sequence length # these are the preferences: # 1: range that has start and stop codons # 2: range that only has start/stop # 3: full sequence res.each_with_index do |frame, index| find_longest(frame, index, orf) end # print ranges if debug is activated orf.each { |k, f| f[:orfs].each { |r| print_range(k, r) } } \ if options[:debug] # @orf = orf end
nt(codon_table = DEFAULT_CODON_TABLE)
click to toggle source
return nucletotide sequence
# File lib/orf.rb, line 60 def nt(codon_table = DEFAULT_CODON_TABLE) return @res_nt unless @res_nt.nil? longest(codon_table) end
Private Instance Methods
all_codons_indices(option_name)
click to toggle source
Find all indexes for valid codons
(either for :start or :stop)
# File lib/orf.rb, line 167 def all_codons_indices(option_name) idxs = [] option_name = option_name.to_sym # if start option does not exist, then should # treat start of sequence as the start return idxs if options[option_name].nil? || options[option_name].empty? # iterate over all start codons to see which # is best options[option_name].each do |codon| # initialize temporary index as empty temp_idxs = [] # index starts at position 0 new_idx = seq.index(codon, 0) until new_idx.nil? # necessary normalization temp_idxs << index_normalization(option_name, new_idx) new_idx = seq.index(codon, new_idx + 1) end idxs << temp_idxs end idxs.flatten.sort end
all_sequences(start_idx, stop_idx, seq_size, read_frame = [0, 1, 2])
click to toggle source
# File lib/orf.rb, line 302 def all_sequences(start_idx, stop_idx, seq_size, read_frame = [0, 1, 2]) # start = [[], [], []] stop = [[], [], []] valid = [] read_frame.each do |frame| start[frame] = filter_codons_by_frame(start_idx, frame, true) stop[frame] = filter_codons_by_frame(stop_idx, frame, false) valid << valid_sequences_by_frame(start[frame], stop[frame], frame, seq_size) end # valid end
filter_codons_by_frame(idxs, frame, start = true)
click to toggle source
get indexes only from a given frame because of a bug the start flag must be given
indicating if it is looking for start or stop codons in frame
# File lib/orf.rb, line 195 def filter_codons_by_frame(idxs, frame, start = true) idxs.collect do |i| if start && (i - frame) % 3 == 0 i elsif !start && (i + 1 - frame) % 3 == 0 i end end.compact end
find_longest(frame, index, orf)
click to toggle source
iterate over all ranges in frame and find the longest
# File lib/orf.rb, line 102 def find_longest(frame, index, orf) # temporary arrays to keep valid and fallback ranges frame_val = [] frame_fal = [] frame.each do |range| if range[:fallback] frame_fal << range else frame_val << range end end # hash_name = frame_sym(index) orf[hash_name][:orfs] = (frame_val.empty? ? frame_fal : frame_val) # longest = { len: nil, range: [] } orf[hash_name][:orfs].each do |range| len = range[:stop] - range[:start] + 1 if longest[:range].nil? || longest[:range].empty? || len > longest[:len] longest[:len] = len longest[:range] = [range] elsif len == longest[:len] longest[:range] << range end end orf[hash_name][:longest] = longest[:range] end
longest(codon_table = DEFAULT_CODON_TABLE)
click to toggle source
get the longest sequence in each frame and translate
to aminoacid
# File lib/orf.rb, line 133 def longest(codon_table = DEFAULT_CODON_TABLE) # run find method if search has not been done find if @orf.nil? # res_nt = { frame1: [''], frame2: [''], frame3: [''] } res_aa = res_nt.clone # if @orf is empty then no point in continuing return res_nt if @orf.nil? || @orf.size == 0 # for each orf get the longest sequence @orf.each do |key, val| res_nt[key] = val[:longest].collect do |el| get_range(el) end res_nt[key] = [Bio::Sequence::NA.new('')] if res_nt[key].empty? end @res_nt = res_nt # translate to aa sequence unless @res_nt.nil? @res_nt.each do |key, val| res_aa[key] = if val.nil? || val.empty? [''] else val.collect { |el| el.translate(1, codon_table) } end end end @res_aa = res_aa # return the nucleotide sequence as default res_nt end
sequences_in_frame(idxs, arrays, seq_size, frame, added_pos)
click to toggle source
given star and stop codons indexes, decide which are the valid
sequence for an orf
TODO: reject sequences that have a stop codon in them
# File lib/orf.rb, line 252 def sequences_in_frame(idxs, arrays, seq_size, frame, added_pos) start = idxs[:start] stop = idxs[:stop] arr = [] # # # iterate on each start codon start.each do |pos_start| # iterate on each stop codon stop.each do |pos_stop| # add a fallback where starts from begining # note: must check if from beggining to end there # are stop codons, if so do not show it if (pos_stop + 1 - frame) >= options[:min] && !(pos_stop > stop.bsearch { |el| el >= (frame - 1) }) arr << { start: frame, stop: pos_stop, fallback: true } end # ignore if start is bigger than stop index next if pos_start >= pos_stop # ignore if there is a stop codon between pos_start # and pos_stop next if pos_stop > stop.bsearch { |el| el >= (pos_start - 1) } # ignore if size of orf is smaller than minimum next if (pos_stop + 1 - pos_start) < options[:min] # if all conditions hold add as valid orf arr << { start: pos_start, stop: pos_stop, fallback: added_pos } end next unless ((seq_size - 1) - pos_start) >= options[:min] next if !(temp_res = stop.bsearch { |el| el >= (pos_start - 1) }).nil? && (seq_size - 1) > temp_res arr << { start: pos_start, stop: seq_size - 1, fallback: true } end # arr.each do |item| if item[:fallback] arrays[:fallback] << item else arrays[:valid] << item end end end
valid_sequences_by_frame(start_idxs, stop_idxs, frame, seq_size)
click to toggle source
from the combination of start and stop indexes, find
the longest one
# File lib/orf.rb, line 208 def valid_sequences_by_frame(start_idxs, stop_idxs, frame, seq_size) # seq_size -= (seq_size - frame) % 3 start = start_idxs.clone stop = stop_idxs.clone # stop << seq_size - 1 if stop_idxs.empty? start << frame if start_idxs.empty? # if options[:debug] logger.info "frame: #{frame}" logger.info " start: #{start} | stop :#{stop}" logger.info " seq size: #{seq_size}" logger.info " #{seq[frame..seq_size]}" end # valid = [] fallback = [] # iterate on each start codon sequences_in_frame({ start: start, stop: stop }, { valid: valid, fallback: fallback }, seq_size, frame, start_idxs.empty? || stop_idxs.empty?) if valid.empty? valid = fallback.uniq.collect do |r| if get_range_str(r[:start], r[:stop], false).size == size_of_frame(frame) nil else r end end.compact logger.info 'no ORF with start and stop codons,' \ ' defaulting to fallback' end valid end