class Transfuse::Transfuse

Public Class Methods

new(threads, verbose) click to toggle source
# File lib/transfuse/transfuse.rb, line 16
def initialize threads, verbose
  @threads = threads
  @verbose = verbose
end

Public Instance Methods

check_dependencies() click to toggle source
# File lib/transfuse/transfuse.rb, line 21
def check_dependencies
  # Check dependencies if they are relevant to the command issued,
  # and handle any commands to install missing ones
  gem_dir = Gem.loaded_specs['transfuse'].full_gem_path
  gem_deps = File.join(gem_dir, 'deps', 'deps.yaml')

  return Bindeps.missing gem_deps

end
check_files(string, option) click to toggle source
# File lib/transfuse/transfuse.rb, line 41
def check_files string, option
  # puts "check file string: #{string}" if @verbose
  abort "Please specify --#{option} option" if string.nil?
  list = []
  string.split(",").each do |file|
    file = File.expand_path(file)
    if File.exist?(file)
      puts "#{File.basename(file)} exists" if @verbose
      list << file
    else
      abort "#{File.basename(file)} not found"
    end
  end
  return list
end
cluster(file, id) click to toggle source
# File lib/transfuse/transfuse.rb, line 88
def cluster file, id
  puts "clustering #{file}" if @verbose
  cluster = Cluster.new @threads, @verbose, id
  return cluster.run file
end
concatenate(assemblies) click to toggle source
# File lib/transfuse/transfuse.rb, line 57
def concatenate assemblies
  catted_fasta = "all-"
  fasta = []
  assemblies.each do |name|
    fasta << File.basename(name, File.extname(name))[0..5]
  end
  catted_fasta << fasta.join("-")
  catted_fasta << ".fa"
  puts "concatenating assemblies into #{catted_fasta}" if @verbose
  cmd = "cat "
  assemblies.each do |file|
    cmd << " #{file} "
  end
  cmd << " > #{catted_fasta}"
  catter = Cmd.new cmd
  catter.run catted_fasta
  return File.expand_path(catted_fasta)
end
consensus(msa, scores, output) click to toggle source
# File lib/transfuse/transfuse.rb, line 94
def consensus msa, scores, output
  cons = Consensus.new(@verbose)
  return cons.run(msa, scores, output)
end
filter(files, scores) click to toggle source
# File lib/transfuse/transfuse.rb, line 115
def filter files, scores
  filtered_files = []
  files.each_with_index do |file, index|
    new_filename = "#{File.basename(file, File.extname(file))}_filtered.fa"
    if !File.exist?(new_filename) or File.stat(new_filename).size < 1
      File.open(new_filename, "wb") do |out|
        puts "filtering #{file}..." if @verbose
        Bio::FastaFormat.open(file).each do |entry|
          contig_name = entry.entry_id
          contig_name = "contig#{index}_#{contig_name}"
          if scores.key?(contig_name) and
             scores[contig_name][:score] > 0.01 and
             scores[contig_name][:coverage] >= 1
            out.write ">#{contig_name}\n"
            out.write "#{entry.seq}\n"
          elsif !scores.key?(contig_name)
            abort "Can't find '#{contig_name}' in scores"
          end
        end
      end
    end
    filtered_files << File.expand_path(new_filename)
  end
  return filtered_files
end
install_dependencies() click to toggle source
# File lib/transfuse/transfuse.rb, line 31
def install_dependencies
  # Check dependencies if they are relevant to the command issued,
  # and handle any commands to install missing ones
  gem_dir = Gem.loaded_specs['transfuse'].full_gem_path
  gem_deps = File.join(gem_dir, 'deps', 'deps.yaml')

  Bindeps.require gem_deps

end
load_fasta(fasta) click to toggle source
# File lib/transfuse/transfuse.rb, line 76
def load_fasta fasta
  print "loading fasta sequence #{fasta}..." if @verbose
  @sequences = {}
  count = 1
  Bio::FastaFormat.open(fasta).each do |entry|
    @sequences[entry.entry_id] = entry.seq.to_s
    print "." if count%10_000==0 and @verbose
    count +=1
  end
  puts " Done" if @verbose
end
load_scores(files) click to toggle source
# File lib/transfuse/transfuse.rb, line 99
def load_scores files
  scores = {}
  files.each do |file|
    CSV.foreach(file, :headers => true,
                      :header_converters => :symbol,
                      :converters => :all) do |row|
      name = row[:contig_name]
      scores[name] = { :score => row[:score].to_f,
                       :p_good => row[:p_good].to_f,
                       :p_bases_covered => row[:p_bases_covered].to_f,
                       :coverage => row[:coverage].to_f }
    end
  end
  return scores
end
transrate(files, left, right) click to toggle source
# File lib/transfuse/transfuse.rb, line 193
def transrate files, left, right
  unless left.is_a?(Array)
    left = [left]
  end
  unless right.is_a?(Array)
    right = [right]
  end
  scores = {}
  shortname = ""
  files.each do |n|
    a = File.basename(n).split("_").first
    if a.length > 5
      a = a[0..4]
    end
    # shortname << File.basename(n, File.extname(n))[0..4]
  end
  scores_file = "#{shortname}_scores.csv"
  if File.exist?(scores_file)
    puts "loading scores from file" if @verbose
    File.open(scores_file).each do |line|
      name, score, p_good, p_bases_covered, coverage = line.chomp.split("\t")
      scores[name] = { :score => score.to_f,
                       :p_good => p_good.to_f,
                       :p_bases_covered => p_bases_covered.to_f,
                       :coverage => coverage.to_f }
    end
  else
    files.each_with_index do |fasta, index|
      puts "transrate on #{fasta}" if @verbose
      dir = "transrate_#{File.basename(fasta, File.extname(fasta))}"
      Dir.mkdir(dir) unless Dir.exist?(dir)
      Dir.chdir(dir) do
        assembly = Transrate::Assembly.new(fasta)
        transrater = Transrate::Transrater.new(assembly, nil, threads:@threads)
        rename = "assembly#{index}_score_optimisation.csv"
        transrater.read_metrics(left.join(','), right.join(','))
        File.rename("assembly_score_optimisation.csv", rename)
        assembly.each do |name, contig|
          name = "contig#{index}_#{name}"
          scores[name] = { :score => contig.score.to_f,
                           :p_good => contig.p_good.to_f,
                           :p_bases_covered => contig.p_bases_covered.to_f,
                           :coverage => contig.coverage.to_f }

        end
        File.open("summary.txt","w") do |out|
          out.write "fasta\tscore\toptimal\tcutoff\n"
          out.write "#{fasta}\t#{transrater.assembly_score}\t#{transrater.assembly_optimal_score("prefix").join("\t")}\n"
        end
      end
    end
    File.open(scores_file, "wb") do |out|
      scores.each do |name, hash|
        out.write "#{name}\t#{hash[:score]}\t#{hash[:p_good]}\t"
        out.write "#{hash[:p_bases_covered]}\t#{hash[:coverage]}\n"
      end
    end
  end
  return scores
end
transrate_consensus(file, output, left, right) click to toggle source
# File lib/transfuse/transfuse.rb, line 141
def transrate_consensus file, output, left, right
  output = File.expand_path(output)
  puts "transrate on #{file}" if @verbose
  file = File.expand_path(file)
  name = File.basename(file, File.extname(file))
  dir = "transrate_#{name}"
  Dir.mkdir(dir) unless Dir.exist?(dir)
  Dir.chdir(dir) do
    assembly = Transrate::Assembly.new(file)
    transrater = Transrate::Transrater.new(assembly, nil, threads:@threads)
    rename = "assembly_#{name}_score_optimisation.csv"
    rm = transrater.read_metrics(left.join(','), right.join(','))
    stats = rm.read_stats
    File.rename("assembly_score_optimisation.csv", rename)
    scores={}
    assembly.each do |name, contig|
      scores[name] = { :score => contig.score.to_f,
                       :p_good => contig.p_good.to_f,
                       :p_bases_covered => contig.p_bases_covered.to_f,
                       :coverage => contig.coverage.to_f }
    end
    scores_file = "#{name}_scores.csv"
    stats_file = "../#{name}_stats.txt"
    puts "  writing scores" if @verbose
    File.open(scores_file, "wb") do |out|
      scores.each do |name, hash|
        out.write "#{name}\t#{hash[:score]}\t#{hash[:p_good]}\t"
        out.write "#{hash[:p_bases_covered]}\t#{hash[:coverage]}\n"
      end
    end
    puts "  writing filtered fasta file" if @verbose
    File.open(output, "wb") do |out|
      assembly.each do |name, contig|
        if contig.score.to_f > 0.01 and contig.coverage.to_f >= 1
          out.write ">#{name}\n"
          out.write "#{contig.seq.seq}\n"
        end
      end
    end
    puts "  writing stats" if @verbose
    File.open(stats_file, "wb") do |out|
      stats.each do |key, value|
        out.write "#{key}\t#{value}\n"
      end
      out.write "assembly score:\t#{transrater.assembly_score}\n"
      optimal = transrater.assembly_optimal_score("prefix")
      out.write "optimal score :\t#{optimal[0]}\n"
      out.write "cutoff        :\t#{optimal[1]}\n"
    end
  end
end