class Transfuse::Cluster

Public Class Methods

new(threads, verbose, id) click to toggle source
# File lib/transfuse/cluster.rb, line 8
def initialize threads, verbose, id
  @vsearch = Which::which('vsearch').first
  raise "vsearch was not in the PATH - please install it" unless @vsearch
  @id = id.to_s
  @threads = threads
  @verbose = verbose
end

Public Instance Methods

generate_vsearch_command(fasta, out, msa) click to toggle source
# File lib/transfuse/cluster.rb, line 32
def generate_vsearch_command fasta, out, msa
  vsearch = "#{@vsearch}"
  vsearch << " --cluster_fast #{fasta}"
  vsearch << " --id #{@id}"
  vsearch << " --iddef 0" # cd-hit definition of sequence id
  vsearch << " --qmask none" # no masking
  vsearch << " --strand both"
  vsearch << " --uc #{out}"
  vsearch << " --msaout #{msa}"
  vsearch << " --threads #{@threads}"
  return vsearch
end
parse_vsearch_output(cluster_output, msa_output) click to toggle source
# File lib/transfuse/cluster.rb, line 45
def parse_vsearch_output cluster_output, msa_output
  print "parsing vsearch output" if @verbose
  clusters = {}
  lookup = {}
  second = 0
  count = 0
  File.open(cluster_output).each_line do |line|
    count+=1
    if line.start_with?("S") or line.start_with?("H")
      cols = line.chomp.split("\t")
      cluster = cols[1]
      len = cols[2].to_i
      cigar = cols[7]
      strand = cols[4]
      strand = "+" if strand == "*"
      contig_name = cols[8]

      clusters[cluster] ||= []
      clusters[cluster] << { :name => contig_name, :strand => strand }
      lookup[contig_name] = cluster
    end
    if count%10_000==0 and @verbose
      print "."
    end
  end
  puts " Done" if @verbose
  print "parsing msa output    " if @verbose
  count = 0
  msa = {}
  Bio::FastaFormat.open(msa_output).each do |entry|
    count += 1
    name = entry.entry_id
    if name != "consensus"
      # name = name[1..-1]
      if name[0]=="*"
        name = name[1..-1]
      end
      # what cluster is name in?
      cluster = lookup[name]
      msa[cluster] ||= []
      msa[cluster] << { :name => name, :seq => entry.seq.seq }
    end
    if count%10_000==0 and @verbose
      print "."
    end

  end
  puts " Done" if @verbose
  return msa
end
run(fasta) click to toggle source
# File lib/transfuse/cluster.rb, line 16
def run fasta
  cluster_output, msa_output = vsearch fasta
  return parse_vsearch_output(cluster_output, msa_output)
end
vsearch(fasta) click to toggle source
# File lib/transfuse/cluster.rb, line 21
def vsearch fasta
  print "running vsearch..." if @verbose
  cluster_output = "#{File.basename(fasta)}-#{@id}.clust"
  msa_output = "#{File.basename(fasta)}-#{@id}.aln"
  vsearch_cmd = generate_vsearch_command fasta, cluster_output, msa_output
  cluster = Cmd.new vsearch_cmd
  cluster.run cluster_output
  puts " Done. Created #{cluster_output}" if @verbose
  return [cluster_output, msa_output]
end