class Bio::NeXML::Parser

def self.parse( nexml, validate = false ) Parser.new( nexml, validate ).parse end

Public Class Methods

new( nexml, validate = false ) click to toggle source
# File lib/bio/db/nexml/parser.rb, line 14
def initialize( nexml, validate = false )
  #initialize a cache
  @cache = {}

  #initialize a libxml cursor
  @reader = read( nexml )
  
  #validate
  validate_nexml if validate

end

Public Instance Methods

close() click to toggle source

Close the assosicated XML::Reader object and try to free other resources like @nexml

# File lib/bio/db/nexml/parser.rb, line 63
def close
  @reader.close
end
parse() click to toggle source

Is a factory method that returns an object of class Bio::NeXML::Nexml

# File lib/bio/db/nexml/parser.rb, line 28
def parse
  #return a cached version if it exists
  return @nexml if @nexml

  #start at the root element
  skip_leader

  #start with a new Nexml object
  version = attribute( 'version' )
  generator = attribute( 'generator' )
  @nexml = NeXML::Nexml.new( version, generator )

  #perhaps a namespace api as well
  
  #start parsing other elements
  while next_node
    case local_name
    when "otus"
      @nexml.add_otus( parse_otus )
    when "trees"
      @nexml.add_trees( parse_trees )
    when "characters"
      @nexml.add_characters( parse_characters )
    end
  end

  #close the libxml parser object
  #close

  #return the Nexml object
  @nexml
end

Private Instance Methods

attribute( name ) click to toggle source
# File lib/bio/db/nexml/parser.rb, line 100
def attribute( name )
  @reader[ name ]
end
cache( object = nil ) click to toggle source

Cache otus, otu, states, state, char, node

# File lib/bio/db/nexml/parser.rb, line 70
def cache( object = nil )
  return @cache unless object
  @cache[ object.id ] = object
end
element_end?() click to toggle source

Check if ‘name’( without prefix ) is the end of an element or not.

# File lib/bio/db/nexml/parser.rb, line 133
def element_end?
  @reader.node_type == XML::Reader::TYPE_END_ELEMENT
end
element_start?() click to toggle source

Check if ‘name’( without prefix ) is an element node or not.

# File lib/bio/db/nexml/parser.rb, line 128
def element_start?
  @reader.node_type == XML::Reader::TYPE_ELEMENT
end
empty_element?() click to toggle source
# File lib/bio/db/nexml/parser.rb, line 141
def empty_element?
  @reader.empty_element?
end
local_name() click to toggle source
# File lib/bio/db/nexml/parser.rb, line 92
def local_name
  @reader.local_name
end
next_node() click to toggle source
# File lib/bio/db/nexml/parser.rb, line 104
def next_node
  while @reader.read
    return true if element_start? or element_end? or text_node?
  end
  false
end
parse_cell( type ) click to toggle source
# File lib/bio/db/nexml/parser.rb, line 589
def parse_cell( type )
  type = type[ 0..-4 ]

  cell = Cell.new

  char_id = attribute( 'char' )
  state_id = attribute( 'state' )

  char = cache[ char_id ] 
  state = ( type !~ /Continuous/ ? cache[ state_id ] : state_id )
  
  cell.state = state
  cell.char = char

  return cell if empty_element?

  while next_node
    case local_name
    when 'cell'
      break
    end
  end

  cell
end
parse_char( type ) click to toggle source
# File lib/bio/db/nexml/parser.rb, line 499
def parse_char( type )
  id = attribute( 'id' )
  label = attribute( 'label' )
  states = cache[ attribute( 'states' ) ]

  type = type.sub( /Format/, "Char" )
  char = Char.new( id, states, :label => label )

  if char.respond_to?(:codon=) and c = attribute( 'codon' )
    char.codon = c
  end

  cache char
  
  return char if empty_element?

  while next_node
    case local_name
    when 'char'
      break
    end #end case
  end #end while

  char
end
parse_characters() click to toggle source
# File lib/bio/db/nexml/parser.rb, line 389
def parse_characters
  #get the taxon linkage
  otus = cache[ attribute( 'otus' ) ]

  #other attribute
  id = attribute( 'id' )
  label = attribute( 'label' )

  #determine the type
  type = attribute( 'xsi:type' )[ 4..-1 ]
  klass = NeXML.const_get( type )
  verbose = type =~ /Cells/ ? true : false;

  characters = klass.new( id, :otus => otus, :label => label )

  #according to the schema a 'characters' will have a child
  while next_node
    case local_name
    when 'format'
      format = parse_format( type )
      characters.add_format( format )
    when 'matrix'
      matrix = parse_matrix( type, verbose )
      characters.add_matrix( matrix )
    when 'characters'
      break
    end #end case
  end #end while

  characters
end
parse_edge( type ) click to toggle source

When this function is called the cursor is at a ‘edge’ element. Return - a ‘edge’ object.

# File lib/bio/db/nexml/parser.rb, line 342
def parse_edge( type )
  id = attribute( 'id' )
  source = cache[ attribute( 'source' ) ]
  target = cache[ attribute( 'target' ) ]
  length = attribute( 'length' )
  
  type.sub!(/Tree|Network/, "Edge")
  klass = NeXML.const_get( type )
  edge = klass.new( id, :source => source, :target => target, :length => length )

  #according to the schema an 'edge' may have no child element.
  return edge if empty_element?

  while next_node
    case local_name
    when 'edge'
      #end of current 'edge' element has been reached
      break
    end
  end

  #return the 'edge' object
  edge
end
parse_format( type ) click to toggle source
# File lib/bio/db/nexml/parser.rb, line 421
def parse_format( type )
  format = Format.new

  #according to the schema a concrete characters type
  #will have a child element.
  while next_node
    case local_name
    when 'states'
      states = parse_states( type )
      format.add_states( states )            
    when 'char'
      char = parse_char( type )
      format.add_char( char )
    when 'format'
      break
    end #end case
  end #end while

  format
end
parse_matrix( type, verbose ) click to toggle source
# File lib/bio/db/nexml/parser.rb, line 525
def parse_matrix( type, verbose )
  type = type[ 0..-2 ]
  type << "Matrix"

  matrix = Matrix.new

  while next_node
    case local_name
    when 'row'
      row = parse_row( type, verbose )
      matrix.add_row( row )
    when 'matrix'
      break
    end
  end

  matrix
end
parse_member() click to toggle source
# File lib/bio/db/nexml/parser.rb, line 494
def parse_member
  state_id = attribute( 'state' )
  cache[ state_id ]
end
parse_network() click to toggle source
# File lib/bio/db/nexml/parser.rb, line 272
def parse_network
  id = attribute( 'id' )
  label = attribute( 'label' )

  type = attribute( 'xsi:type' )[4..-1]
  klass = NeXML.const_get type
  network = klass.new( id, :label => label )

  #a 'network' element *will* have child nodes.
  while next_node
    case local_name
    when "node"
      #parse child 'node' element
      node = parse_node

      #and add it to the 'network'
      network.add_node node

      #root?
      network.root = node if node.root?
    when "edge"
      #parse child 'edge' element
      edge = parse_edge( type )

      #and add it to the 'network'
      network.add_edge edge

    when "network"
      #end of current 'network' element has been reached
      break
    end
  end

  #return the 'network' object
  network
end
parse_node() click to toggle source

When this function is called the cursor is at a ‘node’ element. Return - a ‘node’ object.

# File lib/bio/db/nexml/parser.rb, line 311
def parse_node
  id = attribute( 'id' )
  label = attribute( 'label' )
  root = attribute( 'root' ) ? true : false

  #is this node taxon linked
  if otu_id = attribute( 'otu' )
    otu = cache[ otu_id ]
  end

  node = NeXML::Node.new( id, :otu => otu, :root => root, :label => label )
  cache node

  #according to the schema a 'node' may have no child element.
  return node if empty_element?

  #else, if 'node' has child elements
  while next_node
    case local_name
    when 'node'
      #end of current 'node' element has been reached
      break
    end
  end

  #return the 'node' object
  node
end
parse_opts() click to toggle source

Define XML parsing options for the libxml parser.

  1. remove blank nodes

  2. substitute entities

  3. forbid network access

# File lib/bio/db/nexml/parser.rb, line 115
def parse_opts
  XML::Parser::Options::NOBLANKS |
    XML::Parser::Options::NOENT  |
    XML:: Parser::Options::NONET
end
parse_otu() click to toggle source

When this function is called the cursor is at an ‘otu’ element. Return - an ‘otu’ object.

# File lib/bio/db/nexml/parser.rb, line 176
def parse_otu
  id = attribute( 'id' )
  label = attribute( 'label' )

  otu = NeXML::Otu.new( id, :label => label )

  cache otu

  #according to the schema an 'otu' may have no child element.
  return otu if empty_element?

  while next_node
    case local_name
    when 'otu'
      #end of current 'otu' element has been reached
      break
    end
  end

  #return the 'otu' object
  otu
end
parse_otus() click to toggle source

When this function is called the cursor is at an ‘otus’ element. Return - an ‘otus’ object

# File lib/bio/db/nexml/parser.rb, line 147
def parse_otus
  id = attribute( 'id' )
  label = attribute( 'label' )

  otus = NeXML::Otus.new( id, :label => label )

  cache otus

  #according to the schema an 'otus' may have no child element.
  return otus if empty_element?

  #else, parse child elements
  while next_node
    case local_name
    when "otu"
      #parse child otu element
      otus << parse_otu
    when "otus"
      #end of current 'otus' element has been reached
      break
    end
  end

  #return the 'otus' object
  otus
end
parse_rootedge() click to toggle source
# File lib/bio/db/nexml/parser.rb, line 367
def parse_rootedge
  id = attribute( 'id' )
  target = cache[ attribute( 'target' ) ]
  length = attribute( 'length' )
  
  rootedge = RootEdge.new( id, :target => target, :length => length )

  #according to the schema an 'edge' may have no child element.
  return rootedge if empty_element?

  while next_node
    case local_name
    when 'rootedge'
      #end of current 'rootedge' element has been reached
      break
    end
  end

  #return the 'rootedge' object
  rootedge
end
parse_row( type, verbose ) click to toggle source
# File lib/bio/db/nexml/parser.rb, line 544
def parse_row( type, verbose )
  id = attribute( 'id' )
  label = attribute( 'label' )
  otu = cache[ attribute( 'otu' ) ]

  type = type.sub( /Matrix/, "Row" )
  klass = verbose ? CellRow : SeqRow
  row = klass.new( id, :label => label )

  while next_node
    case local_name
    when 'seq'
      seq = parse_seq( type )
      row.add_sequence( seq )
    when 'cell'
      cell = parse_cell( type )
      row.add_cell( cell )
    when 'row'
      break
    end
  end

  row
end
parse_seq( type ) click to toggle source
# File lib/bio/db/nexml/parser.rb, line 569
def parse_seq( type )
  type = type[ 0..-4 ]
  #klass = NeXML.const_get type

  seq = Sequence.new

  return seq if empty_element?

  while next_node
    case local_name
    when '#text'
      seq.value = value
    when 'seq'
      break
    end
  end

  seq
end
parse_state( type ) click to toggle source
# File lib/bio/db/nexml/parser.rb, line 470
def parse_state( type )
  id = attribute( 'id' )
  symbol = attribute( 'symbol' )
  label = attribute( 'label' )

  type = type[ 0..-2 ]
  state = State.new( id, symbol, :label => label )

  cache state

  return state if empty_element?

  while next_node
    case local_name
    when 'state', 'polymorphic_state_set', 'uncertain_state_set'
      break
    when 'member'
      state.add_member( parse_member )
    end
  end

  state
end
parse_states( type ) click to toggle source
# File lib/bio/db/nexml/parser.rb, line 442
def parse_states( type )
  id = attribute( 'id' )
  label = attribute( 'label' )
  states = States.new( id, :label => label )

  while next_node
    case local_name
    when 'state'
      state = parse_state( type )
      states.add_state( state )
    when 'polymorphic_state_set'
      state = parse_state( type )
      state.ambiguity = :polymorphic
      states.add_state( state )
    when 'uncertain_state_set'
      state = parse_state( type )
      state.ambiguity = :uncertain
      states.add_state( state )
    when 'states'
      break
    end
  end

  cache states

  states
end
parse_tree() click to toggle source

When this function is called the cursor is at a ‘tree’ element. Return - a ‘tree’ object.

# File lib/bio/db/nexml/parser.rb, line 229
def parse_tree
  id = attribute( 'id' )
  label = attribute( 'label' )

  type = attribute( 'xsi:type' )[4..-1]
  klass = NeXML.const_get( type )
  tree = klass.new( id, :label => label )

  #a 'tree' element *will* have child nodes.
  while next_node
    case local_name
    when "node"
      #parse child 'node' element
      node = parse_node

      #and add it to the 'tree'
      tree.add_node node

      #root?
      tree.roots << node if node.root?
    when "rootedge"
      #parse child 'edge' element
      rootedge = parse_rootedge

      #and add it to the 'tree'
      # tree.add_rootedge rootedge # XXX it looks like the super class(es)
      # can only deal with edges that have source and target
    when "edge"
      #parse child 'edge' element
      edge = parse_edge( type )

      #and add it to the 'tree'
      tree.add_edge edge
    when "tree"
      #end of current 'tree' element has been reached
      break
    end
  end

  #return the 'tree' object
  tree
end
parse_trees() click to toggle source

When this function is called the cursor is at a ‘trees’ element. Return - a ‘trees’ object.

# File lib/bio/db/nexml/parser.rb, line 201
def parse_trees
  otus = cache[ attribute( 'otus' ) ]

  id = attribute( 'id' )
  label = attribute( 'label' )

  trees = NeXML::Trees.new( id, :otus => otus, :label => label )

  #a 'trees' element *will* have child nodes.
  while next_node
    case local_name
    when "tree"
      #parse child 'tree' element
      trees << parse_tree
    when "network"
      trees << parse_network
    when "trees"
      #end of current 'trees' element has been reached
      break
    end
  end

  #return the 'trees' object
  trees
end
read( nexml ) click to toggle source

Determine if the ‘nexml’ is a file, string, or an io and accordingly return a XML::Reader object.

# File lib/bio/db/nexml/parser.rb, line 77
def read( nexml )
  case nexml
  when /\.xml$/
    XML::Reader.file( nexml, :options => parse_opts )
  when IO
    XML::Reader.io( nexml, :options => parse_opts )
  when String
    XML::Reader.string( nexml, :options => parse_opts )
  end
end
skip_leader() click to toggle source
# File lib/bio/db/nexml/parser.rb, line 88
def skip_leader
  @reader.read until local_name == "nexml"
end
text_node?() click to toggle source
# File lib/bio/db/nexml/parser.rb, line 137
def text_node?
  @reader.node_type == XML::Reader::TYPE_TEXT
end
validate_nexml() click to toggle source
# File lib/bio/db/nexml/parser.rb, line 121
def validate_nexml
  valid = @reader.schema_validate( File.join( File.dirname(__FILE__),
                                              "schema/nexml.xsd" ) )
  return true if valid == 0
end
value() click to toggle source
# File lib/bio/db/nexml/parser.rb, line 96
def value
  @reader.value
end