class Arboretum::Scandent::Parser

Parser interprets tokens formed by a Scandent string and formed ScandentRules that represent the interpreted form of the string ScandentRules can then be matched to Elements in a DocTree

Public Class Methods

parse_arg(arg_tokens) click to toggle source

Parse an argument given to a pseudo-class

# File lib/arboretum/scandent.rb, line 609
def self.parse_arg(arg_tokens)
  arg_token_types = arg_tokens.map {|token| token[0]}
  if ((arg_token_types[0] == :T_SQUOTE and arg_token_types[2] == :T_SQUOTE) or (arg_token_types[0] == :T_DQUOTE and arg_token_types[2] == :T_DQUOTE)) and arg_tokens.length == 3
    arg_tokens[1][1]
  elsif (arg_token_types[0] == :T_VBARSLASH and arg_token_types[2] == :T_SLASHVBAR) and arg_tokens.length == 3
    Regexp.new(arg_tokens[1][1])
  elsif (!(arg_token_types & [:T_KEY_EVEN, :T_KEY_ODD]).empty? and arg_tokens.length == 1) or arg_token_types.include? :T_FORM_N
    Formula.new(arg_tokens)
  elsif arg_token_types[0] == :LITERAL_INT and arg_tokens.length == 1
    arg_tokens[0][1].to_i
  elsif arg_token_types[0] == :LITERAL_FLOAT and arg_tokens.length == 1
    arg_tokens[0][1].to_f
  elsif arg_tokens.length.zero?
    nil
  else
    arg_str = ''
    arg_tokens.each {|token| arg_str << token[1]}
    raise InvalidExpressionException.new("Invalid argument '#{arg_str}'")
    nil
  end
end
parse_path_tokens(path_tokens, type) click to toggle source

Parse an individual path of a rule

# File lib/arboretum/scandent.rb, line 418
def self.parse_path_tokens(path_tokens, type)
  # Double check that the end state of the path is valid
  raise InvalidExpressionException.new("End state of path is '#{path_tokens.last[2]}' instead of :STATE_ROOT_PATH") if path_tokens.last[2] != :STATE_ROOT_PATH

  step_delimiters = [:T_SLASH, :T_SLASH2, :T_SLASHDOT2, :T_SLASHDOT3, :T_SLASHGT, :T_SLASHGT2, :T_SLASHLT, :T_SLASHLT2]

  # Seperate paths into its steps
  if type == :PATH_LISTENER
    path_steps_tokens = path_tokens.slice_after {|token| step_delimiters.include? token[0] and token[2] == :STATE_ROOT_PATH}.to_a
  elsif type == :PATH_LOCATOR
    path_steps_tokens = path_tokens.slice_before {|token| step_delimiters.include? token[0] and token[2] == :STATE_ROOT_PATH}.to_a
  else
    raise ParseException.new("Unknown step type")
  end

  # Parse each step individually
  path_steps = path_steps_tokens.map{|step| Parser.parse_step_tokens(step, type)}

  if type == :PATH_LISTENER
    ListenerPath.new(path_steps)
  else
    LocatorPath.new(path_steps)
  end
end
parse_rule_string(rule_string, type) click to toggle source

Parse a Scandent string by giving it to the Tokenizer and then parsing the results

# File lib/arboretum/scandent.rb, line 395
def self.parse_rule_string(rule_string, type)
  Parser.parse_rule_tokens(Tokenizer.tokenize(rule_string), type)
end
parse_rule_tokens(rule_tokens, type) click to toggle source

Directly parse Scandent string tokens

# File lib/arboretum/scandent.rb, line 400
def self.parse_rule_tokens(rule_tokens, type)
  # Separate rules into its comma-delimited paths and remove the delimiter
  rule_paths_tokens = rule_tokens.slice_after {|token| token[0] == :T_COMMA and token[2] == :STATE_ROOT_PATH}.to_a
  rule_paths_tokens.each {|path_tokens| path_tokens.pop if path_tokens.last[0] == :T_COMMA}

  # Parse each path individually
  rule_paths = rule_paths_tokens.map{|path| Parser.parse_path_tokens(path, type)}

  if type == :PATH_LISTENER
    ListenerRule.new(rule_paths)
  elsif type == :PATH_LOCATOR
    LocatorRule.new(rule_paths)
  else
    raise ParseException.new("Unknown step type")
  end
end
parse_step_tokens(step_tokens, type) click to toggle source

Parse an individual step of a path

# File lib/arboretum/scandent.rb, line 444
def self.parse_step_tokens(step_tokens, type)
  # Remove whitespace tokens
  step_tokens.delete_if {|token| token[0] == :T_WHITESPACE}

  if type == :PATH_LISTENER
    action = @@actions.has_key?(step_tokens.last[0]) ? @@actions[step_tokens.last[0]] : :ACTION_SELF
  elsif type == :PATH_LOCATOR
    action = @@actions.has_key?(step_tokens.first[0]) ? @@actions[step_tokens.first[0]] : :ACTION_SELF
  else
    raise ParseException.new("Unknown step type")
  end

  # Initialize all of the components that a step uses to match an element
  element_ref = []
  tag = []
  namespace = []
  id = []
  attrs = {
    :has => [],
    :contains => Hash.new{|k, v| k[v] = Array.new},
    :equals => Hash.new{|k, v| k[v] = Array.new},
    :matches => Hash.new{|k, v| k[v] = Array.new}
  }
  pseudo_exps = []
  valid_rules = []

  # Walk through the tokens of this step one at a time to parse grammar
  index = -1
  state = :STATE_ROOT_PATH
  while (index + 1) < step_tokens.length
    # Consume next token
    index_token = step_tokens[index+=1]

    case index_token[0]
    when :T_PCT
      next_token = step_tokens[index+=1]
      raise InvalidExpressionException.new if next_token[0] != :LITERAL_IDENT
      tag << next_token[1]
    when :T_AT
      next_token = step_tokens[index+=1]
      raise InvalidExpressionException.new if next_token[0] != :LITERAL_IDENT
      namespace << next_token[1]
    when :T_PND
      next_token = step_tokens[index+=1]
      raise InvalidExpressionException.new if next_token[0] != :LITERAL_IDENT
      id << next_token[1]
    when :T_COLON
      next_token = step_tokens[index+=1]
      raise InvalidExpressionException.new if next_token[0] != :LITERAL_IDENT
      pseudo_name = next_token[1].to_sym
      arg_tokens = []

      next_token = step_tokens[index+=1]
      if next_token[0] == :T_LPAREN
        next_token = step_tokens[index+=1]
        until next_token[0] == :T_RPAREN or index > step_tokens.length
          arg_tokens << next_token
          next_token = step_tokens[index+=1]
        end
        raise InvalidExpressionException.new if index > step_tokens.length # Undesirable exit condition to above loop
      end
      pseudo_exps << [pseudo_name, Parser.parse_arg(arg_tokens)]
    when :T_ASTERISK
      # Adds no restrictions, so do nothing
    when :T_TILDE
      element_ref << :ELEMENT_ROOT
    when :T_DOT
      element_ref << :ELEMENT_SELF
    when :T_LBRAK
      next_token = step_tokens[index+=1]
      raise InvalidExpressionException.new if next_token[0] != :LITERAL_IDENT
      attr_name = next_token[1].to_sym
      attr_value = nil
      operation = nil

      next_token = step_tokens[index+=1]
      case next_token[0]
      when :T_EQL
        operation = :contains

        next_token = step_tokens[index+=1]
        raise InvalidExpressionException.new("Expected a string after '='") if ![:T_DQUOTE, :T_SQUOTE].include?(next_token[0])
        string_limiter = next_token[0]

        next_token = step_tokens[index+=1]
        raise InvalidExpressionException.new if not [:LITERAL_IDENT, :LITERAL_STRING, :LITERAL_INT, :LITERAL_FLOAT].include?(next_token[0])
        attr_value = next_token[1]

        next_token = step_tokens[index+=1]
        raise InvalidExpressionException.new if next_token[0] != string_limiter

        next_token = step_tokens[index+=1]
        raise InvalidExpressionException.new if next_token[0] != :T_RBRAK
      when :T_EQL2
        operation = :equals
        next_token = step_tokens[index+=1]
        raise InvalidExpressionException.new if ![:T_DQUOTE, :T_SQUOTE].include?(next_token[0])
        string_limiter = next_token[0]

        next_token = step_tokens[index+=1]
        raise InvalidExpressionException.new if not [:LITERAL_IDENT, :LITERAL_STRING, :LITERAL_INT, :LITERAL_FLOAT].include?(next_token[0])
        attr_value = next_token[1].split

        next_token = step_tokens[index+=1]
        raise InvalidExpressionException.new if next_token[0] != string_limiter

        next_token = step_tokens[index+=1]
        raise InvalidExpressionException.new if next_token[0] != :T_RBRAK
      when :T_TILDE_EQL
        operation = :matches

        next_token = step_tokens[index+=1]
        raise InvalidExpressionException.new if next_token[0] != :T_VBARSLASH

        next_token = step_tokens[index+=1]
        raise InvalidExpressionException.new if not [:LITERAL_IDENT, :LITERAL_STRING, :LITERAL_INT, :LITERAL_FLOAT].include?(next_token[0])
        attr_value = Regexp.new(next_token[1])

        next_token = step_tokens[index+=1]
        raise InvalidExpressionException.new if next_token[0] != :T_SLASHVBAR

        next_token = step_tokens[index+=1]
        raise InvalidExpressionException.new if next_token[0] != :T_RBRAK
      when :T_RBRAK
        operation = nil
      else
        raise InvalidExpressionException.new
      end
      if operation.nil?
        attrs[:has] << attr_name
      else
        attrs[operation][attr_name] << attr_value
      end
    when :T_LBRACE
      equilibrium = 1
      reformed_path_string = ''
      next_token = step_tokens[index+=1]
      until (next_token[0] == :T_RBRACE and equilibrium.zero?) or index > step_tokens.length
        reformed_path_string << next_token[1]
        next_token = step_tokens[index+=1]
        equilibrium += 1 if next_token[0] == :T_LBRACE
        equilibrium -= 1 if next_token[0] == :T_RBRACE
      end
      raise InvalidExpressionException.new("Could not find matching R_BRACE in #{reformed_path_string}") if index > step_tokens.length # Undesirable exit condition to above loop
      valid_rules << Parser.parse_rule_string(reformed_path_string, :PATH_LOCATOR)
    when :T_SLASH, :T_SLASH2, :T_SLASHDOT2, :T_SLASHDOT3, :T_SLASHGT, :T_SLASHGT2, :T_SLASHLT, :T_SLASHLT2
      # Do nothing since the action has already been determined
    else
      raise ParseException.new("Consumed unexpected token: #{index_token}")
    end
  end # All tokens consumed

  # Validate results
  raise InvalidExpressionException.new if tag.length > 1
  raise InvalidExpressionException.new if namespace.length > 1
  raise InvalidExpressionException.new if element_ref.length > 1

  if type == :PATH_LISTENER
    ListenerStep.new(action, element_ref[0], tag[0], namespace[0], id[0], attrs, pseudo_exps, valid_rules)
  else
    LocatorStep.new(action, element_ref[0], tag[0], namespace[0], id[0], attrs, pseudo_exps, valid_rules)
  end
end