class Arboretum::Scandent::Parser
Parser
interprets tokens formed by a Scandent
string and formed ScandentRules that represent the interpreted form of the string ScandentRules can then be matched to Elements in a DocTree
Public Class Methods
parse_arg(arg_tokens)
click to toggle source
Parse an argument given to a pseudo-class
# File lib/arboretum/scandent.rb, line 609 def self.parse_arg(arg_tokens) arg_token_types = arg_tokens.map {|token| token[0]} if ((arg_token_types[0] == :T_SQUOTE and arg_token_types[2] == :T_SQUOTE) or (arg_token_types[0] == :T_DQUOTE and arg_token_types[2] == :T_DQUOTE)) and arg_tokens.length == 3 arg_tokens[1][1] elsif (arg_token_types[0] == :T_VBARSLASH and arg_token_types[2] == :T_SLASHVBAR) and arg_tokens.length == 3 Regexp.new(arg_tokens[1][1]) elsif (!(arg_token_types & [:T_KEY_EVEN, :T_KEY_ODD]).empty? and arg_tokens.length == 1) or arg_token_types.include? :T_FORM_N Formula.new(arg_tokens) elsif arg_token_types[0] == :LITERAL_INT and arg_tokens.length == 1 arg_tokens[0][1].to_i elsif arg_token_types[0] == :LITERAL_FLOAT and arg_tokens.length == 1 arg_tokens[0][1].to_f elsif arg_tokens.length.zero? nil else arg_str = '' arg_tokens.each {|token| arg_str << token[1]} raise InvalidExpressionException.new("Invalid argument '#{arg_str}'") nil end end
parse_path_tokens(path_tokens, type)
click to toggle source
Parse an individual path of a rule
# File lib/arboretum/scandent.rb, line 418 def self.parse_path_tokens(path_tokens, type) # Double check that the end state of the path is valid raise InvalidExpressionException.new("End state of path is '#{path_tokens.last[2]}' instead of :STATE_ROOT_PATH") if path_tokens.last[2] != :STATE_ROOT_PATH step_delimiters = [:T_SLASH, :T_SLASH2, :T_SLASHDOT2, :T_SLASHDOT3, :T_SLASHGT, :T_SLASHGT2, :T_SLASHLT, :T_SLASHLT2] # Seperate paths into its steps if type == :PATH_LISTENER path_steps_tokens = path_tokens.slice_after {|token| step_delimiters.include? token[0] and token[2] == :STATE_ROOT_PATH}.to_a elsif type == :PATH_LOCATOR path_steps_tokens = path_tokens.slice_before {|token| step_delimiters.include? token[0] and token[2] == :STATE_ROOT_PATH}.to_a else raise ParseException.new("Unknown step type") end # Parse each step individually path_steps = path_steps_tokens.map{|step| Parser.parse_step_tokens(step, type)} if type == :PATH_LISTENER ListenerPath.new(path_steps) else LocatorPath.new(path_steps) end end
parse_rule_string(rule_string, type)
click to toggle source
parse_rule_tokens(rule_tokens, type)
click to toggle source
Directly parse Scandent
string tokens
# File lib/arboretum/scandent.rb, line 400 def self.parse_rule_tokens(rule_tokens, type) # Separate rules into its comma-delimited paths and remove the delimiter rule_paths_tokens = rule_tokens.slice_after {|token| token[0] == :T_COMMA and token[2] == :STATE_ROOT_PATH}.to_a rule_paths_tokens.each {|path_tokens| path_tokens.pop if path_tokens.last[0] == :T_COMMA} # Parse each path individually rule_paths = rule_paths_tokens.map{|path| Parser.parse_path_tokens(path, type)} if type == :PATH_LISTENER ListenerRule.new(rule_paths) elsif type == :PATH_LOCATOR LocatorRule.new(rule_paths) else raise ParseException.new("Unknown step type") end end
parse_step_tokens(step_tokens, type)
click to toggle source
Parse an individual step of a path
# File lib/arboretum/scandent.rb, line 444 def self.parse_step_tokens(step_tokens, type) # Remove whitespace tokens step_tokens.delete_if {|token| token[0] == :T_WHITESPACE} if type == :PATH_LISTENER action = @@actions.has_key?(step_tokens.last[0]) ? @@actions[step_tokens.last[0]] : :ACTION_SELF elsif type == :PATH_LOCATOR action = @@actions.has_key?(step_tokens.first[0]) ? @@actions[step_tokens.first[0]] : :ACTION_SELF else raise ParseException.new("Unknown step type") end # Initialize all of the components that a step uses to match an element element_ref = [] tag = [] namespace = [] id = [] attrs = { :has => [], :contains => Hash.new{|k, v| k[v] = Array.new}, :equals => Hash.new{|k, v| k[v] = Array.new}, :matches => Hash.new{|k, v| k[v] = Array.new} } pseudo_exps = [] valid_rules = [] # Walk through the tokens of this step one at a time to parse grammar index = -1 state = :STATE_ROOT_PATH while (index + 1) < step_tokens.length # Consume next token index_token = step_tokens[index+=1] case index_token[0] when :T_PCT next_token = step_tokens[index+=1] raise InvalidExpressionException.new if next_token[0] != :LITERAL_IDENT tag << next_token[1] when :T_AT next_token = step_tokens[index+=1] raise InvalidExpressionException.new if next_token[0] != :LITERAL_IDENT namespace << next_token[1] when :T_PND next_token = step_tokens[index+=1] raise InvalidExpressionException.new if next_token[0] != :LITERAL_IDENT id << next_token[1] when :T_COLON next_token = step_tokens[index+=1] raise InvalidExpressionException.new if next_token[0] != :LITERAL_IDENT pseudo_name = next_token[1].to_sym arg_tokens = [] next_token = step_tokens[index+=1] if next_token[0] == :T_LPAREN next_token = step_tokens[index+=1] until next_token[0] == :T_RPAREN or index > step_tokens.length arg_tokens << next_token next_token = step_tokens[index+=1] end raise InvalidExpressionException.new if index > step_tokens.length # Undesirable exit condition to above loop end pseudo_exps << [pseudo_name, Parser.parse_arg(arg_tokens)] when :T_ASTERISK # Adds no restrictions, so do nothing when :T_TILDE element_ref << :ELEMENT_ROOT when :T_DOT element_ref << :ELEMENT_SELF when :T_LBRAK next_token = step_tokens[index+=1] raise InvalidExpressionException.new if next_token[0] != :LITERAL_IDENT attr_name = next_token[1].to_sym attr_value = nil operation = nil next_token = step_tokens[index+=1] case next_token[0] when :T_EQL operation = :contains next_token = step_tokens[index+=1] raise InvalidExpressionException.new("Expected a string after '='") if ![:T_DQUOTE, :T_SQUOTE].include?(next_token[0]) string_limiter = next_token[0] next_token = step_tokens[index+=1] raise InvalidExpressionException.new if not [:LITERAL_IDENT, :LITERAL_STRING, :LITERAL_INT, :LITERAL_FLOAT].include?(next_token[0]) attr_value = next_token[1] next_token = step_tokens[index+=1] raise InvalidExpressionException.new if next_token[0] != string_limiter next_token = step_tokens[index+=1] raise InvalidExpressionException.new if next_token[0] != :T_RBRAK when :T_EQL2 operation = :equals next_token = step_tokens[index+=1] raise InvalidExpressionException.new if ![:T_DQUOTE, :T_SQUOTE].include?(next_token[0]) string_limiter = next_token[0] next_token = step_tokens[index+=1] raise InvalidExpressionException.new if not [:LITERAL_IDENT, :LITERAL_STRING, :LITERAL_INT, :LITERAL_FLOAT].include?(next_token[0]) attr_value = next_token[1].split next_token = step_tokens[index+=1] raise InvalidExpressionException.new if next_token[0] != string_limiter next_token = step_tokens[index+=1] raise InvalidExpressionException.new if next_token[0] != :T_RBRAK when :T_TILDE_EQL operation = :matches next_token = step_tokens[index+=1] raise InvalidExpressionException.new if next_token[0] != :T_VBARSLASH next_token = step_tokens[index+=1] raise InvalidExpressionException.new if not [:LITERAL_IDENT, :LITERAL_STRING, :LITERAL_INT, :LITERAL_FLOAT].include?(next_token[0]) attr_value = Regexp.new(next_token[1]) next_token = step_tokens[index+=1] raise InvalidExpressionException.new if next_token[0] != :T_SLASHVBAR next_token = step_tokens[index+=1] raise InvalidExpressionException.new if next_token[0] != :T_RBRAK when :T_RBRAK operation = nil else raise InvalidExpressionException.new end if operation.nil? attrs[:has] << attr_name else attrs[operation][attr_name] << attr_value end when :T_LBRACE equilibrium = 1 reformed_path_string = '' next_token = step_tokens[index+=1] until (next_token[0] == :T_RBRACE and equilibrium.zero?) or index > step_tokens.length reformed_path_string << next_token[1] next_token = step_tokens[index+=1] equilibrium += 1 if next_token[0] == :T_LBRACE equilibrium -= 1 if next_token[0] == :T_RBRACE end raise InvalidExpressionException.new("Could not find matching R_BRACE in #{reformed_path_string}") if index > step_tokens.length # Undesirable exit condition to above loop valid_rules << Parser.parse_rule_string(reformed_path_string, :PATH_LOCATOR) when :T_SLASH, :T_SLASH2, :T_SLASHDOT2, :T_SLASHDOT3, :T_SLASHGT, :T_SLASHGT2, :T_SLASHLT, :T_SLASHLT2 # Do nothing since the action has already been determined else raise ParseException.new("Consumed unexpected token: #{index_token}") end end # All tokens consumed # Validate results raise InvalidExpressionException.new if tag.length > 1 raise InvalidExpressionException.new if namespace.length > 1 raise InvalidExpressionException.new if element_ref.length > 1 if type == :PATH_LISTENER ListenerStep.new(action, element_ref[0], tag[0], namespace[0], id[0], attrs, pseudo_exps, valid_rules) else LocatorStep.new(action, element_ref[0], tag[0], namespace[0], id[0], attrs, pseudo_exps, valid_rules) end end