class ChefZero::Solr::SolrParser

Constants

DEFAULT_FIELD

Public Class Methods

new(query_string) click to toggle source
# File lib/chef_zero/solr/solr_parser.rb, line 13
def initialize(query_string)
  @query_string = query_string
  @index = 0
end

Public Instance Methods

binary_operator?(token) click to toggle source
# File lib/chef_zero/solr/solr_parser.rb, line 188
def binary_operator?(token)
  [ "AND", "OR", "^", ":"].include?(token)
end
binary_operator_precedence(token) click to toggle source
# File lib/chef_zero/solr/solr_parser.rb, line 192
def binary_operator_precedence(token)
  case token
  when "^"
    4
  when ":"
    3
  when "AND"
    2
  when "OR"
    1
  end
end
eof?() click to toggle source
# File lib/chef_zero/solr/solr_parser.rb, line 87
def eof?
  !@next_token && @index >= @query_string.length
end
next_token() click to toggle source
# File lib/chef_zero/solr/solr_parser.rb, line 29
def next_token
  result = peek_token
  @next_token = nil
  result
end
parse() click to toggle source
# File lib/chef_zero/solr/solr_parser.rb, line 18
def parse
  read_expression
end
parse_error(token, str) click to toggle source
# File lib/chef_zero/solr/solr_parser.rb, line 121
def parse_error(token, str)
  raise ChefZero::Solr::ParseError, "Error on token '#{token}' at #{@index} of '#{@query_string}': #{str}"
end
parse_token() click to toggle source
# File lib/chef_zero/solr/solr_parser.rb, line 35
def parse_token
  # Skip whitespace
  skip_whitespace
  return nil if eof?

  # Operators
  operator = peek_operator_token
  if operator
    @index += operator.length
    operator
  else
    # Everything that isn't whitespace or an operator, is part of a term
    # (characters plus backslashed escaped characters)
    start_index = @index
    loop do
      if @query_string[@index] == '\\'
        @index += 1
      end
      @index += 1 unless eof?
      break if eof? || !peek_term_token
    end
    @query_string[start_index..@index - 1]
  end
end
peek_operator_token() click to toggle source
# File lib/chef_zero/solr/solr_parser.rb, line 74
def peek_operator_token
  if ['"', "+", "-", "!", "(", ")", "{", "}", "[", "]", "^", ":"].include?(@query_string[@index])
    return @query_string[@index]
  else
    result = @query_string[@index..@index + 1]
    if ["&&", "||"].include?(result)
      return result
    end
  end

  nil
end
peek_term_token() click to toggle source
# File lib/chef_zero/solr/solr_parser.rb, line 67
def peek_term_token
  return nil if @query_string[@index] =~ /\s/

  op = peek_operator_token
  !op || op == "-"
end
peek_token() click to toggle source

Tokenization

# File lib/chef_zero/solr/solr_parser.rb, line 25
def peek_token
  @next_token ||= parse_token
end
read_expression() click to toggle source

Parse tree creation

# File lib/chef_zero/solr/solr_parser.rb, line 92
def read_expression
  result = read_single_expression
  # Expression is over when we hit a close paren or eof
  # (peek_token has the side effect of skipping whitespace for us, so we
  # really know if we're at eof or not)
  until peek_token == ")" || eof?
    operator = peek_token
    if binary_operator?(operator)
      next_token
    else
      # If 2 terms are next to each other, the default operator is OR
      operator = "OR"
    end
    next_expression = read_single_expression

    # Build the operator, taking precedence into account
    if result.is_a?(Query::BinaryOperator) &&
        binary_operator_precedence(operator) > binary_operator_precedence(result.operator)
      # a+b*c -> a+(b*c)
      new_right = Query::BinaryOperator.new(result.right, operator, next_expression)
      result = Query::BinaryOperator.new(result.left, result.operator, new_right)
    else
      # a*b+c -> (a*b)+c
      result = Query::BinaryOperator.new(result, operator, next_expression)
    end
  end
  result
end
read_single_expression() click to toggle source
# File lib/chef_zero/solr/solr_parser.rb, line 125
def read_single_expression
  token = next_token
  # If EOF, we have a problem Houston
  if !token
    parse_error(nil, "Expected expression!")

  # If it's an unary operand, build that
  elsif unary_operator?(token)
    operand = read_single_expression
    # TODO We rely on all unary operators having higher precedence than all
    # binary operators.  Check if this is the case.
    Query::UnaryOperator.new(token, operand)

  # If it's the start of a phrase, read the terms in the phrase
  elsif token == '"'
    # Read terms until close "
    phrase_terms = []
    until (term = next_token) == '"'
      phrase_terms << Query::Term.new(term)
    end
    Query::Phrase.new(phrase_terms)

  # If it's the start of a range query, build that
  elsif token == "{" || token == "["
    left = next_token
    parse_error(left, "Expected left term in range query") unless left
    to = next_token
    parse_error(left, "Expected TO in range query") if to != "TO"
    right = next_token
    parse_error(right, "Expected left term in range query") unless right
    end_range = next_token
    parse_error(right, "Expected end range '#{end_range}") unless ["}", "]"].include?(end_range)
    Query::RangeQuery.new(left, right, token == "[", end_range == "]")

  elsif token == "("
    subquery = read_expression
    close_paren = next_token
    parse_error(close_paren, "Expected ')'") if close_paren != ")"
    Query::Subquery.new(subquery)

  # If it's the end of a closure, raise an exception
  elsif ["}", "]", ")"].include?(token)
    parse_error(token, "Unexpected end paren")

  # If it's a binary operator, raise an exception
  elsif binary_operator?(token)
    parse_error(token, "Unexpected binary operator")

  # Otherwise it's a term.
  else
    term = Query::Term.new(token)
    if peek_token == ":"
      Query::BinaryOperator.new(term, next_token, read_single_expression)
    else
      term
    end
  end
end
skip_whitespace() click to toggle source
# File lib/chef_zero/solr/solr_parser.rb, line 60
def skip_whitespace
  if @query_string[@index] =~ /\s/
    whitespace = /\s+/.match(@query_string, @index) || peek
    @index += whitespace[0].length
  end
end
unary_operator?(token) click to toggle source
# File lib/chef_zero/solr/solr_parser.rb, line 184
def unary_operator?(token)
  [ "NOT", "+", "-", "!" ].include?(token)
end