class Dentaku::Tokenizer

Constants

LPAREN
RPAREN

Attributes

aliases[R]

Public Instance Methods

alias_regex() click to toggle source
# File lib/dentaku/tokenizer.rb, line 84
def alias_regex
  values = @aliases.values.flatten.join('|')
  /(?<=\p{Punct}|[[:space:]]|\A)(#{values})(?=\()/i
end
last_token() click to toggle source
# File lib/dentaku/tokenizer.rb, line 40
def last_token
  @tokens.last
end
replace_aliases(string) click to toggle source
# File lib/dentaku/tokenizer.rb, line 70
def replace_aliases(string)
  return string unless @aliases.any?

  string.gsub!(alias_regex) do |match|
    match_regex = /^#{Regexp.escape(match)}$/i

    @aliases.detect do |(_key, aliases)|
      !aliases.grep(match_regex).empty?
    end.first
  end

  string
end
scan(string, scanner) click to toggle source
# File lib/dentaku/tokenizer.rb, line 44
def scan(string, scanner)
  if tokens = scanner.scan(string, last_token)
    tokens.each do |token|
      if token.empty?
        fail! :unexpected_zero_width_match,
              token_category: token.category, at: string
      end

      @nesting += 1 if LPAREN == token
      @nesting -= 1 if RPAREN == token
      fail! :too_many_closing_parentheses if @nesting < 0

      @tokens << token unless token.is?(:whitespace)
    end

    match_length = tokens.map(&:length).reduce(:+)
    [true, string[match_length..-1]]
  else
    [false, string]
  end
end
strip_comments(input) click to toggle source
# File lib/dentaku/tokenizer.rb, line 66
def strip_comments(input)
  input.gsub(/\/\*[^*]*\*+(?:[^*\/][^*]*\*+)*\//, '')
end
tokenize(string, options = {}) click to toggle source
# File lib/dentaku/tokenizer.rb, line 12
def tokenize(string, options = {})
  @nesting = 0
  @tokens  = []
  @aliases = options.fetch(:aliases, global_aliases)
  input    = strip_comments(string.to_s.dup)
  input    = replace_aliases(input)

  scanner_options = {
    case_sensitive: options.fetch(:case_sensitive, false),
    raw_date_literals: options.fetch(:raw_date_literals, true)
  }

  until input.empty?
    scanned = TokenScanner.scanners(scanner_options).any? do |scanner|
      scanned, input = scan(input, scanner)
      scanned
    end

    unless scanned
      fail! :parse_error, at: input
    end
  end

  fail! :too_many_opening_parentheses if @nesting > 0

  @tokens
end

Private Instance Methods

fail!(reason, **meta) click to toggle source
# File lib/dentaku/tokenizer.rb, line 96
def fail!(reason, **meta)
  message =
    case reason
    when :parse_error
      "parse error at: '#{meta.fetch(:at)}'"
    when :too_many_opening_parentheses
      "too many opening parentheses"
    when :too_many_closing_parentheses
      "too many closing parentheses"
    when :unexpected_zero_width_match
      "unexpected zero-width match (:#{meta.fetch(:category)}) at '#{meta.fetch(:at)}'"
    else
      raise ::ArgumentError, "Unhandled #{reason}"
    end

  raise TokenizerError.for(reason, **meta), message
end
global_aliases() click to toggle source
# File lib/dentaku/tokenizer.rb, line 91
def global_aliases
  return {} unless Dentaku.respond_to?(:aliases)
  Dentaku.aliases
end