class Aspen::Lexer

Constants

CONTENT_REGEX
LABELED_NODE
LABEL_PASCAL_CASE
NUMBER_CAPTURE

From stackoverflow.com/questions/171480/regex-grabbing-values-between-quotation-marks

PASCAL_CASE
STRING_CAPTURE

Public Class Methods

tokenize(code, env={}) click to toggle source
# File lib/aspen/lexer.rb, line 17
def self.tokenize(code, env={})
  new.tokenize(code, env)
end

Public Instance Methods

pop_state() click to toggle source
# File lib/aspen/lexer.rb, line 183
def pop_state
  stack.pop
end
push_state(state) click to toggle source
# File lib/aspen/lexer.rb, line 179
def push_state(state)
  stack.push(state)
end
stack() click to toggle source
# File lib/aspen/lexer.rb, line 171
def stack
  @stack ||= []
end
state() click to toggle source
# File lib/aspen/lexer.rb, line 175
def state
  stack.last || :default
end
tokenize(code, env={}) click to toggle source
# File lib/aspen/lexer.rb, line 21
def tokenize(code, env={})
  scanner = StringScanner.new(code)
  tokens = []

  environment = Discourse.assert(env)
  grammar = environment.grammar

  until scanner.eos?
    # puts "tokens (6): #{tokens.last(6).inspect}"
    # puts "\n(#{state}) stack: #{stack}"
    # puts "grammar: #{grammar.inspect}"

    # Match custom grammars
    if grammar && scanner.beginning_of_line?
      line = scanner.scan(/^.*$/)
      if grammar.match?(line)
        tokens << [:CUSTOM_GRAMMAR_STATEMENT, line]
        next
      else
        scanner.unscan # reset pointer to beginning of line
      end
    end

    # If the line ends with :, it's starting a list.
    if scanner.beginning_of_line?
      line = scanner.scan(/^.*$/)
      if line.match? /:$/
        tokens << [:PREPARE_START_LIST]
      end
      scanner.unscan
    end

    # Standard Aspen syntax
    case state
    when :default then
      if scanner.scan(/\(/)
        tokens << [:OPEN_PARENS]
        push_state :node
      elsif scanner.scan(/\[/)
        tokens << [:OPEN_BRACKETS]
        push_state :edge
      elsif scanner.scan(/(:\s*\n)/) # Colon, any whitespace, newline
        tokens << [:START_LIST, scanner.matched]
        push_state :list
      elsif scanner.scan(/\./)
        tokens << [:END_STATEMENT, scanner.matched]
      elsif scanner.scan(/\s/)
        # NO OP
      elsif scanner.scan(/#.*$/)
        tokens << [:COMMENT, scanner.matched.gsub(/#\s*/, '')]
      else
        no_match(scanner, state)
      end

    when :node then
      # Removed Cypher form for now. Un   comment the next 3 lines
      # to start working on it.
      #
      # if scanner.scan(LABEL_PASCAL_CASE)
      #   tokens << [:LABEL, scanner.matched]
      #   push_state :hash
      if scanner.match?(LABELED_NODE)
        push_state :node_labeled
      elsif scanner.scan(/\n/) && stack == [:list, :node]
        # If we're inside a list node and we encounter a newline,
        # pop :node so we can return to the :list state.
        scanner.unscan
        pop_state
      elsif scanner.scan(CONTENT_REGEX)
        tokens << [:CONTENT, scanner.matched.strip]
      elsif scanner.scan(/[\:]/)
        tokens << [:SEPARATOR, scanner.matched]
      elsif scanner.scan(/\(/)
        tokens << [:OPEN_PARENS]
        push_state :label
      elsif scanner.scan(/\)/)
        tokens << [:CLOSE_PARENS]
        pop_state
      else
        no_match(scanner, state)
      end

    when :node_labeled
      if scanner.scan(PASCAL_CASE)
        tokens << [:LABEL, scanner.matched]
        pop_state # Back to Node
      else
        no_match(scanner, state)
      end

    when :edge then
      if scanner.scan(/[[[:alpha:]]\s]+/)
        tokens << [:CONTENT, scanner.matched.strip]
      elsif scanner.scan(/\]/)
        tokens << [:CLOSE_BRACKETS]
        pop_state
      else
        no_match(scanner, state)
      end

    when :hash then
      if scanner.scan(/\{/)
        tokens << [:OPEN_BRACES]
      elsif scanner.scan(/[[[:alpha:]]_]+/)
        tokens << [:IDENTIFIER, scanner.matched]
      elsif scanner.scan(/[\,\:]/)
        tokens << [:SEPARATOR, scanner.matched]
      elsif scanner.scan(STRING_CAPTURE)
        tokens << [:STRING, scanner.matched]
      elsif scanner.scan(NUMBER_CAPTURE)
        tokens << [:NUMBER, scanner.matched]
      elsif scanner.scan(/\}/)
        tokens << [:CLOSE_BRACES]
        pop_state
      elsif scanner.scan(/\s+/)
        # NO OP
      else
        no_match(scanner, state)
      end

    when :list then
      if scanner.scan(/([\-\*\+])/) # -, *, or + (any allowed by Markdown)
        tokens << [:BULLET, scanner.matched]
        push_state :node
      elsif scanner.scan(/\n\n/)
        tokens << [:END_LIST]
        pop_state
      elsif scanner.scan(/\s/)
        # NO OP
      else
        no_match(scanner, state)
      end

    when :label
      if scanner.scan(PASCAL_CASE)
        tokens << [:CONTENT, scanner.matched]
      elsif scanner.peek(1).match?(/\)/)
        pop_state # Go back to :node and let :node pop state
      else
        no_match(scanner, state)
      end

    else # No state match
      raise Aspen::LexError, "There is no matcher for state #{state.inspect}."
    end
  end

  tokens
end

Private Instance Methods

no_match(scanner, state) click to toggle source
# File lib/aspen/lexer.rb, line 189
def no_match(scanner, state)
  raise Aspen::LexError,
          Aspen::Errors.messages(:unexpected_token, scanner, state)
end