class BaseTokenizer
Simplistic tokenizer used mostly for testing purposes
Attributes
@return [Integer] position of start of current line in source text
@return [Integer] current line number
@return [StringScanner]
Public Class Methods
Source
# File lib/support/base_tokenizer.rb, line 21 def initialize(source) @scanner = StringScanner.new('') restart(source) end
Constructor. Initialize a tokenizer. @param source [String] Skeem text to tokenize.
Public Instance Methods
Source
# File lib/support/base_tokenizer.rb, line 27 def restart(source) @scanner.string = source @lineno = 1 @line_start = 0 end
@param source [String] input text to tokenize.
Source
# File lib/support/base_tokenizer.rb, line 34 def tokens tok_sequence = [] until @scanner.eos? token = _next_token tok_sequence << token unless token.nil? end return tok_sequence end
@return [Array<SkmToken>] | Returns a sequence of tokens
Protected Instance Methods
Source
# File lib/support/base_tokenizer.rb, line 50 def _next_token skip_whitespaces curr_ch = scanner.peek(1) return nil if curr_ch.nil? || curr_ch.empty? token = recognize_token if token.nil? # Unknown token curr_ch = scanner.peek(1) erroneous = curr_ch.nil? ? '' : scanner.scan(/./) sequel = scanner.scan(/.{1,20}/) erroneous += sequel unless sequel.nil? raise ScanError, "Unknown token #{erroneous} on line #{lineno}" end return token end
Patterns: Unambiguous single character Conditional single character:
(e.g. '+' operator, '+' prefix for positive numbers)
Source
# File lib/support/base_tokenizer.rb, line 71 def build_token(aSymbolName, aLexeme, aFormat = :default) begin value = convert_to(aLexeme, aSymbolName, aFormat) col = scanner.pos - aLexeme.size - @line_start + 1 pos = Rley::Lexical::Position.new(@lineno, col) token = Rley::Lexical::Token.new(value, aSymbolName, pos) rescue StandardError => e puts "Failing with '#{aSymbolName}' and '#{aLexeme}'" raise e end return token end
Source
# File lib/support/base_tokenizer.rb, line 85 def convert_to(aLexeme, _symbol_name, _format) return aLexeme end
Source
# File lib/support/base_tokenizer.rb, line 110 def next_line @lineno += 1 @line_start = scanner.pos end
Source
# File lib/support/base_tokenizer.rb, line 67 def recognize_token raise NotImplementedError end
Source
# File lib/support/base_tokenizer.rb, line 89 def skip_whitespaces pre_pos = scanner.pos loop do ws_found = false cmt_found = false found = scanner.skip(/[ \t\f]+/) ws_found = true if found found = scanner.skip(/(?:\r\n)|\r|\n/) if found ws_found = true next_line end break unless ws_found || cmt_found end curr_pos = scanner.pos return if curr_pos == pre_pos end