chore(tazjin/rlox): Move other modules under treewalk::

It's unclear if the second part of the book can reuse anything from
the first part (I'm guessing probably the scanner, but I'll move that
back if it turns out to be the case).

Change-Id: I9411355929e31ac6e953599e51665406b1f48d55
Reviewed-on: https://cl.tvl.fyi/c/depot/+/2415
Reviewed-by: tazjin <mail@tazj.in>
Tested-by: BuildkiteCI
This commit is contained in:
Vincent Ambo 2021-01-17 21:13:57 +03:00 committed by tazjin
parent c26915d012
commit b1d0e22b1f
8 changed files with 22 additions and 22 deletions

View file

@ -1,283 +0,0 @@
use crate::errors::{Error, ErrorKind};
#[derive(Clone, Debug, PartialEq)]
pub enum TokenKind {
// Single-character tokens.
LeftParen,
RightParen,
LeftBrace,
RightBrace,
Comma,
Dot,
Minus,
Plus,
Semicolon,
Slash,
Star,
// One or two character tokens.
Bang,
BangEqual,
Equal,
EqualEqual,
Greater,
GreaterEqual,
Less,
LessEqual,
// Literals.
Identifier(String),
String(String),
Number(f64),
True,
False,
Nil,
// Keywords.
And,
Class,
Else,
Fun,
For,
If,
Or,
Print,
Return,
Super,
This,
Var,
While,
// Special things
Eof,
}
#[derive(Clone, Debug)]
pub struct Token {
pub kind: TokenKind,
pub lexeme: String,
pub line: usize,
}
struct Scanner<'a> {
source: &'a [char],
tokens: Vec<Token>,
errors: Vec<Error>,
start: usize, // offset of first character in current lexeme
current: usize, // current offset into source
line: usize, // current line in source
}
impl<'a> Scanner<'a> {
fn is_at_end(&self) -> bool {
return self.current >= self.source.len();
}
fn advance(&mut self) -> char {
self.current += 1;
self.source[self.current - 1]
}
fn add_token(&mut self, kind: TokenKind) {
let lexeme = &self.source[self.start..self.current];
self.tokens.push(Token {
kind,
lexeme: lexeme.into_iter().collect(),
line: self.line,
})
}
fn scan_token(&mut self) {
match self.advance() {
// simple single-character tokens
'(' => self.add_token(TokenKind::LeftParen),
')' => self.add_token(TokenKind::RightParen),
'{' => self.add_token(TokenKind::LeftBrace),
'}' => self.add_token(TokenKind::RightBrace),
',' => self.add_token(TokenKind::Comma),
'.' => self.add_token(TokenKind::Dot),
'-' => self.add_token(TokenKind::Minus),
'+' => self.add_token(TokenKind::Plus),
';' => self.add_token(TokenKind::Semicolon),
'*' => self.add_token(TokenKind::Star),
// possible multi-character tokens
'!' => self.add_if_next('=', TokenKind::BangEqual, TokenKind::Bang),
'=' => self.add_if_next('=', TokenKind::EqualEqual, TokenKind::Equal),
'<' => self.add_if_next('=', TokenKind::LessEqual, TokenKind::Less),
'>' => self.add_if_next('=', TokenKind::GreaterEqual, TokenKind::Greater),
'/' => {
// support comments until EOL by discarding characters
if self.match_next('/') {
while self.peek() != '\n' && !self.is_at_end() {
self.advance();
}
} else {
self.add_token(TokenKind::Slash);
}
}
// ignore whitespace
ws if ws.is_whitespace() => {
if ws == '\n' {
self.line += 1
}
}
'"' => self.scan_string(),
digit if digit.is_digit(10) => self.scan_number(),
chr if chr.is_alphabetic() || chr == '_' => self.scan_identifier(),
unexpected => self.errors.push(Error {
line: self.line,
kind: ErrorKind::UnexpectedChar(unexpected),
}),
};
}
fn match_next(&mut self, expected: char) -> bool {
if self.is_at_end() || self.source[self.current] != expected {
false
} else {
self.current += 1;
true
}
}
fn add_if_next(&mut self, expected: char, then: TokenKind, or: TokenKind) {
if self.match_next(expected) {
self.add_token(then);
} else {
self.add_token(or);
}
}
fn peek(&self) -> char {
if self.is_at_end() {
return '\0';
} else {
return self.source[self.current];
}
}
fn peek_next(&self) -> char {
if self.current + 1 >= self.source.len() {
return '\0';
} else {
return self.source[self.current + 1];
}
}
fn scan_string(&mut self) {
while self.peek() != '"' && !self.is_at_end() {
if self.peek() == '\n' {
self.line += 1;
}
self.advance();
}
if self.is_at_end() {
self.errors.push(Error {
line: self.line,
kind: ErrorKind::UnterminatedString,
});
return;
}
// closing '"'
self.advance();
// add token without surrounding quotes
let string: String = self.source[(self.start + 1)..(self.current - 1)]
.iter()
.collect();
self.add_token(TokenKind::String(string));
}
fn scan_number(&mut self) {
while self.peek().is_digit(10) {
self.advance();
}
// Look for a fractional part
if self.peek() == '.' && self.peek_next().is_digit(10) {
// consume '.'
self.advance();
while self.peek().is_digit(10) {
self.advance();
}
}
let num: f64 = self.source[self.start..self.current]
.iter()
.collect::<String>()
.parse()
.expect("float parsing should always work");
self.add_token(TokenKind::Number(num));
}
fn scan_identifier(&mut self) {
while self.peek().is_alphanumeric() || self.peek() == '_' {
self.advance();
}
let ident: String = self.source[self.start..self.current].iter().collect();
// Determine whether this is an identifier, or a keyword:
let token_kind = match ident.as_str() {
"and" => TokenKind::And,
"class" => TokenKind::Class,
"else" => TokenKind::Else,
"false" => TokenKind::False,
"for" => TokenKind::For,
"fun" => TokenKind::Fun,
"if" => TokenKind::If,
"nil" => TokenKind::Nil,
"or" => TokenKind::Or,
"print" => TokenKind::Print,
"return" => TokenKind::Return,
"super" => TokenKind::Super,
"this" => TokenKind::This,
"true" => TokenKind::True,
"var" => TokenKind::Var,
"while" => TokenKind::While,
_ => TokenKind::Identifier(ident),
};
self.add_token(token_kind);
}
fn scan_tokens(&mut self) {
while !self.is_at_end() {
self.start = self.current;
self.scan_token();
}
self.add_token(TokenKind::Eof);
}
}
pub fn scan<'a>(input: &'a [char]) -> Result<Vec<Token>, Vec<Error>> {
let mut scanner = Scanner {
source: &input,
tokens: vec![],
errors: vec![],
start: 0,
current: 0,
line: 0,
};
scanner.scan_tokens();
if !scanner.errors.is_empty() {
return Err(scanner.errors);
}
return Ok(scanner.tokens);
}