feat(tazjin/rlox): Implement single-character scanning
... still not that interesting, but at this point slightly divergent from the book: The book embraces mutability for interpreter state, initially for tracking whether an error condition has occured. I avoid this by instead defining an error type and collecting the error values, to be handled later on. Notes: So far nothing special, but this is just the beginning of the book. I like the style it is written in and it has pointed to some interesting resources, such as a 1965 paper titled "The Next 700 Languages". Change-Id: I030b38438fec9eb55372bf547af225138908230a Reviewed-on: https://cl.tvl.fyi/c/depot/+/2144 Reviewed-by: tazjin <mail@tazj.in> Tested-by: BuildkiteCI
This commit is contained in:
		
							parent
							
								
									9d2b001c4c
								
							
						
					
					
						commit
						3d1b116f7f
					
				
					 3 changed files with 139 additions and 0 deletions
				
			
		
							
								
								
									
										14
									
								
								users/tazjin/rlox/src/errors.rs
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										14
									
								
								users/tazjin/rlox/src/errors.rs
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
					@ -0,0 +1,14 @@
 | 
				
			||||||
 | 
					#[derive(Debug)]
 | 
				
			||||||
 | 
					pub enum ErrorKind {
 | 
				
			||||||
 | 
					    UnexpectedChar(char),
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#[derive(Debug)]
 | 
				
			||||||
 | 
					pub struct Error {
 | 
				
			||||||
 | 
					    pub line: usize,
 | 
				
			||||||
 | 
					    pub kind: ErrorKind,
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					pub fn report(loc: &str, err: &Error) {
 | 
				
			||||||
 | 
					    eprintln!("[line {}] Error {}: {:?}", err.line, loc, err.kind);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
| 
						 | 
					@ -4,7 +4,9 @@ use std::io;
 | 
				
			||||||
use std::io::Write;
 | 
					use std::io::Write;
 | 
				
			||||||
use std::process;
 | 
					use std::process;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					mod errors;
 | 
				
			||||||
mod interpreter;
 | 
					mod interpreter;
 | 
				
			||||||
 | 
					mod scanner;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
fn main() {
 | 
					fn main() {
 | 
				
			||||||
    let mut args = env::args();
 | 
					    let mut args = env::args();
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
							
								
								
									
										123
									
								
								users/tazjin/rlox/src/scanner.rs
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										123
									
								
								users/tazjin/rlox/src/scanner.rs
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
					@ -0,0 +1,123 @@
 | 
				
			||||||
 | 
					use crate::errors::{Error, ErrorKind};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#[derive(Debug)]
 | 
				
			||||||
 | 
					pub enum TokenKind {
 | 
				
			||||||
 | 
					    // Single-character tokens.
 | 
				
			||||||
 | 
					    LeftParen,
 | 
				
			||||||
 | 
					    RightParen,
 | 
				
			||||||
 | 
					    LeftBrace,
 | 
				
			||||||
 | 
					    RightBrace,
 | 
				
			||||||
 | 
					    Comma,
 | 
				
			||||||
 | 
					    Dot,
 | 
				
			||||||
 | 
					    Minus,
 | 
				
			||||||
 | 
					    Plus,
 | 
				
			||||||
 | 
					    Semicolon,
 | 
				
			||||||
 | 
					    Slash,
 | 
				
			||||||
 | 
					    Star,
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    // One or two character tokens.
 | 
				
			||||||
 | 
					    Bang,
 | 
				
			||||||
 | 
					    BangEqual,
 | 
				
			||||||
 | 
					    Equal,
 | 
				
			||||||
 | 
					    EqualEqual,
 | 
				
			||||||
 | 
					    Greater,
 | 
				
			||||||
 | 
					    GreaterEqual,
 | 
				
			||||||
 | 
					    Less,
 | 
				
			||||||
 | 
					    LessEqual,
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    // Literals.
 | 
				
			||||||
 | 
					    Identifier,
 | 
				
			||||||
 | 
					    String,
 | 
				
			||||||
 | 
					    Number,
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    // Keywords.
 | 
				
			||||||
 | 
					    And,
 | 
				
			||||||
 | 
					    Class,
 | 
				
			||||||
 | 
					    Else,
 | 
				
			||||||
 | 
					    False,
 | 
				
			||||||
 | 
					    Fun,
 | 
				
			||||||
 | 
					    For,
 | 
				
			||||||
 | 
					    If,
 | 
				
			||||||
 | 
					    Nil,
 | 
				
			||||||
 | 
					    Or,
 | 
				
			||||||
 | 
					    Print,
 | 
				
			||||||
 | 
					    Return,
 | 
				
			||||||
 | 
					    Super,
 | 
				
			||||||
 | 
					    This,
 | 
				
			||||||
 | 
					    True,
 | 
				
			||||||
 | 
					    Var,
 | 
				
			||||||
 | 
					    While,
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    // Special things
 | 
				
			||||||
 | 
					    Eof,
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#[derive(Debug)]
 | 
				
			||||||
 | 
					pub struct Token<'a> {
 | 
				
			||||||
 | 
					    kind: TokenKind,
 | 
				
			||||||
 | 
					    lexeme: &'a str,
 | 
				
			||||||
 | 
					    // literal: Object, // TODO(tazjin): Uhh?
 | 
				
			||||||
 | 
					    line: usize,
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					struct Scanner<'a> {
 | 
				
			||||||
 | 
					    source: &'a str,
 | 
				
			||||||
 | 
					    tokens: Vec<Token<'a>>,
 | 
				
			||||||
 | 
					    errors: Vec<Error>,
 | 
				
			||||||
 | 
					    start: usize,   // offset of first character in current lexeme
 | 
				
			||||||
 | 
					    current: usize, // current offset into source
 | 
				
			||||||
 | 
					    line: usize,    // current line in source
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					impl<'a> Scanner<'a> {
 | 
				
			||||||
 | 
					    fn is_at_end(&self) -> bool {
 | 
				
			||||||
 | 
					        return self.current >= self.source.len();
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    fn advance(&mut self) -> char {
 | 
				
			||||||
 | 
					        self.current += 1;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        // TODO(tazjin): Due to utf8-safety, this is a bit annoying.
 | 
				
			||||||
 | 
					        // Since string iteration is not the point here I'm just
 | 
				
			||||||
 | 
					        // leaving this as is for now.
 | 
				
			||||||
 | 
					        self.source.chars().nth(self.current - 1).unwrap()
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    fn add_token(&mut self, kind: TokenKind) {
 | 
				
			||||||
 | 
					        let lexeme = &self.source[self.start..self.current];
 | 
				
			||||||
 | 
					        self.tokens.push(Token {
 | 
				
			||||||
 | 
					            kind,
 | 
				
			||||||
 | 
					            lexeme,
 | 
				
			||||||
 | 
					            line: self.line,
 | 
				
			||||||
 | 
					        })
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    fn scan_token(&mut self) {
 | 
				
			||||||
 | 
					        match self.advance() {
 | 
				
			||||||
 | 
					            '(' => self.add_token(TokenKind::LeftParen),
 | 
				
			||||||
 | 
					            ')' => self.add_token(TokenKind::RightParen),
 | 
				
			||||||
 | 
					            '{' => self.add_token(TokenKind::LeftBrace),
 | 
				
			||||||
 | 
					            '}' => self.add_token(TokenKind::RightBrace),
 | 
				
			||||||
 | 
					            ',' => self.add_token(TokenKind::Comma),
 | 
				
			||||||
 | 
					            '.' => self.add_token(TokenKind::Dot),
 | 
				
			||||||
 | 
					            '-' => self.add_token(TokenKind::Minus),
 | 
				
			||||||
 | 
					            '+' => self.add_token(TokenKind::Plus),
 | 
				
			||||||
 | 
					            ';' => self.add_token(TokenKind::Semicolon),
 | 
				
			||||||
 | 
					            '*' => self.add_token(TokenKind::Star),
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            unexpected => self.errors.push(Error {
 | 
				
			||||||
 | 
					                line: self.line,
 | 
				
			||||||
 | 
					                kind: ErrorKind::UnexpectedChar(unexpected),
 | 
				
			||||||
 | 
					            }),
 | 
				
			||||||
 | 
					        };
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    fn scan_tokens(mut self) -> Vec<Token<'a>> {
 | 
				
			||||||
 | 
					        while !self.is_at_end() {
 | 
				
			||||||
 | 
					            self.start = self.current;
 | 
				
			||||||
 | 
					            self.scan_token();
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        return self.tokens;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue