feat(tazjin/rlox): Synchronise parser state after errors
This lets the parser collect multiple errors instead of returning after the first one, with some optimistic synchronisation after encountering something that looks wonky. Change-Id: Ie9d0ce8de9dcc7a3d1e7aa2abe15f74cab0ab96b Reviewed-on: https://cl.tvl.fyi/c/depot/+/2236 Reviewed-by: tazjin <mail@tazj.in> Tested-by: BuildkiteCI
This commit is contained in:
		
							parent
							
								
									1835b2be99
								
							
						
					
					
						commit
						42405bfa24
					
				
					 3 changed files with 67 additions and 10 deletions
				
			
		| 
						 | 
					@ -3,6 +3,7 @@ pub enum ErrorKind {
 | 
				
			||||||
    UnexpectedChar(char),
 | 
					    UnexpectedChar(char),
 | 
				
			||||||
    UnterminatedString,
 | 
					    UnterminatedString,
 | 
				
			||||||
    UnmatchedParens,
 | 
					    UnmatchedParens,
 | 
				
			||||||
 | 
					    ExpectedExpression(String),
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#[derive(Debug)]
 | 
					#[derive(Debug)]
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -11,7 +11,7 @@ pub fn run(code: &str) {
 | 
				
			||||||
            print_tokens(&tokens);
 | 
					            print_tokens(&tokens);
 | 
				
			||||||
            match parser::parse(tokens) {
 | 
					            match parser::parse(tokens) {
 | 
				
			||||||
                Ok(expr) => println!("Expression:\n{:?}", expr),
 | 
					                Ok(expr) => println!("Expression:\n{:?}", expr),
 | 
				
			||||||
                Err(error) => report_errors(vec![error]),
 | 
					                Err(errors) => report_errors(errors),
 | 
				
			||||||
            }
 | 
					            }
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
        Err(errors) => report_errors(errors),
 | 
					        Err(errors) => report_errors(errors),
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -100,7 +100,7 @@ impl<'a> Parser<'a> {
 | 
				
			||||||
    fn unary(&mut self) -> ExprResult<'a> {
 | 
					    fn unary(&mut self) -> ExprResult<'a> {
 | 
				
			||||||
        if self.match_token(&[TokenKind::Bang, TokenKind::Minus]) {
 | 
					        if self.match_token(&[TokenKind::Bang, TokenKind::Minus]) {
 | 
				
			||||||
            return Ok(Expr::Unary(Unary {
 | 
					            return Ok(Expr::Unary(Unary {
 | 
				
			||||||
                operator: self.previous(),
 | 
					                operator: self.previous().clone(),
 | 
				
			||||||
                right: Box::new(self.unary()?),
 | 
					                right: Box::new(self.unary()?),
 | 
				
			||||||
            }));
 | 
					            }));
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
| 
						 | 
					@ -123,8 +123,13 @@ impl<'a> Parser<'a> {
 | 
				
			||||||
                return Ok(Expr::Grouping(Grouping(Box::new(expr))));
 | 
					                return Ok(Expr::Grouping(Grouping(Box::new(expr))));
 | 
				
			||||||
            }
 | 
					            }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            // This branch indicates a parser bug, not invalid input.
 | 
					            unexpected => {
 | 
				
			||||||
            unexpected => panic!("Parser encountered unexpected token '{:?}'", unexpected),
 | 
					                eprintln!("encountered {:?}", unexpected);
 | 
				
			||||||
 | 
					                return Err(Error {
 | 
				
			||||||
 | 
					                    line: next.line,
 | 
				
			||||||
 | 
					                    kind: ErrorKind::ExpectedExpression(next.lexeme.into_iter().collect()),
 | 
				
			||||||
 | 
					                });
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
        };
 | 
					        };
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        Ok(Expr::Literal(literal))
 | 
					        Ok(Expr::Literal(literal))
 | 
				
			||||||
| 
						 | 
					@ -150,7 +155,7 @@ impl<'a> Parser<'a> {
 | 
				
			||||||
            self.current += 1;
 | 
					            self.current += 1;
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        return self.previous();
 | 
					        return self.previous().clone();
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    fn is_at_end(&self) -> bool {
 | 
					    fn is_at_end(&self) -> bool {
 | 
				
			||||||
| 
						 | 
					@ -166,8 +171,8 @@ impl<'a> Parser<'a> {
 | 
				
			||||||
        &self.tokens[self.current]
 | 
					        &self.tokens[self.current]
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    fn previous(&self) -> Token<'a> {
 | 
					    fn previous(&self) -> &Token<'a> {
 | 
				
			||||||
        self.tokens[self.current - 1].clone()
 | 
					        &self.tokens[self.current - 1]
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    fn consume(&mut self, kind: &TokenKind, err: ErrorKind) -> Result<(), Error> {
 | 
					    fn consume(&mut self, kind: &TokenKind, err: ErrorKind) -> Result<(), Error> {
 | 
				
			||||||
| 
						 | 
					@ -182,6 +187,31 @@ impl<'a> Parser<'a> {
 | 
				
			||||||
        })
 | 
					        })
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    fn synchronise(&mut self) {
 | 
				
			||||||
 | 
					        self.advance();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        while !self.is_at_end() {
 | 
				
			||||||
 | 
					            if self.previous().kind == TokenKind::Semicolon {
 | 
				
			||||||
 | 
					                return;
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            match self.peek().kind {
 | 
				
			||||||
 | 
					                TokenKind::Class
 | 
				
			||||||
 | 
					                | TokenKind::Fun
 | 
				
			||||||
 | 
					                | TokenKind::Var
 | 
				
			||||||
 | 
					                | TokenKind::For
 | 
				
			||||||
 | 
					                | TokenKind::If
 | 
				
			||||||
 | 
					                | TokenKind::While
 | 
				
			||||||
 | 
					                | TokenKind::Print
 | 
				
			||||||
 | 
					                | TokenKind::Return => return,
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                _ => {
 | 
				
			||||||
 | 
					                    self.advance();
 | 
				
			||||||
 | 
					                }
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    fn binary_operator(
 | 
					    fn binary_operator(
 | 
				
			||||||
        &mut self,
 | 
					        &mut self,
 | 
				
			||||||
        oneof: &[TokenKind],
 | 
					        oneof: &[TokenKind],
 | 
				
			||||||
| 
						 | 
					@ -192,7 +222,7 @@ impl<'a> Parser<'a> {
 | 
				
			||||||
        while self.match_token(oneof) {
 | 
					        while self.match_token(oneof) {
 | 
				
			||||||
            expr = Expr::Binary(Binary {
 | 
					            expr = Expr::Binary(Binary {
 | 
				
			||||||
                left: Box::new(expr),
 | 
					                left: Box::new(expr),
 | 
				
			||||||
                operator: self.previous(),
 | 
					                operator: self.previous().clone(),
 | 
				
			||||||
                right: Box::new(each(self)?),
 | 
					                right: Box::new(each(self)?),
 | 
				
			||||||
            })
 | 
					            })
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
| 
						 | 
					@ -201,8 +231,34 @@ impl<'a> Parser<'a> {
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
pub fn parse<'a>(tokens: Vec<Token<'a>>) -> ExprResult<'a> {
 | 
					pub fn parse<'a>(tokens: Vec<Token<'a>>) -> Result<Expr<'a>, Vec<Error>> {
 | 
				
			||||||
    let mut parser = Parser { tokens, current: 0 };
 | 
					    let mut parser = Parser { tokens, current: 0 };
 | 
				
			||||||
 | 
					    let mut errors: Vec<Error> = vec![];
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    parser.expression()
 | 
					    while !parser.is_at_end() {
 | 
				
			||||||
 | 
					        match parser.expression() {
 | 
				
			||||||
 | 
					            Err(err) => {
 | 
				
			||||||
 | 
					                errors.push(err);
 | 
				
			||||||
 | 
					                parser.synchronise();
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					            Ok(expr) => {
 | 
				
			||||||
 | 
					                if !parser.is_at_end() {
 | 
				
			||||||
 | 
					                    // TODO(tazjin): This isn't a functional language
 | 
				
			||||||
 | 
					                    // - multiple statements should be allowed, at
 | 
				
			||||||
 | 
					                    // some point.
 | 
				
			||||||
 | 
					                    let current = &parser.tokens[parser.current];
 | 
				
			||||||
 | 
					                    errors.push(Error {
 | 
				
			||||||
 | 
					                        line: current.line,
 | 
				
			||||||
 | 
					                        kind: ErrorKind::UnexpectedChar(current.lexeme[0]),
 | 
				
			||||||
 | 
					                    });
 | 
				
			||||||
 | 
					                }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                if errors.is_empty() {
 | 
				
			||||||
 | 
					                    return Ok(expr);
 | 
				
			||||||
 | 
					                }
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    return Err(errors);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue