feat(wpcarro/simple-select): Parse query language 🎉
Seems to successfully handle inputs like... ``` > (-fname:"William" lname:/C.*l/) OR (fname:"William" -lname:"Carroll") ``` Change-Id: I5277cfbc7d102158eab5e1e71b2d95aaf13508fd Reviewed-on: https://cl.tvl.fyi/c/depot/+/5340 Reviewed-by: wpcarro <wpcarro@gmail.com> Autosubmit: wpcarro <wpcarro@gmail.com> Tested-by: BuildkiteCI
This commit is contained in:
		
							parent
							
								
									b5f78e433c
								
							
						
					
					
						commit
						eb412699b6
					
				
					 2 changed files with 93 additions and 19 deletions
				
			
		|  | @ -1,5 +1,7 @@ | |||
| import string | ||||
| from scanner import Scanner | ||||
| from parser import Parser | ||||
| 
 | ||||
| ################################################################################ | ||||
| # Predicates | ||||
| ################################################################################ | ||||
|  | @ -20,6 +22,13 @@ def is_whitespace(c): | |||
| # Tokenizer | ||||
| ################################################################################ | ||||
| 
 | ||||
| AND    = ("CONJUNCTION", "AND") | ||||
| OR     = ("CONJUNCTION", "OR") | ||||
| NOT    = ("PUNCTUATION", "NOT") | ||||
| COLON  = ("PUNCTUATION", "COLON") | ||||
| LPAREN = ("PUNCTUATION", "LPAREN") | ||||
| RPAREN = ("PUNCTUATION", "RPAREN") | ||||
| 
 | ||||
| def tokenize(x): | ||||
|   s = Scanner(x) | ||||
|   tokens = scan_tokens(s) | ||||
|  | @ -36,8 +45,10 @@ def scan_tokens(s): | |||
| 
 | ||||
| def scan_token(s): | ||||
|   punctuation = { | ||||
|       "-": "NOT", | ||||
|       ":": "COLON", | ||||
|       "-": NOT, | ||||
|       ":": COLON, | ||||
|       "(": LPAREN, | ||||
|       ")": RPAREN, | ||||
|   } | ||||
|   c = s.peek() | ||||
|   if c in punctuation: | ||||
|  | @ -71,18 +82,78 @@ def tokenize_regex(s): | |||
|   return ("REGEX", current) | ||||
| 
 | ||||
| def tokenize_identifier(s): | ||||
|   keywords = { | ||||
|   conjunctions = { | ||||
|       "AND", | ||||
|       "OR", | ||||
|   } | ||||
|   current = s.advance() | ||||
|   while is_alphanumeric(s.peek()): | ||||
|     current += s.advance() | ||||
|   if current.upper() in keywords: | ||||
|     return ("KEYWORD", current.upper()) | ||||
|   if current.upper() in conjunctions: | ||||
|     return ("CONJUNCTION", current.upper()) | ||||
|   else: | ||||
|     return ("IDENTIFIER", current) | ||||
| 
 | ||||
| ################################################################################ | ||||
| # Parser | ||||
| ################################################################################ | ||||
| 
 | ||||
| # EBNF | ||||
| # Note: we order expression types by ascending levels of precedence. | ||||
| # | ||||
| # expression  -> conjunction ; | ||||
| # conjunction -> selection ( ( "AND" | "OR" )? selection )* ; | ||||
| # selection   -> "-"? IDENTIFIER ":" ( REGEX | STRING ) | grouping ; | ||||
| # grouping    -> REGEX | STRING | "(" expression ")" ; | ||||
| 
 | ||||
| def parse(x): | ||||
|   tokens = tokenize(x) | ||||
|   p = Parser(tokens) | ||||
|   return expression(p) | ||||
| 
 | ||||
| def expression(p): | ||||
|   return conjunction(p) | ||||
| 
 | ||||
| def conjunction(p): | ||||
|   lhs = selection(p) | ||||
| 
 | ||||
|   while not p.exhausted() and p.test(lambda tokens, i: tokens[i] not in {LPAREN, RPAREN}): | ||||
|     conj = p.advance() if p.peek()[0] == "CONJUNCTION" else AND | ||||
|     rhs = selection(p) | ||||
|     lhs = ("CONJUNCTION", conj[1], lhs, rhs) | ||||
| 
 | ||||
|   if not p.exhausted(): | ||||
|     raise Exception("Encountered more tokens than we can parse: \"{}\"".format(p.tokens[p.i:])) | ||||
| 
 | ||||
|   return lhs | ||||
| 
 | ||||
| def selection(p): | ||||
|   negate = False | ||||
|   if p.peek() == NOT: | ||||
|     negate = True | ||||
|     p.advance() | ||||
| 
 | ||||
|   if p.peek()[0] != "IDENTIFIER": | ||||
|     return grouping(p) | ||||
| 
 | ||||
|   ident = p.expect(lambda x: x[0] == "IDENTIFIER") | ||||
|   colon = p.expect(lambda x: x[1] == "COLON") | ||||
|   value = p.expect(lambda x: x[0] in {"REGEX", "STRING"}) | ||||
|   return ("SELECTION", negate, ident[1], value) | ||||
| 
 | ||||
| def grouping(p): | ||||
|   if p.peek()[0] == "REGEX": | ||||
|     return p.advance() | ||||
| 
 | ||||
|   if p.peek()[0] == "STRING": | ||||
|     return p.advance() | ||||
| 
 | ||||
|   if p.peek() == LPAREN: | ||||
|     p.advance() | ||||
|     expr = expression(p) | ||||
|     p.expect(lambda x: x == RPAREN) | ||||
|     return ("GROUPING", expr) | ||||
| 
 | ||||
| ################################################################################ | ||||
| # Main | ||||
| ################################################################################ | ||||
|  | @ -90,7 +161,7 @@ def tokenize_identifier(s): | |||
| def main(): | ||||
|   while True: | ||||
|     x = input("> ") | ||||
|     print(tokenize(x)) | ||||
|     print(parse(x)) | ||||
| 
 | ||||
| if __name__ == "__main__": | ||||
|   main() | ||||
|  |  | |||
|  | @ -3,26 +3,29 @@ class Parser(object): | |||
|         self.tokens = tokens | ||||
|         self.i = 0 | ||||
| 
 | ||||
|     def prev(self): | ||||
|         return self.tokens[self.i - 1] | ||||
|     def exhausted(self): | ||||
|         return self.i >= len(self.tokens) | ||||
| 
 | ||||
|     def curr(self): | ||||
|         return self.tokens[self.i] | ||||
|     def peek(self, n=0): | ||||
|         return self.tokens[self.i + n] | ||||
| 
 | ||||
|     def consume(self): | ||||
|     def advance(self): | ||||
|         if not self.exhausted(): | ||||
|             self.i += 1 | ||||
|             return self.prev() | ||||
|         return self.peek(n=-1) | ||||
| 
 | ||||
|     def match(self, xs): | ||||
|         if not self.exhausted() and self.curr() in xs: | ||||
|             self.consume() | ||||
|         if self.peek() in xs: | ||||
|             self.advance() | ||||
|             return True | ||||
|         return False | ||||
| 
 | ||||
|     def expect(self, xs): | ||||
|         if not self.match(xs): | ||||
|             raise Exception("Expected token \"{}\" but received \"{}\"".format(xs, self.curr())) | ||||
|     def test(self, predicate): | ||||
|         return predicate(self.tokens, self.i) | ||||
| 
 | ||||
|     def exhausted(self): | ||||
|         return self.i >= len(self.tokens) | ||||
|     def expect(self, predicate): | ||||
|         if self.exhausted(): | ||||
|             raise Exception("Unexpected EOL") | ||||
|         if predicate(self.peek()): | ||||
|             return self.advance() | ||||
|         raise Exception("Unexpected token: \"{}\"".format(self.peek())) | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue