Done using
    find third_party/lisp/{sclf,mime4cl,npg} \
      -name '*.lisp' -or -name '*.asd' \
      -exec bash -c 'expand -i -t 8 "$0" | sponge "$0"' {} \;
Change-Id: If84afac9c1d5cbc74e137a5aa0ae61472f0f1e90
Reviewed-on: https://cl.tvl.fyi/c/depot/+/5066
Tested-by: BuildkiteCI
Reviewed-by: sterni <sternenseemann@systemli.org>
		
	
			
		
			
				
	
	
		
			234 lines
		
	
	
	
		
			11 KiB
		
	
	
	
		
			Common Lisp
		
	
	
	
	
	
			
		
		
	
	
			234 lines
		
	
	
	
		
			11 KiB
		
	
	
	
		
			Common Lisp
		
	
	
	
	
	
| ;;;  parser.lisp --- runtime parser
 | |
| 
 | |
| ;;;  Copyright (C) 2003-2006, 2009 by Walter C. Pelissero
 | |
| 
 | |
| ;;;  Author: Walter C. Pelissero <walter@pelissero.de>
 | |
| ;;;  Project: NPG a Naive Parser Generator
 | |
| 
 | |
| #+cmu (ext:file-comment "$Module: parser.lisp $")
 | |
| 
 | |
| ;;; This library is free software; you can redistribute it and/or
 | |
| ;;; modify it under the terms of the GNU Lesser General Public License
 | |
| ;;; as published by the Free Software Foundation; either version 2.1
 | |
| ;;; of the License, or (at your option) any later version.
 | |
| ;;; This library is distributed in the hope that it will be useful,
 | |
| ;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
 | |
| ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 | |
| ;;; Lesser General Public License for more details.
 | |
| ;;; You should have received a copy of the GNU Lesser General Public
 | |
| ;;; License along with this library; if not, write to the Free
 | |
| ;;; Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
 | |
| ;;; 02111-1307 USA
 | |
| 
 | |
| ;;;  Commentary:
 | |
| ;;;
 | |
| ;;; This is the runtime part of the parser.  The code that is
 | |
| ;;; responsible to execute the parser defined with the primitives
 | |
| ;;; found in define.lisp.
 | |
| 
 | |
| (in-package :naive-parser-generator)
 | |
| 
 | |
| (defvar *debug* nil
 | |
|   "Either nil or a stream where to write the debug informations.")
 | |
| #+debug (declaim (fixnum *maximum-recursion-depth*))
 | |
| #+debug (defvar *maximum-recursion-depth* 1000
 | |
|   "Maximum depth the parser is allowed to recursively call itself.
 | |
| This is the only way for the parser to detect a loop in the grammar.
 | |
| Tune this if your grammar is unusually complex.")
 | |
| 
 | |
| (declaim (inline reduce-production))
 | |
| (defun reduce-production (production arguments)
 | |
|   "Apply PRODUCTION's action on ARGUMENTS.  This has the effect of
 | |
|   \"reducing\" the production."
 | |
|   (when *debug*
 | |
|     (format *debug* "reducing ~S on ~S~%" production arguments))
 | |
|   (flet ((safe-token-value (token)
 | |
|            (if (token-p token)
 | |
|                (token-value token)
 | |
|                token)))
 | |
|     (apply (prod-action production) (mapcar #'safe-token-value arguments))))
 | |
| 
 | |
| (defgeneric later-position (pos1 pos2)
 | |
|   (:documentation
 | |
|    "Compare two file postions and return true if POS1 is later than
 | |
| POS2 in the input stream."))
 | |
| 
 | |
| ;; This is meant to be overloaded in the lexer
 | |
| (defmethod later-position ((pos1 integer) (pos2 integer))
 | |
|   (> pos1 pos2))
 | |
| 
 | |
| ;; this looks silly but turns out to be useful (see below)
 | |
| (defmethod later-position (pos1 pos2)
 | |
|   (and (eq pos1 :eof) (not (eq pos2 :eof))))
 | |
| 
 | |
| (defgeneric read-next-tokens (tokens-source)
 | |
|   (:documentation "Read next token from a lexical analysed stream.  The nature of
 | |
| TOKENS-SOURCE is implementation dependent and any lexical analyzer is
 | |
| supposed to specialise this method."))
 | |
| 
 | |
| ;; This is the actual parser.  the algorithm is pretty
 | |
| ;; straightforward, the execution of the reductions a bit less.  Error
 | |
| ;; recovery is rather clumsy.
 | |
| 
 | |
| (defun parse (grammar start tokenizer)
 | |
|   "Match a GRAMMAR against the list of input tokens coming from TOKENIZER.
 | |
| Return the reduced values according to the nonterminal actions.  Raise
 | |
| an error on failure."
 | |
|   (declare (type grammar grammar)
 | |
|            (type symbol start))
 | |
|   (labels
 | |
|       ((match-token (expected token)
 | |
|          (when *debug*
 | |
|            (format *debug* "match-token ~S ~S -> " expected token))
 | |
|          (let ((res (cond ((symbolp expected)
 | |
|                            ;; non-costant terminal (like identifiers)
 | |
|                            (eq expected (token-type token)))
 | |
|                           ((and (stringp expected)
 | |
|                                 (stringp (token-value token)))
 | |
|                            ;; string costant terminal
 | |
|                            (funcall (the function (grammar-equal-p grammar)) expected (token-value token)))
 | |
|                           ((functionp expected)
 | |
|                            ;; custom equality predicate (must be able
 | |
|                            ;; to deal with token objects)
 | |
|                            (funcall expected token))
 | |
|                           ;; all the rest
 | |
|                           (t (equal expected (token-value token))))))
 | |
|            (when *debug*
 | |
|              (format *debug* "~Amatched~%" (if res "" "not ")))
 | |
|            res))
 | |
|        (match (expected matched #+debug depth)
 | |
|          (declare (list expected matched)
 | |
|                   #+debug (fixnum depth))
 | |
|          (let ((first-expected (car expected)))
 | |
|            (cond #+debug ((> depth *maximum-recursion-depth*)
 | |
|                   (error "endless recursion on ~A ~A at ~A expecting ~S"
 | |
|                          (token-type (car matched)) (token-value (car matched))
 | |
|                          (token-position (car matched)) expected))
 | |
|                  ((eq first-expected :any)
 | |
|                   (match (cdr expected) (cdr matched) #+debug depth))
 | |
|                  ;; This is a trick to obtain partial parses.  When we
 | |
|                  ;; reach this expected token we assume we succeeded
 | |
|                  ;; the parsing and return the remaining tokens as
 | |
|                  ;; part of the match.
 | |
|                  ((eq first-expected :rest)
 | |
|                   ;; we could be at the end of input so we check this
 | |
|                   (unless (cdr matched)
 | |
|                     (setf (cdr matched) (list :rest)))
 | |
|                   (list nil nil))
 | |
|                  ((rule-p first-expected)
 | |
|                   ;; If it's a rule, then we try to match all its
 | |
|                   ;; productions.  We return the first that succeeds.
 | |
|                   (loop
 | |
|                      for production in (rule-productions first-expected)
 | |
|                      for production-tokens of-type list = (prod-tokens production)
 | |
|                      with last-error-position = nil
 | |
|                      with last-error = nil
 | |
|                      for (error-position error-descr) =
 | |
|                        (progn
 | |
|                          (when *debug*
 | |
|                            (format *debug* "trying to match ~A: ~S~%"
 | |
|                                    (rule-name first-expected) production-tokens))
 | |
|                          (match (append production-tokens (cdr expected)) matched #+debug (1+ depth)))
 | |
|                      do (cond ((not error-position)
 | |
|                                (return (let ((args-count (prod-tokens-length production)))
 | |
|                                          (setf (cdr matched)
 | |
|                                                (cons (reduce-production
 | |
|                                                       production
 | |
|                                                       (subseq (the list (cdr matched)) 0 args-count))
 | |
|                                                      (nthcdr (1+ args-count) matched)))
 | |
|                                          (list nil nil))))
 | |
|                               ((or (not last-error)
 | |
|                                    (later-position error-position last-error-position))
 | |
|                                (setf last-error-position error-position
 | |
|                                      last-error error-descr)))
 | |
|                      ;; if everything fails return the "best" error
 | |
|                      finally (return (list last-error-position
 | |
|                                            (if *debug*
 | |
|                                                #'(lambda ()
 | |
|                                                    (format nil "~A, trying to match ~A"
 | |
|                                                            (funcall (the function last-error))
 | |
|                                                            (rule-name first-expected)))
 | |
|                                                last-error)))))
 | |
|                  (t
 | |
|                   ;; if necessary load the next tokens
 | |
|                   (when (null (cdr matched))
 | |
|                     (setf (cdr matched) (read-next-tokens tokenizer)))
 | |
|                   (cond ((and (or (null expected) (eq first-expected :eof))
 | |
|                               (null (cdr matched)))
 | |
|                          ;; This point is reached only once for each complete
 | |
|                          ;; parsing.  The expected tokens and the input
 | |
|                          ;; tokens have been exhausted at the same time.
 | |
|                          ;; Hence we succeeded the parsing.
 | |
|                          (setf (cdr matched) (list :eof))
 | |
|                          (list nil nil))
 | |
|                         ((null expected)
 | |
|                          ;; Garbage at end of parsing.  This may mean that we
 | |
|                          ;; have considered a production completed too soon.
 | |
|                          (list (token-position (car matched))
 | |
|                                #'(lambda ()
 | |
|                                    "garbage at end of parsing")))
 | |
|                         ((null (cdr matched))
 | |
|                          ;; EOF error
 | |
|                          (list :eof
 | |
|                                #'(lambda ()
 | |
|                                    (format nil "end of input expecting ~S" expected))))
 | |
|                         (t ;; normal token
 | |
|                          (let ((first-token (cadr matched)))
 | |
|                            (if (match-token first-expected first-token)
 | |
|                                (match (cdr expected) (cdr matched) #+debug depth)
 | |
|                                ;; failed: we return the error
 | |
|                                (list (token-position first-token)
 | |
|                                      #'(lambda ()
 | |
|                                          (format nil "expected ~S but got ~S ~S"
 | |
|                                                  first-expected (token-type first-token)
 | |
|                                                  (token-value first-token)))))))))))))
 | |
|     (declare (inline match-token))
 | |
|     (let ((result (list :head)))
 | |
|       (destructuring-bind (error-position error)
 | |
|           (match (list (find-rule start (grammar-rules grammar))) result #+debug 0)
 | |
|         (when error-position
 | |
|           (error "~A at ~A~%" (funcall (the function error)) error-position))
 | |
|         (cadr result)))))
 | |
| 
 | |
| (defgeneric terminals-in-grammar (grammar-or-hashtable)
 | |
|   (:documentation
 | |
|    "Find non constant terminal symbols in GRAMMAR."))
 | |
| 
 | |
| (defmethod terminals-in-grammar ((grammar hash-table))
 | |
|   (loop
 | |
|      for rule being each hash-value of grammar
 | |
|      with terminals = '()
 | |
|      do (loop
 | |
|            for prod in (rule-productions rule)
 | |
|            do (loop
 | |
|                  for tok in (prod-tokens prod)
 | |
|                  when (symbolp tok)
 | |
|                  do (pushnew tok terminals)))
 | |
|      finally (return terminals)))
 | |
| 
 | |
| (defmethod terminals-in-grammar ((grammar grammar))
 | |
|   (terminals-in-grammar (grammar-rules grammar)))
 | |
| 
 | |
| (defun print-grammar-figures (grammar &optional (stream *standard-output*))
 | |
|   (format stream "rules: ~A~%constant terminals: ~A~%variable terminals: ~S~%"
 | |
|           (hash-table-count (grammar-rules grammar))
 | |
|           (hash-table-count (grammar-keywords grammar))
 | |
|           (terminals-in-grammar (grammar-rules grammar))))
 | |
| 
 | |
| 
 | |
| (defun grammar-keyword-p (keyword grammar)
 | |
|   "Check if KEYWORD is part of this grammar."
 | |
|   (find-keyword keyword (grammar-keywords grammar)))
 | |
| 
 | |
| ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 | |
| 
 | |
| (defvar *grammars* (make-hash-table))
 | |
| 
 | |
| (defun find-grammar (name)
 | |
|   (gethash name *grammars*))
 | |
| 
 | |
| (defun delete-grammar (name)
 | |
|   (remhash name *grammars*))
 | |
| 
 | |
| (defun add-grammar (name grammar)
 | |
|   (setf (gethash name *grammars*) grammar))
 |