Done using
    find third_party/lisp/{sclf,mime4cl,npg} \
      -name '*.lisp' -or -name '*.asd' \
      -exec bash -c 'expand -i -t 8 "$0" | sponge "$0"' {} \;
Change-Id: If84afac9c1d5cbc74e137a5aa0ae61472f0f1e90
Reviewed-on: https://cl.tvl.fyi/c/depot/+/5066
Tested-by: BuildkiteCI
Reviewed-by: sterni <sternenseemann@systemli.org>
		
	
			
		
			
				
	
	
		
			234 lines
		
	
	
	
		
			11 KiB
		
	
	
	
		
			Common Lisp
		
	
	
	
	
	
			
		
		
	
	
			234 lines
		
	
	
	
		
			11 KiB
		
	
	
	
		
			Common Lisp
		
	
	
	
	
	
;;;  parser.lisp --- runtime parser
 | 
						|
 | 
						|
;;;  Copyright (C) 2003-2006, 2009 by Walter C. Pelissero
 | 
						|
 | 
						|
;;;  Author: Walter C. Pelissero <walter@pelissero.de>
 | 
						|
;;;  Project: NPG a Naive Parser Generator
 | 
						|
 | 
						|
#+cmu (ext:file-comment "$Module: parser.lisp $")
 | 
						|
 | 
						|
;;; This library is free software; you can redistribute it and/or
 | 
						|
;;; modify it under the terms of the GNU Lesser General Public License
 | 
						|
;;; as published by the Free Software Foundation; either version 2.1
 | 
						|
;;; of the License, or (at your option) any later version.
 | 
						|
;;; This library is distributed in the hope that it will be useful,
 | 
						|
;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
						|
;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 | 
						|
;;; Lesser General Public License for more details.
 | 
						|
;;; You should have received a copy of the GNU Lesser General Public
 | 
						|
;;; License along with this library; if not, write to the Free
 | 
						|
;;; Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
 | 
						|
;;; 02111-1307 USA
 | 
						|
 | 
						|
;;;  Commentary:
 | 
						|
;;;
 | 
						|
;;; This is the runtime part of the parser.  The code that is
 | 
						|
;;; responsible to execute the parser defined with the primitives
 | 
						|
;;; found in define.lisp.
 | 
						|
 | 
						|
(in-package :naive-parser-generator)
 | 
						|
 | 
						|
(defvar *debug* nil
 | 
						|
  "Either nil or a stream where to write the debug informations.")
 | 
						|
#+debug (declaim (fixnum *maximum-recursion-depth*))
 | 
						|
#+debug (defvar *maximum-recursion-depth* 1000
 | 
						|
  "Maximum depth the parser is allowed to recursively call itself.
 | 
						|
This is the only way for the parser to detect a loop in the grammar.
 | 
						|
Tune this if your grammar is unusually complex.")
 | 
						|
 | 
						|
(declaim (inline reduce-production))
 | 
						|
(defun reduce-production (production arguments)
 | 
						|
  "Apply PRODUCTION's action on ARGUMENTS.  This has the effect of
 | 
						|
  \"reducing\" the production."
 | 
						|
  (when *debug*
 | 
						|
    (format *debug* "reducing ~S on ~S~%" production arguments))
 | 
						|
  (flet ((safe-token-value (token)
 | 
						|
           (if (token-p token)
 | 
						|
               (token-value token)
 | 
						|
               token)))
 | 
						|
    (apply (prod-action production) (mapcar #'safe-token-value arguments))))
 | 
						|
 | 
						|
(defgeneric later-position (pos1 pos2)
 | 
						|
  (:documentation
 | 
						|
   "Compare two file postions and return true if POS1 is later than
 | 
						|
POS2 in the input stream."))
 | 
						|
 | 
						|
;; This is meant to be overloaded in the lexer
 | 
						|
(defmethod later-position ((pos1 integer) (pos2 integer))
 | 
						|
  (> pos1 pos2))
 | 
						|
 | 
						|
;; this looks silly but turns out to be useful (see below)
 | 
						|
(defmethod later-position (pos1 pos2)
 | 
						|
  (and (eq pos1 :eof) (not (eq pos2 :eof))))
 | 
						|
 | 
						|
(defgeneric read-next-tokens (tokens-source)
 | 
						|
  (:documentation "Read next token from a lexical analysed stream.  The nature of
 | 
						|
TOKENS-SOURCE is implementation dependent and any lexical analyzer is
 | 
						|
supposed to specialise this method."))
 | 
						|
 | 
						|
;; This is the actual parser.  the algorithm is pretty
 | 
						|
;; straightforward, the execution of the reductions a bit less.  Error
 | 
						|
;; recovery is rather clumsy.
 | 
						|
 | 
						|
(defun parse (grammar start tokenizer)
 | 
						|
  "Match a GRAMMAR against the list of input tokens coming from TOKENIZER.
 | 
						|
Return the reduced values according to the nonterminal actions.  Raise
 | 
						|
an error on failure."
 | 
						|
  (declare (type grammar grammar)
 | 
						|
           (type symbol start))
 | 
						|
  (labels
 | 
						|
      ((match-token (expected token)
 | 
						|
         (when *debug*
 | 
						|
           (format *debug* "match-token ~S ~S -> " expected token))
 | 
						|
         (let ((res (cond ((symbolp expected)
 | 
						|
                           ;; non-costant terminal (like identifiers)
 | 
						|
                           (eq expected (token-type token)))
 | 
						|
                          ((and (stringp expected)
 | 
						|
                                (stringp (token-value token)))
 | 
						|
                           ;; string costant terminal
 | 
						|
                           (funcall (the function (grammar-equal-p grammar)) expected (token-value token)))
 | 
						|
                          ((functionp expected)
 | 
						|
                           ;; custom equality predicate (must be able
 | 
						|
                           ;; to deal with token objects)
 | 
						|
                           (funcall expected token))
 | 
						|
                          ;; all the rest
 | 
						|
                          (t (equal expected (token-value token))))))
 | 
						|
           (when *debug*
 | 
						|
             (format *debug* "~Amatched~%" (if res "" "not ")))
 | 
						|
           res))
 | 
						|
       (match (expected matched #+debug depth)
 | 
						|
         (declare (list expected matched)
 | 
						|
                  #+debug (fixnum depth))
 | 
						|
         (let ((first-expected (car expected)))
 | 
						|
           (cond #+debug ((> depth *maximum-recursion-depth*)
 | 
						|
                  (error "endless recursion on ~A ~A at ~A expecting ~S"
 | 
						|
                         (token-type (car matched)) (token-value (car matched))
 | 
						|
                         (token-position (car matched)) expected))
 | 
						|
                 ((eq first-expected :any)
 | 
						|
                  (match (cdr expected) (cdr matched) #+debug depth))
 | 
						|
                 ;; This is a trick to obtain partial parses.  When we
 | 
						|
                 ;; reach this expected token we assume we succeeded
 | 
						|
                 ;; the parsing and return the remaining tokens as
 | 
						|
                 ;; part of the match.
 | 
						|
                 ((eq first-expected :rest)
 | 
						|
                  ;; we could be at the end of input so we check this
 | 
						|
                  (unless (cdr matched)
 | 
						|
                    (setf (cdr matched) (list :rest)))
 | 
						|
                  (list nil nil))
 | 
						|
                 ((rule-p first-expected)
 | 
						|
                  ;; If it's a rule, then we try to match all its
 | 
						|
                  ;; productions.  We return the first that succeeds.
 | 
						|
                  (loop
 | 
						|
                     for production in (rule-productions first-expected)
 | 
						|
                     for production-tokens of-type list = (prod-tokens production)
 | 
						|
                     with last-error-position = nil
 | 
						|
                     with last-error = nil
 | 
						|
                     for (error-position error-descr) =
 | 
						|
                       (progn
 | 
						|
                         (when *debug*
 | 
						|
                           (format *debug* "trying to match ~A: ~S~%"
 | 
						|
                                   (rule-name first-expected) production-tokens))
 | 
						|
                         (match (append production-tokens (cdr expected)) matched #+debug (1+ depth)))
 | 
						|
                     do (cond ((not error-position)
 | 
						|
                               (return (let ((args-count (prod-tokens-length production)))
 | 
						|
                                         (setf (cdr matched)
 | 
						|
                                               (cons (reduce-production
 | 
						|
                                                      production
 | 
						|
                                                      (subseq (the list (cdr matched)) 0 args-count))
 | 
						|
                                                     (nthcdr (1+ args-count) matched)))
 | 
						|
                                         (list nil nil))))
 | 
						|
                              ((or (not last-error)
 | 
						|
                                   (later-position error-position last-error-position))
 | 
						|
                               (setf last-error-position error-position
 | 
						|
                                     last-error error-descr)))
 | 
						|
                     ;; if everything fails return the "best" error
 | 
						|
                     finally (return (list last-error-position
 | 
						|
                                           (if *debug*
 | 
						|
                                               #'(lambda ()
 | 
						|
                                                   (format nil "~A, trying to match ~A"
 | 
						|
                                                           (funcall (the function last-error))
 | 
						|
                                                           (rule-name first-expected)))
 | 
						|
                                               last-error)))))
 | 
						|
                 (t
 | 
						|
                  ;; if necessary load the next tokens
 | 
						|
                  (when (null (cdr matched))
 | 
						|
                    (setf (cdr matched) (read-next-tokens tokenizer)))
 | 
						|
                  (cond ((and (or (null expected) (eq first-expected :eof))
 | 
						|
                              (null (cdr matched)))
 | 
						|
                         ;; This point is reached only once for each complete
 | 
						|
                         ;; parsing.  The expected tokens and the input
 | 
						|
                         ;; tokens have been exhausted at the same time.
 | 
						|
                         ;; Hence we succeeded the parsing.
 | 
						|
                         (setf (cdr matched) (list :eof))
 | 
						|
                         (list nil nil))
 | 
						|
                        ((null expected)
 | 
						|
                         ;; Garbage at end of parsing.  This may mean that we
 | 
						|
                         ;; have considered a production completed too soon.
 | 
						|
                         (list (token-position (car matched))
 | 
						|
                               #'(lambda ()
 | 
						|
                                   "garbage at end of parsing")))
 | 
						|
                        ((null (cdr matched))
 | 
						|
                         ;; EOF error
 | 
						|
                         (list :eof
 | 
						|
                               #'(lambda ()
 | 
						|
                                   (format nil "end of input expecting ~S" expected))))
 | 
						|
                        (t ;; normal token
 | 
						|
                         (let ((first-token (cadr matched)))
 | 
						|
                           (if (match-token first-expected first-token)
 | 
						|
                               (match (cdr expected) (cdr matched) #+debug depth)
 | 
						|
                               ;; failed: we return the error
 | 
						|
                               (list (token-position first-token)
 | 
						|
                                     #'(lambda ()
 | 
						|
                                         (format nil "expected ~S but got ~S ~S"
 | 
						|
                                                 first-expected (token-type first-token)
 | 
						|
                                                 (token-value first-token)))))))))))))
 | 
						|
    (declare (inline match-token))
 | 
						|
    (let ((result (list :head)))
 | 
						|
      (destructuring-bind (error-position error)
 | 
						|
          (match (list (find-rule start (grammar-rules grammar))) result #+debug 0)
 | 
						|
        (when error-position
 | 
						|
          (error "~A at ~A~%" (funcall (the function error)) error-position))
 | 
						|
        (cadr result)))))
 | 
						|
 | 
						|
(defgeneric terminals-in-grammar (grammar-or-hashtable)
 | 
						|
  (:documentation
 | 
						|
   "Find non constant terminal symbols in GRAMMAR."))
 | 
						|
 | 
						|
(defmethod terminals-in-grammar ((grammar hash-table))
 | 
						|
  (loop
 | 
						|
     for rule being each hash-value of grammar
 | 
						|
     with terminals = '()
 | 
						|
     do (loop
 | 
						|
           for prod in (rule-productions rule)
 | 
						|
           do (loop
 | 
						|
                 for tok in (prod-tokens prod)
 | 
						|
                 when (symbolp tok)
 | 
						|
                 do (pushnew tok terminals)))
 | 
						|
     finally (return terminals)))
 | 
						|
 | 
						|
(defmethod terminals-in-grammar ((grammar grammar))
 | 
						|
  (terminals-in-grammar (grammar-rules grammar)))
 | 
						|
 | 
						|
(defun print-grammar-figures (grammar &optional (stream *standard-output*))
 | 
						|
  (format stream "rules: ~A~%constant terminals: ~A~%variable terminals: ~S~%"
 | 
						|
          (hash-table-count (grammar-rules grammar))
 | 
						|
          (hash-table-count (grammar-keywords grammar))
 | 
						|
          (terminals-in-grammar (grammar-rules grammar))))
 | 
						|
 | 
						|
 | 
						|
(defun grammar-keyword-p (keyword grammar)
 | 
						|
  "Check if KEYWORD is part of this grammar."
 | 
						|
  (find-keyword keyword (grammar-keywords grammar)))
 | 
						|
 | 
						|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 | 
						|
 | 
						|
(defvar *grammars* (make-hash-table))
 | 
						|
 | 
						|
(defun find-grammar (name)
 | 
						|
  (gethash name *grammars*))
 | 
						|
 | 
						|
(defun delete-grammar (name)
 | 
						|
  (remhash name *grammars*))
 | 
						|
 | 
						|
(defun add-grammar (name grammar)
 | 
						|
  (setf (gethash name *grammars*) grammar))
 |