By computing the amount the stream position advanced we can save a syscall on every read which speeds up mime:mime-body-stream by /a lot/, e.g. extracting a ~3MB attachment drops from over 15s to under ~0.5s. There's still a lot to be gained and correctness left to be desired which can be addressed as described in the newly added comment. Change-Id: I5e1dfd213aac41203f271cf220db456dfb95a02b Reviewed-on: https://cl.tvl.fyi/c/depot/+/5073 Tested-by: BuildkiteCI Reviewed-by: sterni <sternenseemann@systemli.org>
		
			
				
	
	
		
			355 lines
		
	
	
	
		
			14 KiB
		
	
	
	
		
			Common Lisp
		
	
	
	
	
	
			
		
		
	
	
			355 lines
		
	
	
	
		
			14 KiB
		
	
	
	
		
			Common Lisp
		
	
	
	
	
	
| ;;; streams.lisp --- En/De-coding Streams
 | |
| 
 | |
| ;;; Copyright (C) 2012 by Walter C. Pelissero
 | |
| ;;; Copyright (C) 2021-2022 by the TVL Authors
 | |
| 
 | |
| ;;; Author: Walter C. Pelissero <walter@pelissero.de>
 | |
| ;;; Project: mime4cl
 | |
| 
 | |
| ;;; This library is free software; you can redistribute it and/or
 | |
| ;;; modify it under the terms of the GNU Lesser General Public License
 | |
| ;;; as published by the Free Software Foundation; either version 2.1
 | |
| ;;; of the License, or (at your option) any later version.
 | |
| ;;; This library is distributed in the hope that it will be useful,
 | |
| ;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
 | |
| ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 | |
| ;;; Lesser General Public License for more details.
 | |
| ;;; You should have received a copy of the GNU Lesser General Public
 | |
| ;;; License along with this library; if not, write to the Free
 | |
| ;;; Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
 | |
| ;;; 02111-1307 USA
 | |
| 
 | |
| (in-package :mime4cl)
 | |
| 
 | |
| (defclass coder-stream-mixin ()
 | |
|   ((real-stream :type stream
 | |
|                 :initarg :stream
 | |
|                 :reader real-stream)
 | |
|    (dont-close :initform nil
 | |
|                :initarg :dont-close)))
 | |
| 
 | |
| (defmethod stream-file-position ((stream coder-stream-mixin))
 | |
|   (file-position (slot-value stream 'real-stream)))
 | |
| 
 | |
| (defmethod (setf stream-file-position) (newval (stream coder-stream-mixin))
 | |
|   (file-position (slot-value stream 'real-stream) newval))
 | |
| 
 | |
| (defclass coder-input-stream-mixin (fundamental-binary-input-stream coder-stream-mixin)
 | |
|   ())
 | |
| (defclass coder-output-stream-mixin (fundamental-binary-output-stream coder-stream-mixin)
 | |
|   ())
 | |
| 
 | |
| 
 | |
| (defclass quoted-printable-decoder-stream (coder-input-stream-mixin quoted-printable-decoder) ())
 | |
| (defclass base64-decoder-stream (coder-input-stream-mixin base64-decoder) ())
 | |
| (defclass 8bit-decoder-stream (coder-input-stream-mixin 8bit-decoder) ())
 | |
| 
 | |
| (defclass quoted-printable-encoder-stream (coder-output-stream-mixin quoted-printable-encoder) ())
 | |
| (defclass base64-encoder-stream (coder-output-stream-mixin base64-encoder) ())
 | |
| (defclass 8bit-encoder-stream (coder-output-stream-mixin 8bit-encoder) ())
 | |
| 
 | |
| ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 | |
| 
 | |
| (defmethod initialize-instance :after ((stream coder-stream-mixin) &key &allow-other-keys)
 | |
|   (unless (slot-boundp stream 'real-stream)
 | |
|     (error "REAL-STREAM is unbound.  Must provide a :STREAM argument.")))
 | |
| 
 | |
| (defmethod initialize-instance ((stream coder-output-stream-mixin) &key &allow-other-keys)
 | |
|   (call-next-method)
 | |
|   (unless (slot-boundp stream 'output-function)
 | |
|     (setf (slot-value stream 'output-function)
 | |
|           #'(lambda (char)
 | |
|               (write-char char (slot-value stream 'real-stream))))))
 | |
| 
 | |
| (defmethod initialize-instance ((stream coder-input-stream-mixin) &key &allow-other-keys)
 | |
|   (call-next-method)
 | |
|   (unless (slot-boundp stream 'input-function)
 | |
|     (setf (slot-value stream 'input-function)
 | |
|           #'(lambda ()
 | |
|               (read-char (slot-value stream 'real-stream) nil)))))
 | |
| 
 | |
| (defmethod stream-read-byte ((stream coder-input-stream-mixin))
 | |
|   (or (decoder-read-byte stream)
 | |
|       :eof))
 | |
| 
 | |
| (defmethod stream-write-byte ((stream coder-output-stream-mixin) byte)
 | |
|   (encoder-write-byte stream byte))
 | |
| 
 | |
| (defmethod close ((stream coder-stream-mixin) &key abort)
 | |
|   (with-slots (real-stream dont-close) stream
 | |
|     (unless dont-close
 | |
|       (close real-stream :abort abort))))
 | |
| 
 | |
| (defmethod close ((stream coder-output-stream-mixin) &key abort)
 | |
|   (unless abort
 | |
|     (encoder-finish-output stream))
 | |
|   (call-next-method))
 | |
| 
 | |
| ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 | |
| 
 | |
| (defclass encoder-input-stream (fundamental-character-input-stream coder-stream-mixin)
 | |
|   ((encoder)
 | |
|    (buffer-queue :initform (make-queue)))
 | |
|   (:documentation
 | |
|    "This is the base class for encoders with the direction swapped. It
 | |
| reads from REAL-STREAM a stream of bytes, encodes it and returnes it
 | |
| in a stream of character."))
 | |
| 
 | |
| (defclass quoted-printable-encoder-input-stream (encoder-input-stream) ())
 | |
| (defclass base64-encoder-input-stream (encoder-input-stream) ())
 | |
| (defclass 8bit-encoder-input-stream (fundamental-character-input-stream coder-stream-mixin) ())
 | |
| 
 | |
| (defmethod initialize-instance ((stream quoted-printable-encoder-input-stream) &key &allow-other-keys)
 | |
|   (call-next-method)
 | |
|   (with-slots (encoder buffer-queue) stream
 | |
|     (setf encoder
 | |
|           (make-instance 'quoted-printable-encoder
 | |
|                          :output-function #'(lambda (char)
 | |
|                                               (queue-append buffer-queue char))))))
 | |
| 
 | |
| (defmethod initialize-instance ((stream base64-encoder-input-stream) &key &allow-other-keys)
 | |
|   (call-next-method)
 | |
|   (with-slots (encoder buffer-queue) stream
 | |
|     (setf encoder
 | |
|           (make-instance 'base64-encoder
 | |
|                          :output-function #'(lambda (char)
 | |
|                                               (queue-append buffer-queue char))))))
 | |
| 
 | |
| (defmethod stream-read-char ((stream encoder-input-stream))
 | |
|   (with-slots (encoder buffer-queue real-stream) stream
 | |
|     (loop
 | |
|        while (queue-empty-p buffer-queue)
 | |
|        do (be byte (read-byte real-stream nil)
 | |
|             (if byte
 | |
|                 (encoder-write-byte encoder byte)
 | |
|                 (progn
 | |
|                   (encoder-finish-output encoder)
 | |
|                   (queue-append buffer-queue :eof)))))
 | |
|     (queue-pop buffer-queue)))
 | |
| 
 | |
| 
 | |
| (defmethod stream-read-char ((stream 8bit-encoder-input-stream))
 | |
|   (with-slots (real-stream) stream
 | |
|     (aif (read-byte real-stream nil)
 | |
|          (code-char it)
 | |
|          :eof)))
 | |
| 
 | |
| ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 | |
| 
 | |
| (defclass input-adapter-stream ()
 | |
|   ((source :initarg :source)
 | |
|    (real-stream)
 | |
|    (input-function)))
 | |
| 
 | |
| (defclass binary-input-adapter-stream (fundamental-binary-input-stream input-adapter-stream) ())
 | |
| 
 | |
| (defclass character-input-adapter-stream (fundamental-character-input-stream input-adapter-stream) ())
 | |
| 
 | |
| (defmethod stream-element-type ((stream binary-input-adapter-stream))
 | |
|   '(unsigned-byte 8))
 | |
| 
 | |
| (defmethod initialize-instance ((stream input-adapter-stream) &key &allow-other-keys)
 | |
|   (call-next-method)
 | |
|   (assert (slot-boundp stream 'source)))
 | |
| 
 | |
| (defmethod initialize-instance ((stream binary-input-adapter-stream) &key &allow-other-keys)
 | |
|   (call-next-method)
 | |
|   ;; REAL-STREAM slot is set only if we are going to close it later on
 | |
|   (with-slots (source real-stream input-function) stream
 | |
|     (etypecase source
 | |
|       (string
 | |
|        (setf real-stream (make-string-input-stream source)
 | |
|              input-function #'(lambda ()
 | |
|                                 (awhen (read-char real-stream nil)
 | |
|                                   (char-code it)))))
 | |
|       ((vector (unsigned-byte 8))
 | |
|        (be i 0
 | |
|          (setf input-function #'(lambda ()
 | |
|                                   (when (< i (length source))
 | |
|                                     (prog1 (aref source i)
 | |
|                                       (incf i)))))))
 | |
|       (stream
 | |
|        (assert (input-stream-p source))
 | |
|        (setf input-function (if (subtypep (stream-element-type source) 'character)
 | |
|                                 #'(lambda ()
 | |
|                                     (awhen (read-char source nil)
 | |
|                                       (char-code it)))
 | |
|                                 #'(lambda ()
 | |
|                                     (read-byte source nil)))))
 | |
|       (pathname
 | |
|        (setf real-stream (open source :element-type '(unsigned-byte 8))
 | |
|              input-function #'(lambda ()
 | |
|                                 (read-byte real-stream nil))))
 | |
|       (file-portion
 | |
|        (setf real-stream (open-decoded-file-portion source)
 | |
|              input-function #'(lambda ()
 | |
|                                 (read-byte real-stream nil)))))))
 | |
| 
 | |
| (defmethod initialize-instance ((stream character-input-adapter-stream) &key &allow-other-keys)
 | |
|   (call-next-method)
 | |
|   ;; REAL-STREAM slot is set only if we are going to close later on
 | |
|   (with-slots (source real-stream input-function) stream
 | |
|     (etypecase source
 | |
|       (string
 | |
|        (setf real-stream (make-string-input-stream source)
 | |
|              input-function #'(lambda ()
 | |
|                                 (read-char real-stream nil))))
 | |
|       ((vector (unsigned-byte 8))
 | |
|        (be i 0
 | |
|          (setf input-function #'(lambda ()
 | |
|                                   (when (< i (length source))
 | |
|                                     (prog1 (code-char (aref source i))
 | |
|                                       (incf i)))))))
 | |
|       (stream
 | |
|        (assert (input-stream-p source))
 | |
|        (setf input-function (if (subtypep (stream-element-type source) 'character)
 | |
|                                 #'(lambda ()
 | |
|                                     (read-char source nil))
 | |
|                                 #'(lambda ()
 | |
|                                     (awhen (read-byte source nil)
 | |
|                                       (code-char it))))))
 | |
|       (pathname
 | |
|        (setf real-stream (open source :element-type 'character)
 | |
|              input-function #'(lambda ()
 | |
|                                 (read-char real-stream nil))))
 | |
|       (file-portion
 | |
|        (setf real-stream (open-decoded-file-portion source)
 | |
|              input-function #'(lambda ()
 | |
|                                 (awhen (read-byte real-stream nil)
 | |
|                                   (code-char it))))))))
 | |
| 
 | |
| (defmethod close ((stream input-adapter-stream) &key abort)
 | |
|   (when (slot-boundp stream 'real-stream)
 | |
|     (with-slots (real-stream) stream
 | |
|       (close real-stream :abort abort))))
 | |
| 
 | |
| (defmethod stream-read-byte ((stream binary-input-adapter-stream))
 | |
|   (with-slots (input-function) stream
 | |
|     (or (funcall input-function)
 | |
|         :eof)))
 | |
| 
 | |
| (defmethod stream-read-char ((stream character-input-adapter-stream))
 | |
|   (with-slots (input-function) stream
 | |
|     (or (funcall input-function)
 | |
|         :eof)))
 | |
| 
 | |
| ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 | |
| 
 | |
| (defclass delimited-input-stream (fundamental-character-input-stream coder-stream-mixin)
 | |
|   ((start-offset :initarg :start
 | |
|                  :initform 0
 | |
|                  :reader stream-start
 | |
|                  :type integer)
 | |
|    (end-offset :initarg :end
 | |
|                :initform nil
 | |
|                :reader stream-end
 | |
|                :type (or null integer))
 | |
|    (current-offset :type integer)))
 | |
| 
 | |
| (defmethod print-object ((object delimited-input-stream) stream)
 | |
|   (if *print-readably*
 | |
|       (call-next-method)
 | |
|       (with-slots (start-offset end-offset) object
 | |
|         (print-unreadable-object (object stream :type t :identity t)
 | |
|           (format stream "start=~A end=~A" start-offset end-offset)))))
 | |
| 
 | |
| (defun base-stream (stream)
 | |
|   (if (typep stream 'delimited-input-stream)
 | |
|       (base-stream (real-stream stream))
 | |
|       stream))
 | |
| 
 | |
| (defmethod initialize-instance ((stream delimited-input-stream) &key &allow-other-keys)
 | |
|   (call-next-method)
 | |
|   (unless (slot-boundp stream 'real-stream)
 | |
|     (error "REAL-STREAM is unbound.  Must provide a :STREAM argument."))
 | |
|   (with-slots (start-offset) stream
 | |
|     (file-position stream start-offset)))
 | |
| 
 | |
| (defmethod (setf stream-file-position) (newval (stream delimited-input-stream))
 | |
|   (with-slots (current-offset real-stream) stream
 | |
|     (setf current-offset newval)
 | |
|     (call-next-method)))
 | |
| 
 | |
| (defmethod stream-file-position ((stream delimited-input-stream))
 | |
|   (slot-value stream 'current-offset))
 | |
| 
 | |
| ;; Calling file-position with SBCL on every read is quite expensive, since
 | |
| ;; it will invoke lseek each time. This is so expensive that it's faster to
 | |
| ;; /compute/ the amount the stream got advanced by.
 | |
| ;; file-position's behavior however, is quite flexible and it behaves differently
 | |
| ;; not only for different implementation, but also different streams in SBCL.
 | |
| ;; Thus, we should ideally go back to file-position and try to reduce the amount
 | |
| ;; of calls by using read-sequence.
 | |
| ;; TODO(sterni): make decoders use read-sequence and drop offset tracking code
 | |
| (macrolet ((def-stream-read (name read-fun update-offset-form)
 | |
|              `(defmethod ,name ((stream delimited-input-stream))
 | |
|                (with-slots (real-stream end-offset current-offset) stream
 | |
|                  (let ((el (if (or (not end-offset)
 | |
|                                    (< current-offset end-offset))
 | |
|                                (or (,read-fun real-stream nil)
 | |
|                                    :eof)
 | |
|                                :eof)))
 | |
|                    (setf current-offset ,update-offset-form)
 | |
|                    el)))))
 | |
| 
 | |
|   ;; Assume we are using an encoding where < 128 is one byte, in all other cases
 | |
|   ;; it's hard to guess how much file-position will increase
 | |
|   (def-stream-read stream-read-char read-char
 | |
|     (if (or (eq el :eof) (< (char-code el) 128))
 | |
|         (1+ current-offset)
 | |
|         (file-position real-stream)))
 | |
| 
 | |
|   (def-stream-read stream-read-byte read-byte (1+ current-offset)))
 | |
| 
 | |
| ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 | |
| 
 | |
| (defclass my-string-input-stream (fundamental-character-input-stream coder-stream-mixin)
 | |
|   ((string :initarg :string
 | |
|            :reader stream-string)))
 | |
| 
 | |
| (defmethod initialize-instance ((stream my-string-input-stream) &key &allow-other-keys)
 | |
|   (call-next-method)
 | |
|   (assert (slot-boundp stream 'string))
 | |
|   (with-slots (string real-stream) stream
 | |
|     (setf real-stream (make-string-input-stream string))))
 | |
| 
 | |
| (defmethod stream-read-char ((stream my-string-input-stream))
 | |
|   (with-slots (real-stream) stream
 | |
|     (or (read-char real-stream nil)
 | |
|         :eof)))
 | |
| 
 | |
| ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 | |
| 
 | |
| (defstruct file-portion
 | |
|   data					;  string or a pathname
 | |
|   encoding
 | |
|   start
 | |
|   end)
 | |
| 
 | |
| (defun open-file-portion (file-portion)
 | |
|   (be data (file-portion-data file-portion)
 | |
|     (etypecase data
 | |
|       (pathname
 | |
|        (be stream (open data)
 | |
|          (make-instance 'delimited-input-stream
 | |
|                         :stream stream
 | |
|                         :start (file-portion-start file-portion)
 | |
|                         :end (file-portion-end file-portion))))
 | |
|       (string
 | |
|        (make-instance 'delimited-input-stream
 | |
|                       :stream (make-string-input-stream data)
 | |
|                       :start (file-portion-start file-portion)
 | |
|                       :end (file-portion-end file-portion)))
 | |
|       (stream
 | |
|        (make-instance 'delimited-input-stream
 | |
|                       :stream data
 | |
|                       :dont-close t
 | |
|                       :start (file-portion-start file-portion)
 | |
|                       :end (file-portion-end file-portion))))))
 | |
| 
 | |
| (defun open-decoded-file-portion (file-portion)
 | |
|   (make-instance (case (file-portion-encoding file-portion)
 | |
|                    (:quoted-printable 'quoted-printable-decoder-stream)
 | |
|                    (:base64 'base64-decoder-stream)
 | |
|                    (t '8bit-decoder-stream))
 | |
|                  :stream (open-file-portion file-portion)))
 |