This is possible since all the commits have been made by me. The code taken from SCLF (which is licensed LGPL-2.1-or-later) can also be included since the LGPL 2.1 is [compatible] with the GPL 3.0. compatible: https://www.gnu.org/licenses/license-list.en.html#LGPLv2.1 Change-Id: I2d274c29378679c489dc667a53b234642c3da817 Reviewed-on: https://cl.tvl.fyi/c/depot/+/5928 Tested-by: BuildkiteCI Reviewed-by: sterni <sternenseemann@systemli.org>
		
			
				
	
	
		
			130 lines
		
	
	
	
		
			5.6 KiB
		
	
	
	
		
			Common Lisp
		
	
	
	
	
	
			
		
		
	
	
			130 lines
		
	
	
	
		
			5.6 KiB
		
	
	
	
		
			Common Lisp
		
	
	
	
	
	
| ;; SPDX-License-Identifier: GPL-3.0-only
 | |
| ;; SPDX-FileCopyrightText: Copyright (C) 2022 by sterni
 | |
| 
 | |
| (in-package :note)
 | |
| (declaim (optimize (safety 3)))
 | |
| 
 | |
| ;; Throw away these tags and all of their children
 | |
| (defparameter +discard-tags-with-children+ '("HEAD"))
 | |
| ;; Only “strip” these tags and leave their content as is
 | |
| (defparameter +discard-tags-only+ '("BODY" "HTML"))
 | |
| 
 | |
| ;; This is basically the same as cxml's PROXY-HANDLER.
 | |
| ;; Couldn't be bothered to make a BROADCAST-HANDLER because I
 | |
| ;; only need to pass through to one handler. It accepts every
 | |
| ;; event and passes it on to NEXT-HANDLER. This is useful for
 | |
| ;; subclassing mostly where an event can be modified or passed
 | |
| ;; on as is via CALL-NEXT-METHOD.
 | |
| (defclass hax-proxy-handler (hax:default-handler)
 | |
|   ((next-handler
 | |
|     :initarg :next-handler
 | |
|     :accessor proxy-next-handler)))
 | |
| 
 | |
| ;; Define the trivial handlers which just call themselves for NEXT-HANDLER
 | |
| (macrolet ((def-proxy-handler (name (&rest args))
 | |
|              `(defmethod ,name ((h hax-proxy-handler) ,@args)
 | |
|                 (,name (proxy-next-handler h) ,@args))))
 | |
|   (def-proxy-handler hax:start-document (name p-id s-id))
 | |
|   (def-proxy-handler hax:end-document ())
 | |
|   (def-proxy-handler hax:start-element (name attrs))
 | |
|   (def-proxy-handler hax:end-element (name))
 | |
|   (def-proxy-handler hax:characters (data))
 | |
|   (def-proxy-handler hax:unescaped (data))
 | |
|   (def-proxy-handler hax:comment (data)))
 | |
| 
 | |
| (defclass apple-note-transformer (hax-proxy-handler)
 | |
|   ((cid-lookup
 | |
|     :initarg :cid-lookup
 | |
|     :initform (lambda (cid) nil)
 | |
|     :accessor transformer-cid-lookup)
 | |
|    (discard-until
 | |
|     :initarg :discard-until
 | |
|     :initform nil
 | |
|     :accessor transformer-discard-until)
 | |
|    (depth
 | |
|     :initarg :depth
 | |
|     :initform 0
 | |
|     :accessor transformer-depth))
 | |
|   (:documentation
 | |
|    "HAX handler that strips unnecessary tags from the HTML of a com.apple.mail-note
 | |
|    and resolves references to attachments to IMG tags."))
 | |
| 
 | |
| ;; Define the “boring” handlers which just call the next method (i. e. the next
 | |
| ;; handler) unless discard-until is not nil in which case the event is dropped.
 | |
| (macrolet ((def-filter-handler (name (&rest args))
 | |
|              `(defmethod ,name ((h apple-note-transformer) ,@args)
 | |
|                 (when (not (transformer-discard-until h))
 | |
|                   (call-next-method)))))
 | |
|   (def-filter-handler hax:start-document (name p-id s-id))
 | |
|   (def-filter-handler hax:end-document ())
 | |
|   (def-filter-handler hax:characters (data))
 | |
|   (def-filter-handler hax:unescaped (data))
 | |
|   (def-filter-handler hax:comment (data)))
 | |
| 
 | |
| (defun parse-content-id (attrlist)
 | |
|   (when-let (data (find-if (lambda (x)
 | |
|                              (string-equal (hax:attribute-name x) "DATA"))
 | |
|                            attrlist))
 | |
|     (multiple-value-bind (starts-with-cid-p suffix)
 | |
|         (starts-with-subseq "cid:" (hax:attribute-value data)
 | |
|                             :return-suffix t :test #'char=)
 | |
|       (if starts-with-cid-p suffix data))))
 | |
| 
 | |
| (defmethod hax:start-element ((handler apple-note-transformer) name attrs)
 | |
|   (with-accessors ((discard-until transformer-discard-until)
 | |
|                    (next-handler proxy-next-handler)
 | |
|                    (cid-lookup transformer-cid-lookup)
 | |
|                    (depth transformer-depth))
 | |
|       handler
 | |
| 
 | |
|     (cond
 | |
|       ;; If we are discarding, any started element is dropped,
 | |
|       ;; since the end-condition only is reached via END-ELEMENT.
 | |
|       (discard-until nil)
 | |
|       ;; If we are not discarding any outer elements, we can set
 | |
|       ;; up a new discard condition if we encounter an appropriate
 | |
|       ;; element.
 | |
|       ((member name +discard-tags-with-children+ :test #'string-equal)
 | |
|        (setf discard-until (cons name depth)))
 | |
|       ;; Only drop this event, must be mirrored in END-ELEMENT to
 | |
|       ;; avoid invalidly nested HTML.
 | |
|       ((member name +discard-tags-only+ :test #'string-equal) nil)
 | |
|       ;; If we encounter an object tag, we drop it and its contents,
 | |
|       ;; but only after inspecting its attributes and emitting new
 | |
|       ;; events representing an img tag which includes the respective
 | |
|       ;; attachment via its filename.
 | |
|       ((string-equal name "OBJECT")
 | |
|        (progn
 | |
|          (setf discard-until (cons "OBJECT" depth))
 | |
|          ;; TODO(sterni): check type and only resolve images, raise error
 | |
|          ;; otherwise. We should only encounter images anyways, since
 | |
|          ;; other types are only supported for iCloud which doesn't seem
 | |
|          ;; to use IMAP for sync these days.
 | |
|          (when-let* ((cid (parse-content-id attrs))
 | |
|                      (file (apply cid-lookup (list cid)))
 | |
|                      (src (hax:make-attribute "SRC" file)))
 | |
|            (hax:start-element next-handler "IMG" (list src))
 | |
|            (hax:end-element next-handler "IMG"))))
 | |
|       ;; In all other cases, we use HAX-PROXY-HANDLER to pass the event on.
 | |
|       (t (call-next-method)))
 | |
|     (setf depth (1+ depth))))
 | |
| 
 | |
| (defmethod hax:end-element ((handler apple-note-transformer) name)
 | |
|   (with-accessors ((discard-until transformer-discard-until)
 | |
|                    (depth transformer-depth))
 | |
|       handler
 | |
| 
 | |
|     (setf depth (1- depth))
 | |
|     (cond
 | |
|       ;; If we are discarding and encounter the same tag again at the same
 | |
|       ;; depth, we can stop, but still have to discard the current tag.
 | |
|       ((and discard-until
 | |
|             (string-equal (car discard-until) name)
 | |
|             (= (cdr discard-until) depth))
 | |
|        (setf discard-until nil))
 | |
|       ;; In all other cases, we drop properly.
 | |
|       (discard-until nil)
 | |
|       ;; Mirrored tag stripping as in START-ELEMENT
 | |
|       ((member name +discard-tags-only+ :test #'string-equal) nil)
 | |
|       ;; In all other cases, we use HAX-PROXY-HANDLER to pass the event on.
 | |
|       (t (call-next-method)))))
 |