Reimplement the MIME body extraction and HTML rewriting from mblog in Rust so that shelling out to it becomes viable. The problem with mnote-html is mainly that it – being written in CL – requires a ~300MB executable and is a bit sluggish starting. Change-Id: I5c1adc1a7ab5f3dde207f9a1f67ace685bd3f69f Reviewed-on: https://cl.tvl.fyi/c/depot/+/13014 Tested-by: BuildkiteCI Reviewed-by: sterni <sternenseemann@systemli.org> Autosubmit: sterni <sternenseemann@systemli.org>
165 lines
5.3 KiB
Rust
165 lines
5.3 KiB
Rust
// SPDX-FileCopyrightText: Copyright © 2024 sterni
|
|
// SPDX-License-Identifier: GPL-3.0-only
|
|
use lol_html::html_content::ContentType;
|
|
use lol_html::{element, HtmlRewriter, Settings};
|
|
use mail_parser::{Message, MessageParser, MimeHeaders};
|
|
use memmap2::Mmap;
|
|
|
|
use std::collections::HashMap;
|
|
use std::env;
|
|
use std::error::Error;
|
|
use std::fmt;
|
|
use std::fs::File;
|
|
use std::io::Write;
|
|
|
|
type CidMap<'a> = HashMap<&'a str, &'a str>;
|
|
|
|
#[derive(Debug)]
|
|
enum Mn2htmlError {
|
|
MimeParseFail,
|
|
NoMailNote,
|
|
MissingAttachment(String),
|
|
}
|
|
|
|
impl fmt::Display for Mn2htmlError {
|
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
|
match self {
|
|
Mn2htmlError::MimeParseFail => {
|
|
write!(f, "Could not parse given file as a MIME message")
|
|
}
|
|
Mn2htmlError::NoMailNote => {
|
|
write!(f, "Given MIME message does not appear to be a Mail Note")
|
|
}
|
|
Mn2htmlError::MissingAttachment(cid) => write!(
|
|
f,
|
|
"Given object's Content-Id {} doesn't match any attachment",
|
|
cid
|
|
),
|
|
}
|
|
}
|
|
}
|
|
|
|
impl Error for Mn2htmlError {
|
|
fn source(&self) -> Option<&(dyn Error + 'static)> {
|
|
None
|
|
}
|
|
}
|
|
|
|
fn warn(msg: &str) {
|
|
eprintln!("mn2html: {}", msg);
|
|
}
|
|
|
|
fn main() -> Result<(), Box<dyn std::error::Error>> {
|
|
for arg in env::args_os().skip(1) {
|
|
// TODO(sterni): flags, --help and such
|
|
let msg_file = File::open(arg)?;
|
|
let msg_raw = unsafe { Mmap::map(&msg_file) }?;
|
|
|
|
let msg_parsed = MessageParser::default()
|
|
.parse(msg_raw.as_ref())
|
|
.ok_or(Mn2htmlError::MimeParseFail)?;
|
|
|
|
if !matches!(
|
|
msg_parsed
|
|
.header("X-Uniform-Type-Identifier")
|
|
.and_then(|h| h.as_text()),
|
|
Some("com.apple.mail-note")
|
|
) {
|
|
return Err(Box::new(Mn2htmlError::NoMailNote));
|
|
}
|
|
|
|
let cid_map = index_attachments(&msg_parsed);
|
|
let html_body = msg_parsed
|
|
.html_bodies()
|
|
.nth(0)
|
|
.ok_or(Mn2htmlError::NoMailNote)?
|
|
.contents();
|
|
|
|
rewrite_html(html_body, &cid_map)?;
|
|
}
|
|
|
|
Ok(())
|
|
}
|
|
|
|
// At some point, it was a consideration to move this out of the Rust program.
|
|
// mn2html would have been a shell script with mblaze(7) tools finding the
|
|
// attachments and their content ideas passing the information to a Rust HTML
|
|
// rewriter via CLI args. It is unclear how much (if at all?) slower this would
|
|
// have been. In the end, it just seemed cleaner to do it in the Rust program,
|
|
// especially since the HTML rewriter would not really have been useful on its
|
|
// own.
|
|
fn index_attachments<'a>(msg: &'a Message) -> CidMap<'a> {
|
|
let mut map = HashMap::new();
|
|
for a in msg.attachments() {
|
|
match (a.content_id(), a.attachment_name()) {
|
|
(Some(cid), Some(filename)) => {
|
|
if let Some(_) = map.insert(cid, filename) {
|
|
warn("multiple attachments share the same Content-Id");
|
|
}
|
|
}
|
|
(_, _) => warn("attachment without Content-Id and/or filename in Content-Disposition"),
|
|
}
|
|
}
|
|
|
|
map
|
|
}
|
|
|
|
fn rewrite_html(html_body: &[u8], cid_map: &CidMap) -> Result<(), Box<dyn std::error::Error>> {
|
|
let mut stdout = std::io::stdout();
|
|
let mut rewriter = HtmlRewriter::new(
|
|
Settings {
|
|
element_content_handlers: vec![
|
|
element!("head", |el| {
|
|
el.remove();
|
|
Ok(())
|
|
}),
|
|
element!("body", |el| {
|
|
el.remove_and_keep_content();
|
|
Ok(())
|
|
}),
|
|
element!("html", |el| {
|
|
el.remove_and_keep_content();
|
|
Ok(())
|
|
}),
|
|
element!("object[type][data]", |el| {
|
|
if el
|
|
.get_attribute("type")
|
|
.expect("element! matched object[type] without type attribute")
|
|
!= "application/x-apple-msg-attachment"
|
|
{
|
|
warn("encountered object with unknown type attribute, ignoring");
|
|
return Ok(());
|
|
}
|
|
|
|
match el
|
|
.get_attribute("data")
|
|
.expect("element! matched object[data] without data attribute")
|
|
.split_at_checked(4)
|
|
{
|
|
Some(("cid:", cid)) => match cid_map.get(cid) {
|
|
Some(filename) => el.replace(
|
|
&format!(r#"<img src="{}">"#, filename),
|
|
ContentType::Html,
|
|
),
|
|
_ => {
|
|
return Err(Box::new(Mn2htmlError::MissingAttachment(
|
|
cid.to_string(),
|
|
)))
|
|
}
|
|
},
|
|
_ => warn("encountered object with malformed data attribute, ignoring"),
|
|
};
|
|
|
|
Ok(())
|
|
}),
|
|
],
|
|
..Settings::new()
|
|
},
|
|
|c: &[u8]| stdout.write_all(c).expect("Can't write to stdout"),
|
|
);
|
|
|
|
rewriter.write(html_body)?;
|
|
rewriter.end()?;
|
|
|
|
Ok(())
|
|
}
|