snix/users/sterni/mn2html/mn2html.rs
sterni 3224488a29 feat(sterni/mn2html): reimplement mnote-html in Rust
Reimplement the MIME body extraction and HTML rewriting from mblog in
Rust so that shelling out to it becomes viable. The problem with
mnote-html is mainly that it – being written in CL – requires a ~300MB
executable and is a bit sluggish starting.

Change-Id: I5c1adc1a7ab5f3dde207f9a1f67ace685bd3f69f
Reviewed-on: https://cl.tvl.fyi/c/depot/+/13014
Tested-by: BuildkiteCI
Reviewed-by: sterni <sternenseemann@systemli.org>
Autosubmit: sterni <sternenseemann@systemli.org>
2025-02-05 13:01:25 +00:00

165 lines
5.3 KiB
Rust

// SPDX-FileCopyrightText: Copyright © 2024 sterni
// SPDX-License-Identifier: GPL-3.0-only
use lol_html::html_content::ContentType;
use lol_html::{element, HtmlRewriter, Settings};
use mail_parser::{Message, MessageParser, MimeHeaders};
use memmap2::Mmap;
use std::collections::HashMap;
use std::env;
use std::error::Error;
use std::fmt;
use std::fs::File;
use std::io::Write;
type CidMap<'a> = HashMap<&'a str, &'a str>;
#[derive(Debug)]
enum Mn2htmlError {
MimeParseFail,
NoMailNote,
MissingAttachment(String),
}
impl fmt::Display for Mn2htmlError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Mn2htmlError::MimeParseFail => {
write!(f, "Could not parse given file as a MIME message")
}
Mn2htmlError::NoMailNote => {
write!(f, "Given MIME message does not appear to be a Mail Note")
}
Mn2htmlError::MissingAttachment(cid) => write!(
f,
"Given object's Content-Id {} doesn't match any attachment",
cid
),
}
}
}
impl Error for Mn2htmlError {
fn source(&self) -> Option<&(dyn Error + 'static)> {
None
}
}
fn warn(msg: &str) {
eprintln!("mn2html: {}", msg);
}
fn main() -> Result<(), Box<dyn std::error::Error>> {
for arg in env::args_os().skip(1) {
// TODO(sterni): flags, --help and such
let msg_file = File::open(arg)?;
let msg_raw = unsafe { Mmap::map(&msg_file) }?;
let msg_parsed = MessageParser::default()
.parse(msg_raw.as_ref())
.ok_or(Mn2htmlError::MimeParseFail)?;
if !matches!(
msg_parsed
.header("X-Uniform-Type-Identifier")
.and_then(|h| h.as_text()),
Some("com.apple.mail-note")
) {
return Err(Box::new(Mn2htmlError::NoMailNote));
}
let cid_map = index_attachments(&msg_parsed);
let html_body = msg_parsed
.html_bodies()
.nth(0)
.ok_or(Mn2htmlError::NoMailNote)?
.contents();
rewrite_html(html_body, &cid_map)?;
}
Ok(())
}
// At some point, it was a consideration to move this out of the Rust program.
// mn2html would have been a shell script with mblaze(7) tools finding the
// attachments and their content ideas passing the information to a Rust HTML
// rewriter via CLI args. It is unclear how much (if at all?) slower this would
// have been. In the end, it just seemed cleaner to do it in the Rust program,
// especially since the HTML rewriter would not really have been useful on its
// own.
fn index_attachments<'a>(msg: &'a Message) -> CidMap<'a> {
let mut map = HashMap::new();
for a in msg.attachments() {
match (a.content_id(), a.attachment_name()) {
(Some(cid), Some(filename)) => {
if let Some(_) = map.insert(cid, filename) {
warn("multiple attachments share the same Content-Id");
}
}
(_, _) => warn("attachment without Content-Id and/or filename in Content-Disposition"),
}
}
map
}
fn rewrite_html(html_body: &[u8], cid_map: &CidMap) -> Result<(), Box<dyn std::error::Error>> {
let mut stdout = std::io::stdout();
let mut rewriter = HtmlRewriter::new(
Settings {
element_content_handlers: vec![
element!("head", |el| {
el.remove();
Ok(())
}),
element!("body", |el| {
el.remove_and_keep_content();
Ok(())
}),
element!("html", |el| {
el.remove_and_keep_content();
Ok(())
}),
element!("object[type][data]", |el| {
if el
.get_attribute("type")
.expect("element! matched object[type] without type attribute")
!= "application/x-apple-msg-attachment"
{
warn("encountered object with unknown type attribute, ignoring");
return Ok(());
}
match el
.get_attribute("data")
.expect("element! matched object[data] without data attribute")
.split_at_checked(4)
{
Some(("cid:", cid)) => match cid_map.get(cid) {
Some(filename) => el.replace(
&format!(r#"<img src="{}">"#, filename),
ContentType::Html,
),
_ => {
return Err(Box::new(Mn2htmlError::MissingAttachment(
cid.to_string(),
)))
}
},
_ => warn("encountered object with malformed data attribute, ignoring"),
};
Ok(())
}),
],
..Settings::new()
},
|c: &[u8]| stdout.write_all(c).expect("Can't write to stdout"),
);
rewriter.write(html_body)?;
rewriter.end()?;
Ok(())
}