implemented also atom feed via template

This commit is contained in:
Thomas Koch 2025-01-11 21:17:55 +02:00
parent 8b17e93d74
commit cfab9ef5c0
10 changed files with 259 additions and 114 deletions

View file

@ -1,5 +1,7 @@
use anyhow::Result;
use feed_rs::model::Entry;
use feed_rs::model::Feed;
use ron::ser::{to_string_pretty, PrettyConfig};
use serde::{Deserialize, Serialize};
use std::convert::AsRef;
use std::fs;
@ -28,43 +30,47 @@ impl FeedStore {
}
fn slugify_url(url: &Url) -> String {
let domain = url.domain().unwrap();
let domain = url.domain().unwrap(); // todo don't hide error
let query = url.query().unwrap_or("");
slug::slugify(format!("{domain}{}{query}", url.path()))
}
fn generic_path(&self, url: &Url, ext: &str) -> String {
format!("{}/{}{ext}", self.dir.display(), Self::slugify_url(url))
}
fn feed_path(&self, url: &Url) -> String {
format!("{}/{}", self.dir.display(), Self::slugify_url(url))
self.generic_path(url, "")
}
fn fetchdata_path(&self, url: &Url) -> String {
format!("{}.toml", self.feed_path(url))
self.generic_path(url, ".toml")
}
pub fn load_fetchdata(&self, url: &Url) -> FetchData {
pub fn load_fetchdata(&self, url: &Url) -> Result<FetchData> {
let path = self.fetchdata_path(url);
if !fs::exists(path.clone()).unwrap() {
return FetchData::default();
if !fs::exists(path.clone())? {
return Ok(FetchData::default());
}
toml::from_str(&fs::read_to_string(path).unwrap()).unwrap()
Ok(toml::from_str(&fs::read_to_string(path)?)?)
}
fn has_changed(&self, url: &Url, new_feed: &Feed) -> bool {
fn has_changed(&self, url: &Url, new_feed: &Feed) -> Result<bool> {
let Some(old_feed) = self.load_feed(url, false) else {
return true;
return Ok(true);
};
let mut old_iter = old_feed.entries.iter();
for new in &new_feed.entries {
let Some(old) = old_iter.next() else {
return true;
return Ok(true);
};
if old != new {
return true;
return Ok(true);
}
}
// ignoring any entries left in old_iter
false
Ok(false)
}
fn write<P: AsRef<std::path::Path> + std::fmt::Display, C: AsRef<[u8]>>(
@ -77,7 +83,7 @@ impl FeedStore {
fs::write(path, contents)
}
pub fn store(&self, url: &Url, mut response: Response<Body>) -> bool {
pub fn store(&self, url: &Url, mut response: Response<Body>) -> Result<bool> {
let headers = response.headers();
let fetchdata = FetchData {
etag: hv(headers, "etag"),
@ -94,19 +100,24 @@ impl FeedStore {
Ok(f) => f,
Err(e) => {
warn!("Error when parsing feed for {url}: {e:?}");
return false;
return Ok(false);
}
};
if !self.has_changed(url, &feed) {
return false;
if !self.has_changed(url, &feed)? {
return Ok(false);
}
debug!("Storing feed for {url}.");
let _ = Self::write(self.feed_path(url), body);
let _ = Self::write(
// todo don't serialize to string but to writer
Self::write(
self.generic_path(url, ".ron"),
to_string_pretty(&feed, PrettyConfig::default())?,
)?;
Self::write(self.feed_path(url), body)?;
Self::write(
self.fetchdata_path(url),
toml::to_string(&fetchdata).unwrap(),
);
true
)?;
Ok(true)
}
fn load_feed(&self, url: &Url, sanitize: bool) -> Option<Feed> {
@ -132,6 +143,7 @@ impl FeedStore {
warn!("Problem parsing feed file for feed {}", feed_config.url);
continue;
};
entries.append(&mut feed.entries);
// todo also trim mid-way when length > something, trading cpu for memory
}

View file

@ -1,3 +1,4 @@
use anyhow::Result;
use std::time::Instant;
use ureq::tls::{TlsConfig, TlsProvider};
use ureq::Agent;
@ -31,8 +32,8 @@ impl Fetcher {
}
}
pub fn fetch(&self, url: Url, feed_store: &FeedStore) -> bool {
let fetchdata = feed_store.load_fetchdata(&url);
pub fn fetch(&self, url: Url, feed_store: &FeedStore) -> Result<bool> {
let fetchdata = feed_store.load_fetchdata(&url)?;
let mut builder = self
.agent
.get(url.to_string())
@ -48,7 +49,7 @@ impl Fetcher {
let result = builder.call();
let duration = start_instant.elapsed();
let response = result.unwrap(); // todo log and return false
let response = result?; // todo log and return false
debug!(
"fetched with status {} in {} ms: {url}",
response.status(),
@ -56,14 +57,14 @@ impl Fetcher {
);
let status = response.status();
match status.as_u16() {
304 => false, // Not Modified -> nothing to do
304 => Ok(false), // Not Modified -> nothing to do
200 => feed_store.store(&url, response),
_ => {
warn!(
"HTTP Status {} not implemented for {url}",
response.status()
);
false
Ok(false)
}
}
}

View file

@ -6,14 +6,14 @@ use crate::fetcher::Fetcher;
use anyhow::Result;
use clap::Parser;
use serde::Deserialize;
use simple_entry::SimpleEntry;
use std::fs;
use std::path::PathBuf;
use url::Url;
//mod atom_serializer;
mod feed_store;
mod fetcher;
mod simple_entry;
mod template_engine;
#[derive(Parser)]
#[command(author, version, about, long_about = None)]
@ -70,42 +70,12 @@ fn fetch(config: &Config, feed_store: &FeedStore) -> Result<bool> {
continue;
}
};
rebuild |= fetcher.fetch(url, feed_store);
rebuild |= fetcher.fetch(url, feed_store)?;
}
info!("Done fetching. Rebuild needed: {rebuild}");
Ok(rebuild)
}
fn build(config: &Config, feed_store: &FeedStore) -> Result<()> {
let templates_dir = to_checked_pathbuf(&config.templates_dir);
let out_dir = to_checked_pathbuf(&config.out_dir);
let mut tera = match tera::Tera::new(&format!("{}/*", &templates_dir.display())) {
Ok(t) => t,
Err(e) => {
println!("Parsing error(s): {}", e);
::std::process::exit(1);
}
};
// disable autoescape as this would corrupt urls or the entriy contents. todo check this!
tera.autoescape_on(vec![]);
let mut context = tera::Context::new();
let entries: Vec<SimpleEntry> = feed_store
.collect(&config.feeds)
.into_iter()
.map(SimpleEntry::from_feed_entry)
.collect();
context.insert("entries", &entries);
for name in tera.get_template_names() {
debug!("Processing template {name}");
let file = fs::File::create(format!("{}/{name}", out_dir.display()))?;
tera.render_to(name, &context, file)?;
}
Ok(())
}
fn main() -> Result<()> {
env_logger::init();
info!("starting up");
@ -129,7 +99,7 @@ fn main() -> Result<()> {
};
if should_build {
build(&config, &feed_store)?;
template_engine::build(&config, &feed_store)?;
}
Ok(())
}

View file

@ -1,43 +0,0 @@
use feed_rs::model::Entry;
/// Simplified Feed entry for easier value access in template
#[derive(serde::Serialize)]
pub struct SimpleEntry {
pub date: String,
pub content: String,
pub author: String,
pub link: String,
pub title: String,
}
/// format for the entries timestamp
/// <https://docs.rs/chrono/latest/chrono/format/strftime>
const FMT: &str = "%c";
impl SimpleEntry {
pub fn from_feed_entry(entry: Entry) -> Self {
Self {
date: entry
.updated
.or(entry.published)
.unwrap_or_default()
.format(FMT)
.to_string(),
content: entry
.content
.map(|x| x.body.unwrap_or_default())
.unwrap_or_default(),
author: if !entry.authors.is_empty() {
entry.authors[0].name.clone()
} else {
"".to_string()
},
link: if !entry.links.is_empty() {
entry.links[0].href.clone()
} else {
"".to_string()
},
title: entry.title.map(|x| x.content).unwrap_or_default(),
}
}
}

View file

@ -0,0 +1,35 @@
use crate::feed_store::FeedStore;
use crate::to_checked_pathbuf;
use crate::Config;
use anyhow::Result;
use feed_rs::model::Entry;
use std::fs::File;
use tera::Tera;
pub fn build(config: &Config, feed_store: &FeedStore) -> Result<()> {
let tera = create_tera(&config.templates_dir)?;
let out_dir = to_checked_pathbuf(&config.out_dir);
let mut context = tera::Context::new();
let feed_entries: Vec<Entry> = feed_store.collect(&config.feeds);
context.insert("entries", &feed_entries);
context.insert("PKG_AUTHORS", env!("CARGO_PKG_AUTHORS"));
context.insert("PKG_HOMEPAGE", env!("CARGO_PKG_HOMEPAGE"));
context.insert("PKG_NAME", env!("CARGO_PKG_NAME"));
context.insert("PKG_VERSION", env!("CARGO_PKG_VERSION"));
for name in tera.get_template_names() {
debug!("Processing template {name}");
let file = File::create(format!("{}/{name}", out_dir.display()))?;
tera.render_to(name, &context, file)?;
}
Ok(())
}
fn create_tera(templates_dir: &str) -> Result<Tera> {
let dir = to_checked_pathbuf(templates_dir);
let mut tera = tera::Tera::new(&format!("{}/*", &dir.display()))?;
// disable autoescape as this would corrupt urls or the entriy contents. todo check this!
tera.autoescape_on(vec![]);
Ok(tera)
}