generating html from 12 feeds works

This commit is contained in:
Thomas Koch 2025-01-10 20:17:06 +02:00
parent b422ece932
commit 3bee93ecb3
7 changed files with 165 additions and 41 deletions

View file

@ -1,6 +1,7 @@
use feed_rs::model::Entry;
use feed_rs::model::Feed;
use serde::{Deserialize, Serialize};
use std::convert::AsRef;
use std::fs;
use std::io::BufReader;
use std::path::PathBuf;
@ -20,7 +21,7 @@ pub struct FeedStore {
}
impl FeedStore {
pub fn new(dir: String) -> Self {
pub fn new(dir: &str) -> Self {
Self {
dir: super::to_checked_pathbuf(dir),
}
@ -66,6 +67,16 @@ impl FeedStore {
false
}
fn write<P: AsRef<std::path::Path> + std::fmt::Display, C: AsRef<[u8]>>(
path: P,
contents: C,
) -> std::io::Result<()> {
if fs::exists(&path)? {
fs::rename(&path, format!("{path}.backup"))?;
}
fs::write(path, contents)
}
pub fn store(&self, url: &Url, mut response: Response<Body>) -> bool {
let headers = response.headers();
let fetchdata = FetchData {
@ -89,8 +100,9 @@ impl FeedStore {
if !self.has_changed(url, &feed) {
return false;
}
let _ = fs::write(self.feed_path(url), body);
let _ = fs::write(
debug!("Storing feed for {url}.");
let _ = Self::write(self.feed_path(url), body);
let _ = Self::write(
self.fetchdata_path(url),
toml::to_string(&fetchdata).unwrap(),
);

View file

@ -37,10 +37,10 @@ impl Fetcher {
.agent
.get(url.to_string())
.header("FROM", self.from.clone());
if fetchdata.etag != "" {
if !fetchdata.etag.is_empty() {
builder = builder.header("If-None-Match", fetchdata.etag);
}
if fetchdata.date != "" {
if !fetchdata.date.is_empty() {
builder = builder.header("If-Modified-Since", fetchdata.date);
}

View file

@ -3,14 +3,17 @@ extern crate log;
use crate::feed_store::FeedStore;
use crate::fetcher::Fetcher;
use anyhow::Result;
use clap::Parser;
use serde::Deserialize;
use simple_entry::SimpleEntry;
use std::fs;
use std::path::PathBuf;
use url::Url;
mod feed_store;
mod fetcher;
mod simple_entry;
#[derive(Parser)]
#[command(author, version, about, long_about = None)]
@ -21,6 +24,8 @@ struct Args {
default_value_t = String::from("mars.toml")
)]
config: String,
#[arg(long, default_value_t = false)]
no_fetch: bool,
}
#[derive(Deserialize)]
@ -39,7 +44,7 @@ struct Config {
templates_dir: String,
}
pub fn to_checked_pathbuf(dir: String) -> PathBuf {
pub fn to_checked_pathbuf(dir: &str) -> PathBuf {
let dir: PathBuf = PathBuf::from(dir);
let m = dir
@ -54,7 +59,54 @@ struct FeedConfig {
url: String,
}
fn main() -> Result<(), Box<dyn std::error::Error>> {
fn fetch(config: &Config, feed_store: &FeedStore) -> Result<bool> {
let fetcher = Fetcher::new(&config.bot_name, &config.from);
let mut rebuild = false;
for feed in &config.feeds {
let url = match Url::parse(&feed.url) {
Ok(x) => x,
Err(e) => {
error!("Error parsing url '{}': {e:?}", feed.url);
continue;
}
};
rebuild |= fetcher.fetch(url, feed_store);
}
info!("Done fetching. Rebuild needed: {rebuild}");
Ok(rebuild)
}
fn build(config: &Config, feed_store: &FeedStore) -> Result<()> {
let templates_dir = to_checked_pathbuf(&config.templates_dir);
let out_dir = to_checked_pathbuf(&config.out_dir);
let mut tera = match tera::Tera::new(&format!("{}/*", &templates_dir.display())) {
Ok(t) => t,
Err(e) => {
println!("Parsing error(s): {}", e);
::std::process::exit(1);
}
};
// disable autoescape as this would corrupt urls or the entriy contents. todo check this!
tera.autoescape_on(vec![]);
let mut context = tera::Context::new();
let entries: Vec<SimpleEntry> = feed_store
.collect(&config.feeds)
.into_iter()
.map(SimpleEntry::from_feed_entry)
.collect();
context.insert("entries", &entries);
for name in tera.get_template_names() {
debug!("Processing template {name}");
let file = fs::File::create(format!("{}/{name}", out_dir.display()))?;
tera.render_to(name, &context, file)?;
}
Ok(())
}
fn main() -> Result<()> {
env_logger::init();
info!("starting up");
@ -64,35 +116,20 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
panic!("Configuration file {config_path} does not exist!");
}
let config: Config = toml::from_str(&fs::read_to_string(config_path)?)?;
let templates_dir = to_checked_pathbuf(config.templates_dir);
let out_dir = to_checked_pathbuf(config.out_dir);
// only check here to avoid fetching with broken config
// todo: get a config lib that provides validation!
let _ = to_checked_pathbuf(&config.templates_dir);
let _ = to_checked_pathbuf(&config.out_dir);
let feed_store = FeedStore::new(config.feed_dir);
let fetcher = Fetcher::new(&config.bot_name, &config.from);
let feed_store = FeedStore::new(&config.feed_dir);
let should_build = if args.no_fetch {
true
} else {
fetch(&config, &feed_store)?
};
let mut rebuild = false;
for feed in &config.feeds {
let url = Url::parse(&feed.url)?;
rebuild |= fetcher.fetch(url, &feed_store);
}
info!("Done fetching. Rebuild needed: {rebuild}");
if rebuild {
let entries = feed_store.collect(&config.feeds);
let mut tera = match tera::Tera::new(&format!("{}/*", &templates_dir.display())) {
Ok(t) => t,
Err(e) => {
println!("Parsing error(s): {}", e);
::std::process::exit(1);
}
};
tera.autoescape_on(vec![]);
let mut context = tera::Context::new();
context.insert("entries", &entries);
for name in tera.get_template_names() {
debug!("Processing template {name}");
let file = fs::File::create(&format!("{}/{name}", out_dir.display()))?;
let _ = tera.render_to(name, &context, file)?;
}
if should_build {
build(&config, &feed_store)?;
}
Ok(())
}

View file

@ -0,0 +1,43 @@
use feed_rs::model::Entry;
/// Simplified Feed entry for easier value access in template
#[derive(serde::Serialize)]
pub struct SimpleEntry {
pub date: String,
pub content: String,
pub author: String,
pub link: String,
pub title: String,
}
/// format for the entries timestamp
/// <https://docs.rs/chrono/latest/chrono/format/strftime>
const FMT: &str = "%c";
impl SimpleEntry {
pub fn from_feed_entry(entry: Entry) -> Self {
Self {
date: entry
.updated
.or(entry.published)
.unwrap_or_default()
.format(FMT)
.to_string(),
content: entry
.content
.map(|x| x.body.unwrap_or_default())
.unwrap_or_default(),
author: if !entry.authors.is_empty() {
entry.authors[0].name.clone()
} else {
"".to_string()
},
link: if !entry.links.is_empty() {
entry.links[0].href.clone()
} else {
"".to_string()
},
title: entry.title.map(|x| x.content).unwrap_or_default(),
}
}
}