.
This commit is contained in:
commit
b422ece932
9 changed files with 2419 additions and 0 deletions
141
web/planet-mars/src/feed_store.rs
Normal file
141
web/planet-mars/src/feed_store.rs
Normal file
|
|
@ -0,0 +1,141 @@
|
|||
use feed_rs::model::Entry;
|
||||
use feed_rs::model::Feed;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::fs;
|
||||
use std::io::BufReader;
|
||||
use std::path::PathBuf;
|
||||
use ureq::http::HeaderMap;
|
||||
use ureq::http::Response;
|
||||
use ureq::Body;
|
||||
use url::Url;
|
||||
|
||||
#[derive(Deserialize, Serialize, Default)]
|
||||
pub struct FetchData {
|
||||
pub etag: String,
|
||||
pub date: String,
|
||||
}
|
||||
|
||||
pub struct FeedStore {
|
||||
pub dir: PathBuf,
|
||||
}
|
||||
|
||||
impl FeedStore {
|
||||
pub fn new(dir: String) -> Self {
|
||||
Self {
|
||||
dir: super::to_checked_pathbuf(dir),
|
||||
}
|
||||
}
|
||||
|
||||
fn slugify_url(url: &Url) -> String {
|
||||
let domain = url.domain().unwrap();
|
||||
let query = url.query().unwrap_or("");
|
||||
slug::slugify(format!("{domain}{}{query}", url.path()))
|
||||
}
|
||||
|
||||
fn feed_path(&self, url: &Url) -> String {
|
||||
format!("{}/{}", self.dir.display(), Self::slugify_url(url))
|
||||
}
|
||||
|
||||
fn fetchdata_path(&self, url: &Url) -> String {
|
||||
format!("{}.toml", self.feed_path(url))
|
||||
}
|
||||
|
||||
pub fn get_fetchdata(&self, url: &Url) -> FetchData {
|
||||
let path = self.fetchdata_path(url);
|
||||
if !fs::exists(path.clone()).unwrap() {
|
||||
return FetchData::default();
|
||||
}
|
||||
toml::from_str(&fs::read_to_string(path).unwrap()).unwrap()
|
||||
}
|
||||
|
||||
fn has_changed(&self, url: &Url, new_feed: &Feed) -> bool {
|
||||
let Some(old_feed) = self.load_feed(url, false) else {
|
||||
return true;
|
||||
};
|
||||
|
||||
let mut old_iter = old_feed.entries.iter();
|
||||
for new in &new_feed.entries {
|
||||
let Some(old) = old_iter.next() else {
|
||||
return true;
|
||||
};
|
||||
if old != new {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
// ignoring any entries left in old_iter
|
||||
false
|
||||
}
|
||||
|
||||
pub fn store(&self, url: &Url, mut response: Response<Body>) -> bool {
|
||||
let headers = response.headers();
|
||||
let fetchdata = FetchData {
|
||||
etag: hv(headers, "etag"),
|
||||
date: hv(headers, "date"),
|
||||
};
|
||||
|
||||
let body = response
|
||||
.body_mut()
|
||||
.with_config()
|
||||
// .limit(MAX_BODY_SIZE)
|
||||
.read_to_vec()
|
||||
.unwrap();
|
||||
let feed = match feed_rs::parser::parse(body.as_slice()) {
|
||||
Ok(f) => f,
|
||||
Err(e) => {
|
||||
warn!("Error when parsing feed for {url}: {e:?}");
|
||||
return false;
|
||||
}
|
||||
};
|
||||
if !self.has_changed(url, &feed) {
|
||||
return false;
|
||||
}
|
||||
let _ = fs::write(self.feed_path(url), body);
|
||||
let _ = fs::write(
|
||||
self.fetchdata_path(url),
|
||||
toml::to_string(&fetchdata).unwrap(),
|
||||
);
|
||||
true
|
||||
}
|
||||
|
||||
fn load_feed(&self, url: &Url, sanitize: bool) -> Option<Feed> {
|
||||
let parser = feed_rs::parser::Builder::new()
|
||||
.sanitize_content(sanitize)
|
||||
.build();
|
||||
|
||||
let path = self.feed_path(url);
|
||||
if !fs::exists(path.clone()).unwrap() {
|
||||
return None;
|
||||
}
|
||||
let file = fs::File::open(path).unwrap();
|
||||
Some(parser.parse(BufReader::new(file)).unwrap())
|
||||
}
|
||||
|
||||
pub fn collect(&self, feed_configs: &Vec<super::FeedConfig>) -> Vec<Entry> {
|
||||
let mut entries = vec![];
|
||||
|
||||
for feed_config in feed_configs {
|
||||
let url = Url::parse(&feed_config.url).unwrap();
|
||||
let Some(mut feed) = self.load_feed(&url, true) else {
|
||||
// todo error handling!
|
||||
warn!("Problem parsing feed file for feed {}", feed_config.url);
|
||||
continue;
|
||||
};
|
||||
entries.append(&mut feed.entries);
|
||||
// todo also trim mid-way when length > something, trading cpu for memory
|
||||
}
|
||||
trim_entries(entries)
|
||||
}
|
||||
}
|
||||
|
||||
fn trim_entries(mut entries: Vec<Entry>) -> Vec<Entry> {
|
||||
entries.sort_by_key(|e| std::cmp::Reverse(e.updated.or(e.published).unwrap_or_default()));
|
||||
entries.truncate(10);
|
||||
entries
|
||||
}
|
||||
|
||||
fn hv(headers: &HeaderMap, key: &str) -> String {
|
||||
match headers.get(key) {
|
||||
Some(hv) => hv.to_str().unwrap_or_default().to_string(),
|
||||
_ => "".to_string(),
|
||||
}
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue