.
This commit is contained in:
		
						commit
						b422ece932
					
				
					 9 changed files with 2419 additions and 0 deletions
				
			
		
							
								
								
									
										141
									
								
								web/planet-mars/src/feed_store.rs
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										141
									
								
								web/planet-mars/src/feed_store.rs
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,141 @@ | |||
| use feed_rs::model::Entry; | ||||
| use feed_rs::model::Feed; | ||||
| use serde::{Deserialize, Serialize}; | ||||
| use std::fs; | ||||
| use std::io::BufReader; | ||||
| use std::path::PathBuf; | ||||
| use ureq::http::HeaderMap; | ||||
| use ureq::http::Response; | ||||
| use ureq::Body; | ||||
| use url::Url; | ||||
| 
 | ||||
| #[derive(Deserialize, Serialize, Default)] | ||||
| pub struct FetchData { | ||||
|     pub etag: String, | ||||
|     pub date: String, | ||||
| } | ||||
| 
 | ||||
| pub struct FeedStore { | ||||
|     pub dir: PathBuf, | ||||
| } | ||||
| 
 | ||||
| impl FeedStore { | ||||
|     pub fn new(dir: String) -> Self { | ||||
|         Self { | ||||
|             dir: super::to_checked_pathbuf(dir), | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     fn slugify_url(url: &Url) -> String { | ||||
|         let domain = url.domain().unwrap(); | ||||
|         let query = url.query().unwrap_or(""); | ||||
|         slug::slugify(format!("{domain}{}{query}", url.path())) | ||||
|     } | ||||
| 
 | ||||
|     fn feed_path(&self, url: &Url) -> String { | ||||
|         format!("{}/{}", self.dir.display(), Self::slugify_url(url)) | ||||
|     } | ||||
| 
 | ||||
|     fn fetchdata_path(&self, url: &Url) -> String { | ||||
|         format!("{}.toml", self.feed_path(url)) | ||||
|     } | ||||
| 
 | ||||
|     pub fn get_fetchdata(&self, url: &Url) -> FetchData { | ||||
|         let path = self.fetchdata_path(url); | ||||
|         if !fs::exists(path.clone()).unwrap() { | ||||
|             return FetchData::default(); | ||||
|         } | ||||
|         toml::from_str(&fs::read_to_string(path).unwrap()).unwrap() | ||||
|     } | ||||
| 
 | ||||
|     fn has_changed(&self, url: &Url, new_feed: &Feed) -> bool { | ||||
|         let Some(old_feed) = self.load_feed(url, false) else { | ||||
|             return true; | ||||
|         }; | ||||
| 
 | ||||
|         let mut old_iter = old_feed.entries.iter(); | ||||
|         for new in &new_feed.entries { | ||||
|             let Some(old) = old_iter.next() else { | ||||
|                 return true; | ||||
|             }; | ||||
|             if old != new { | ||||
|                 return true; | ||||
|             } | ||||
|         } | ||||
|         // ignoring any entries left in old_iter
 | ||||
|         false | ||||
|     } | ||||
| 
 | ||||
|     pub fn store(&self, url: &Url, mut response: Response<Body>) -> bool { | ||||
|         let headers = response.headers(); | ||||
|         let fetchdata = FetchData { | ||||
|             etag: hv(headers, "etag"), | ||||
|             date: hv(headers, "date"), | ||||
|         }; | ||||
| 
 | ||||
|         let body = response | ||||
|             .body_mut() | ||||
|             .with_config() | ||||
|             //            .limit(MAX_BODY_SIZE)
 | ||||
|             .read_to_vec() | ||||
|             .unwrap(); | ||||
|         let feed = match feed_rs::parser::parse(body.as_slice()) { | ||||
|             Ok(f) => f, | ||||
|             Err(e) => { | ||||
|                 warn!("Error when parsing feed for {url}: {e:?}"); | ||||
|                 return false; | ||||
|             } | ||||
|         }; | ||||
|         if !self.has_changed(url, &feed) { | ||||
|             return false; | ||||
|         } | ||||
|         let _ = fs::write(self.feed_path(url), body); | ||||
|         let _ = fs::write( | ||||
|             self.fetchdata_path(url), | ||||
|             toml::to_string(&fetchdata).unwrap(), | ||||
|         ); | ||||
|         true | ||||
|     } | ||||
| 
 | ||||
|     fn load_feed(&self, url: &Url, sanitize: bool) -> Option<Feed> { | ||||
|         let parser = feed_rs::parser::Builder::new() | ||||
|             .sanitize_content(sanitize) | ||||
|             .build(); | ||||
| 
 | ||||
|         let path = self.feed_path(url); | ||||
|         if !fs::exists(path.clone()).unwrap() { | ||||
|             return None; | ||||
|         } | ||||
|         let file = fs::File::open(path).unwrap(); | ||||
|         Some(parser.parse(BufReader::new(file)).unwrap()) | ||||
|     } | ||||
| 
 | ||||
|     pub fn collect(&self, feed_configs: &Vec<super::FeedConfig>) -> Vec<Entry> { | ||||
|         let mut entries = vec![]; | ||||
| 
 | ||||
|         for feed_config in feed_configs { | ||||
|             let url = Url::parse(&feed_config.url).unwrap(); | ||||
|             let Some(mut feed) = self.load_feed(&url, true) else { | ||||
|                 // todo error handling!
 | ||||
|                 warn!("Problem parsing feed file for feed {}", feed_config.url); | ||||
|                 continue; | ||||
|             }; | ||||
|             entries.append(&mut feed.entries); | ||||
|             // todo also trim mid-way when length > something, trading cpu for memory
 | ||||
|         } | ||||
|         trim_entries(entries) | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| fn trim_entries(mut entries: Vec<Entry>) -> Vec<Entry> { | ||||
|     entries.sort_by_key(|e| std::cmp::Reverse(e.updated.or(e.published).unwrap_or_default())); | ||||
|     entries.truncate(10); | ||||
|     entries | ||||
| } | ||||
| 
 | ||||
| fn hv(headers: &HeaderMap, key: &str) -> String { | ||||
|     match headers.get(key) { | ||||
|         Some(hv) => hv.to_str().unwrap_or_default().to_string(), | ||||
|         _ => "".to_string(), | ||||
|     } | ||||
| } | ||||
							
								
								
									
										70
									
								
								web/planet-mars/src/fetcher.rs
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										70
									
								
								web/planet-mars/src/fetcher.rs
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,70 @@ | |||
| use std::time::Instant; | ||||
| use ureq::tls::{TlsConfig, TlsProvider}; | ||||
| use ureq::Agent; | ||||
| use url::Url; | ||||
| 
 | ||||
| use crate::FeedStore; | ||||
| 
 | ||||
| pub struct Fetcher { | ||||
|     agent: Agent, | ||||
|     /// FROM header for requests
 | ||||
|     from: String, | ||||
| } | ||||
| 
 | ||||
| impl Fetcher { | ||||
|     pub fn new(bot_name: &str, from: &str) -> Fetcher { | ||||
|         // TODO Get URL from a better place, e.g. Cargo.toml?
 | ||||
|         let ua_name = format!("{bot_name}/{} https://TODO", env!("CARGO_PKG_VERSION")); | ||||
|         let agent = Agent::config_builder() | ||||
|             .http_status_as_error(false) | ||||
|             .user_agent(ua_name) | ||||
|             .tls_config( | ||||
|                 TlsConfig::builder() | ||||
|                     .provider(TlsProvider::NativeTls) | ||||
|                     .build(), | ||||
|             ) | ||||
|             .build() | ||||
|             .into(); | ||||
|         Fetcher { | ||||
|             agent, | ||||
|             from: from.to_string(), | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     pub fn fetch(&self, url: Url, feed_store: &FeedStore) -> bool { | ||||
|         let fetchdata = feed_store.get_fetchdata(&url); | ||||
|         let mut builder = self | ||||
|             .agent | ||||
|             .get(url.to_string()) | ||||
|             .header("FROM", self.from.clone()); | ||||
|         if fetchdata.etag != "" { | ||||
|             builder = builder.header("If-None-Match", fetchdata.etag); | ||||
|         } | ||||
|         if fetchdata.date != "" { | ||||
|             builder = builder.header("If-Modified-Since", fetchdata.date); | ||||
|         } | ||||
| 
 | ||||
|         let start_instant = Instant::now(); | ||||
|         let result = builder.call(); | ||||
|         let duration = start_instant.elapsed(); | ||||
| 
 | ||||
|         let response = result.unwrap(); // todo log and return false
 | ||||
|         debug!( | ||||
|             "fetched with status {} in {} ms: {url}", | ||||
|             response.status(), | ||||
|             duration.as_millis() | ||||
|         ); | ||||
|         let status = response.status(); | ||||
|         match status.as_u16() { | ||||
|             304 => false, // Not Modified -> nothing to do
 | ||||
|             200 => feed_store.store(&url, response), | ||||
|             _ => { | ||||
|                 warn!( | ||||
|                     "HTTP Status {} not implemented for {url}", | ||||
|                     response.status() | ||||
|                 ); | ||||
|                 false | ||||
|             } | ||||
|         } | ||||
|     } | ||||
| } | ||||
							
								
								
									
										98
									
								
								web/planet-mars/src/main.rs
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										98
									
								
								web/planet-mars/src/main.rs
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,98 @@ | |||
| #[macro_use] | ||||
| extern crate log; | ||||
| 
 | ||||
| use crate::feed_store::FeedStore; | ||||
| use crate::fetcher::Fetcher; | ||||
| use clap::Parser; | ||||
| use serde::Deserialize; | ||||
| use std::fs; | ||||
| use std::path::PathBuf; | ||||
| use url::Url; | ||||
| 
 | ||||
| mod feed_store; | ||||
| mod fetcher; | ||||
| 
 | ||||
| #[derive(Parser)] | ||||
| #[command(author, version, about, long_about = None)] | ||||
| struct Args { | ||||
|     #[arg(
 | ||||
|         short, | ||||
|         long, | ||||
|         default_value_t = String::from("mars.toml") | ||||
|     )] | ||||
|     config: String, | ||||
| } | ||||
| 
 | ||||
| #[derive(Deserialize)] | ||||
| struct Config { | ||||
|     /// to be used as part of the fetchers username header
 | ||||
|     bot_name: String, | ||||
|     /// where to store downloaded feeds and their metadata
 | ||||
|     feed_dir: String, | ||||
|     /// feeds to be agregated
 | ||||
|     feeds: Vec<FeedConfig>, | ||||
|     /// Email adress to use for the from header when fetching feeds
 | ||||
|     from: String, | ||||
|     /// where to build the output files
 | ||||
|     out_dir: String, | ||||
|     /// templates folder
 | ||||
|     templates_dir: String, | ||||
| } | ||||
| 
 | ||||
| pub fn to_checked_pathbuf(dir: String) -> PathBuf { | ||||
|     let dir: PathBuf = PathBuf::from(dir); | ||||
| 
 | ||||
|     let m = dir | ||||
|         .metadata() | ||||
|         .unwrap_or_else(|_| panic!("Could not get metadata of dir: {}", dir.display())); | ||||
|     assert!(m.is_dir(), "Not a dir: {}", dir.display()); | ||||
|     dir | ||||
| } | ||||
| 
 | ||||
| #[derive(Deserialize)] | ||||
| struct FeedConfig { | ||||
|     url: String, | ||||
| } | ||||
| 
 | ||||
| fn main() -> Result<(), Box<dyn std::error::Error>> { | ||||
|     env_logger::init(); | ||||
|     info!("starting up"); | ||||
| 
 | ||||
|     let args = Args::parse(); | ||||
|     let config_path = &args.config; | ||||
|     if !fs::exists(config_path)? { | ||||
|         panic!("Configuration file {config_path} does not exist!"); | ||||
|     } | ||||
|     let config: Config = toml::from_str(&fs::read_to_string(config_path)?)?; | ||||
|     let templates_dir = to_checked_pathbuf(config.templates_dir); | ||||
|     let out_dir = to_checked_pathbuf(config.out_dir); | ||||
| 
 | ||||
|     let feed_store = FeedStore::new(config.feed_dir); | ||||
|     let fetcher = Fetcher::new(&config.bot_name, &config.from); | ||||
| 
 | ||||
|     let mut rebuild = false; | ||||
|     for feed in &config.feeds { | ||||
|         let url = Url::parse(&feed.url)?; | ||||
|         rebuild |= fetcher.fetch(url, &feed_store); | ||||
|     } | ||||
|     info!("Done fetching. Rebuild needed: {rebuild}"); | ||||
|     if rebuild { | ||||
|         let entries = feed_store.collect(&config.feeds); | ||||
|         let mut tera = match tera::Tera::new(&format!("{}/*", &templates_dir.display())) { | ||||
|             Ok(t) => t, | ||||
|             Err(e) => { | ||||
|                 println!("Parsing error(s): {}", e); | ||||
|                 ::std::process::exit(1); | ||||
|             } | ||||
|         }; | ||||
|         tera.autoescape_on(vec![]); | ||||
|         let mut context = tera::Context::new(); | ||||
|         context.insert("entries", &entries); | ||||
|         for name in tera.get_template_names() { | ||||
|             debug!("Processing template {name}"); | ||||
|             let file = fs::File::create(&format!("{}/{name}", out_dir.display()))?; | ||||
|             let _ = tera.render_to(name, &context, file)?; | ||||
|         } | ||||
|     } | ||||
|     Ok(()) | ||||
| } | ||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue