From 7b20d0dac1f1b6b0209207daaa29a03bbaada6b8 Mon Sep 17 00:00:00 2001 From: edef Date: Wed, 2 Apr 2025 11:16:10 +0000 Subject: [PATCH] feat(contrib/fetchroots): track git revisions Change-Id: I3af0a4486a8685191adc28210162661fae6cfc3c Reviewed-on: https://cl.snix.dev/c/snix/+/30290 Reviewed-by: Florian Klink Tested-by: besadii --- contrib/fetchroots/Cargo.lock | 5 +++-- contrib/fetchroots/Cargo.nix | 8 ++++++-- contrib/fetchroots/Cargo.toml | 1 + contrib/fetchroots/src/main.rs | 29 +++++++++++++++++++++++++++++ 4 files changed, 39 insertions(+), 4 deletions(-) diff --git a/contrib/fetchroots/Cargo.lock b/contrib/fetchroots/Cargo.lock index 214ddab1c..d84ee4280 100644 --- a/contrib/fetchroots/Cargo.lock +++ b/contrib/fetchroots/Cargo.lock @@ -917,9 +917,9 @@ dependencies = [ [[package]] name = "data-encoding" -version = "2.6.0" +version = "2.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e8566979429cf69b49a5c740c60791108e86440e8be149bbea4fe54d2c32d6e2" +checksum = "575f75dfd25738df5b91b8e43e14d44bda14637a58fae779fd2b064f8bf3e010" [[package]] name = "der" @@ -1099,6 +1099,7 @@ dependencies = [ "bytes-utils", "bzip2", "chrono", + "data-encoding", "futures", "indicatif", "nix-compat", diff --git a/contrib/fetchroots/Cargo.nix b/contrib/fetchroots/Cargo.nix index 4682e704b..8cb557bd5 100644 --- a/contrib/fetchroots/Cargo.nix +++ b/contrib/fetchroots/Cargo.nix @@ -3099,9 +3099,9 @@ rec { }; "data-encoding" = rec { crateName = "data-encoding"; - version = "2.6.0"; + version = "2.8.0"; edition = "2018"; - sha256 = "1qnn68n4vragxaxlkqcb1r28d3hhj43wch67lm4rpxlw89wnjmp8"; + sha256 = "0470yf5ly1ibzmwygyjqg9ii9njbsha3xr5qj5dxyf2psbgpapsp"; libName = "data_encoding"; authors = [ "Julien Cretin " @@ -3721,6 +3721,10 @@ rec { name = "chrono"; packageId = "chrono"; } + { + name = "data-encoding"; + packageId = "data-encoding"; + } { name = "futures"; packageId = "futures"; diff --git a/contrib/fetchroots/Cargo.toml b/contrib/fetchroots/Cargo.toml index a1647eaed..5dd98732b 100644 --- a/contrib/fetchroots/Cargo.toml +++ b/contrib/fetchroots/Cargo.toml @@ -14,6 +14,7 @@ bytes = "1.5.0" bytes-utils = "0.1.4" bzip2 = "0.4.4" chrono = "0.4.34" +data-encoding = "2.8.0" futures = "0.3.30" indicatif = "0.17.8" nix-compat = { version = "0.1.0", path = "../../snix/nix-compat" } diff --git a/contrib/fetchroots/src/main.rs b/contrib/fetchroots/src/main.rs index 842b719c2..563f57712 100644 --- a/contrib/fetchroots/src/main.rs +++ b/contrib/fetchroots/src/main.rs @@ -3,12 +3,14 @@ //! The resulting Parquet has three columns: //! //! * `key` (`String`): the release, eg `nixos/22.11-small/nixos-22.11.513.563dc6476b8` +//! * `git_rev` (`Binary`): the git revision hash of this release, if available //! * `timestamp` (`DateTime`): the timestamp of the GC roots file for this release //! * `store_path_hash` (`List[Binary]`): hash part of the store paths rooted by this release //! //! [^1]: some roots are truly ancient, and aren't compatible with Nix 1.x use anyhow::Result; +use data_encoding::HEXLOWER; use std::{ collections::BTreeMap, fs::File, @@ -123,6 +125,31 @@ async fn main() { js.spawn(async move { let _permit = sem.acquire().await.unwrap(); + // TODO(edef): learn whether there is a git-revision from the listings + let rev = s3 + .get_object() + .bucket("nix-releases") + .key(format!("{root}/git-revision")) + .send() + .await; + + let rev = match rev { + Ok(resp) => { + let hex = resp.body.collect().await.unwrap().to_vec(); + let mut buf = [0; 20]; + assert_eq!(HEXLOWER.decode_mut(&hex, &mut buf).unwrap(), buf.len()); + Ok(Some(buf)) + } + Err(e) => { + if e.as_service_error().is_some_and(|e| e.is_no_such_key()) { + Ok(None) + } else { + Err(e) + } + } + } + .unwrap(); + let body = get_object( s3.get_object() .bucket("nix-releases") @@ -136,6 +163,7 @@ async fn main() { let ph_array = block_in_place(|| meta.format.to_ph_array(body).rechunk()); df! { "key" => [root], + "git_rev" => [rev.as_ref().map(|r| &r[..])], "timestamp" => [meta.last_modified.naive_utc()], "store_path_hash" => ph_array.into_series().implode().unwrap() } @@ -146,6 +174,7 @@ async fn main() { let mut writer = ParquetWriter::new(File::create("roots.parquet").unwrap()) .batched(&Schema::from_iter([ Field::new("key", DataType::String), + Field::new("git_rev", DataType::Binary), Field::new( "timestamp", DataType::Datetime(TimeUnit::Milliseconds, None),