feat(contrib/fetchroots): track git revisions

Change-Id: I3af0a4486a8685191adc28210162661fae6cfc3c
Reviewed-on: https://cl.snix.dev/c/snix/+/30290
Reviewed-by: Florian Klink <flokli@flokli.de>
Tested-by: besadii
This commit is contained in:
edef 2025-04-02 11:16:10 +00:00
parent d108eecb24
commit 7b20d0dac1
4 changed files with 39 additions and 4 deletions

View file

@ -917,9 +917,9 @@ dependencies = [
[[package]]
name = "data-encoding"
version = "2.6.0"
version = "2.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e8566979429cf69b49a5c740c60791108e86440e8be149bbea4fe54d2c32d6e2"
checksum = "575f75dfd25738df5b91b8e43e14d44bda14637a58fae779fd2b064f8bf3e010"
[[package]]
name = "der"
@ -1099,6 +1099,7 @@ dependencies = [
"bytes-utils",
"bzip2",
"chrono",
"data-encoding",
"futures",
"indicatif",
"nix-compat",

View file

@ -3099,9 +3099,9 @@ rec {
};
"data-encoding" = rec {
crateName = "data-encoding";
version = "2.6.0";
version = "2.8.0";
edition = "2018";
sha256 = "1qnn68n4vragxaxlkqcb1r28d3hhj43wch67lm4rpxlw89wnjmp8";
sha256 = "0470yf5ly1ibzmwygyjqg9ii9njbsha3xr5qj5dxyf2psbgpapsp";
libName = "data_encoding";
authors = [
"Julien Cretin <git@ia0.eu>"
@ -3721,6 +3721,10 @@ rec {
name = "chrono";
packageId = "chrono";
}
{
name = "data-encoding";
packageId = "data-encoding";
}
{
name = "futures";
packageId = "futures";

View file

@ -14,6 +14,7 @@ bytes = "1.5.0"
bytes-utils = "0.1.4"
bzip2 = "0.4.4"
chrono = "0.4.34"
data-encoding = "2.8.0"
futures = "0.3.30"
indicatif = "0.17.8"
nix-compat = { version = "0.1.0", path = "../../snix/nix-compat" }

View file

@ -3,12 +3,14 @@
//! The resulting Parquet has three columns:
//!
//! * `key` (`String`): the release, eg `nixos/22.11-small/nixos-22.11.513.563dc6476b8`
//! * `git_rev` (`Binary`): the git revision hash of this release, if available
//! * `timestamp` (`DateTime`): the timestamp of the GC roots file for this release
//! * `store_path_hash` (`List[Binary]`): hash part of the store paths rooted by this release
//!
//! [^1]: some roots are truly ancient, and aren't compatible with Nix 1.x
use anyhow::Result;
use data_encoding::HEXLOWER;
use std::{
collections::BTreeMap,
fs::File,
@ -123,6 +125,31 @@ async fn main() {
js.spawn(async move {
let _permit = sem.acquire().await.unwrap();
// TODO(edef): learn whether there is a git-revision from the listings
let rev = s3
.get_object()
.bucket("nix-releases")
.key(format!("{root}/git-revision"))
.send()
.await;
let rev = match rev {
Ok(resp) => {
let hex = resp.body.collect().await.unwrap().to_vec();
let mut buf = [0; 20];
assert_eq!(HEXLOWER.decode_mut(&hex, &mut buf).unwrap(), buf.len());
Ok(Some(buf))
}
Err(e) => {
if e.as_service_error().is_some_and(|e| e.is_no_such_key()) {
Ok(None)
} else {
Err(e)
}
}
}
.unwrap();
let body = get_object(
s3.get_object()
.bucket("nix-releases")
@ -136,6 +163,7 @@ async fn main() {
let ph_array = block_in_place(|| meta.format.to_ph_array(body).rechunk());
df! {
"key" => [root],
"git_rev" => [rev.as_ref().map(|r| &r[..])],
"timestamp" => [meta.last_modified.naive_utc()],
"store_path_hash" => ph_array.into_series().implode().unwrap()
}
@ -146,6 +174,7 @@ async fn main() {
let mut writer = ParquetWriter::new(File::create("roots.parquet").unwrap())
.batched(&Schema::from_iter([
Field::new("key", DataType::String),
Field::new("git_rev", DataType::Binary),
Field::new(
"timestamp",
DataType::Datetime(TimeUnit::Milliseconds, None),