feat(tvix/tools/crunch-v2): init

This is a tool for ingesting subsets of cache.nixos.org into its own flattened castore format.
Currently, produced chunks are not preserved, and this purely serves as a way of measuring
compression/deduplication ratios for various chunking and compression parameters.

Change-Id: I3983af02a66f7837d76874ee0fc8b2fab62ac17e
Reviewed-on: https://cl.tvl.fyi/c/depot/+/10486
Tested-by: BuildkiteCI
Reviewed-by: flokli <flokli@flokli.de>
This commit is contained in:
edef 2024-01-17 16:04:03 +00:00
parent e0a1c03b24
commit 4f22203a3a
12 changed files with 15022 additions and 0 deletions

View file

@ -0,0 +1,38 @@
[package]
name = "crunch-v2"
version = "0.1.0"
edition = "2021"
[workspace]
members = ["."]
[dependencies]
anyhow = { version = "1.0.75", features = ["backtrace"] }
lazy_static = "1.4.0"
bstr = "1.8.0"
bytes = "1.5.0"
futures = "0.3.29"
tokio = { version = "1.34.0", features = ["full"] }
rusoto_core = { version = "0.48.0", default-features = false, features = ["hyper-rustls"] }
rusoto_s3 = { version = "0.48.0", default-features = false, features = ["rustls"] }
nix-compat = { version = "0.1.0", path = "../../nix-compat" }
sled = "0.34.7"
fastcdc = "3.1.0"
blake3 = "1.5.0"
sha2 = { version = "0.10.8", features = ["asm"] }
digest = "0.10.7"
bzip2 = "0.4.4"
xz2 = "0.1.7"
zstd = "0.13.0"
prost = "0.12.2"
polars = { version = "0.35.4", default-features = false, features = ["parquet", "lazy", "sql", "dtype-struct"] }
indicatif = "0.17.7"
[build-dependencies]
prost-build = "0.12.2"