refactor(tvix/castore): generalize store ingestion streams

Previously the store ingestion code was coupled to `walkdir::DirEntry`s
produced by the `walkdir` crate which made it impossible to reuse
ingesting from other sources like tarballs or NARs.

This introduces a `IngestionEntry` which carries enough information for
store ingestion and a future for computing the Blake3 digest of files.
This allows the producer to perform file uploads in a way that makes
sense for the source, ie. the filesystem upload could concurrently
upload multiple files at the same time, while the NAR ingestor will need
to ingest the entire blob before yielding the next blob in the stream.
In the future we can buffer small blobs and upload them concurrently,
but the full blob still needs to be read from the NAR before advancing.

Change-Id: I6d144063e2ba5b05e765bac1f27d41b3c8e7b283
Reviewed-on: https://cl.tvl.fyi/c/depot/+/11462
Reviewed-by: flokli <flokli@flokli.de>
Tested-by: BuildkiteCI
This commit is contained in:
Connor Brewster 2024-04-18 13:51:28 -05:00
parent 150106610e
commit 259d7a3cfa
5 changed files with 256 additions and 258 deletions

View file

@ -112,12 +112,12 @@ pub async fn import_path_as_nar_ca<BS, DS, PS, P>(
) -> Result<StorePath, std::io::Error>
where
P: AsRef<Path> + std::fmt::Debug,
BS: AsRef<dyn BlobService> + Clone,
BS: BlobService + Clone,
DS: AsRef<dyn DirectoryService>,
PS: AsRef<dyn PathInfoService>,
{
let root_node =
tvix_castore::import::ingest_path(blob_service, directory_service, &path).await?;
tvix_castore::import::ingest_path(blob_service, directory_service, path.as_ref()).await?;
// Ask the PathInfoService for the NAR size and sha256
let (nar_size, nar_sha256) = path_info_service.as_ref().calculate_nar(&root_node).await?;