refactor(tvix/castore): use Directory struct separate from proto one

This uses our own data type to deal with Directories in the castore model.

It makes some undesired states unrepresentable, removing the need for conversions and checking in various places:

 - In the protobuf, blake3 digests could have a wrong length, as proto doesn't know fixed-size fields. We now use `B3Digest`, which makes cloning cheaper, and removes the need to do size-checking everywhere.
 - In the protobuf, we had three different lists for `files`, `symlinks` and `directories`. This was mostly a protobuf size optimization, but made interacting with them a bit awkward. This has now been replaced with a list of enums, and convenience iterators to get various nodes, and add new ones.

Change-Id: I7b92691bb06d77ff3f58a5ccea94a22c16f84f04
Reviewed-on: https://cl.tvl.fyi/c/depot/+/12057
Tested-by: BuildkiteCI
Reviewed-by: flokli <flokli@flokli.de>
This commit is contained in:
Yureka 2024-07-29 14:34:50 +02:00 committed by yuka
parent 5d3f3158d6
commit 3ca0b53840
53 changed files with 1429 additions and 1377 deletions

View file

@ -7,12 +7,11 @@ use tokio::{
};
use tvix_castore::{
blobservice::BlobService,
directoryservice::DirectoryService,
directoryservice::{DirectoryService, NamedNode, Node},
import::{
blobs::{self, ConcurrentBlobUploader},
ingest_entries, IngestionEntry, IngestionError,
},
proto::{node::Node, NamedNode},
PathBuf,
};
@ -99,7 +98,7 @@ where
let (_, node) = try_join!(produce, consume)?;
// remove the fake "root" name again
debug_assert_eq!(&node.get_name(), b"root");
debug_assert_eq!(&node.get_name()[..], b"root");
Ok(node.rename("".into()))
}
@ -172,12 +171,13 @@ mod test {
use rstest::*;
use tokio_stream::StreamExt;
use tvix_castore::blobservice::BlobService;
use tvix_castore::directoryservice::DirectoryService;
use tvix_castore::directoryservice::{
Directory, DirectoryNode, DirectoryService, FileNode, Node, SymlinkNode,
};
use tvix_castore::fixtures::{
DIRECTORY_COMPLICATED, DIRECTORY_WITH_KEEP, EMPTY_BLOB_DIGEST, HELLOWORLD_BLOB_CONTENTS,
HELLOWORLD_BLOB_DIGEST,
};
use tvix_castore::proto as castorepb;
use crate::tests::fixtures::{
blob_service, directory_service, NAR_CONTENTS_COMPLICATED, NAR_CONTENTS_HELLOWORLD,
@ -199,10 +199,13 @@ mod test {
.expect("must parse");
assert_eq!(
castorepb::node::Node::Symlink(castorepb::SymlinkNode {
name: "".into(), // name must be empty
target: "/nix/store/somewhereelse".into(),
}),
Node::Symlink(
SymlinkNode::new(
"".into(), // name must be empty
"/nix/store/somewhereelse".into(),
)
.unwrap()
),
root_node
);
}
@ -222,12 +225,15 @@ mod test {
.expect("must parse");
assert_eq!(
castorepb::node::Node::File(castorepb::FileNode {
name: "".into(), // name must be empty
digest: HELLOWORLD_BLOB_DIGEST.clone().into(),
size: HELLOWORLD_BLOB_CONTENTS.len() as u64,
executable: false,
}),
Node::File(
FileNode::new(
"".into(), // name must be empty
HELLOWORLD_BLOB_DIGEST.clone(),
HELLOWORLD_BLOB_CONTENTS.len() as u64,
false,
)
.unwrap()
),
root_node
);
@ -250,11 +256,14 @@ mod test {
.expect("must parse");
assert_eq!(
castorepb::node::Node::Directory(castorepb::DirectoryNode {
name: "".into(), // name must be empty
digest: DIRECTORY_COMPLICATED.digest().into(),
size: DIRECTORY_COMPLICATED.size(),
}),
Node::Directory(
DirectoryNode::new(
"".into(), // name must be empty
DIRECTORY_COMPLICATED.digest(),
DIRECTORY_COMPLICATED.size(),
)
.unwrap()
),
root_node,
);
@ -262,7 +271,7 @@ mod test {
assert!(blob_service.has(&EMPTY_BLOB_DIGEST).await.unwrap());
// directoryservice must contain the directories, at least with get_recursive.
let resp: Result<Vec<castorepb::Directory>, _> = directory_service
let resp: Result<Vec<Directory>, _> = directory_service
.get_recursive(&DIRECTORY_COMPLICATED.digest())
.collect()
.await;

View file

@ -8,16 +8,14 @@ pub use import::ingest_nar_and_hash;
pub use renderer::calculate_size_and_sha256;
pub use renderer::write_nar;
pub use renderer::SimpleRenderer;
use tvix_castore::proto as castorepb;
use tvix_castore::directoryservice::Node;
#[async_trait]
pub trait NarCalculationService: Send + Sync {
/// Return the nar size and nar sha256 digest for a given root node.
/// This can be used to calculate NAR-based output paths.
async fn calculate_nar(
&self,
root_node: &castorepb::node::Node,
) -> Result<(u64, [u8; 32]), tvix_castore::Error>;
async fn calculate_nar(&self, root_node: &Node)
-> Result<(u64, [u8; 32]), tvix_castore::Error>;
}
#[async_trait]
@ -27,7 +25,7 @@ where
{
async fn calculate_nar(
&self,
root_node: &castorepb::node::Node,
root_node: &Node,
) -> Result<(u64, [u8; 32]), tvix_castore::Error> {
self.as_ref().calculate_nar(root_node).await
}

View file

@ -10,8 +10,7 @@ use tracing::{instrument, Span};
use tracing_indicatif::span_ext::IndicatifSpanExt;
use tvix_castore::{
blobservice::BlobService,
directoryservice::DirectoryService,
proto::{self as castorepb, NamedNode},
directoryservice::{DirectoryService, NamedNode, Node},
};
pub struct SimpleRenderer<BS, DS> {
@ -36,7 +35,7 @@ where
{
async fn calculate_nar(
&self,
root_node: &castorepb::node::Node,
root_node: &Node,
) -> Result<(u64, [u8; 32]), tvix_castore::Error> {
calculate_size_and_sha256(
root_node,
@ -52,7 +51,7 @@ where
/// NAR output.
#[instrument(skip_all, fields(indicatif.pb_show=1))]
pub async fn calculate_size_and_sha256<BS, DS>(
root_node: &castorepb::node::Node,
root_node: &Node,
blob_service: BS,
directory_service: DS,
) -> Result<(u64, [u8; 32]), RenderError>
@ -80,13 +79,13 @@ where
Ok((cw.count(), h.finalize().into()))
}
/// Accepts a [castorepb::node::Node] pointing to the root of a (store) path,
/// Accepts a [Node] pointing to the root of a (store) path,
/// and uses the passed blob_service and directory_service to perform the
/// necessary lookups as it traverses the structure.
/// The contents in NAR serialization are writen to the passed [AsyncWrite].
pub async fn write_nar<W, BS, DS>(
mut w: W,
proto_root_node: &castorepb::node::Node,
proto_root_node: &Node,
blob_service: BS,
directory_service: DS,
) -> Result<(), RenderError>
@ -115,7 +114,7 @@ where
/// This consumes the node.
async fn walk_node<BS, DS>(
nar_node: nar_writer::Node<'_, '_>,
proto_node: &castorepb::node::Node,
proto_node: &Node,
blob_service: BS,
directory_service: DS,
) -> Result<(BS, DS), RenderError>
@ -124,23 +123,17 @@ where
DS: DirectoryService + Send,
{
match proto_node {
castorepb::node::Node::Symlink(proto_symlink_node) => {
Node::Symlink(proto_symlink_node) => {
nar_node
.symlink(&proto_symlink_node.target)
.symlink(proto_symlink_node.target())
.await
.map_err(RenderError::NARWriterError)?;
}
castorepb::node::Node::File(proto_file_node) => {
let digest_len = proto_file_node.digest.len();
let digest = proto_file_node.digest.clone().try_into().map_err(|_| {
RenderError::StoreError(io::Error::new(
io::ErrorKind::Other,
format!("invalid digest len {} in file node", digest_len),
))
})?;
Node::File(proto_file_node) => {
let digest = proto_file_node.digest();
let mut blob_reader = match blob_service
.open_read(&digest)
.open_read(digest)
.await
.map_err(RenderError::StoreError)?
{
@ -153,36 +146,24 @@ where
nar_node
.file(
proto_file_node.executable,
proto_file_node.size,
proto_file_node.executable(),
proto_file_node.size(),
&mut blob_reader,
)
.await
.map_err(RenderError::NARWriterError)?;
}
castorepb::node::Node::Directory(proto_directory_node) => {
let digest_len = proto_directory_node.digest.len();
let digest = proto_directory_node
.digest
.clone()
.try_into()
.map_err(|_| {
RenderError::StoreError(io::Error::new(
io::ErrorKind::InvalidData,
format!("invalid digest len {} in directory node", digest_len),
))
})?;
Node::Directory(proto_directory_node) => {
// look it up with the directory service
match directory_service
.get(&digest)
.get(proto_directory_node.digest())
.await
.map_err(|e| RenderError::StoreError(e.into()))?
{
// if it's None, that's an error!
None => Err(RenderError::DirectoryNotFound(
digest,
proto_directory_node.name.clone(),
proto_directory_node.digest().clone(),
proto_directory_node.get_name().clone(),
))?,
Some(proto_directory) => {
// start a directory node
@ -206,7 +187,7 @@ where
(blob_service, directory_service) = Box::pin(walk_node(
child_node,
&proto_node,
proto_node,
blob_service,
directory_service,
))