refactor(tvix/castore): use Directory struct separate from proto one

This uses our own data type to deal with Directories in the castore model.

It makes some undesired states unrepresentable, removing the need for conversions and checking in various places:

 - In the protobuf, blake3 digests could have a wrong length, as proto doesn't know fixed-size fields. We now use `B3Digest`, which makes cloning cheaper, and removes the need to do size-checking everywhere.
 - In the protobuf, we had three different lists for `files`, `symlinks` and `directories`. This was mostly a protobuf size optimization, but made interacting with them a bit awkward. This has now been replaced with a list of enums, and convenience iterators to get various nodes, and add new ones.

Change-Id: I7b92691bb06d77ff3f58a5ccea94a22c16f84f04
Reviewed-on: https://cl.tvl.fyi/c/depot/+/12057
Tested-by: BuildkiteCI
Reviewed-by: flokli <flokli@flokli.de>
This commit is contained in:
Yureka 2024-07-29 14:34:50 +02:00 committed by yuka
parent 5d3f3158d6
commit 3ca0b53840
53 changed files with 1429 additions and 1377 deletions

View file

@ -13,11 +13,11 @@ use tokio_stream::{wrappers::ReadDirStream, StreamExt};
use super::FuseDaemon;
use crate::fs::{TvixStoreFs, XATTR_NAME_BLOB_DIGEST, XATTR_NAME_DIRECTORY_DIGEST};
use crate::proto as castorepb;
use crate::proto::node::Node;
use crate::{
blobservice::{BlobService, MemoryBlobService},
directoryservice::{DirectoryService, MemoryDirectoryService},
directoryservice::{
DirectoryNode, DirectoryService, FileNode, MemoryDirectoryService, Node, SymlinkNode,
},
fixtures,
};
@ -70,12 +70,15 @@ async fn populate_blob_a(
root_nodes.insert(
BLOB_A_NAME.into(),
Node::File(castorepb::FileNode {
name: BLOB_A_NAME.into(),
digest: fixtures::BLOB_A_DIGEST.clone().into(),
size: fixtures::BLOB_A.len() as u64,
executable: false,
}),
Node::File(
FileNode::new(
BLOB_A_NAME.into(),
fixtures::BLOB_A_DIGEST.clone(),
fixtures::BLOB_A.len() as u64,
false,
)
.unwrap(),
),
);
}
@ -91,12 +94,15 @@ async fn populate_blob_b(
root_nodes.insert(
BLOB_B_NAME.into(),
Node::File(castorepb::FileNode {
name: BLOB_B_NAME.into(),
digest: fixtures::BLOB_B_DIGEST.clone().into(),
size: fixtures::BLOB_B.len() as u64,
executable: false,
}),
Node::File(
FileNode::new(
BLOB_B_NAME.into(),
fixtures::BLOB_B_DIGEST.clone(),
fixtures::BLOB_B.len() as u64,
false,
)
.unwrap(),
),
);
}
@ -116,22 +122,22 @@ async fn populate_blob_helloworld(
root_nodes.insert(
HELLOWORLD_BLOB_NAME.into(),
Node::File(castorepb::FileNode {
name: HELLOWORLD_BLOB_NAME.into(),
digest: fixtures::HELLOWORLD_BLOB_DIGEST.clone().into(),
size: fixtures::HELLOWORLD_BLOB_CONTENTS.len() as u64,
executable: true,
}),
Node::File(
FileNode::new(
HELLOWORLD_BLOB_NAME.into(),
fixtures::HELLOWORLD_BLOB_DIGEST.clone(),
fixtures::HELLOWORLD_BLOB_CONTENTS.len() as u64,
true,
)
.unwrap(),
),
);
}
async fn populate_symlink(root_nodes: &mut BTreeMap<Bytes, Node>) {
root_nodes.insert(
SYMLINK_NAME.into(),
Node::Symlink(castorepb::SymlinkNode {
name: SYMLINK_NAME.into(),
target: BLOB_A_NAME.into(),
}),
Node::Symlink(SymlinkNode::new(SYMLINK_NAME.into(), BLOB_A_NAME.into()).unwrap()),
);
}
@ -140,10 +146,9 @@ async fn populate_symlink(root_nodes: &mut BTreeMap<Bytes, Node>) {
async fn populate_symlink2(root_nodes: &mut BTreeMap<Bytes, Node>) {
root_nodes.insert(
SYMLINK_NAME2.into(),
Node::Symlink(castorepb::SymlinkNode {
name: SYMLINK_NAME2.into(),
target: "/nix/store/somewhereelse".into(),
}),
Node::Symlink(
SymlinkNode::new(SYMLINK_NAME2.into(), "/nix/store/somewhereelse".into()).unwrap(),
),
);
}
@ -167,11 +172,14 @@ async fn populate_directory_with_keep(
root_nodes.insert(
DIRECTORY_WITH_KEEP_NAME.into(),
castorepb::node::Node::Directory(castorepb::DirectoryNode {
name: DIRECTORY_WITH_KEEP_NAME.into(),
digest: fixtures::DIRECTORY_WITH_KEEP.digest().into(),
size: fixtures::DIRECTORY_WITH_KEEP.size(),
}),
Node::Directory(
DirectoryNode::new(
DIRECTORY_WITH_KEEP_NAME.into(),
fixtures::DIRECTORY_WITH_KEEP.digest(),
fixtures::DIRECTORY_WITH_KEEP.size(),
)
.unwrap(),
),
);
}
@ -180,11 +188,14 @@ async fn populate_directory_with_keep(
async fn populate_directorynode_without_directory(root_nodes: &mut BTreeMap<Bytes, Node>) {
root_nodes.insert(
DIRECTORY_WITH_KEEP_NAME.into(),
castorepb::node::Node::Directory(castorepb::DirectoryNode {
name: DIRECTORY_WITH_KEEP_NAME.into(),
digest: fixtures::DIRECTORY_WITH_KEEP.digest().into(),
size: fixtures::DIRECTORY_WITH_KEEP.size(),
}),
Node::Directory(
DirectoryNode::new(
DIRECTORY_WITH_KEEP_NAME.into(),
fixtures::DIRECTORY_WITH_KEEP.digest(),
fixtures::DIRECTORY_WITH_KEEP.size(),
)
.unwrap(),
),
);
}
@ -192,12 +203,15 @@ async fn populate_directorynode_without_directory(root_nodes: &mut BTreeMap<Byte
async fn populate_filenode_without_blob(root_nodes: &mut BTreeMap<Bytes, Node>) {
root_nodes.insert(
BLOB_A_NAME.into(),
Node::File(castorepb::FileNode {
name: BLOB_A_NAME.into(),
digest: fixtures::BLOB_A_DIGEST.clone().into(),
size: fixtures::BLOB_A.len() as u64,
executable: false,
}),
Node::File(
FileNode::new(
BLOB_A_NAME.into(),
fixtures::BLOB_A_DIGEST.clone(),
fixtures::BLOB_A.len() as u64,
false,
)
.unwrap(),
),
);
}
@ -227,11 +241,14 @@ async fn populate_directory_complicated(
root_nodes.insert(
DIRECTORY_COMPLICATED_NAME.into(),
Node::Directory(castorepb::DirectoryNode {
name: DIRECTORY_COMPLICATED_NAME.into(),
digest: fixtures::DIRECTORY_COMPLICATED.digest().into(),
size: fixtures::DIRECTORY_COMPLICATED.size(),
}),
Node::Directory(
DirectoryNode::new(
DIRECTORY_COMPLICATED_NAME.into(),
fixtures::DIRECTORY_COMPLICATED.digest(),
fixtures::DIRECTORY_COMPLICATED.size(),
)
.unwrap(),
),
);
}

View file

@ -4,7 +4,7 @@ use std::time::Duration;
use bytes::Bytes;
use crate::proto as castorepb;
use crate::directoryservice::{NamedNode, Node};
use crate::B3Digest;
#[derive(Clone, Debug)]
@ -20,27 +20,24 @@ pub enum InodeData {
/// lookup and did fetch the data.
#[derive(Clone, Debug)]
pub enum DirectoryInodeData {
Sparse(B3Digest, u64), // digest, size
Populated(B3Digest, Vec<(u64, castorepb::node::Node)>), // [(child_inode, node)]
Sparse(B3Digest, u64), // digest, size
Populated(B3Digest, Vec<(u64, Node)>), // [(child_inode, node)]
}
impl InodeData {
/// Constructs a new InodeData by consuming a [Node].
/// It splits off the orginal name, so it can be used later.
pub fn from_node(node: castorepb::node::Node) -> (Self, Bytes) {
pub fn from_node(node: &Node) -> (Self, Bytes) {
match node {
castorepb::node::Node::Directory(n) => (
Self::Directory(DirectoryInodeData::Sparse(
n.digest.try_into().unwrap(),
n.size,
)),
n.name,
Node::Directory(n) => (
Self::Directory(DirectoryInodeData::Sparse(n.digest().clone(), n.size())),
n.get_name().clone(),
),
castorepb::node::Node::File(n) => (
Self::Regular(n.digest.try_into().unwrap(), n.size, n.executable),
n.name,
Node::File(n) => (
Self::Regular(n.digest().clone(), n.size(), n.executable()),
n.get_name().clone(),
),
castorepb::node::Node::Symlink(n) => (Self::Symlink(n.target), n.name),
Node::Symlink(n) => (Self::Symlink(n.target().clone()), n.get_name().clone()),
}
}

View file

@ -15,11 +15,9 @@ use self::{
inode_tracker::InodeTracker,
inodes::{DirectoryInodeData, InodeData},
};
use crate::proto as castorepb;
use crate::{
blobservice::{BlobReader, BlobService},
directoryservice::DirectoryService,
proto::{node::Node, NamedNode},
directoryservice::{DirectoryService, NamedNode, Node},
B3Digest,
};
use bstr::ByteVec;
@ -198,13 +196,13 @@ where
let children = {
let mut inode_tracker = self.inode_tracker.write();
let children: Vec<(u64, castorepb::node::Node)> = directory
let children: Vec<(u64, Node)> = directory
.nodes()
.map(|child_node| {
let (inode_data, _) = InodeData::from_node(child_node.clone());
let (inode_data, _) = InodeData::from_node(child_node);
let child_ino = inode_tracker.put(inode_data);
(child_ino, child_node)
(child_ino, child_node.clone())
})
.collect();
@ -287,7 +285,7 @@ where
// insert the (sparse) inode data and register in
// self.root_nodes.
let (inode_data, name) = InodeData::from_node(root_node);
let (inode_data, name) = InodeData::from_node(&root_node);
let ino = inode_tracker.put(inode_data.clone());
root_nodes.insert(name, ino);
@ -468,7 +466,7 @@ where
io::Error::from_raw_os_error(libc::EIO)
})?;
let (inode_data, name) = InodeData::from_node(root_node);
let (inode_data, name) = InodeData::from_node(&root_node);
// obtain the inode, or allocate a new one.
let ino = self.get_inode_for_root_name(&name).unwrap_or_else(|| {
@ -498,7 +496,7 @@ where
Span::current().record("directory.digest", parent_digest.to_string());
for (i, (ino, child_node)) in children.into_iter().skip(offset as usize).enumerate() {
let (inode_data, name) = InodeData::from_node(child_node);
let (inode_data, name) = InodeData::from_node(&child_node);
// the second parameter will become the "offset" parameter on the next call.
let written = add_entry(fuse_backend_rs::api::filesystem::DirEntry {
@ -555,7 +553,7 @@ where
io::Error::from_raw_os_error(libc::EPERM)
})?;
let (inode_data, name) = InodeData::from_node(root_node);
let (inode_data, name) = InodeData::from_node(&root_node);
// obtain the inode, or allocate a new one.
let ino = self.get_inode_for_root_name(&name).unwrap_or_else(|| {
@ -588,7 +586,7 @@ where
Span::current().record("directory.digest", parent_digest.to_string());
for (i, (ino, child_node)) in children.into_iter().skip(offset as usize).enumerate() {
let (inode_data, name) = InodeData::from_node(child_node);
let (inode_data, name) = InodeData::from_node(&child_node);
// the second parameter will become the "offset" parameter on the next call.
let written = add_entry(

View file

@ -1,6 +1,6 @@
use std::collections::BTreeMap;
use crate::{proto::node::Node, Error};
use crate::{directoryservice::Node, Error};
use bytes::Bytes;
use futures::stream::BoxStream;
use tonic::async_trait;