refactor(tvix/castore): move *Node and Directory to crate root
*Node and Directory are types of the tvix-castore model, not the tvix DirectoryService model. A DirectoryService only happens to send Directories. Move types into individual files in a nodes/ subdirectory, as it's gotten too cluttered in a single file, and (re-)export all types from the crate root. This has the effect that we now cannot poke at private fields directly from other files inside `crate::directoryservice` (as it's not all in the same file anymore), but that's a good thing, it now forces us to go through the proper accessors. For the same reasons, we currently also need to introduce the `rename` functions on each *Node directly. A followup is gonna move the names out of the individual enum kinds, so we can better represent "unnamed nodes". Change-Id: Icdb34dcfe454c41c94f2396e8e99973d27db8418 Reviewed-on: https://cl.tvl.fyi/c/depot/+/12199 Reviewed-by: yuka <yuka@yuka.dev> Autosubmit: flokli <flokli@flokli.de> Tested-by: BuildkiteCI
This commit is contained in:
parent
2f4185ff1a
commit
c7845f3c88
42 changed files with 620 additions and 622 deletions
234
tvix/castore/src/nodes/directory.rs
Normal file
234
tvix/castore/src/nodes/directory.rs
Normal file
|
|
@ -0,0 +1,234 @@
|
|||
use crate::{
|
||||
proto, B3Digest, DirectoryNode, FileNode, NamedNode, Node, SymlinkNode, ValidateDirectoryError,
|
||||
ValidateNodeError,
|
||||
};
|
||||
|
||||
/// A Directory can contain Directory, File or Symlink nodes.
|
||||
/// Each of these nodes have a name attribute, which is the basename in that
|
||||
/// directory and node type specific attributes.
|
||||
/// While a Node by itself may have any name, the names of Directory entries:
|
||||
/// - MUST not contain slashes or null bytes
|
||||
/// - MUST not be '.' or '..'
|
||||
/// - MUST be unique across all three lists
|
||||
#[derive(Default, Debug, Clone, PartialEq, Eq)]
|
||||
pub struct Directory {
|
||||
nodes: Vec<Node>,
|
||||
}
|
||||
|
||||
impl Directory {
|
||||
pub fn new() -> Self {
|
||||
Directory { nodes: vec![] }
|
||||
}
|
||||
|
||||
/// The size of a directory is the number of all regular and symlink elements,
|
||||
/// the number of directory elements, and their size fields.
|
||||
pub fn size(&self) -> u64 {
|
||||
// It's impossible to create a Directory where the size overflows, because we
|
||||
// check before every add() that the size won't overflow.
|
||||
(self.nodes.len() as u64) + self.directories().map(|e| e.size()).sum::<u64>()
|
||||
}
|
||||
|
||||
/// Calculates the digest of a Directory, which is the blake3 hash of a
|
||||
/// Directory protobuf message, serialized in protobuf canonical form.
|
||||
pub fn digest(&self) -> B3Digest {
|
||||
proto::Directory::from(self.clone()).digest()
|
||||
}
|
||||
|
||||
/// Allows iterating over all nodes (directories, files and symlinks)
|
||||
/// ordered by their name.
|
||||
pub fn nodes(&self) -> impl Iterator<Item = &Node> + Send + Sync + '_ {
|
||||
self.nodes.iter()
|
||||
}
|
||||
|
||||
/// Allows iterating over the FileNode entries of this directory
|
||||
/// ordered by their name
|
||||
pub fn files(&self) -> impl Iterator<Item = &FileNode> + Send + Sync + '_ {
|
||||
self.nodes.iter().filter_map(|node| match node {
|
||||
Node::File(n) => Some(n),
|
||||
_ => None,
|
||||
})
|
||||
}
|
||||
|
||||
/// Allows iterating over the subdirectories of this directory
|
||||
/// ordered by their name
|
||||
pub fn directories(&self) -> impl Iterator<Item = &DirectoryNode> + Send + Sync + '_ {
|
||||
self.nodes.iter().filter_map(|node| match node {
|
||||
Node::Directory(n) => Some(n),
|
||||
_ => None,
|
||||
})
|
||||
}
|
||||
|
||||
/// Allows iterating over the SymlinkNode entries of this directory
|
||||
/// ordered by their name
|
||||
pub fn symlinks(&self) -> impl Iterator<Item = &SymlinkNode> + Send + Sync + '_ {
|
||||
self.nodes.iter().filter_map(|node| match node {
|
||||
Node::Symlink(n) => Some(n),
|
||||
_ => None,
|
||||
})
|
||||
}
|
||||
|
||||
/// Checks a Node name for validity as a directory entry
|
||||
/// We disallow slashes, null bytes, '.', '..' and the empty string.
|
||||
pub(crate) fn validate_node_name(name: &[u8]) -> Result<(), ValidateNodeError> {
|
||||
if name.is_empty()
|
||||
|| name == b".."
|
||||
|| name == b"."
|
||||
|| name.contains(&0x00)
|
||||
|| name.contains(&b'/')
|
||||
{
|
||||
Err(ValidateNodeError::InvalidName(name.to_owned().into()))
|
||||
} else {
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
/// Adds the specified [Node] to the [Directory], preserving sorted entries.
|
||||
///
|
||||
/// Inserting an element that already exists with the same name in the directory will yield an
|
||||
/// error.
|
||||
/// Inserting an element will validate that its name fulfills the stricter requirements for
|
||||
/// directory entries and yield an error if it is not.
|
||||
pub fn add(&mut self, node: Node) -> Result<(), ValidateDirectoryError> {
|
||||
Self::validate_node_name(node.get_name())
|
||||
.map_err(|e| ValidateDirectoryError::InvalidNode(node.get_name().clone().into(), e))?;
|
||||
|
||||
// Check that the even after adding this new directory entry, the size calculation will not
|
||||
// overflow
|
||||
// FUTUREWORK: add some sort of batch add interface which only does this check once with
|
||||
// all the to-be-added entries
|
||||
checked_sum([
|
||||
self.size(),
|
||||
1,
|
||||
match node {
|
||||
Node::Directory(ref dir) => dir.size(),
|
||||
_ => 0,
|
||||
},
|
||||
])
|
||||
.ok_or(ValidateDirectoryError::SizeOverflow)?;
|
||||
|
||||
// This assumes the [Directory] is sorted, since we don't allow accessing the nodes list
|
||||
// directly and all previous inserts should have been in-order
|
||||
let pos = match self
|
||||
.nodes
|
||||
.binary_search_by_key(&node.get_name(), |n| n.get_name())
|
||||
{
|
||||
Err(pos) => pos, // There is no node with this name; good!
|
||||
Ok(_) => {
|
||||
return Err(ValidateDirectoryError::DuplicateName(
|
||||
node.get_name().to_vec(),
|
||||
))
|
||||
}
|
||||
};
|
||||
|
||||
self.nodes.insert(pos, node);
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
fn checked_sum(iter: impl IntoIterator<Item = u64>) -> Option<u64> {
|
||||
iter.into_iter().try_fold(0u64, |acc, i| acc.checked_add(i))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::{Directory, DirectoryNode, FileNode, Node, SymlinkNode};
|
||||
use crate::fixtures::DUMMY_DIGEST;
|
||||
use crate::ValidateDirectoryError;
|
||||
|
||||
#[test]
|
||||
fn add_nodes_to_directory() {
|
||||
let mut d = Directory::new();
|
||||
|
||||
d.add(Node::Directory(
|
||||
DirectoryNode::new("b".into(), DUMMY_DIGEST.clone(), 1).unwrap(),
|
||||
))
|
||||
.unwrap();
|
||||
d.add(Node::Directory(
|
||||
DirectoryNode::new("a".into(), DUMMY_DIGEST.clone(), 1).unwrap(),
|
||||
))
|
||||
.unwrap();
|
||||
d.add(Node::Directory(
|
||||
DirectoryNode::new("z".into(), DUMMY_DIGEST.clone(), 1).unwrap(),
|
||||
))
|
||||
.unwrap();
|
||||
|
||||
d.add(Node::File(
|
||||
FileNode::new("f".into(), DUMMY_DIGEST.clone(), 1, true).unwrap(),
|
||||
))
|
||||
.unwrap();
|
||||
d.add(Node::File(
|
||||
FileNode::new("c".into(), DUMMY_DIGEST.clone(), 1, true).unwrap(),
|
||||
))
|
||||
.unwrap();
|
||||
d.add(Node::File(
|
||||
FileNode::new("g".into(), DUMMY_DIGEST.clone(), 1, true).unwrap(),
|
||||
))
|
||||
.unwrap();
|
||||
|
||||
d.add(Node::Symlink(
|
||||
SymlinkNode::new("t".into(), "a".into()).unwrap(),
|
||||
))
|
||||
.unwrap();
|
||||
d.add(Node::Symlink(
|
||||
SymlinkNode::new("o".into(), "a".into()).unwrap(),
|
||||
))
|
||||
.unwrap();
|
||||
d.add(Node::Symlink(
|
||||
SymlinkNode::new("e".into(), "a".into()).unwrap(),
|
||||
))
|
||||
.unwrap();
|
||||
|
||||
// Convert to proto struct and back to ensure we are not generating any invalid structures
|
||||
crate::Directory::try_from(crate::proto::Directory::from(d))
|
||||
.expect("directory should be valid");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn validate_overflow() {
|
||||
let mut d = Directory::new();
|
||||
|
||||
assert_eq!(
|
||||
d.add(Node::Directory(
|
||||
DirectoryNode::new("foo".into(), DUMMY_DIGEST.clone(), u64::MAX).unwrap(),
|
||||
)),
|
||||
Err(ValidateDirectoryError::SizeOverflow)
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn add_duplicate_node_to_directory() {
|
||||
let mut d = Directory::new();
|
||||
|
||||
d.add(Node::Directory(
|
||||
DirectoryNode::new("a".into(), DUMMY_DIGEST.clone(), 1).unwrap(),
|
||||
))
|
||||
.unwrap();
|
||||
assert_eq!(
|
||||
format!(
|
||||
"{}",
|
||||
d.add(Node::File(
|
||||
FileNode::new("a".into(), DUMMY_DIGEST.clone(), 1, true).unwrap(),
|
||||
))
|
||||
.expect_err("adding duplicate dir entry must fail")
|
||||
),
|
||||
"\"a\" is a duplicate name"
|
||||
);
|
||||
}
|
||||
|
||||
/// Attempt to add a directory entry with a name which should be rejected.
|
||||
#[tokio::test]
|
||||
async fn directory_reject_invalid_name() {
|
||||
let mut dir = Directory::new();
|
||||
assert!(
|
||||
dir.add(Node::Symlink(
|
||||
SymlinkNode::new(
|
||||
"".into(), // wrong! can not be added to directory
|
||||
"doesntmatter".into(),
|
||||
)
|
||||
.unwrap()
|
||||
))
|
||||
.is_err(),
|
||||
"invalid symlink entry be rejected"
|
||||
);
|
||||
}
|
||||
}
|
||||
64
tvix/castore/src/nodes/directory_node.rs
Normal file
64
tvix/castore/src/nodes/directory_node.rs
Normal file
|
|
@ -0,0 +1,64 @@
|
|||
use crate::{B3Digest, NamedNode, ValidateNodeError};
|
||||
|
||||
/// A DirectoryNode is a pointer to a [Directory], by its [Directory::digest].
|
||||
/// It also gives it a `name` and `size`.
|
||||
/// Such a node is either an element in the [Directory] it itself is contained in,
|
||||
/// or a standalone root node./
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub struct DirectoryNode {
|
||||
/// The (base)name of the directory
|
||||
name: bytes::Bytes,
|
||||
/// The blake3 hash of a Directory message, serialized in protobuf canonical form.
|
||||
digest: B3Digest,
|
||||
/// Number of child elements in the Directory referred to by `digest`.
|
||||
/// Calculated by summing up the numbers of nodes, and for each directory.
|
||||
/// its size field. Can be used for inode allocation.
|
||||
/// This field is precisely as verifiable as any other Merkle tree edge.
|
||||
/// Resolve `digest`, and you can compute it incrementally. Resolve the entire
|
||||
/// tree, and you can fully compute it from scratch.
|
||||
/// A credulous implementation won't reject an excessive size, but this is
|
||||
/// harmless: you'll have some ordinals without nodes. Undersizing is obvious
|
||||
/// and easy to reject: you won't have an ordinal for some nodes.
|
||||
size: u64,
|
||||
}
|
||||
|
||||
impl DirectoryNode {
|
||||
pub fn new(name: bytes::Bytes, digest: B3Digest, size: u64) -> Result<Self, ValidateNodeError> {
|
||||
Ok(Self { name, digest, size })
|
||||
}
|
||||
|
||||
pub fn digest(&self) -> &B3Digest {
|
||||
&self.digest
|
||||
}
|
||||
|
||||
pub fn size(&self) -> u64 {
|
||||
self.size
|
||||
}
|
||||
|
||||
pub fn rename(self, name: bytes::Bytes) -> Self {
|
||||
Self { name, ..self }
|
||||
}
|
||||
}
|
||||
|
||||
impl PartialOrd for DirectoryNode {
|
||||
fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
|
||||
Some(self.cmp(other))
|
||||
}
|
||||
}
|
||||
|
||||
impl Ord for DirectoryNode {
|
||||
fn cmp(&self, other: &Self) -> std::cmp::Ordering {
|
||||
self.get_name().cmp(other.get_name())
|
||||
}
|
||||
}
|
||||
|
||||
impl NamedNode for &DirectoryNode {
|
||||
fn get_name(&self) -> &bytes::Bytes {
|
||||
&self.name
|
||||
}
|
||||
}
|
||||
impl NamedNode for DirectoryNode {
|
||||
fn get_name(&self) -> &bytes::Bytes {
|
||||
&self.name
|
||||
}
|
||||
}
|
||||
72
tvix/castore/src/nodes/file_node.rs
Normal file
72
tvix/castore/src/nodes/file_node.rs
Normal file
|
|
@ -0,0 +1,72 @@
|
|||
use crate::{B3Digest, NamedNode, ValidateNodeError};
|
||||
|
||||
/// A FileNode represents a regular or executable file in a Directory or at the root.
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub struct FileNode {
|
||||
/// The (base)name of the file
|
||||
name: bytes::Bytes,
|
||||
|
||||
/// The blake3 digest of the file contents
|
||||
digest: B3Digest,
|
||||
|
||||
/// The file content size
|
||||
size: u64,
|
||||
|
||||
/// Whether the file is executable
|
||||
executable: bool,
|
||||
}
|
||||
|
||||
impl FileNode {
|
||||
pub fn new(
|
||||
name: bytes::Bytes,
|
||||
digest: B3Digest,
|
||||
size: u64,
|
||||
executable: bool,
|
||||
) -> Result<Self, ValidateNodeError> {
|
||||
Ok(Self {
|
||||
name,
|
||||
digest,
|
||||
size,
|
||||
executable,
|
||||
})
|
||||
}
|
||||
|
||||
pub fn digest(&self) -> &B3Digest {
|
||||
&self.digest
|
||||
}
|
||||
|
||||
pub fn size(&self) -> u64 {
|
||||
self.size
|
||||
}
|
||||
|
||||
pub fn executable(&self) -> bool {
|
||||
self.executable
|
||||
}
|
||||
|
||||
pub fn rename(self, name: bytes::Bytes) -> Self {
|
||||
Self { name, ..self }
|
||||
}
|
||||
}
|
||||
|
||||
impl PartialOrd for FileNode {
|
||||
fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
|
||||
Some(self.cmp(other))
|
||||
}
|
||||
}
|
||||
|
||||
impl Ord for FileNode {
|
||||
fn cmp(&self, other: &Self) -> std::cmp::Ordering {
|
||||
self.get_name().cmp(other.get_name())
|
||||
}
|
||||
}
|
||||
|
||||
impl NamedNode for &FileNode {
|
||||
fn get_name(&self) -> &bytes::Bytes {
|
||||
&self.name
|
||||
}
|
||||
}
|
||||
impl NamedNode for FileNode {
|
||||
fn get_name(&self) -> &bytes::Bytes {
|
||||
&self.name
|
||||
}
|
||||
}
|
||||
69
tvix/castore/src/nodes/mod.rs
Normal file
69
tvix/castore/src/nodes/mod.rs
Normal file
|
|
@ -0,0 +1,69 @@
|
|||
//! This holds types describing nodes in the tvix-castore model.
|
||||
mod directory;
|
||||
mod directory_node;
|
||||
mod file_node;
|
||||
mod symlink_node;
|
||||
|
||||
use bytes::Bytes;
|
||||
pub use directory::Directory;
|
||||
pub use directory_node::DirectoryNode;
|
||||
pub use file_node::FileNode;
|
||||
pub use symlink_node::SymlinkNode;
|
||||
|
||||
/// A Node is either a [DirectoryNode], [FileNode] or [SymlinkNode].
|
||||
/// While a Node by itself may have any name, only those matching specific requirements
|
||||
/// can can be added as entries to a [Directory] (see the documentation on [Directory] for details).
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub enum Node {
|
||||
Directory(DirectoryNode),
|
||||
File(FileNode),
|
||||
Symlink(SymlinkNode),
|
||||
}
|
||||
|
||||
impl Node {
|
||||
/// Returns the node with a new name.
|
||||
pub fn rename(self, name: Bytes) -> Self {
|
||||
match self {
|
||||
Node::Directory(n) => Node::Directory(n.rename(name)),
|
||||
Node::File(n) => Node::File(n.rename(name)),
|
||||
Node::Symlink(n) => Node::Symlink(n.rename(name)),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl PartialOrd for Node {
|
||||
fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
|
||||
Some(self.cmp(other))
|
||||
}
|
||||
}
|
||||
|
||||
impl Ord for Node {
|
||||
fn cmp(&self, other: &Self) -> std::cmp::Ordering {
|
||||
self.get_name().cmp(other.get_name())
|
||||
}
|
||||
}
|
||||
|
||||
/// NamedNode is implemented for [FileNode], [DirectoryNode] and [SymlinkNode]
|
||||
/// and [Node], so we can ask all of them for the name easily.
|
||||
pub trait NamedNode {
|
||||
fn get_name(&self) -> &Bytes;
|
||||
}
|
||||
|
||||
impl NamedNode for &Node {
|
||||
fn get_name(&self) -> &Bytes {
|
||||
match self {
|
||||
Node::File(node_file) => node_file.get_name(),
|
||||
Node::Directory(node_directory) => node_directory.get_name(),
|
||||
Node::Symlink(node_symlink) => node_symlink.get_name(),
|
||||
}
|
||||
}
|
||||
}
|
||||
impl NamedNode for Node {
|
||||
fn get_name(&self) -> &Bytes {
|
||||
match self {
|
||||
Node::File(node_file) => node_file.get_name(),
|
||||
Node::Directory(node_directory) => node_directory.get_name(),
|
||||
Node::Symlink(node_symlink) => node_symlink.get_name(),
|
||||
}
|
||||
}
|
||||
}
|
||||
50
tvix/castore/src/nodes/symlink_node.rs
Normal file
50
tvix/castore/src/nodes/symlink_node.rs
Normal file
|
|
@ -0,0 +1,50 @@
|
|||
use crate::{NamedNode, ValidateNodeError};
|
||||
|
||||
/// A SymlinkNode represents a symbolic link in a Directory or at the root.
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub struct SymlinkNode {
|
||||
/// The (base)name of the symlink
|
||||
name: bytes::Bytes,
|
||||
/// The target of the symlink.
|
||||
target: bytes::Bytes,
|
||||
}
|
||||
|
||||
impl SymlinkNode {
|
||||
pub fn new(name: bytes::Bytes, target: bytes::Bytes) -> Result<Self, ValidateNodeError> {
|
||||
if target.is_empty() || target.contains(&b'\0') {
|
||||
return Err(ValidateNodeError::InvalidSymlinkTarget(target));
|
||||
}
|
||||
Ok(Self { name, target })
|
||||
}
|
||||
|
||||
pub fn target(&self) -> &bytes::Bytes {
|
||||
&self.target
|
||||
}
|
||||
|
||||
pub(crate) fn rename(self, name: bytes::Bytes) -> Self {
|
||||
Self { name, ..self }
|
||||
}
|
||||
}
|
||||
|
||||
impl PartialOrd for SymlinkNode {
|
||||
fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
|
||||
Some(self.cmp(other))
|
||||
}
|
||||
}
|
||||
|
||||
impl Ord for SymlinkNode {
|
||||
fn cmp(&self, other: &Self) -> std::cmp::Ordering {
|
||||
self.get_name().cmp(other.get_name())
|
||||
}
|
||||
}
|
||||
|
||||
impl NamedNode for &SymlinkNode {
|
||||
fn get_name(&self) -> &bytes::Bytes {
|
||||
&self.name
|
||||
}
|
||||
}
|
||||
impl NamedNode for SymlinkNode {
|
||||
fn get_name(&self) -> &bytes::Bytes {
|
||||
&self.name
|
||||
}
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue