refactor(tvix/castore): drop {Directory,File,Symlink}Node

Add a `SymlinkTarget` type to represent validated symlink targets.
With this, no invalid states are representable, so we can make `Node` be
just an enum of all three kind of types, and allow access to these
fields directly.

Change-Id: I20bdd480c8d5e64a827649f303c97023b7e390f2
Reviewed-on: https://cl.tvl.fyi/c/depot/+/12216
Reviewed-by: benjaminedwardwebb <benjaminedwardwebb@gmail.com>
Autosubmit: flokli <flokli@flokli.de>
Reviewed-by: Connor Brewster <cbrewster@hey.com>
Tested-by: BuildkiteCI
This commit is contained in:
Florian Klink 2024-08-16 02:24:12 +03:00 committed by clbot
parent 49b173786c
commit 8ea7d2b60e
27 changed files with 555 additions and 461 deletions

View file

@ -1,6 +1,6 @@
use std::collections::BTreeMap;
use crate::{errors::DirectoryError, proto, B3Digest, DirectoryNode, FileNode, Node, SymlinkNode};
use crate::{errors::DirectoryError, proto, B3Digest, Node};
/// A Directory contains nodes, which can be Directory, File or Symlink nodes.
/// It attached names to these nodes, which is the basename in that directory.
@ -27,7 +27,14 @@ impl Directory {
pub fn size(&self) -> u64 {
// It's impossible to create a Directory where the size overflows, because we
// check before every add() that the size won't overflow.
(self.nodes.len() as u64) + self.directories().map(|(_name, dn)| dn.size()).sum::<u64>()
(self.nodes.len() as u64)
+ self
.nodes()
.map(|(_name, n)| match n {
Node::Directory { size, .. } => 1 + size,
Node::File { .. } | Node::Symlink { .. } => 1,
})
.sum::<u64>()
}
/// Calculates the digest of a Directory, which is the blake3 hash of a
@ -43,40 +50,6 @@ impl Directory {
self.nodes.iter()
}
/// Allows iterating over the FileNode entries of this directory.
/// For each, it returns a tuple of its name and node.
/// The elements are sorted by their names.
pub fn files(&self) -> impl Iterator<Item = (&bytes::Bytes, &FileNode)> + Send + Sync + '_ {
self.nodes.iter().filter_map(|(name, node)| match node {
Node::File(n) => Some((name, n)),
_ => None,
})
}
/// Allows iterating over the DirectoryNode entries (subdirectories) of this directory.
/// For each, it returns a tuple of its name and node.
/// The elements are sorted by their names.
pub fn directories(
&self,
) -> impl Iterator<Item = (&bytes::Bytes, &DirectoryNode)> + Send + Sync + '_ {
self.nodes.iter().filter_map(|(name, node)| match node {
Node::Directory(n) => Some((name, n)),
_ => None,
})
}
/// Allows iterating over the SymlinkNode entries of this directory
/// For each, it returns a tuple of its name and node.
/// The elements are sorted by their names.
pub fn symlinks(
&self,
) -> impl Iterator<Item = (&bytes::Bytes, &SymlinkNode)> + Send + Sync + '_ {
self.nodes.iter().filter_map(|(name, node)| match node {
Node::Symlink(n) => Some((name, n)),
_ => None,
})
}
/// Checks a Node name for validity as a directory entry
/// We disallow slashes, null bytes, '.', '..' and the empty string.
pub(crate) fn is_valid_name(name: &[u8]) -> bool {
@ -106,7 +79,7 @@ impl Directory {
self.size(),
1,
match node {
Node::Directory(ref dir) => dir.size(),
Node::Directory { size, .. } => size,
_ => 0,
},
])
@ -130,7 +103,7 @@ fn checked_sum(iter: impl IntoIterator<Item = u64>) -> Option<u64> {
#[cfg(test)]
mod test {
use super::{Directory, DirectoryNode, FileNode, Node, SymlinkNode};
use super::{Directory, Node};
use crate::fixtures::DUMMY_DIGEST;
use crate::DirectoryError;
@ -140,49 +113,76 @@ mod test {
d.add(
"b".into(),
Node::Directory(DirectoryNode::new(DUMMY_DIGEST.clone(), 1)),
Node::Directory {
digest: DUMMY_DIGEST.clone(),
size: 1,
},
)
.unwrap();
d.add(
"a".into(),
Node::Directory(DirectoryNode::new(DUMMY_DIGEST.clone(), 1)),
Node::Directory {
digest: DUMMY_DIGEST.clone(),
size: 1,
},
)
.unwrap();
d.add(
"z".into(),
Node::Directory(DirectoryNode::new(DUMMY_DIGEST.clone(), 1)),
Node::Directory {
digest: DUMMY_DIGEST.clone(),
size: 1,
},
)
.unwrap();
d.add(
"f".into(),
Node::File(FileNode::new(DUMMY_DIGEST.clone(), 1, true)),
Node::File {
digest: DUMMY_DIGEST.clone(),
size: 1,
executable: true,
},
)
.unwrap();
d.add(
"c".into(),
Node::File(FileNode::new(DUMMY_DIGEST.clone(), 1, true)),
Node::File {
digest: DUMMY_DIGEST.clone(),
size: 1,
executable: true,
},
)
.unwrap();
d.add(
"g".into(),
Node::File(FileNode::new(DUMMY_DIGEST.clone(), 1, true)),
Node::File {
digest: DUMMY_DIGEST.clone(),
size: 1,
executable: true,
},
)
.unwrap();
d.add(
"t".into(),
Node::Symlink(SymlinkNode::new("a".into()).unwrap()),
Node::Symlink {
target: "a".try_into().unwrap(),
},
)
.unwrap();
d.add(
"o".into(),
Node::Symlink(SymlinkNode::new("a".into()).unwrap()),
Node::Symlink {
target: "a".try_into().unwrap(),
},
)
.unwrap();
d.add(
"e".into(),
Node::Symlink(SymlinkNode::new("a".into()).unwrap()),
Node::Symlink {
target: "a".try_into().unwrap(),
},
)
.unwrap();
@ -198,7 +198,10 @@ mod test {
assert_eq!(
d.add(
"foo".into(),
Node::Directory(DirectoryNode::new(DUMMY_DIGEST.clone(), u64::MAX))
Node::Directory {
digest: DUMMY_DIGEST.clone(),
size: u64::MAX
}
),
Err(DirectoryError::SizeOverflow)
);
@ -210,7 +213,10 @@ mod test {
d.add(
"a".into(),
Node::Directory(DirectoryNode::new(DUMMY_DIGEST.clone(), 1)),
Node::Directory {
digest: DUMMY_DIGEST.clone(),
size: 1,
},
)
.unwrap();
assert_eq!(
@ -218,7 +224,11 @@ mod test {
"{}",
d.add(
"a".into(),
Node::File(FileNode::new(DUMMY_DIGEST.clone(), 1, true))
Node::File {
digest: DUMMY_DIGEST.clone(),
size: 1,
executable: true
}
)
.expect_err("adding duplicate dir entry must fail")
),
@ -233,7 +243,9 @@ mod test {
assert!(
dir.add(
"".into(), // wrong! can not be added to directory
Node::Symlink(SymlinkNode::new("doesntmatter".into(),).unwrap())
Node::Symlink {
target: "doesntmatter".try_into().unwrap(),
},
)
.is_err(),
"invalid symlink entry be rejected"

View file

@ -1,35 +0,0 @@
use crate::B3Digest;
/// A DirectoryNode is a pointer to a [Directory], by its [Directory::digest].
/// It also records a`size`.
/// Such a node is either an element in the [Directory] it itself is contained in,
/// or a standalone root node./
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct DirectoryNode {
/// The blake3 hash of a Directory message, serialized in protobuf canonical form.
digest: B3Digest,
/// Number of child elements in the Directory referred to by `digest`.
/// Calculated by summing up the numbers of nodes, and for each directory.
/// its size field. Can be used for inode allocation.
/// This field is precisely as verifiable as any other Merkle tree edge.
/// Resolve `digest`, and you can compute it incrementally. Resolve the entire
/// tree, and you can fully compute it from scratch.
/// A credulous implementation won't reject an excessive size, but this is
/// harmless: you'll have some ordinals without nodes. Undersizing is obvious
/// and easy to reject: you won't have an ordinal for some nodes.
size: u64,
}
impl DirectoryNode {
pub fn new(digest: B3Digest, size: u64) -> Self {
Self { digest, size }
}
pub fn digest(&self) -> &B3Digest {
&self.digest
}
pub fn size(&self) -> u64 {
self.size
}
}

View file

@ -1,36 +0,0 @@
use crate::B3Digest;
/// A FileNode represents a regular or executable file in a Directory or at the root.
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct FileNode {
/// The blake3 digest of the file contents
digest: B3Digest,
/// The file content size
size: u64,
/// Whether the file is executable
executable: bool,
}
impl FileNode {
pub fn new(digest: B3Digest, size: u64, executable: bool) -> Self {
Self {
digest,
size,
executable,
}
}
pub fn digest(&self) -> &B3Digest {
&self.digest
}
pub fn size(&self) -> u64 {
self.size
}
pub fn executable(&self) -> bool {
self.executable
}
}

View file

@ -1,20 +1,48 @@
//! This holds types describing nodes in the tvix-castore model.
mod directory;
mod directory_node;
mod file_node;
mod symlink_node;
mod symlink_target;
use crate::B3Digest;
pub use directory::Directory;
pub use directory_node::DirectoryNode;
pub use file_node::FileNode;
pub use symlink_node::SymlinkNode;
use symlink_target::SymlinkTarget;
/// A Node is either a [DirectoryNode], [FileNode] or [SymlinkNode].
/// Nodes themselves don't have names, what gives them names is either them
/// being inside a [Directory], or a root node with its own name attached to it.
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum Node {
Directory(DirectoryNode),
File(FileNode),
Symlink(SymlinkNode),
/// A DirectoryNode is a pointer to a [Directory], by its [Directory::digest].
/// It also records a`size`.
/// Such a node is either an element in the [Directory] it itself is contained in,
/// or a standalone root node.
Directory {
/// The blake3 hash of a Directory message, serialized in protobuf canonical form.
digest: B3Digest,
/// Number of child elements in the Directory referred to by `digest`.
/// Calculated by summing up the numbers of nodes, and for each directory,
/// its size field. Can be used for inode allocation.
/// This field is precisely as verifiable as any other Merkle tree edge.
/// Resolve `digest`, and you can compute it incrementally. Resolve the entire
/// tree, and you can fully compute it from scratch.
/// A credulous implementation won't reject an excessive size, but this is
/// harmless: you'll have some ordinals without nodes. Undersizing is obvious
/// and easy to reject: you won't have an ordinal for some nodes.
size: u64,
},
/// A FileNode represents a regular or executable file in a Directory or at the root.
File {
/// The blake3 digest of the file contents
digest: B3Digest,
/// The file content size
size: u64,
/// Whether the file is executable
executable: bool,
},
/// A SymlinkNode represents a symbolic link in a Directory or at the root.
Symlink {
/// The target of the symlink.
target: SymlinkTarget,
},
}

View file

@ -1,21 +0,0 @@
use crate::ValidateNodeError;
/// A SymlinkNode represents a symbolic link in a Directory or at the root.
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct SymlinkNode {
/// The target of the symlink.
target: bytes::Bytes,
}
impl SymlinkNode {
pub fn new(target: bytes::Bytes) -> Result<Self, ValidateNodeError> {
if target.is_empty() || target.contains(&b'\0') {
return Err(ValidateNodeError::InvalidSymlinkTarget(target));
}
Ok(Self { target })
}
pub fn target(&self) -> &bytes::Bytes {
&self.target
}
}

View file

@ -0,0 +1,82 @@
// TODO: split out this error
use crate::ValidateNodeError;
use bstr::ByteSlice;
use std::fmt::{self, Debug, Display};
/// A wrapper type for symlink targets.
/// Internally uses a [bytes::Bytes], but disallows empty targets and those
/// containing null bytes.
#[repr(transparent)]
#[derive(Clone, PartialEq, Eq)]
pub struct SymlinkTarget {
inner: bytes::Bytes,
}
impl AsRef<[u8]> for SymlinkTarget {
fn as_ref(&self) -> &[u8] {
self.inner.as_ref()
}
}
impl From<SymlinkTarget> for bytes::Bytes {
fn from(value: SymlinkTarget) -> Self {
value.inner
}
}
impl TryFrom<bytes::Bytes> for SymlinkTarget {
type Error = ValidateNodeError;
fn try_from(value: bytes::Bytes) -> Result<Self, Self::Error> {
if value.is_empty() || value.contains(&b'\0') {
return Err(ValidateNodeError::InvalidSymlinkTarget(value));
}
Ok(Self { inner: value })
}
}
impl TryFrom<&'static [u8]> for SymlinkTarget {
type Error = ValidateNodeError;
fn try_from(value: &'static [u8]) -> Result<Self, Self::Error> {
if value.is_empty() || value.contains(&b'\0') {
return Err(ValidateNodeError::InvalidSymlinkTarget(
bytes::Bytes::from_static(value),
));
}
Ok(Self {
inner: bytes::Bytes::from_static(value),
})
}
}
impl TryFrom<&str> for SymlinkTarget {
type Error = ValidateNodeError;
fn try_from(value: &str) -> Result<Self, Self::Error> {
if value.is_empty() {
return Err(ValidateNodeError::InvalidSymlinkTarget(
bytes::Bytes::copy_from_slice(value.as_bytes()),
));
}
Ok(Self {
inner: bytes::Bytes::copy_from_slice(value.as_bytes()),
})
}
}
impl Debug for SymlinkTarget {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
Debug::fmt(self.inner.as_bstr(), f)
}
}
impl Display for SymlinkTarget {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
Display::fmt(self.inner.as_bstr(), f)
}
}