refactor(tvix/castore): add PathComponent type for checked components

This encodes a verified component on the type level. Internally, it
contains a bytes::Bytes.

The castore Path/PathBuf component() and file_name() methods now
return this type, the old ones returning bytes were renamed to
component_bytes() and component_file_name() respectively.

We can drop the directory_reject_invalid_name test - it's not possible
anymore to pass an invalid name to Directories::add.
Invalid names in the Directory proto are still being tested to be
rejected in the validate_invalid_names tests.

Change-Id: Ide4d16415dfd50b7e2d7e0c36d42a3bbeeb9b6c5
Reviewed-on: https://cl.tvl.fyi/c/depot/+/12217
Autosubmit: flokli <flokli@flokli.de>
Reviewed-by: Connor Brewster <cbrewster@hey.com>
Tested-by: BuildkiteCI
This commit is contained in:
Florian Klink 2024-08-16 17:32:20 +03:00 committed by clbot
parent 8ea7d2b60e
commit 5ec93b57e6
25 changed files with 282 additions and 165 deletions

View file

@ -1,5 +1,4 @@
use bstr::ByteSlice;
use bytes::Bytes;
use std::{
collections::BTreeMap,
ffi::{OsStr, OsString},
@ -12,12 +11,15 @@ use tempfile::TempDir;
use tokio_stream::{wrappers::ReadDirStream, StreamExt};
use super::FuseDaemon;
use crate::fs::{TvixStoreFs, XATTR_NAME_BLOB_DIGEST, XATTR_NAME_DIRECTORY_DIGEST};
use crate::{
blobservice::{BlobService, MemoryBlobService},
directoryservice::{DirectoryService, MemoryDirectoryService},
fixtures, Node,
};
use crate::{
fs::{TvixStoreFs, XATTR_NAME_BLOB_DIGEST, XATTR_NAME_DIRECTORY_DIGEST},
PathComponent,
};
const BLOB_A_NAME: &str = "00000000000000000000000000000000-test";
const BLOB_B_NAME: &str = "55555555555555555555555555555555-test";
@ -37,7 +39,7 @@ fn gen_svcs() -> (Arc<dyn BlobService>, Arc<dyn DirectoryService>) {
fn do_mount<P: AsRef<Path>, BS, DS>(
blob_service: BS,
directory_service: DS,
root_nodes: BTreeMap<bytes::Bytes, Node>,
root_nodes: BTreeMap<PathComponent, Node>,
mountpoint: P,
list_root: bool,
show_xattr: bool,
@ -58,7 +60,7 @@ where
async fn populate_blob_a(
blob_service: &Arc<dyn BlobService>,
root_nodes: &mut BTreeMap<Bytes, Node>,
root_nodes: &mut BTreeMap<PathComponent, Node>,
) {
let mut bw = blob_service.open_write().await;
tokio::io::copy(&mut Cursor::new(fixtures::BLOB_A.to_vec()), &mut bw)
@ -67,7 +69,7 @@ async fn populate_blob_a(
bw.close().await.expect("must succeed closing");
root_nodes.insert(
BLOB_A_NAME.into(),
BLOB_A_NAME.try_into().unwrap(),
Node::File {
digest: fixtures::BLOB_A_DIGEST.clone(),
size: fixtures::BLOB_A.len() as u64,
@ -78,7 +80,7 @@ async fn populate_blob_a(
async fn populate_blob_b(
blob_service: &Arc<dyn BlobService>,
root_nodes: &mut BTreeMap<Bytes, Node>,
root_nodes: &mut BTreeMap<PathComponent, Node>,
) {
let mut bw = blob_service.open_write().await;
tokio::io::copy(&mut Cursor::new(fixtures::BLOB_B.to_vec()), &mut bw)
@ -87,7 +89,7 @@ async fn populate_blob_b(
bw.close().await.expect("must succeed closing");
root_nodes.insert(
BLOB_B_NAME.into(),
BLOB_B_NAME.try_into().unwrap(),
Node::File {
digest: fixtures::BLOB_B_DIGEST.clone(),
size: fixtures::BLOB_B.len() as u64,
@ -99,7 +101,7 @@ async fn populate_blob_b(
/// adds a blob containing helloworld and marks it as executable
async fn populate_blob_helloworld(
blob_service: &Arc<dyn BlobService>,
root_nodes: &mut BTreeMap<Bytes, Node>,
root_nodes: &mut BTreeMap<PathComponent, Node>,
) {
let mut bw = blob_service.open_write().await;
tokio::io::copy(
@ -111,7 +113,7 @@ async fn populate_blob_helloworld(
bw.close().await.expect("must succeed closing");
root_nodes.insert(
HELLOWORLD_BLOB_NAME.into(),
HELLOWORLD_BLOB_NAME.try_into().unwrap(),
Node::File {
digest: fixtures::HELLOWORLD_BLOB_DIGEST.clone(),
size: fixtures::HELLOWORLD_BLOB_CONTENTS.len() as u64,
@ -120,9 +122,9 @@ async fn populate_blob_helloworld(
);
}
async fn populate_symlink(root_nodes: &mut BTreeMap<Bytes, Node>) {
async fn populate_symlink(root_nodes: &mut BTreeMap<PathComponent, Node>) {
root_nodes.insert(
SYMLINK_NAME.into(),
SYMLINK_NAME.try_into().unwrap(),
Node::Symlink {
target: BLOB_A_NAME.try_into().unwrap(),
},
@ -131,9 +133,9 @@ async fn populate_symlink(root_nodes: &mut BTreeMap<Bytes, Node>) {
/// This writes a symlink pointing to /nix/store/somewhereelse,
/// which is the same symlink target as "aa" inside DIRECTORY_COMPLICATED.
async fn populate_symlink2(root_nodes: &mut BTreeMap<Bytes, Node>) {
async fn populate_symlink2(root_nodes: &mut BTreeMap<PathComponent, Node>) {
root_nodes.insert(
SYMLINK_NAME2.into(),
SYMLINK_NAME2.try_into().unwrap(),
Node::Symlink {
target: "/nix/store/somewhereelse".try_into().unwrap(),
},
@ -143,7 +145,7 @@ async fn populate_symlink2(root_nodes: &mut BTreeMap<Bytes, Node>) {
async fn populate_directory_with_keep(
blob_service: &Arc<dyn BlobService>,
directory_service: &Arc<dyn DirectoryService>,
root_nodes: &mut BTreeMap<Bytes, Node>,
root_nodes: &mut BTreeMap<PathComponent, Node>,
) {
// upload empty blob
let mut bw = blob_service.open_write().await;
@ -159,7 +161,7 @@ async fn populate_directory_with_keep(
.expect("must succeed uploading");
root_nodes.insert(
DIRECTORY_WITH_KEEP_NAME.into(),
DIRECTORY_WITH_KEEP_NAME.try_into().unwrap(),
Node::Directory {
digest: fixtures::DIRECTORY_WITH_KEEP.digest(),
size: fixtures::DIRECTORY_WITH_KEEP.size(),
@ -169,9 +171,9 @@ async fn populate_directory_with_keep(
/// Create a root node for DIRECTORY_WITH_KEEP, but don't upload the Directory
/// itself.
async fn populate_directorynode_without_directory(root_nodes: &mut BTreeMap<Bytes, Node>) {
async fn populate_directorynode_without_directory(root_nodes: &mut BTreeMap<PathComponent, Node>) {
root_nodes.insert(
DIRECTORY_WITH_KEEP_NAME.into(),
DIRECTORY_WITH_KEEP_NAME.try_into().unwrap(),
Node::Directory {
digest: fixtures::DIRECTORY_WITH_KEEP.digest(),
size: fixtures::DIRECTORY_WITH_KEEP.size(),
@ -180,9 +182,9 @@ async fn populate_directorynode_without_directory(root_nodes: &mut BTreeMap<Byte
}
/// Insert BLOB_A, but don't provide the blob .keep is pointing to.
async fn populate_filenode_without_blob(root_nodes: &mut BTreeMap<Bytes, Node>) {
async fn populate_filenode_without_blob(root_nodes: &mut BTreeMap<PathComponent, Node>) {
root_nodes.insert(
BLOB_A_NAME.into(),
BLOB_A_NAME.try_into().unwrap(),
Node::File {
digest: fixtures::BLOB_A_DIGEST.clone(),
size: fixtures::BLOB_A.len() as u64,
@ -194,7 +196,7 @@ async fn populate_filenode_without_blob(root_nodes: &mut BTreeMap<Bytes, Node>)
async fn populate_directory_complicated(
blob_service: &Arc<dyn BlobService>,
directory_service: &Arc<dyn DirectoryService>,
root_nodes: &mut BTreeMap<Bytes, Node>,
root_nodes: &mut BTreeMap<PathComponent, Node>,
) {
// upload empty blob
let mut bw = blob_service.open_write().await;
@ -216,7 +218,7 @@ async fn populate_directory_complicated(
.expect("must succeed uploading");
root_nodes.insert(
DIRECTORY_COMPLICATED_NAME.into(),
DIRECTORY_COMPLICATED_NAME.try_into().unwrap(),
Node::Directory {
digest: fixtures::DIRECTORY_COMPLICATED.digest(),
size: fixtures::DIRECTORY_COMPLICATED.size(),

View file

@ -2,7 +2,7 @@
//! about inodes, which present tvix-castore nodes in a filesystem.
use std::time::Duration;
use crate::{B3Digest, Node};
use crate::{path::PathComponent, B3Digest, Node};
#[derive(Clone, Debug)]
pub enum InodeData {
@ -17,8 +17,8 @@ pub enum InodeData {
/// lookup and did fetch the data.
#[derive(Clone, Debug)]
pub enum DirectoryInodeData {
Sparse(B3Digest, u64), // digest, size
Populated(B3Digest, Vec<(u64, bytes::Bytes, Node)>), // [(child_inode, name, node)]
Sparse(B3Digest, u64), // digest, size
Populated(B3Digest, Vec<(u64, PathComponent, Node)>), // [(child_inode, name, node)]
}
impl InodeData {

View file

@ -18,10 +18,10 @@ use self::{
use crate::{
blobservice::{BlobReader, BlobService},
directoryservice::DirectoryService,
{B3Digest, Node},
path::PathComponent,
B3Digest, Node,
};
use bstr::ByteVec;
use bytes::Bytes;
use fuse_backend_rs::abi::fuse_abi::{stat64, OpenOptions};
use fuse_backend_rs::api::filesystem::{
Context, FileSystem, FsOptions, GetxattrReply, ListxattrReply, ROOT_ID,
@ -87,7 +87,7 @@ pub struct TvixStoreFs<BS, DS, RN> {
show_xattr: bool,
/// This maps a given basename in the root to the inode we allocated for the node.
root_nodes: RwLock<HashMap<Bytes, u64>>,
root_nodes: RwLock<HashMap<PathComponent, u64>>,
/// This keeps track of inodes and data alongside them.
inode_tracker: RwLock<InodeTracker>,
@ -103,7 +103,7 @@ pub struct TvixStoreFs<BS, DS, RN> {
u64,
(
Span,
Arc<Mutex<mpsc::Receiver<(usize, Result<(Bytes, Node), crate::Error>)>>>,
Arc<Mutex<mpsc::Receiver<(usize, Result<(PathComponent, Node), crate::Error>)>>>,
),
>,
>,
@ -154,7 +154,7 @@ where
/// Retrieves the inode for a given root node basename, if present.
/// This obtains a read lock on self.root_nodes.
fn get_inode_for_root_name(&self, name: &[u8]) -> Option<u64> {
fn get_inode_for_root_name(&self, name: &PathComponent) -> Option<u64> {
self.root_nodes.read().get(name).cloned()
}
@ -170,7 +170,7 @@ where
fn get_directory_children(
&self,
ino: u64,
) -> io::Result<(B3Digest, Vec<(u64, bytes::Bytes, Node)>)> {
) -> io::Result<(B3Digest, Vec<(u64, PathComponent, Node)>)> {
let data = self.inode_tracker.read().get(ino).unwrap();
match *data {
// if it's populated already, return children.
@ -200,7 +200,7 @@ where
let children = {
let mut inode_tracker = self.inode_tracker.write();
let children: Vec<(u64, Bytes, Node)> = directory
let children: Vec<(u64, PathComponent, Node)> = directory
.nodes()
.map(|(child_name, child_node)| {
let inode_data = InodeData::from_node(child_node);
@ -240,12 +240,12 @@ where
/// In the case the name can't be found, a libc::ENOENT is returned.
fn name_in_root_to_ino_and_data(
&self,
name: &std::ffi::CStr,
name: &PathComponent,
) -> io::Result<(u64, Arc<InodeData>)> {
// Look up the inode for that root node.
// If there's one, [self.inode_tracker] MUST also contain the data,
// which we can then return.
if let Some(inode) = self.get_inode_for_root_name(name.to_bytes()) {
if let Some(inode) = self.get_inode_for_root_name(name) {
return Ok((
inode,
self.inode_tracker
@ -259,7 +259,8 @@ where
// We don't have it yet, look it up in [self.root_nodes].
match self.tokio_handle.block_on({
let root_nodes_provider = self.root_nodes_provider.clone();
async move { root_nodes_provider.get_by_basename(name.to_bytes()).await }
let name = name.clone();
async move { root_nodes_provider.get_by_basename(&name).await }
}) {
// if there was an error looking up the root node, propagate up an IO error.
Err(_e) => Err(io::Error::from_raw_os_error(libc::EIO)),
@ -269,7 +270,7 @@ where
Ok(Some(root_node)) => {
// Let's check if someone else beat us to updating the inode tracker and
// root_nodes map. This avoids locking inode_tracker for writing.
if let Some(ino) = self.root_nodes.read().get(name.to_bytes()) {
if let Some(ino) = self.root_nodes.read().get(name) {
return Ok((
*ino,
self.inode_tracker.read().get(*ino).expect("must exist"),
@ -285,7 +286,7 @@ where
// self.root_nodes.
let inode_data = InodeData::from_node(&root_node);
let ino = inode_tracker.put(inode_data.clone());
root_nodes.insert(bytes::Bytes::copy_from_slice(name.to_bytes()), ino);
root_nodes.insert(name.to_owned(), ino);
Ok((ino, Arc::new(inode_data)))
}
@ -341,13 +342,17 @@ where
) -> io::Result<fuse_backend_rs::api::filesystem::Entry> {
debug!("lookup");
// convert the CStr to a PathComponent
// If it can't be converted, we definitely don't have anything here.
let name: PathComponent = name.try_into().map_err(|_| std::io::ErrorKind::NotFound)?;
// This goes from a parent inode to a node.
// - If the parent is [ROOT_ID], we need to check
// [self.root_nodes] (fetching from a [RootNode] provider if needed)
// - Otherwise, lookup the parent in [self.inode_tracker] (which must be
// a [InodeData::Directory]), and find the child with that name.
if parent == ROOT_ID {
let (ino, inode_data) = self.name_in_root_to_ino_and_data(name)?;
let (ino, inode_data) = self.name_in_root_to_ino_and_data(&name)?;
debug!(inode_data=?&inode_data, ino=ino, "Some");
return Ok(inode_data.as_fuse_entry(ino));
@ -360,7 +365,7 @@ where
// Search for that name in the list of children and return the FileAttrs.
// in the children, find the one with the desired name.
if let Some((child_ino, _, _)) = children.iter().find(|(_, n, _)| n == name.to_bytes()) {
if let Some((child_ino, _, _)) = children.iter().find(|(_, n, _)| n == &name) {
// lookup the child [InodeData] in [self.inode_tracker].
// We know the inodes for children have already been allocated.
let child_inode_data = self.inode_tracker.read().get(*child_ino).unwrap();
@ -479,7 +484,7 @@ where
ino,
offset: offset + (i as u64) + 1,
type_: inode_data.as_fuse_type(),
name: &name,
name: name.as_ref(),
})?;
// If the buffer is full, add_entry will return `Ok(0)`.
if written == 0 {
@ -503,7 +508,7 @@ where
ino,
offset: offset + (i as u64) + 1,
type_: inode_data.as_fuse_type(),
name: &child_name,
name: child_name.as_ref(),
})?;
// If the buffer is full, add_entry will return `Ok(0)`.
if written == 0 {
@ -569,7 +574,7 @@ where
ino,
offset: offset + (i as u64) + 1,
type_: inode_data.as_fuse_type(),
name: &name,
name: name.as_ref(),
},
inode_data.as_fuse_entry(ino),
)?;
@ -594,7 +599,7 @@ where
ino,
offset: offset + (i as u64) + 1,
type_: inode_data.as_fuse_type(),
name: &name,
name: name.as_ref(),
},
inode_data.as_fuse_entry(ino),
)?;

View file

@ -1,6 +1,6 @@
use std::collections::BTreeMap;
use crate::{Error, Node};
use crate::{path::PathComponent, Error, Node};
use futures::stream::BoxStream;
use tonic::async_trait;
@ -10,12 +10,12 @@ use tonic::async_trait;
pub trait RootNodes: Send + Sync {
/// Looks up a root CA node based on the basename of the node in the root
/// directory of the filesystem.
async fn get_by_basename(&self, name: &[u8]) -> Result<Option<Node>, Error>;
async fn get_by_basename(&self, name: &PathComponent) -> Result<Option<Node>, Error>;
/// Lists all root CA nodes in the filesystem, as a tuple of (base)name
/// and Node.
/// An error can be returned in case listing is not allowed.
fn list(&self) -> BoxStream<Result<(bytes::Bytes, Node), Error>>;
fn list(&self) -> BoxStream<Result<(PathComponent, Node), Error>>;
}
#[async_trait]
@ -23,13 +23,13 @@ pub trait RootNodes: Send + Sync {
/// the key is the node name.
impl<T> RootNodes for T
where
T: AsRef<BTreeMap<bytes::Bytes, Node>> + Send + Sync,
T: AsRef<BTreeMap<PathComponent, Node>> + Send + Sync,
{
async fn get_by_basename(&self, name: &[u8]) -> Result<Option<Node>, Error> {
async fn get_by_basename(&self, name: &PathComponent) -> Result<Option<Node>, Error> {
Ok(self.as_ref().get(name).cloned())
}
fn list(&self) -> BoxStream<Result<(bytes::Bytes, Node), Error>> {
fn list(&self) -> BoxStream<Result<(PathComponent, Node), Error>> {
Box::pin(tokio_stream::iter(
self.as_ref()
.iter()