refactor(tvix/store): drop calculate_nar from PathInfoService

This shouldn't be part of the PathInfoService trait.

Pretty much none of the PathInfoServices do implement it, and requiring
them to implement it means they also cannot make use of this calculation
already being done by other PathInfoServices.

Move it out into its own NarCalculationService trait, defined somewhere
at tvix_store::nar, and have everyone who wants to trigger nar
calculation use nar_calculation_service directly, which now is an
additional field in TvixStoreIO for example.

It being moved outside the PathInfoService trait doesn't prohibit
specific implementations to implement it (like the GRPC client for the
`PathInfoService` does.

This is currently wired together in a bit of a hacky fashion - as of
now, everything uses the naive implementation that traverses blob and
directoryservice, rather than composing it properly. I want to leave
that up to a later CL, dealing with other parts of store composition
too.

Change-Id: I18d07ea4301d4a07651b8218bc5fe95e4e307208
Reviewed-on: https://cl.tvl.fyi/c/depot/+/11619
Reviewed-by: Connor Brewster <cbrewster@hey.com>
Autosubmit: flokli <flokli@flokli.de>
Tested-by: BuildkiteCI
This commit is contained in:
Florian Klink 2024-05-10 08:59:25 +03:00 committed by clbot
parent 944a781354
commit 14766cfe1d
20 changed files with 241 additions and 187 deletions

View file

@ -11,7 +11,6 @@ use serde::{Deserialize, Serialize};
use serde_with::{serde_as, DurationSeconds};
use tonic::async_trait;
use tracing::trace;
use tvix_castore::proto as castorepb;
use tvix_castore::Error;
/// There should not be more than 10 MiB in a single cell.
@ -330,13 +329,6 @@ impl PathInfoService for BigtablePathInfoService {
Ok(path_info)
}
async fn calculate_nar(
&self,
_root_node: &castorepb::node::Node,
) -> Result<(u64, [u8; 32]), Error> {
return Err(Error::StorageError("unimplemented".into()));
}
fn list(&self) -> BoxStream<'static, Result<PathInfo, Error>> {
let mut client = self.client.clone();

View file

@ -1,5 +1,8 @@
use super::PathInfoService;
use crate::proto::{self, ListPathInfoRequest, PathInfo};
use crate::{
nar::NarCalculationService,
proto::{self, ListPathInfoRequest, PathInfo},
};
use async_stream::try_stream;
use data_encoding::BASE64;
use futures::stream::BoxStream;
@ -67,30 +70,6 @@ impl PathInfoService for GRPCPathInfoService {
Ok(path_info)
}
#[instrument(level = "trace", skip_all, fields(root_node = ?root_node))]
async fn calculate_nar(
&self,
root_node: &castorepb::node::Node,
) -> Result<(u64, [u8; 32]), Error> {
let path_info = self
.grpc_client
.clone()
.calculate_nar(castorepb::Node {
node: Some(root_node.clone()),
})
.await
.map_err(|e| Error::StorageError(e.to_string()))?
.into_inner();
let nar_sha256: [u8; 32] = path_info
.nar_sha256
.to_vec()
.try_into()
.map_err(|_e| Error::StorageError("invalid digest length".to_string()))?;
Ok((path_info.nar_size, nar_sha256))
}
#[instrument(level = "trace", skip_all)]
fn list(&self) -> BoxStream<'static, Result<PathInfo, Error>> {
let mut grpc_client = self.grpc_client.clone();
@ -126,6 +105,33 @@ impl PathInfoService for GRPCPathInfoService {
}
}
#[async_trait]
impl NarCalculationService for GRPCPathInfoService {
#[instrument(level = "trace", skip_all, fields(root_node = ?root_node))]
async fn calculate_nar(
&self,
root_node: &castorepb::node::Node,
) -> Result<(u64, [u8; 32]), Error> {
let path_info = self
.grpc_client
.clone()
.calculate_nar(castorepb::Node {
node: Some(root_node.clone()),
})
.await
.map_err(|e| Error::StorageError(e.to_string()))?
.into_inner();
let nar_sha256: [u8; 32] = path_info
.nar_sha256
.to_vec()
.try_into()
.map_err(|_e| Error::StorageError("invalid digest length".to_string()))?;
Ok((path_info.nar_size, nar_sha256))
}
}
#[cfg(test)]
mod tests {
use crate::pathinfoservice::tests::make_grpc_path_info_service_client;

View file

@ -1,19 +1,20 @@
use super::PathInfoService;
use crate::{nar::calculate_size_and_sha256, proto::PathInfo};
use crate::proto::PathInfo;
use futures::stream::{iter, BoxStream};
use std::{
collections::HashMap,
sync::{Arc, RwLock},
};
use tonic::async_trait;
use tvix_castore::proto as castorepb;
use tvix_castore::Error;
use tvix_castore::{blobservice::BlobService, directoryservice::DirectoryService};
pub struct MemoryPathInfoService<BS, DS> {
db: Arc<RwLock<HashMap<[u8; 20], PathInfo>>>,
#[allow(dead_code)]
blob_service: BS,
#[allow(dead_code)]
directory_service: DS,
}
@ -61,15 +62,6 @@ where
}
}
async fn calculate_nar(
&self,
root_node: &castorepb::node::Node,
) -> Result<(u64, [u8; 32]), Error> {
calculate_size_and_sha256(root_node, &self.blob_service, &self.directory_service)
.await
.map_err(|e| Error::StorageError(e.to_string()))
}
fn list(&self) -> BoxStream<'static, Result<PathInfo, Error>> {
let db = self.db.read().unwrap();

View file

@ -12,7 +12,6 @@ mod tests;
use futures::stream::BoxStream;
use tonic::async_trait;
use tvix_castore::proto as castorepb;
use tvix_castore::Error;
use crate::proto::PathInfo;
@ -41,14 +40,6 @@ pub trait PathInfoService: Send + Sync {
/// invalid messages.
async fn put(&self, path_info: PathInfo) -> Result<PathInfo, Error>;
/// Return the nar size and nar sha256 digest for a given root node.
/// This can be used to calculate NAR-based output paths,
/// and implementations are encouraged to cache it.
async fn calculate_nar(
&self,
root_node: &castorepb::node::Node,
) -> Result<(u64, [u8; 32]), Error>;
/// Iterate over all PathInfo objects in the store.
/// Implementations can decide to disallow listing.
///
@ -72,13 +63,6 @@ where
self.as_ref().put(path_info).await
}
async fn calculate_nar(
&self,
root_node: &castorepb::node::Node,
) -> Result<(u64, [u8; 32]), Error> {
self.as_ref().calculate_nar(root_node).await
}
fn list(&self) -> BoxStream<'static, Result<PathInfo, Error>> {
self.as_ref().list()
}

View file

@ -33,8 +33,7 @@ use super::PathInfoService;
///
/// The client is expected to be (indirectly) using the same [BlobService] and
/// [DirectoryService], so able to fetch referred Directories and Blobs.
/// [PathInfoService::put] and [PathInfoService::calculate_nar] are not
/// implemented and return an error if called.
/// [PathInfoService::put] is not implemented and returns an error if called.
/// TODO: what about reading from nix-cache-info?
pub struct NixHTTPPathInfoService<BS, DS> {
base_url: url::Url,
@ -258,16 +257,6 @@ where
))
}
#[instrument(skip_all, fields(root_node=?root_node))]
async fn calculate_nar(
&self,
root_node: &castorepb::node::Node,
) -> Result<(u64, [u8; 32]), Error> {
Err(Error::InvalidRequest(
"calculate_nar not supported for this backend".to_string(),
))
}
fn list(&self) -> BoxStream<'static, Result<PathInfo, Error>> {
Box::pin(futures::stream::once(async {
Err(Error::InvalidRequest(

View file

@ -1,5 +1,4 @@
use super::PathInfoService;
use crate::nar::calculate_size_and_sha256;
use crate::proto::PathInfo;
use async_stream::try_stream;
use data_encoding::BASE64;
@ -9,7 +8,6 @@ use std::path::Path;
use tonic::async_trait;
use tracing::instrument;
use tracing::warn;
use tvix_castore::proto as castorepb;
use tvix_castore::{blobservice::BlobService, directoryservice::DirectoryService, Error};
/// SledPathInfoService stores PathInfo in a [sled](https://github.com/spacejam/sled).
@ -19,7 +17,9 @@ use tvix_castore::{blobservice::BlobService, directoryservice::DirectoryService,
pub struct SledPathInfoService<BS, DS> {
db: sled::Db,
#[allow(dead_code)]
blob_service: BS,
#[allow(dead_code)]
directory_service: DS,
}
@ -109,16 +109,6 @@ where
Ok(path_info)
}
#[instrument(level = "trace", skip_all, fields(root_node = ?root_node))]
async fn calculate_nar(
&self,
root_node: &castorepb::node::Node,
) -> Result<(u64, [u8; 32]), Error> {
calculate_size_and_sha256(root_node, &self.blob_service, &self.directory_service)
.await
.map_err(|e| Error::StorageError(e.to_string()))
}
fn list(&self) -> BoxStream<'static, Result<PathInfo, Error>> {
let db = self.db.clone();
let mut it = db.iter().values();

View file

@ -3,6 +3,7 @@ use std::sync::Arc;
use tonic::transport::{Endpoint, Server, Uri};
use crate::{
nar::{NarCalculationService, SimpleRenderer},
pathinfoservice::{GRPCPathInfoService, MemoryPathInfoService, PathInfoService},
proto::{
path_info_service_client::PathInfoServiceClient,
@ -25,13 +26,17 @@ pub async fn make_grpc_path_info_service_client() -> super::BSDSPS {
let blob_service = blob_service.clone();
let directory_service = directory_service.clone();
async move {
let path_info_service: Arc<dyn PathInfoService> =
Arc::from(MemoryPathInfoService::new(blob_service, directory_service));
let path_info_service: Arc<dyn PathInfoService> = Arc::from(
MemoryPathInfoService::new(blob_service.clone(), directory_service.clone()),
);
let nar_calculation_service =
Box::new(SimpleRenderer::new(blob_service, directory_service))
as Box<dyn NarCalculationService>;
// spin up a new DirectoryService
// spin up a new PathInfoService
let mut server = Server::builder();
let router = server.add_service(PathInfoServiceServer::new(
GRPCPathInfoServiceWrapper::new(path_info_service),
GRPCPathInfoServiceWrapper::new(path_info_service, nar_calculation_service),
));
router