chore(tvix/[ca]store): Drop sled support completely in favor of redb

Over the past couple of months we've been using redb instead of sled as
the default filesystem-based database in PS and DS. I am confident that
we can get rid of sled completely now, and just keep redb.

Change-Id: I11fa1e4453e280253855f8eade990b37eb6965ae
Reviewed-on: https://cl.tvl.fyi/c/depot/+/12567
Reviewed-by: yuka <yuka@yuka.dev>
Reviewed-by: flokli <flokli@flokli.de>
Tested-by: BuildkiteCI
Autosubmit: Ilan Joselevich <personal@ilanjoselevich.com>
This commit is contained in:
Ilan Joselevich 2024-10-03 19:50:30 +02:00
parent 56b306f884
commit ab6e8d28aa
14 changed files with 18 additions and 812 deletions

View file

@ -12,11 +12,6 @@ use url::Url;
/// The following URIs are supported:
/// - `memory:`
/// Uses a in-memory implementation.
/// - `sled:`
/// Uses a in-memory sled implementation.
/// - `sled:///absolute/path/to/somewhere`
/// Uses sled, using a path on the disk for persistency. Can be only opened
/// from one process at the same time.
/// - `redb:`
/// Uses a in-memory redb implementation.
/// - `redb:///absolute/path/to/somewhere`
@ -66,8 +61,6 @@ mod tests {
use tvix_castore::directoryservice::{DirectoryService, MemoryDirectoryServiceConfig};
lazy_static! {
static ref TMPDIR_SLED_1: TempDir = TempDir::new().unwrap();
static ref TMPDIR_SLED_2: TempDir = TempDir::new().unwrap();
static ref TMPDIR_REDB_1: TempDir = TempDir::new().unwrap();
static ref TMPDIR_REDB_2: TempDir = TempDir::new().unwrap();
}
@ -77,16 +70,6 @@ mod tests {
#[rstest]
/// This uses a unsupported scheme.
#[case::unsupported_scheme("http://foo.example/test", false)]
/// This configures sled in temporary mode.
#[case::sled_temporary("sled://", true)]
/// This configures sled with /, which should fail.
#[case::sled_invalid_root("sled:///", false)]
/// This configures sled with a host, not path, which should fail.
#[case::sled_invalid_host("sled://foo.example", false)]
/// This configures sled with a valid path path, which should succeed.
#[case::sled_valid_path(&format!("sled://{}", &TMPDIR_SLED_1.path().to_str().unwrap()), true)]
/// This configures sled with a host, and a valid path path, which should fail.
#[case::sled_invalid_host_with_valid_path(&format!("sled://foo.example{}", &TMPDIR_SLED_2.path().to_str().unwrap()), false)]
/// This correctly sets the scheme, and doesn't set a path.
#[case::memory_valid("memory://", true)]
/// This sets a memory url host to `foo`

View file

@ -6,7 +6,6 @@ mod memory;
mod nix_http;
mod redb;
mod signing_wrapper;
mod sled;
#[cfg(any(feature = "fuse", feature = "virtiofs"))]
mod fs;
@ -32,7 +31,6 @@ pub use self::memory::{MemoryPathInfoService, MemoryPathInfoServiceConfig};
pub use self::nix_http::{NixHTTPPathInfoService, NixHTTPPathInfoServiceConfig};
pub use self::redb::{RedbPathInfoService, RedbPathInfoServiceConfig};
pub use self::signing_wrapper::{KeyFileSigningPathInfoServiceConfig, SigningPathInfoService};
pub use self::sled::{SledPathInfoService, SledPathInfoServiceConfig};
#[cfg(test)]
pub(crate) use self::signing_wrapper::test_signing_service;
@ -94,7 +92,6 @@ pub(crate) fn register_pathinfo_services(reg: &mut Registry) {
reg.register::<Box<dyn ServiceBuilder<Output = dyn PathInfoService>>, LruPathInfoServiceConfig>("lru");
reg.register::<Box<dyn ServiceBuilder<Output = dyn PathInfoService>>, MemoryPathInfoServiceConfig>("memory");
reg.register::<Box<dyn ServiceBuilder<Output = dyn PathInfoService>>, NixHTTPPathInfoServiceConfig>("nix");
reg.register::<Box<dyn ServiceBuilder<Output = dyn PathInfoService>>, SledPathInfoServiceConfig>("sled");
reg.register::<Box<dyn ServiceBuilder<Output = dyn PathInfoService>>, RedbPathInfoServiceConfig>("redb");
reg.register::<Box<dyn ServiceBuilder<Output = dyn PathInfoService>>, KeyFileSigningPathInfoServiceConfig>("keyfile-signing");
#[cfg(feature = "cloud")]

View file

@ -1,190 +0,0 @@
use super::PathInfoService;
use crate::proto::PathInfo;
use async_stream::try_stream;
use futures::stream::BoxStream;
use nix_compat::nixbase32;
use prost::Message;
use std::path::Path;
use std::sync::Arc;
use tonic::async_trait;
use tracing::{instrument, warn};
use tvix_castore::composition::{CompositionContext, ServiceBuilder};
use tvix_castore::Error;
/// SledPathInfoService stores PathInfo in a [sled](https://github.com/spacejam/sled).
///
/// The PathInfo messages are stored as encoded protos, and keyed by their output hash,
/// as that's currently the only request type available.
pub struct SledPathInfoService {
db: sled::Db,
}
impl SledPathInfoService {
pub fn new<P: AsRef<Path>>(p: P) -> Result<Self, sled::Error> {
if p.as_ref() == Path::new("/") {
return Err(sled::Error::Unsupported(
"cowardly refusing to open / with sled".to_string(),
));
}
let config = sled::Config::default()
.use_compression(false) // is a required parameter
.path(p);
let db = config.open()?;
Ok(Self { db })
}
pub fn new_temporary() -> Result<Self, sled::Error> {
let config = sled::Config::default().temporary(true);
let db = config.open()?;
Ok(Self { db })
}
}
#[async_trait]
impl PathInfoService for SledPathInfoService {
#[instrument(level = "trace", skip_all, fields(path_info.digest = nixbase32::encode(&digest)))]
async fn get(&self, digest: [u8; 20]) -> Result<Option<PathInfo>, Error> {
let resp = tokio::task::spawn_blocking({
let db = self.db.clone();
move || db.get(digest.as_slice())
})
.await?
.map_err(|e| {
warn!("failed to retrieve PathInfo: {}", e);
Error::StorageError(format!("failed to retrieve PathInfo: {}", e))
})?;
match resp {
None => Ok(None),
Some(data) => {
let path_info = PathInfo::decode(&*data).map_err(|e| {
warn!("failed to decode stored PathInfo: {}", e);
Error::StorageError(format!("failed to decode stored PathInfo: {}", e))
})?;
Ok(Some(path_info))
}
}
}
#[instrument(level = "trace", skip_all, fields(path_info.root_node = ?path_info.node))]
async fn put(&self, path_info: PathInfo) -> Result<PathInfo, Error> {
// Call validate on the received PathInfo message.
let store_path = path_info
.validate()
.map_err(|e| Error::InvalidRequest(format!("failed to validate PathInfo: {}", e)))?;
// In case the PathInfo is valid, we were able to parse a StorePath.
// Store it in the database, keyed by its digest.
// This overwrites existing PathInfo objects.
tokio::task::spawn_blocking({
let db = self.db.clone();
let k = *store_path.digest();
let data = path_info.encode_to_vec();
move || db.insert(k, data)
})
.await?
.map_err(|e| {
warn!("failed to insert PathInfo: {}", e);
Error::StorageError(format! {
"failed to insert PathInfo: {}", e
})
})?;
Ok(path_info)
}
fn list(&self) -> BoxStream<'static, Result<PathInfo, Error>> {
let db = self.db.clone();
let mut it = db.iter().values();
Box::pin(try_stream! {
// Don't block the executor while waiting for .next(), so wrap that
// in a spawn_blocking call.
// We need to pass around it to be able to reuse it.
while let (Some(elem), new_it) = tokio::task::spawn_blocking(move || {
(it.next(), it)
}).await? {
it = new_it;
let data = elem.map_err(|e| {
warn!("failed to retrieve PathInfo: {}", e);
Error::StorageError(format!("failed to retrieve PathInfo: {}", e))
})?;
let path_info = PathInfo::decode(&*data).map_err(|e| {
warn!("failed to decode stored PathInfo: {}", e);
Error::StorageError(format!("failed to decode stored PathInfo: {}", e))
})?;
yield path_info
}
})
}
}
#[derive(serde::Deserialize)]
#[serde(deny_unknown_fields)]
pub struct SledPathInfoServiceConfig {
is_temporary: bool,
#[serde(default)]
/// required when is_temporary = false
path: Option<String>,
}
impl TryFrom<url::Url> for SledPathInfoServiceConfig {
type Error = Box<dyn std::error::Error + Send + Sync>;
fn try_from(url: url::Url) -> Result<Self, Self::Error> {
// sled doesn't support host, and a path can be provided (otherwise
// it'll live in memory only).
if url.has_host() {
return Err(Error::StorageError("no host allowed".to_string()).into());
}
// TODO: expose compression and other parameters as URL parameters?
Ok(if url.path().is_empty() {
SledPathInfoServiceConfig {
is_temporary: true,
path: None,
}
} else {
SledPathInfoServiceConfig {
is_temporary: false,
path: Some(url.path().to_string()),
}
})
}
}
#[async_trait]
impl ServiceBuilder for SledPathInfoServiceConfig {
type Output = dyn PathInfoService;
async fn build<'a>(
&'a self,
_instance_name: &str,
_context: &CompositionContext,
) -> Result<Arc<dyn PathInfoService>, Box<dyn std::error::Error + Send + Sync + 'static>> {
match self {
SledPathInfoServiceConfig {
is_temporary: true,
path: None,
} => Ok(Arc::new(SledPathInfoService::new_temporary()?)),
SledPathInfoServiceConfig {
is_temporary: true,
path: Some(_),
} => Err(
Error::StorageError("Temporary SledPathInfoService can not have path".into())
.into(),
),
SledPathInfoServiceConfig {
is_temporary: false,
path: None,
} => Err(Error::StorageError("SledPathInfoService is missing path".into()).into()),
SledPathInfoServiceConfig {
is_temporary: false,
path: Some(path),
} => Ok(Arc::new(SledPathInfoService::new(path)?)),
}
}
}

View file

@ -9,7 +9,6 @@ use rstest_reuse::{self, *};
use super::PathInfoService;
use crate::pathinfoservice::redb::RedbPathInfoService;
use crate::pathinfoservice::MemoryPathInfoService;
use crate::pathinfoservice::SledPathInfoService;
use crate::proto::PathInfo;
use crate::tests::fixtures::DUMMY_PATH_DIGEST;
use tvix_castore::proto as castorepb;
@ -29,7 +28,6 @@ use self::utils::make_bigtable_path_info_service;
let (_, _, svc) = make_grpc_path_info_service_client().await;
svc
})]
#[case::sled(SledPathInfoService::new_temporary().unwrap())]
#[case::redb(RedbPathInfoService::new_temporary().unwrap())]
#[case::signing(test_signing_service())]
#[cfg_attr(all(feature = "cloud",feature="integration"), case::bigtable(make_bigtable_path_info_service().await))]