feat(tvix/build), add OciBuildService, the old way
This is just patchset 10 of CL10855, before the color_eyre changes,
rebased to the tvix_castore api.
Change-Id: If4b42412ff8568058908cda971ad7d6f2d9f9b7b
---
This provides a build service invoking runc. It can be used by using the
`oci://$path_to_some_tempdir` builder URL for now.
For now, it can be tested as such:
```
BUILD_SERVICE_ADDR=oci://$PWD/bundles target/debug/tvix
let pkgs = (import <nixpkgs> {}); in builtins.readDir pkgs.perl
```
readDir is to actually trigger IO into the store path (which triggers
the builds).
For now it fails due to missing reference scanning (see followup CLs).
Change-Id: I09b40e410114ce69966a41a0e3c33281b859e443
Reviewed-on: https://cl.tvl.fyi/c/depot/+/12526
Autosubmit: yuka <yuka@yuka.dev>
Tested-by: BuildkiteCI
Reviewed-by: flokli <flokli@flokli.de>
This commit is contained in:
parent
cf91917a9d
commit
2414c87282
14 changed files with 1270 additions and 18 deletions
|
|
@ -2,18 +2,22 @@ use super::{grpc::GRPCBuildService, BuildService, DummyBuildService};
|
|||
use tvix_castore::{blobservice::BlobService, directoryservice::DirectoryService};
|
||||
use url::Url;
|
||||
|
||||
#[cfg(target_os = "linux")]
|
||||
use super::oci::OCIBuildService;
|
||||
|
||||
/// Constructs a new instance of a [BuildService] from an URI.
|
||||
///
|
||||
/// The following schemes are supported by the following services:
|
||||
/// - `dummy://` ([DummyBuildService])
|
||||
/// - `oci://` ([OCIBuildService])
|
||||
/// - `grpc+*://` ([GRPCBuildService])
|
||||
///
|
||||
/// As some of these [BuildService] need to talk to a [BlobService] and
|
||||
/// [DirectoryService], these also need to be passed in.
|
||||
pub async fn from_addr<BS, DS>(
|
||||
uri: &str,
|
||||
_blob_service: BS,
|
||||
_directory_service: DS,
|
||||
blob_service: BS,
|
||||
directory_service: DS,
|
||||
) -> std::io::Result<Box<dyn BuildService>>
|
||||
where
|
||||
BS: AsRef<dyn BlobService> + Send + Sync + Clone + 'static,
|
||||
|
|
@ -25,6 +29,21 @@ where
|
|||
Ok(match url.scheme() {
|
||||
// dummy doesn't care about parameters.
|
||||
"dummy" => Box::<DummyBuildService>::default(),
|
||||
#[cfg(target_os = "linux")]
|
||||
"oci" => {
|
||||
// oci wants a path in which it creates bundles.
|
||||
if url.path().is_empty() {
|
||||
Err(std::io::Error::other("oci needs a bundle dir as path"))?
|
||||
}
|
||||
|
||||
// TODO: make sandbox shell and rootless_uid_gid
|
||||
|
||||
Box::new(OCIBuildService::new(
|
||||
url.path().into(),
|
||||
blob_service,
|
||||
directory_service,
|
||||
))
|
||||
}
|
||||
scheme => {
|
||||
if scheme.starts_with("grpc+") {
|
||||
let client = crate::proto::build_service_client::BuildServiceClient::new(
|
||||
|
|
@ -50,12 +69,18 @@ mod tests {
|
|||
use std::sync::Arc;
|
||||
|
||||
use super::from_addr;
|
||||
use lazy_static::lazy_static;
|
||||
use rstest::rstest;
|
||||
use tempfile::TempDir;
|
||||
use tvix_castore::{
|
||||
blobservice::{BlobService, MemoryBlobService},
|
||||
directoryservice::{DirectoryService, MemoryDirectoryService},
|
||||
};
|
||||
|
||||
lazy_static! {
|
||||
static ref TMPDIR_OCI_1: TempDir = TempDir::new().unwrap();
|
||||
}
|
||||
|
||||
#[rstest]
|
||||
/// This uses an unsupported scheme.
|
||||
#[case::unsupported_scheme("http://foo.example/test", false)]
|
||||
|
|
@ -73,6 +98,10 @@ mod tests {
|
|||
#[case::grpc_valid_https_host_without_port("grpc+https://localhost", true)]
|
||||
/// Correct scheme to connect to localhost over http, but with additional path, which is invalid.
|
||||
#[case::grpc_invalid_host_and_path("grpc+http://localhost/some-path", false)]
|
||||
/// This configures OCI, but doesn't specify the bundle path
|
||||
#[case::oci_missing_bundle_dir("oci://", false)]
|
||||
/// This configures OCI, specifying the bundle path
|
||||
#[case::oci_bundle_path(&format!("oci://{}", TMPDIR_OCI_1.path().to_str().unwrap()), true)]
|
||||
#[tokio::test]
|
||||
async fn test_from_addr(#[case] uri_str: &str, #[case] exp_succeed: bool) {
|
||||
let blob_service: Arc<dyn BlobService> = Arc::from(MemoryBlobService::default());
|
||||
|
|
|
|||
|
|
@ -6,6 +6,9 @@ mod dummy;
|
|||
mod from_addr;
|
||||
mod grpc;
|
||||
|
||||
#[cfg(target_os = "linux")]
|
||||
mod oci;
|
||||
|
||||
pub use dummy::DummyBuildService;
|
||||
pub use from_addr::from_addr;
|
||||
|
||||
|
|
|
|||
245
tvix/build/src/buildservice/oci.rs
Normal file
245
tvix/build/src/buildservice/oci.rs
Normal file
|
|
@ -0,0 +1,245 @@
|
|||
use anyhow::Context;
|
||||
use bstr::BStr;
|
||||
use oci_spec::runtime::{LinuxIdMapping, LinuxIdMappingBuilder};
|
||||
use tokio::process::{Child, Command};
|
||||
use tonic::async_trait;
|
||||
use tracing::{debug, instrument, warn, Span};
|
||||
use tvix_castore::{
|
||||
blobservice::BlobService, directoryservice::DirectoryService, fs::fuse::FuseDaemon,
|
||||
import::fs::ingest_path, Node, PathComponent,
|
||||
};
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::{
|
||||
oci::{get_host_output_paths, make_bundle, make_spec},
|
||||
proto::{Build, BuildRequest},
|
||||
};
|
||||
use std::{collections::BTreeMap, ffi::OsStr, path::PathBuf, process::Stdio};
|
||||
|
||||
use super::BuildService;
|
||||
|
||||
const SANDBOX_SHELL: &str = env!("TVIX_BUILD_SANDBOX_SHELL");
|
||||
const MAX_CONCURRENT_BUILDS: usize = 2; // TODO: make configurable
|
||||
|
||||
pub struct OCIBuildService<BS, DS> {
|
||||
/// Root path in which all bundles are created in
|
||||
bundle_root: PathBuf,
|
||||
|
||||
/// uid mappings to set up for the workloads
|
||||
uid_mappings: Vec<LinuxIdMapping>,
|
||||
/// uid mappings to set up for the workloads
|
||||
gid_mappings: Vec<LinuxIdMapping>,
|
||||
|
||||
/// Handle to a [BlobService], used by filesystems spawned during builds.
|
||||
blob_service: BS,
|
||||
/// Handle to a [DirectoryService], used by filesystems spawned during builds.
|
||||
directory_service: DS,
|
||||
|
||||
// semaphore to track number of concurrently running builds.
|
||||
// this is necessary, as otherwise we very quickly run out of open file handles.
|
||||
concurrent_builds: tokio::sync::Semaphore,
|
||||
}
|
||||
|
||||
impl<BS, DS> OCIBuildService<BS, DS> {
|
||||
pub fn new(bundle_root: PathBuf, blob_service: BS, directory_service: DS) -> Self {
|
||||
// We map root inside the container to the uid/gid this is running at,
|
||||
// and allocate one for uid 1000 into the container from the range we
|
||||
// got in /etc/sub{u,g}id.
|
||||
// TODO: actually read uid, and /etc/subuid. Maybe only when we try to build?
|
||||
// FUTUREWORK: use different uids?
|
||||
Self {
|
||||
bundle_root,
|
||||
blob_service,
|
||||
directory_service,
|
||||
uid_mappings: vec![
|
||||
LinuxIdMappingBuilder::default()
|
||||
.host_id(1000_u32)
|
||||
.container_id(0_u32)
|
||||
.size(1_u32)
|
||||
.build()
|
||||
.unwrap(),
|
||||
LinuxIdMappingBuilder::default()
|
||||
.host_id(100000_u32)
|
||||
.container_id(1000_u32)
|
||||
.size(1_u32)
|
||||
.build()
|
||||
.unwrap(),
|
||||
],
|
||||
gid_mappings: vec![
|
||||
LinuxIdMappingBuilder::default()
|
||||
.host_id(100_u32)
|
||||
.container_id(0_u32)
|
||||
.size(1_u32)
|
||||
.build()
|
||||
.unwrap(),
|
||||
LinuxIdMappingBuilder::default()
|
||||
.host_id(100000_u32)
|
||||
.container_id(100_u32)
|
||||
.size(1_u32)
|
||||
.build()
|
||||
.unwrap(),
|
||||
],
|
||||
concurrent_builds: tokio::sync::Semaphore::new(MAX_CONCURRENT_BUILDS),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl<BS, DS> BuildService for OCIBuildService<BS, DS>
|
||||
where
|
||||
BS: AsRef<dyn BlobService> + Send + Sync + Clone + 'static,
|
||||
DS: AsRef<dyn DirectoryService> + Send + Sync + Clone + 'static,
|
||||
{
|
||||
#[instrument(skip_all, err)]
|
||||
async fn do_build(&self, request: BuildRequest) -> std::io::Result<Build> {
|
||||
let _permit = self.concurrent_builds.acquire().await.unwrap();
|
||||
|
||||
let bundle_name = Uuid::new_v4();
|
||||
let bundle_path = self.bundle_root.join(bundle_name.to_string());
|
||||
|
||||
let span = Span::current();
|
||||
span.record("bundle_name", bundle_name.to_string());
|
||||
|
||||
let mut runtime_spec = make_spec(&request, true, SANDBOX_SHELL)
|
||||
.context("failed to create spec")
|
||||
.map_err(std::io::Error::other)?;
|
||||
|
||||
let mut linux = runtime_spec.linux().clone().unwrap();
|
||||
|
||||
// edit the spec, we need to setup uid/gid mappings.
|
||||
linux.set_uid_mappings(Some(self.uid_mappings.clone()));
|
||||
linux.set_gid_mappings(Some(self.gid_mappings.clone()));
|
||||
|
||||
runtime_spec.set_linux(Some(linux));
|
||||
|
||||
make_bundle(&request, &runtime_spec, &bundle_path)
|
||||
.context("failed to produce bundle")
|
||||
.map_err(std::io::Error::other)?;
|
||||
|
||||
// pre-calculate the locations we want to later ingest, in the order of
|
||||
// the original outputs.
|
||||
// If we can't find calculate that path, don't start the build in first place.
|
||||
let host_output_paths = get_host_output_paths(&request, &bundle_path)
|
||||
.context("failed to calculate host output paths")
|
||||
.map_err(std::io::Error::other)?;
|
||||
|
||||
// NOTE: impl Drop for FuseDaemon unmounts, so if the call is cancelled, umount.
|
||||
let _fuse_daemon = tokio::task::spawn_blocking({
|
||||
let blob_service = self.blob_service.clone();
|
||||
let directory_service = self.directory_service.clone();
|
||||
// assemble a BTreeMap of Nodes to pass into TvixStoreFs.
|
||||
let root_nodes: BTreeMap<PathComponent, Node> =
|
||||
BTreeMap::from_iter(request.inputs.iter().map(|input| {
|
||||
// We know from validation this is Some.
|
||||
input.clone().into_name_and_node().unwrap()
|
||||
}));
|
||||
|
||||
debug!(inputs=?root_nodes.keys(), "got inputs");
|
||||
|
||||
let dest = bundle_path.join("inputs");
|
||||
|
||||
move || {
|
||||
let fs = tvix_castore::fs::TvixStoreFs::new(
|
||||
blob_service,
|
||||
directory_service,
|
||||
Box::new(root_nodes),
|
||||
true,
|
||||
false,
|
||||
);
|
||||
// mount the filesystem and wait for it to be unmounted.
|
||||
// FUTUREWORK: make fuse daemon threads configurable?
|
||||
FuseDaemon::new(fs, dest, 4, true).context("failed to start fuse daemon")
|
||||
}
|
||||
})
|
||||
.await?
|
||||
.context("mounting")
|
||||
.map_err(std::io::Error::other)?;
|
||||
|
||||
debug!(bundle.path=?bundle_path, bundle.name=%bundle_name, "about to spawn bundle");
|
||||
|
||||
// start the bundle as another process.
|
||||
let child = spawn_bundle(bundle_path, &bundle_name.to_string())?;
|
||||
|
||||
// wait for the process to exit
|
||||
// FUTUREWORK: change the trait to allow reporting progress / logs…
|
||||
let child_output = child
|
||||
.wait_with_output()
|
||||
.await
|
||||
.context("failed to run process")
|
||||
.map_err(std::io::Error::other)?;
|
||||
|
||||
// Check the exit code
|
||||
if !child_output.status.success() {
|
||||
let stdout = BStr::new(&child_output.stdout);
|
||||
let stderr = BStr::new(&child_output.stderr);
|
||||
|
||||
warn!(stdout=%stdout, stderr=%stderr, exit_code=%child_output.status, "build failed");
|
||||
|
||||
return Err(std::io::Error::new(
|
||||
std::io::ErrorKind::Other,
|
||||
"nonzero exit code".to_string(),
|
||||
));
|
||||
}
|
||||
|
||||
// Ingest build outputs into the castore.
|
||||
// We use try_join_all here. No need to spawn new tasks, as this is
|
||||
// mostly IO bound.
|
||||
let outputs = futures::future::try_join_all(host_output_paths.into_iter().enumerate().map(
|
||||
|(i, p)| {
|
||||
let output_path = request.outputs[i].clone();
|
||||
async move {
|
||||
debug!(host.path=?p, output.path=?output_path, "ingesting path");
|
||||
|
||||
let output_node = ingest_path::<_, _, _, &[u8]>(
|
||||
self.blob_service.clone(),
|
||||
&self.directory_service,
|
||||
p,
|
||||
None,
|
||||
)
|
||||
.await
|
||||
.map_err(|e| {
|
||||
std::io::Error::new(
|
||||
std::io::ErrorKind::InvalidData,
|
||||
format!("Unable to ingest output: {}", e),
|
||||
)
|
||||
})?;
|
||||
|
||||
Ok::<_, std::io::Error>(tvix_castore::proto::Node::from_name_and_node(
|
||||
"".into(),
|
||||
output_node,
|
||||
))
|
||||
}
|
||||
},
|
||||
))
|
||||
.await?;
|
||||
|
||||
Ok(Build {
|
||||
build_request: Some(request.clone()),
|
||||
outputs,
|
||||
outputs_needles: vec![], // TODO refscanning
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
/// Spawns runc with the bundle at bundle_path.
|
||||
/// On success, returns the child.
|
||||
#[instrument(err)]
|
||||
fn spawn_bundle(
|
||||
bundle_path: impl AsRef<OsStr> + std::fmt::Debug,
|
||||
bundle_name: &str,
|
||||
) -> std::io::Result<Child> {
|
||||
let mut command = Command::new("runc");
|
||||
|
||||
command
|
||||
.args(&[
|
||||
"run".into(),
|
||||
"--bundle".into(),
|
||||
bundle_path.as_ref().to_os_string(),
|
||||
bundle_name.into(),
|
||||
])
|
||||
.stderr(Stdio::piped())
|
||||
.stdout(Stdio::piped())
|
||||
.stdin(Stdio::null());
|
||||
|
||||
command.spawn()
|
||||
}
|
||||
|
|
@ -1,2 +1,3 @@
|
|||
pub mod buildservice;
|
||||
mod oci;
|
||||
pub mod proto;
|
||||
|
|
|
|||
133
tvix/build/src/oci/bundle.rs
Normal file
133
tvix/build/src/oci/bundle.rs
Normal file
|
|
@ -0,0 +1,133 @@
|
|||
//! Module to create an OCI runtime bundle for a given [BuildRequest].
|
||||
use std::{
|
||||
fs,
|
||||
path::{Path, PathBuf},
|
||||
};
|
||||
|
||||
use super::scratch_name;
|
||||
use crate::proto::BuildRequest;
|
||||
use anyhow::{bail, Context};
|
||||
use tracing::{debug, instrument};
|
||||
|
||||
/// Produce an OCI bundle in a given path.
|
||||
/// Check [make_spec] for a description about the paths produced.
|
||||
#[instrument(err)]
|
||||
pub(crate) fn make_bundle<'a>(
|
||||
request: &BuildRequest,
|
||||
runtime_spec: &oci_spec::runtime::Spec,
|
||||
path: &Path,
|
||||
) -> anyhow::Result<()> {
|
||||
fs::create_dir_all(path).context("failed to create bundle path")?;
|
||||
|
||||
let spec_json = serde_json::to_string(runtime_spec).context("failed to render spec to json")?;
|
||||
fs::write(path.join("config.json"), spec_json).context("failed to write config.json")?;
|
||||
|
||||
fs::create_dir_all(path.join("inputs")).context("failed to create inputs dir")?;
|
||||
|
||||
let root_path = path.join("root");
|
||||
|
||||
fs::create_dir_all(&root_path).context("failed to create root path dir")?;
|
||||
fs::create_dir_all(root_path.join("etc")).context("failed to create root/etc dir")?;
|
||||
|
||||
// TODO: populate /etc/{group,passwd}. It's a mess?
|
||||
|
||||
let scratch_root = path.join("scratch");
|
||||
fs::create_dir_all(&scratch_root).context("failed to create scratch/ dir")?;
|
||||
|
||||
// for each scratch path, calculate its name inside scratch, and ensure the
|
||||
// directory exists.
|
||||
for p in request.scratch_paths.iter() {
|
||||
let scratch_path = scratch_root.join(scratch_name(p));
|
||||
debug!(scratch_path=?scratch_path, path=?p, "about to create scratch dir");
|
||||
fs::create_dir_all(scratch_path).context("Unable to create scratch dir")?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Determine the path of all outputs specified in a [BuildRequest]
|
||||
/// as seen from the host, for post-build ingestion.
|
||||
/// This lookup needs to take scratch paths into consideration, as the build
|
||||
/// root is not writable on its own.
|
||||
/// If a path can't be determined, an error is returned.
|
||||
pub(crate) fn get_host_output_paths(
|
||||
request: &BuildRequest,
|
||||
bundle_path: &Path,
|
||||
) -> anyhow::Result<Vec<PathBuf>> {
|
||||
let scratch_root = bundle_path.join("scratch");
|
||||
|
||||
let mut host_output_paths: Vec<PathBuf> = Vec::with_capacity(request.outputs.len());
|
||||
|
||||
for output_path in request.outputs.iter() {
|
||||
// calculate the location of the path.
|
||||
if let Some((mp, relpath)) =
|
||||
find_path_in_scratchs(output_path, request.scratch_paths.as_slice())
|
||||
{
|
||||
host_output_paths.push(scratch_root.join(scratch_name(mp)).join(relpath));
|
||||
} else {
|
||||
bail!("unable to find path {}", output_path);
|
||||
}
|
||||
}
|
||||
|
||||
Ok(host_output_paths)
|
||||
}
|
||||
|
||||
/// For a given list of mountpoints (sorted) and a search_path, find the
|
||||
/// specific mountpoint parenting that search_path and return it, as well as the
|
||||
/// relative path from there to the search_path.
|
||||
/// mountpoints must be sorted, so we can iterate over the list from the back
|
||||
/// and match on the prefix.
|
||||
fn find_path_in_scratchs<'a, 'b>(
|
||||
search_path: &'a str,
|
||||
mountpoints: &'b [String],
|
||||
) -> Option<(&'b str, &'a str)> {
|
||||
mountpoints.iter().rev().find_map(|mp| {
|
||||
Some((
|
||||
mp.as_str(),
|
||||
search_path.strip_prefix(mp)?.strip_prefix('/')?,
|
||||
))
|
||||
})
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::path::{Path, PathBuf};
|
||||
|
||||
use rstest::rstest;
|
||||
|
||||
use crate::{oci::scratch_name, proto::BuildRequest};
|
||||
|
||||
use super::{find_path_in_scratchs, get_host_output_paths};
|
||||
|
||||
#[rstest]
|
||||
#[case::simple("nix/store/aaaa", &["nix/store".into()], Some(("nix/store", "aaaa")))]
|
||||
#[case::prefix_no_sep("nix/store/aaaa", &["nix/sto".into()], None)]
|
||||
#[case::not_found("nix/store/aaaa", &["build".into()], None)]
|
||||
fn test_test_find_path_in_scratchs(
|
||||
#[case] search_path: &str,
|
||||
#[case] mountpoints: &[String],
|
||||
#[case] expected: Option<(&str, &str)>,
|
||||
) {
|
||||
assert_eq!(find_path_in_scratchs(search_path, mountpoints), expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_get_host_output_paths_simple() {
|
||||
let request = BuildRequest {
|
||||
outputs: vec!["nix/store/fhaj6gmwns62s6ypkcldbaj2ybvkhx3p-foo".into()],
|
||||
scratch_paths: vec!["build".into(), "nix/store".into()],
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let paths =
|
||||
get_host_output_paths(&request, Path::new("bundle-root")).expect("must succeed");
|
||||
|
||||
let mut expected_path = PathBuf::new();
|
||||
expected_path.push("bundle-root");
|
||||
expected_path.push("scratch");
|
||||
expected_path.push(scratch_name("nix/store"));
|
||||
expected_path.push("fhaj6gmwns62s6ypkcldbaj2ybvkhx3p-foo");
|
||||
|
||||
assert_eq!(vec![expected_path], paths)
|
||||
}
|
||||
}
|
||||
13
tvix/build/src/oci/mod.rs
Normal file
13
tvix/build/src/oci/mod.rs
Normal file
|
|
@ -0,0 +1,13 @@
|
|||
mod bundle;
|
||||
mod spec;
|
||||
|
||||
pub(crate) use bundle::get_host_output_paths;
|
||||
pub(crate) use bundle::make_bundle;
|
||||
pub(crate) use spec::make_spec;
|
||||
|
||||
/// For a given scratch path, return the scratch_name that's allocated.
|
||||
// We currently use use lower hex encoding of the b3 digest of the scratch
|
||||
// path, so we don't need to globally allocate and pass down some uuids.
|
||||
pub(crate) fn scratch_name(scratch_path: &str) -> String {
|
||||
data_encoding::BASE32.encode(blake3::hash(scratch_path.as_bytes()).as_bytes())
|
||||
}
|
||||
319
tvix/build/src/oci/spec.rs
Normal file
319
tvix/build/src/oci/spec.rs
Normal file
|
|
@ -0,0 +1,319 @@
|
|||
//! Module to create a OCI runtime spec for a given [BuildRequest].
|
||||
use crate::proto::BuildRequest;
|
||||
use oci_spec::{
|
||||
runtime::{Capability, LinuxNamespace, LinuxNamespaceBuilder, LinuxNamespaceType},
|
||||
OciSpecError,
|
||||
};
|
||||
use std::{collections::HashSet, path::Path};
|
||||
use tvix_castore::proto as castorepb;
|
||||
|
||||
use super::scratch_name;
|
||||
|
||||
/// For a given [BuildRequest], return an OCI runtime spec.
|
||||
///
|
||||
/// While there's no IO occuring in this function, the generated spec contains
|
||||
/// path references relative to the "bundle location".
|
||||
/// Due to overlayfs requiring its layers to be absolute paths, we also need a
|
||||
/// [bundle_dir] parameter, pointing to the location of the bundle dir itself.
|
||||
///
|
||||
/// The paths used in the spec are the following (relative to a "bundle root"):
|
||||
///
|
||||
/// - `inputs`, a directory where the castore nodes specified the build request
|
||||
/// inputs are supposed to be populated.
|
||||
/// - `outputs`, a directory where all writes to the store_dir during the build
|
||||
/// are directed to.
|
||||
/// - `root`, a minimal skeleton of files that'll be present at /.
|
||||
/// - `scratch`, a directory containing other directories which will be
|
||||
/// bind-mounted read-write into the container and used as scratch space
|
||||
/// during the build.
|
||||
/// No assumptions should be made about what's inside this directory.
|
||||
///
|
||||
/// Generating these paths, and populating contents, like a skeleton root
|
||||
/// is up to another function, this function doesn't do filesystem IO.
|
||||
pub(crate) fn make_spec(
|
||||
request: &BuildRequest,
|
||||
rootless: bool,
|
||||
sandbox_shell: &str,
|
||||
) -> Result<oci_spec::runtime::Spec, oci_spec::OciSpecError> {
|
||||
// TODO: add BuildRequest validations. BuildRequest must contain strings as inputs
|
||||
|
||||
let allow_network = request
|
||||
.constraints
|
||||
.as_ref()
|
||||
.is_some_and(|c| c.network_access);
|
||||
|
||||
// Assemble ro_host_mounts. Start with constraints.available_ro_paths.
|
||||
let mut ro_host_mounts = request
|
||||
.constraints
|
||||
.as_ref()
|
||||
.map(|constraints| {
|
||||
constraints
|
||||
.available_ro_paths
|
||||
.iter()
|
||||
.map(|e| (e.as_str(), e.as_str()))
|
||||
.collect::<Vec<_>>()
|
||||
})
|
||||
.unwrap_or_default();
|
||||
|
||||
// If provide_bin_sh is set, mount sandbox_shell to /bin/sh
|
||||
if request
|
||||
.constraints
|
||||
.as_ref()
|
||||
.is_some_and(|c| c.provide_bin_sh)
|
||||
{
|
||||
ro_host_mounts.push((sandbox_shell, "/bin/sh"))
|
||||
}
|
||||
|
||||
oci_spec::runtime::SpecBuilder::default()
|
||||
.process(configure_process(
|
||||
&request.command_args,
|
||||
&request.working_dir,
|
||||
request
|
||||
.environment_vars
|
||||
.iter()
|
||||
.map(|e| {
|
||||
(
|
||||
e.key.as_str(),
|
||||
// TODO: decide what to do with non-bytes env values
|
||||
String::from_utf8(e.value.to_vec()).expect("invalid string in env"),
|
||||
)
|
||||
})
|
||||
.collect::<Vec<_>>(),
|
||||
rootless,
|
||||
)?)
|
||||
.linux(configure_linux(allow_network, rootless)?)
|
||||
.root(
|
||||
oci_spec::runtime::RootBuilder::default()
|
||||
.path("root")
|
||||
.readonly(true)
|
||||
.build()?,
|
||||
)
|
||||
.hostname("localhost")
|
||||
.mounts(configure_mounts(
|
||||
rootless,
|
||||
allow_network,
|
||||
request.scratch_paths.iter().map(|e| e.as_str()),
|
||||
request.inputs.iter(),
|
||||
&request.inputs_dir, // TODO: validate
|
||||
ro_host_mounts,
|
||||
)?)
|
||||
.build()
|
||||
}
|
||||
|
||||
/// Return the Process part of the OCI Runtime spec.
|
||||
/// This configures the command, it's working dir, env and terminal setup.
|
||||
/// It also takes care of setting rlimits and capabilities.
|
||||
/// Capabilities are a bit more complicated in case rootless building is requested.
|
||||
fn configure_process<'a>(
|
||||
command_args: &[String],
|
||||
cwd: &String,
|
||||
env: impl IntoIterator<Item = (&'a str, String)>,
|
||||
rootless: bool,
|
||||
) -> Result<oci_spec::runtime::Process, oci_spec::OciSpecError> {
|
||||
let spec_builder = oci_spec::runtime::ProcessBuilder::default()
|
||||
.args(command_args)
|
||||
.env(
|
||||
env.into_iter()
|
||||
.map(|(k, v)| format!("{}={}", k, v))
|
||||
.collect::<Vec<_>>(),
|
||||
)
|
||||
.terminal(true)
|
||||
.user(
|
||||
oci_spec::runtime::UserBuilder::default()
|
||||
.uid(1000u32)
|
||||
.gid(100u32)
|
||||
.build()?,
|
||||
)
|
||||
.cwd(Path::new("/").join(cwd)) // relative to the bundle root, but at least runc wants it to also be absolute.
|
||||
.capabilities({
|
||||
let caps: HashSet<Capability> = if !rootless {
|
||||
HashSet::from([Capability::AuditWrite, Capability::Kill])
|
||||
} else {
|
||||
HashSet::from([
|
||||
Capability::AuditWrite,
|
||||
Capability::Chown,
|
||||
Capability::DacOverride,
|
||||
Capability::Fowner,
|
||||
Capability::Fsetid,
|
||||
Capability::Kill,
|
||||
Capability::Mknod,
|
||||
Capability::NetBindService,
|
||||
Capability::NetRaw,
|
||||
Capability::Setfcap,
|
||||
Capability::Setgid,
|
||||
Capability::Setpcap,
|
||||
Capability::Setuid,
|
||||
Capability::SysChroot,
|
||||
])
|
||||
};
|
||||
|
||||
oci_spec::runtime::LinuxCapabilitiesBuilder::default()
|
||||
.bounding(caps.clone())
|
||||
.effective(caps.clone())
|
||||
.inheritable(caps.clone())
|
||||
.permitted(caps.clone())
|
||||
.ambient(caps)
|
||||
.build()?
|
||||
})
|
||||
.rlimits([oci_spec::runtime::LinuxRlimitBuilder::default()
|
||||
.typ(oci_spec::runtime::LinuxRlimitType::RlimitNofile)
|
||||
.hard(1024_u64)
|
||||
.soft(1024_u64)
|
||||
.build()?])
|
||||
.no_new_privileges(true);
|
||||
|
||||
spec_builder.build()
|
||||
}
|
||||
|
||||
/// Return the Linux part of the OCI Runtime spec.
|
||||
/// This configures various namespaces, masked and read-only paths.
|
||||
fn configure_linux(
|
||||
allow_network: bool,
|
||||
rootless: bool,
|
||||
) -> Result<oci_spec::runtime::Linux, OciSpecError> {
|
||||
let mut linux = oci_spec::runtime::Linux::default();
|
||||
|
||||
// explicitly set namespaces, depending on allow_network.
|
||||
linux.set_namespaces(Some({
|
||||
let mut namespace_types = vec![
|
||||
LinuxNamespaceType::Pid,
|
||||
LinuxNamespaceType::Ipc,
|
||||
LinuxNamespaceType::Uts,
|
||||
LinuxNamespaceType::Mount,
|
||||
LinuxNamespaceType::Cgroup,
|
||||
];
|
||||
if !allow_network {
|
||||
namespace_types.push(LinuxNamespaceType::Network)
|
||||
}
|
||||
if rootless {
|
||||
namespace_types.push(LinuxNamespaceType::User)
|
||||
}
|
||||
|
||||
namespace_types
|
||||
.into_iter()
|
||||
.map(|e| LinuxNamespaceBuilder::default().typ(e).build())
|
||||
.collect::<Result<Vec<LinuxNamespace>, _>>()?
|
||||
}));
|
||||
|
||||
linux.set_masked_paths(Some(
|
||||
[
|
||||
"/proc/kcore",
|
||||
"/proc/latency_stats",
|
||||
"/proc/timer_list",
|
||||
"/proc/timer_stats",
|
||||
"/proc/sched_debug",
|
||||
"/sys/firmware",
|
||||
]
|
||||
.into_iter()
|
||||
.map(|e| e.to_string())
|
||||
.collect::<Vec<_>>(),
|
||||
));
|
||||
|
||||
linux.set_readonly_paths(Some(
|
||||
[
|
||||
"/proc/asound",
|
||||
"/proc/bus",
|
||||
"/proc/fs",
|
||||
"/proc/irq",
|
||||
"/proc/sys",
|
||||
"/proc/sysrq-trigger",
|
||||
]
|
||||
.into_iter()
|
||||
.map(|e| e.to_string())
|
||||
.collect::<Vec<_>>(),
|
||||
));
|
||||
|
||||
Ok(linux)
|
||||
}
|
||||
|
||||
/// Return the Mounts part of the OCI Runtime spec.
|
||||
/// It first sets up the standard mounts, then scratch paths, bind mounts for
|
||||
/// all inputs, and finally read-only paths from the hosts.
|
||||
fn configure_mounts<'a>(
|
||||
rootless: bool,
|
||||
allow_network: bool,
|
||||
scratch_paths: impl IntoIterator<Item = &'a str>,
|
||||
inputs: impl Iterator<Item = &'a castorepb::Node>,
|
||||
inputs_dir: &str,
|
||||
ro_host_mounts: impl IntoIterator<Item = (&'a str, &'a str)>,
|
||||
) -> Result<Vec<oci_spec::runtime::Mount>, oci_spec::OciSpecError> {
|
||||
let mut mounts: Vec<_> = if rootless {
|
||||
oci_spec::runtime::get_rootless_mounts()
|
||||
} else {
|
||||
oci_spec::runtime::get_default_mounts()
|
||||
};
|
||||
|
||||
mounts.push(configure_mount(
|
||||
"tmpfs",
|
||||
"/tmp",
|
||||
"tmpfs",
|
||||
&["nosuid", "noatime", "mode=700"],
|
||||
)?);
|
||||
|
||||
// For each scratch path, create a bind mount entry.
|
||||
let scratch_root = Path::new("scratch"); // relative path
|
||||
for scratch_path in scratch_paths.into_iter() {
|
||||
let src = scratch_root.join(scratch_name(scratch_path));
|
||||
mounts.push(configure_mount(
|
||||
src.to_str().unwrap(),
|
||||
Path::new("/").join(scratch_path).to_str().unwrap(),
|
||||
"none",
|
||||
&["rbind", "rw"],
|
||||
)?);
|
||||
}
|
||||
|
||||
// For each input, create a bind mount from inputs/$name into $inputs_dir/$name.
|
||||
for input in inputs {
|
||||
let (input_name, _input) = input
|
||||
.clone()
|
||||
.into_name_and_node()
|
||||
.expect("invalid input name");
|
||||
|
||||
let input_name = std::str::from_utf8(input_name.as_ref()).expect("invalid input name");
|
||||
mounts.push(configure_mount(
|
||||
Path::new("inputs").join(input_name).to_str().unwrap(),
|
||||
Path::new("/")
|
||||
.join(inputs_dir)
|
||||
.join(input_name)
|
||||
.to_str()
|
||||
.unwrap(),
|
||||
"none",
|
||||
&[
|
||||
"rbind", "ro",
|
||||
// "nosuid" is required, otherwise mounting will just fail with
|
||||
// a generic permission error.
|
||||
// See https://github.com/wllenyj/containerd/commit/42a386c8164bef16d59590c61ab00806f854d8fd
|
||||
"nosuid", "nodev",
|
||||
],
|
||||
)?);
|
||||
}
|
||||
|
||||
// Process ro_host_mounts
|
||||
for (src, dst) in ro_host_mounts.into_iter() {
|
||||
mounts.push(configure_mount(src, dst, "none", &["rbind", "ro"])?);
|
||||
}
|
||||
|
||||
// In case network is enabled, also mount in /etc/{resolv.conf,services,hosts}
|
||||
if allow_network {
|
||||
for p in ["/etc/resolv.conf", "/etc/services", "/etc/hosts"] {
|
||||
mounts.push(configure_mount(p, p, "none", &["rbind", "ro"])?);
|
||||
}
|
||||
}
|
||||
|
||||
Ok(mounts)
|
||||
}
|
||||
|
||||
/// Helper function to produce a mount.
|
||||
fn configure_mount(
|
||||
source: &str,
|
||||
destination: &str,
|
||||
typ: &str,
|
||||
options: &[&str],
|
||||
) -> Result<oci_spec::runtime::Mount, oci_spec::OciSpecError> {
|
||||
oci_spec::runtime::MountBuilder::default()
|
||||
.destination(destination.to_string())
|
||||
.typ(typ.to_string())
|
||||
.source(source.to_string())
|
||||
.options(options.iter().map(|e| e.to_string()).collect::<Vec<_>>())
|
||||
.build()
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue