This is just patchset 10 of CL10855, before the color_eyre changes,
rebased to the tvix_castore api.
Change-Id: If4b42412ff8568058908cda971ad7d6f2d9f9b7b
---
This provides a build service invoking runc. It can be used by using the
`oci://$path_to_some_tempdir` builder URL for now.
For now, it can be tested as such:
```
BUILD_SERVICE_ADDR=oci://$PWD/bundles target/debug/tvix
let pkgs = (import <nixpkgs> {}); in builtins.readDir pkgs.perl
```
readDir is to actually trigger IO into the store path (which triggers
the builds).
For now it fails due to missing reference scanning (see followup CLs).
Change-Id: I09b40e410114ce69966a41a0e3c33281b859e443
Reviewed-on: https://cl.tvl.fyi/c/depot/+/12526
Autosubmit: yuka <yuka@yuka.dev>
Tested-by: BuildkiteCI
Reviewed-by: flokli <flokli@flokli.de>
319 lines
11 KiB
Rust
319 lines
11 KiB
Rust
//! Module to create a OCI runtime spec for a given [BuildRequest].
|
|
use crate::proto::BuildRequest;
|
|
use oci_spec::{
|
|
runtime::{Capability, LinuxNamespace, LinuxNamespaceBuilder, LinuxNamespaceType},
|
|
OciSpecError,
|
|
};
|
|
use std::{collections::HashSet, path::Path};
|
|
use tvix_castore::proto as castorepb;
|
|
|
|
use super::scratch_name;
|
|
|
|
/// For a given [BuildRequest], return an OCI runtime spec.
|
|
///
|
|
/// While there's no IO occuring in this function, the generated spec contains
|
|
/// path references relative to the "bundle location".
|
|
/// Due to overlayfs requiring its layers to be absolute paths, we also need a
|
|
/// [bundle_dir] parameter, pointing to the location of the bundle dir itself.
|
|
///
|
|
/// The paths used in the spec are the following (relative to a "bundle root"):
|
|
///
|
|
/// - `inputs`, a directory where the castore nodes specified the build request
|
|
/// inputs are supposed to be populated.
|
|
/// - `outputs`, a directory where all writes to the store_dir during the build
|
|
/// are directed to.
|
|
/// - `root`, a minimal skeleton of files that'll be present at /.
|
|
/// - `scratch`, a directory containing other directories which will be
|
|
/// bind-mounted read-write into the container and used as scratch space
|
|
/// during the build.
|
|
/// No assumptions should be made about what's inside this directory.
|
|
///
|
|
/// Generating these paths, and populating contents, like a skeleton root
|
|
/// is up to another function, this function doesn't do filesystem IO.
|
|
pub(crate) fn make_spec(
|
|
request: &BuildRequest,
|
|
rootless: bool,
|
|
sandbox_shell: &str,
|
|
) -> Result<oci_spec::runtime::Spec, oci_spec::OciSpecError> {
|
|
// TODO: add BuildRequest validations. BuildRequest must contain strings as inputs
|
|
|
|
let allow_network = request
|
|
.constraints
|
|
.as_ref()
|
|
.is_some_and(|c| c.network_access);
|
|
|
|
// Assemble ro_host_mounts. Start with constraints.available_ro_paths.
|
|
let mut ro_host_mounts = request
|
|
.constraints
|
|
.as_ref()
|
|
.map(|constraints| {
|
|
constraints
|
|
.available_ro_paths
|
|
.iter()
|
|
.map(|e| (e.as_str(), e.as_str()))
|
|
.collect::<Vec<_>>()
|
|
})
|
|
.unwrap_or_default();
|
|
|
|
// If provide_bin_sh is set, mount sandbox_shell to /bin/sh
|
|
if request
|
|
.constraints
|
|
.as_ref()
|
|
.is_some_and(|c| c.provide_bin_sh)
|
|
{
|
|
ro_host_mounts.push((sandbox_shell, "/bin/sh"))
|
|
}
|
|
|
|
oci_spec::runtime::SpecBuilder::default()
|
|
.process(configure_process(
|
|
&request.command_args,
|
|
&request.working_dir,
|
|
request
|
|
.environment_vars
|
|
.iter()
|
|
.map(|e| {
|
|
(
|
|
e.key.as_str(),
|
|
// TODO: decide what to do with non-bytes env values
|
|
String::from_utf8(e.value.to_vec()).expect("invalid string in env"),
|
|
)
|
|
})
|
|
.collect::<Vec<_>>(),
|
|
rootless,
|
|
)?)
|
|
.linux(configure_linux(allow_network, rootless)?)
|
|
.root(
|
|
oci_spec::runtime::RootBuilder::default()
|
|
.path("root")
|
|
.readonly(true)
|
|
.build()?,
|
|
)
|
|
.hostname("localhost")
|
|
.mounts(configure_mounts(
|
|
rootless,
|
|
allow_network,
|
|
request.scratch_paths.iter().map(|e| e.as_str()),
|
|
request.inputs.iter(),
|
|
&request.inputs_dir, // TODO: validate
|
|
ro_host_mounts,
|
|
)?)
|
|
.build()
|
|
}
|
|
|
|
/// Return the Process part of the OCI Runtime spec.
|
|
/// This configures the command, it's working dir, env and terminal setup.
|
|
/// It also takes care of setting rlimits and capabilities.
|
|
/// Capabilities are a bit more complicated in case rootless building is requested.
|
|
fn configure_process<'a>(
|
|
command_args: &[String],
|
|
cwd: &String,
|
|
env: impl IntoIterator<Item = (&'a str, String)>,
|
|
rootless: bool,
|
|
) -> Result<oci_spec::runtime::Process, oci_spec::OciSpecError> {
|
|
let spec_builder = oci_spec::runtime::ProcessBuilder::default()
|
|
.args(command_args)
|
|
.env(
|
|
env.into_iter()
|
|
.map(|(k, v)| format!("{}={}", k, v))
|
|
.collect::<Vec<_>>(),
|
|
)
|
|
.terminal(true)
|
|
.user(
|
|
oci_spec::runtime::UserBuilder::default()
|
|
.uid(1000u32)
|
|
.gid(100u32)
|
|
.build()?,
|
|
)
|
|
.cwd(Path::new("/").join(cwd)) // relative to the bundle root, but at least runc wants it to also be absolute.
|
|
.capabilities({
|
|
let caps: HashSet<Capability> = if !rootless {
|
|
HashSet::from([Capability::AuditWrite, Capability::Kill])
|
|
} else {
|
|
HashSet::from([
|
|
Capability::AuditWrite,
|
|
Capability::Chown,
|
|
Capability::DacOverride,
|
|
Capability::Fowner,
|
|
Capability::Fsetid,
|
|
Capability::Kill,
|
|
Capability::Mknod,
|
|
Capability::NetBindService,
|
|
Capability::NetRaw,
|
|
Capability::Setfcap,
|
|
Capability::Setgid,
|
|
Capability::Setpcap,
|
|
Capability::Setuid,
|
|
Capability::SysChroot,
|
|
])
|
|
};
|
|
|
|
oci_spec::runtime::LinuxCapabilitiesBuilder::default()
|
|
.bounding(caps.clone())
|
|
.effective(caps.clone())
|
|
.inheritable(caps.clone())
|
|
.permitted(caps.clone())
|
|
.ambient(caps)
|
|
.build()?
|
|
})
|
|
.rlimits([oci_spec::runtime::LinuxRlimitBuilder::default()
|
|
.typ(oci_spec::runtime::LinuxRlimitType::RlimitNofile)
|
|
.hard(1024_u64)
|
|
.soft(1024_u64)
|
|
.build()?])
|
|
.no_new_privileges(true);
|
|
|
|
spec_builder.build()
|
|
}
|
|
|
|
/// Return the Linux part of the OCI Runtime spec.
|
|
/// This configures various namespaces, masked and read-only paths.
|
|
fn configure_linux(
|
|
allow_network: bool,
|
|
rootless: bool,
|
|
) -> Result<oci_spec::runtime::Linux, OciSpecError> {
|
|
let mut linux = oci_spec::runtime::Linux::default();
|
|
|
|
// explicitly set namespaces, depending on allow_network.
|
|
linux.set_namespaces(Some({
|
|
let mut namespace_types = vec![
|
|
LinuxNamespaceType::Pid,
|
|
LinuxNamespaceType::Ipc,
|
|
LinuxNamespaceType::Uts,
|
|
LinuxNamespaceType::Mount,
|
|
LinuxNamespaceType::Cgroup,
|
|
];
|
|
if !allow_network {
|
|
namespace_types.push(LinuxNamespaceType::Network)
|
|
}
|
|
if rootless {
|
|
namespace_types.push(LinuxNamespaceType::User)
|
|
}
|
|
|
|
namespace_types
|
|
.into_iter()
|
|
.map(|e| LinuxNamespaceBuilder::default().typ(e).build())
|
|
.collect::<Result<Vec<LinuxNamespace>, _>>()?
|
|
}));
|
|
|
|
linux.set_masked_paths(Some(
|
|
[
|
|
"/proc/kcore",
|
|
"/proc/latency_stats",
|
|
"/proc/timer_list",
|
|
"/proc/timer_stats",
|
|
"/proc/sched_debug",
|
|
"/sys/firmware",
|
|
]
|
|
.into_iter()
|
|
.map(|e| e.to_string())
|
|
.collect::<Vec<_>>(),
|
|
));
|
|
|
|
linux.set_readonly_paths(Some(
|
|
[
|
|
"/proc/asound",
|
|
"/proc/bus",
|
|
"/proc/fs",
|
|
"/proc/irq",
|
|
"/proc/sys",
|
|
"/proc/sysrq-trigger",
|
|
]
|
|
.into_iter()
|
|
.map(|e| e.to_string())
|
|
.collect::<Vec<_>>(),
|
|
));
|
|
|
|
Ok(linux)
|
|
}
|
|
|
|
/// Return the Mounts part of the OCI Runtime spec.
|
|
/// It first sets up the standard mounts, then scratch paths, bind mounts for
|
|
/// all inputs, and finally read-only paths from the hosts.
|
|
fn configure_mounts<'a>(
|
|
rootless: bool,
|
|
allow_network: bool,
|
|
scratch_paths: impl IntoIterator<Item = &'a str>,
|
|
inputs: impl Iterator<Item = &'a castorepb::Node>,
|
|
inputs_dir: &str,
|
|
ro_host_mounts: impl IntoIterator<Item = (&'a str, &'a str)>,
|
|
) -> Result<Vec<oci_spec::runtime::Mount>, oci_spec::OciSpecError> {
|
|
let mut mounts: Vec<_> = if rootless {
|
|
oci_spec::runtime::get_rootless_mounts()
|
|
} else {
|
|
oci_spec::runtime::get_default_mounts()
|
|
};
|
|
|
|
mounts.push(configure_mount(
|
|
"tmpfs",
|
|
"/tmp",
|
|
"tmpfs",
|
|
&["nosuid", "noatime", "mode=700"],
|
|
)?);
|
|
|
|
// For each scratch path, create a bind mount entry.
|
|
let scratch_root = Path::new("scratch"); // relative path
|
|
for scratch_path in scratch_paths.into_iter() {
|
|
let src = scratch_root.join(scratch_name(scratch_path));
|
|
mounts.push(configure_mount(
|
|
src.to_str().unwrap(),
|
|
Path::new("/").join(scratch_path).to_str().unwrap(),
|
|
"none",
|
|
&["rbind", "rw"],
|
|
)?);
|
|
}
|
|
|
|
// For each input, create a bind mount from inputs/$name into $inputs_dir/$name.
|
|
for input in inputs {
|
|
let (input_name, _input) = input
|
|
.clone()
|
|
.into_name_and_node()
|
|
.expect("invalid input name");
|
|
|
|
let input_name = std::str::from_utf8(input_name.as_ref()).expect("invalid input name");
|
|
mounts.push(configure_mount(
|
|
Path::new("inputs").join(input_name).to_str().unwrap(),
|
|
Path::new("/")
|
|
.join(inputs_dir)
|
|
.join(input_name)
|
|
.to_str()
|
|
.unwrap(),
|
|
"none",
|
|
&[
|
|
"rbind", "ro",
|
|
// "nosuid" is required, otherwise mounting will just fail with
|
|
// a generic permission error.
|
|
// See https://github.com/wllenyj/containerd/commit/42a386c8164bef16d59590c61ab00806f854d8fd
|
|
"nosuid", "nodev",
|
|
],
|
|
)?);
|
|
}
|
|
|
|
// Process ro_host_mounts
|
|
for (src, dst) in ro_host_mounts.into_iter() {
|
|
mounts.push(configure_mount(src, dst, "none", &["rbind", "ro"])?);
|
|
}
|
|
|
|
// In case network is enabled, also mount in /etc/{resolv.conf,services,hosts}
|
|
if allow_network {
|
|
for p in ["/etc/resolv.conf", "/etc/services", "/etc/hosts"] {
|
|
mounts.push(configure_mount(p, p, "none", &["rbind", "ro"])?);
|
|
}
|
|
}
|
|
|
|
Ok(mounts)
|
|
}
|
|
|
|
/// Helper function to produce a mount.
|
|
fn configure_mount(
|
|
source: &str,
|
|
destination: &str,
|
|
typ: &str,
|
|
options: &[&str],
|
|
) -> Result<oci_spec::runtime::Mount, oci_spec::OciSpecError> {
|
|
oci_spec::runtime::MountBuilder::default()
|
|
.destination(destination.to_string())
|
|
.typ(typ.to_string())
|
|
.source(source.to_string())
|
|
.options(options.iter().map(|e| e.to_string()).collect::<Vec<_>>())
|
|
.build()
|
|
}
|