refactor(tvix/build): add stricter BuildRequest type
Change-Id: I2950c76bbc2227952e583426bfb3ed34e8da6d2d Reviewed-on: https://cl.tvl.fyi/c/depot/+/12625 Reviewed-by: flokli <flokli@flokli.de> Tested-by: BuildkiteCI
This commit is contained in:
parent
1c1eb68678
commit
a247b25097
3 changed files with 315 additions and 3 deletions
131
tvix/build/src/buildservice/build_request.rs
Normal file
131
tvix/build/src/buildservice/build_request.rs
Normal file
|
|
@ -0,0 +1,131 @@
|
||||||
|
use std::collections::{BTreeMap, HashSet};
|
||||||
|
use std::path::PathBuf;
|
||||||
|
|
||||||
|
use bytes::Bytes;
|
||||||
|
use tvix_castore::{Node, PathComponent};
|
||||||
|
/// A BuildRequest describes the request of something to be run on the builder.
|
||||||
|
/// It is distinct from an actual \[Build\] that has already happened, or might be
|
||||||
|
/// currently ongoing.
|
||||||
|
///
|
||||||
|
/// A BuildRequest can be seen as a more normalized version of a Derivation
|
||||||
|
/// (parsed from A-Term), "writing out" some of the Nix-internal details about
|
||||||
|
/// how e.g. environment variables in the build are set.
|
||||||
|
///
|
||||||
|
/// Nix has some impurities when building a Derivation, for example the --cores option
|
||||||
|
/// ends up as an environment variable in the build, that's not part of the ATerm.
|
||||||
|
///
|
||||||
|
/// As of now, we serialize this into the BuildRequest, so builders can stay dumb.
|
||||||
|
/// This might change in the future.
|
||||||
|
///
|
||||||
|
/// There's also a big difference when it comes to how inputs are modelled:
|
||||||
|
///
|
||||||
|
/// * Nix only uses store path (strings) to describe the inputs.
|
||||||
|
/// As store paths can be input-addressed, a certain store path can contain
|
||||||
|
/// different contents (as not all store paths are binary reproducible).
|
||||||
|
/// This requires that for every input-addressed input, the builder has access
|
||||||
|
/// to either the input's deriver (and needs to build it) or else a trusted
|
||||||
|
/// source for the built input.
|
||||||
|
/// to upload input-addressed paths, requiring the trusted users concept.
|
||||||
|
/// * tvix-build records a list of tvix.castore.v1.Node as inputs.
|
||||||
|
/// These map from the store path base name to their contents, relieving the
|
||||||
|
/// builder from having to "trust" any input-addressed paths, contrary to Nix.
|
||||||
|
///
|
||||||
|
/// While this approach gives a better hermeticity, it has one downside:
|
||||||
|
/// A BuildRequest can only be sent once the contents of all its inputs are known.
|
||||||
|
///
|
||||||
|
/// As of now, we're okay to accept this, but it prevents uploading an
|
||||||
|
/// entirely-non-IFD subgraph of BuildRequests eagerly.
|
||||||
|
#[derive(Clone, PartialEq)]
|
||||||
|
pub struct BuildRequest {
|
||||||
|
/// The list of all root nodes that should be visible in `inputs_dir` at the
|
||||||
|
/// time of the build.
|
||||||
|
/// As all references are content-addressed, no additional signatures are
|
||||||
|
/// needed to substitute / make these available in the build environment.
|
||||||
|
pub inputs: BTreeMap<PathComponent, Node>,
|
||||||
|
/// The command (and its args) executed as the build script.
|
||||||
|
/// In the case of a Nix derivation, this is usually
|
||||||
|
/// \["/path/to/some-bash/bin/bash", "-e", "/path/to/some/builder.sh"\].
|
||||||
|
pub command_args: Vec<String>,
|
||||||
|
/// The working dir of the command, relative to the build root.
|
||||||
|
/// "build", in the case of Nix.
|
||||||
|
/// This MUST be a clean relative path, without any ".", "..", or superfluous
|
||||||
|
/// slashes.
|
||||||
|
pub working_dir: PathBuf,
|
||||||
|
/// A list of "scratch" paths, relative to the build root.
|
||||||
|
/// These will be write-able during the build.
|
||||||
|
/// \[build, nix/store\] in the case of Nix.
|
||||||
|
/// These MUST be clean relative paths, without any ".", "..", or superfluous
|
||||||
|
/// slashes, and sorted.
|
||||||
|
pub scratch_paths: Vec<PathBuf>,
|
||||||
|
/// The path where the castore input nodes will be located at,
|
||||||
|
/// "nix/store" in case of Nix.
|
||||||
|
/// Builds might also write into here (Nix builds do that).
|
||||||
|
/// This MUST be a clean relative path, without any ".", "..", or superfluous
|
||||||
|
/// slashes.
|
||||||
|
pub inputs_dir: PathBuf,
|
||||||
|
/// The list of output paths the build is expected to produce,
|
||||||
|
/// relative to the root.
|
||||||
|
/// If the path is not produced, the build is considered to have failed.
|
||||||
|
/// These MUST be clean relative paths, without any ".", "..", or superfluous
|
||||||
|
/// slashes, and sorted.
|
||||||
|
pub outputs: Vec<PathBuf>,
|
||||||
|
/// The list of environment variables and their values that should be set
|
||||||
|
/// inside the build environment.
|
||||||
|
/// This includes both environment vars set inside the derivation, as well as
|
||||||
|
/// more "ephemeral" ones like NIX_BUILD_CORES, controlled by the `--cores`
|
||||||
|
/// CLI option of `nix-build`.
|
||||||
|
/// For now, we consume this as an option when turning a Derivation into a BuildRequest,
|
||||||
|
/// similar to how Nix has a `--cores` option.
|
||||||
|
/// We don't want to bleed these very nix-specific sandbox impl details into
|
||||||
|
/// (dumber) builders if we don't have to.
|
||||||
|
/// Environment variables are sorted by their keys.
|
||||||
|
pub environment_vars: Vec<EnvVar>,
|
||||||
|
/// A set of constraints that need to be satisfied on a build host before a
|
||||||
|
/// Build can be started.
|
||||||
|
pub constraints: HashSet<BuildConstraints>,
|
||||||
|
/// Additional (small) files and their contents that should be placed into the
|
||||||
|
/// build environment, but outside inputs_dir.
|
||||||
|
/// Used for passAsFile and structuredAttrs in Nix.
|
||||||
|
pub additional_files: Vec<AdditionalFile>,
|
||||||
|
/// If this is an non-empty list, all paths in `outputs` are scanned for these.
|
||||||
|
/// For Nix, `refscan_needles` would be populated with the nixbase32 hash parts of
|
||||||
|
/// every input store path and output store path. The latter is necessary to scan
|
||||||
|
/// for references between multi-output derivations.
|
||||||
|
pub refscan_needles: Vec<String>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, PartialEq)]
|
||||||
|
pub struct EnvVar {
|
||||||
|
/// name of the environment variable. Must not contain =.
|
||||||
|
pub key: String,
|
||||||
|
pub value: Bytes,
|
||||||
|
}
|
||||||
|
/// BuildConstraints represents certain conditions that must be fulfilled
|
||||||
|
/// inside the build environment to be able to build this.
|
||||||
|
/// Constraints can be things like required architecture and minimum amount of memory.
|
||||||
|
/// The required input paths are *not* represented in here, because it
|
||||||
|
/// wouldn't be hermetic enough - see the comment around inputs too.
|
||||||
|
#[derive(Clone, PartialEq, Eq, Hash)]
|
||||||
|
pub enum BuildConstraints {
|
||||||
|
/// The system that's needed to execute the build.
|
||||||
|
/// Must not be empty.
|
||||||
|
System(String),
|
||||||
|
/// The amount of memory required to be available for the build, in bytes.
|
||||||
|
MinMemory(u64),
|
||||||
|
/// An absolute path that need to be available in the build
|
||||||
|
/// environment, like `/dev/kvm`.
|
||||||
|
/// This is distinct from the castore nodes in inputs.
|
||||||
|
/// These MUST be clean absolute paths, without any ".", "..", or superfluous
|
||||||
|
/// slashes, and sorted.
|
||||||
|
AvailableReadOnlyPath(PathBuf),
|
||||||
|
/// Whether the build should be able to access the network.
|
||||||
|
NetworkAccess,
|
||||||
|
/// Whether to provide a /bin/sh inside the build environment, usually a static bash.
|
||||||
|
ProvideBinSh,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, PartialEq)]
|
||||||
|
pub struct AdditionalFile {
|
||||||
|
pub path: PathBuf,
|
||||||
|
pub contents: Bytes,
|
||||||
|
}
|
||||||
|
|
@ -1,7 +1,9 @@
|
||||||
use tonic::async_trait;
|
use tonic::async_trait;
|
||||||
|
|
||||||
use crate::proto::{Build, BuildRequest};
|
use crate::proto::{self, Build};
|
||||||
|
|
||||||
|
pub mod build_request;
|
||||||
|
pub use crate::buildservice::build_request::*;
|
||||||
mod dummy;
|
mod dummy;
|
||||||
mod from_addr;
|
mod from_addr;
|
||||||
mod grpc;
|
mod grpc;
|
||||||
|
|
@ -15,5 +17,5 @@ pub use from_addr::from_addr;
|
||||||
#[async_trait]
|
#[async_trait]
|
||||||
pub trait BuildService: Send + Sync {
|
pub trait BuildService: Send + Sync {
|
||||||
/// TODO: document
|
/// TODO: document
|
||||||
async fn do_build(&self, request: BuildRequest) -> std::io::Result<Build>;
|
async fn do_build(&self, request: proto::BuildRequest) -> std::io::Result<Build>;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -1,7 +1,8 @@
|
||||||
|
use std::collections::{BTreeMap, HashSet};
|
||||||
use std::path::{Path, PathBuf};
|
use std::path::{Path, PathBuf};
|
||||||
|
|
||||||
use itertools::Itertools;
|
use itertools::Itertools;
|
||||||
use tvix_castore::DirectoryError;
|
use tvix_castore::{DirectoryError, Node, PathComponent};
|
||||||
|
|
||||||
mod grpc_buildservice_wrapper;
|
mod grpc_buildservice_wrapper;
|
||||||
|
|
||||||
|
|
@ -201,6 +202,101 @@ impl BuildRequest {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl TryFrom<BuildRequest> for crate::buildservice::BuildRequest {
|
||||||
|
type Error = ValidateBuildRequestError;
|
||||||
|
fn try_from(value: BuildRequest) -> Result<Self, Self::Error> {
|
||||||
|
// validate input names. Make sure they're sorted
|
||||||
|
|
||||||
|
let mut last_name: bytes::Bytes = "".into();
|
||||||
|
let mut inputs: BTreeMap<PathComponent, Node> = BTreeMap::new();
|
||||||
|
for (i, node) in value.inputs.iter().enumerate() {
|
||||||
|
let (name, node) = node
|
||||||
|
.clone()
|
||||||
|
.into_name_and_node()
|
||||||
|
.map_err(|e| ValidateBuildRequestError::InvalidInputNode(i, e))?;
|
||||||
|
|
||||||
|
if name.as_ref() <= last_name.as_ref() {
|
||||||
|
return Err(ValidateBuildRequestError::InputNodesNotSorted);
|
||||||
|
} else {
|
||||||
|
inputs.insert(name.clone(), node);
|
||||||
|
last_name = name.into();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// validate working_dir
|
||||||
|
if !is_clean_relative_path(&value.working_dir) {
|
||||||
|
Err(ValidateBuildRequestError::InvalidWorkingDir)?;
|
||||||
|
}
|
||||||
|
|
||||||
|
// validate scratch paths
|
||||||
|
for (i, p) in value.scratch_paths.iter().enumerate() {
|
||||||
|
if !is_clean_relative_path(p) {
|
||||||
|
Err(ValidateBuildRequestError::InvalidScratchPath(i))?
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if !is_sorted(value.scratch_paths.iter().map(|e| e.as_bytes())) {
|
||||||
|
Err(ValidateBuildRequestError::ScratchPathsNotSorted)?;
|
||||||
|
}
|
||||||
|
|
||||||
|
// validate inputs_dir
|
||||||
|
if !is_clean_relative_path(&value.inputs_dir) {
|
||||||
|
Err(ValidateBuildRequestError::InvalidInputsDir)?;
|
||||||
|
}
|
||||||
|
|
||||||
|
// validate outputs
|
||||||
|
for (i, p) in value.outputs.iter().enumerate() {
|
||||||
|
if !is_clean_relative_path(p) {
|
||||||
|
Err(ValidateBuildRequestError::InvalidOutputPath(i))?
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if !is_sorted(value.outputs.iter().map(|e| e.as_bytes())) {
|
||||||
|
Err(ValidateBuildRequestError::OutputsNotSorted)?;
|
||||||
|
}
|
||||||
|
|
||||||
|
// validate environment_vars.
|
||||||
|
for (i, e) in value.environment_vars.iter().enumerate() {
|
||||||
|
if e.key.is_empty() || e.key.contains('=') {
|
||||||
|
Err(ValidateBuildRequestError::InvalidEnvVar(i))?
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if !is_sorted(value.environment_vars.iter().map(|e| e.key.as_bytes())) {
|
||||||
|
Err(ValidateBuildRequestError::EnvVarNotSorted)?;
|
||||||
|
}
|
||||||
|
|
||||||
|
// validate build constraints
|
||||||
|
let constraints = value
|
||||||
|
.constraints
|
||||||
|
.map_or(Ok(HashSet::new()), |constraints| {
|
||||||
|
constraints
|
||||||
|
.try_into()
|
||||||
|
.map_err(ValidateBuildRequestError::InvalidBuildConstraints)
|
||||||
|
})?;
|
||||||
|
|
||||||
|
// validate additional_files
|
||||||
|
for (i, additional_file) in value.additional_files.iter().enumerate() {
|
||||||
|
if !is_clean_relative_path(&additional_file.path) {
|
||||||
|
Err(ValidateBuildRequestError::InvalidAdditionalFilePath(i))?
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if !is_sorted(value.additional_files.iter().map(|e| e.path.as_bytes())) {
|
||||||
|
Err(ValidateBuildRequestError::AdditionalFilesNotSorted)?;
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(Self {
|
||||||
|
inputs,
|
||||||
|
command_args: value.command_args,
|
||||||
|
working_dir: PathBuf::from(value.working_dir),
|
||||||
|
scratch_paths: value.scratch_paths.iter().map(PathBuf::from).collect(),
|
||||||
|
inputs_dir: PathBuf::from(value.inputs_dir),
|
||||||
|
outputs: value.outputs.iter().map(PathBuf::from).collect(),
|
||||||
|
environment_vars: value.environment_vars.into_iter().map(Into::into).collect(),
|
||||||
|
constraints,
|
||||||
|
additional_files: value.additional_files.into_iter().map(Into::into).collect(),
|
||||||
|
refscan_needles: value.refscan_needles,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// Errors that occur during the validation of
|
/// Errors that occur during the validation of
|
||||||
/// [build_request::BuildConstraints] messages.
|
/// [build_request::BuildConstraints] messages.
|
||||||
#[derive(Debug, thiserror::Error)]
|
#[derive(Debug, thiserror::Error)]
|
||||||
|
|
@ -235,7 +331,90 @@ impl build_request::BuildConstraints {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl From<build_request::EnvVar> for crate::buildservice::EnvVar {
|
||||||
|
fn from(value: build_request::EnvVar) -> Self {
|
||||||
|
Self {
|
||||||
|
key: value.key,
|
||||||
|
value: value.value,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<crate::buildservice::EnvVar> for build_request::EnvVar {
|
||||||
|
fn from(value: crate::buildservice::EnvVar) -> Self {
|
||||||
|
Self {
|
||||||
|
key: value.key,
|
||||||
|
value: value.value,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<build_request::AdditionalFile> for crate::buildservice::AdditionalFile {
|
||||||
|
fn from(value: build_request::AdditionalFile) -> Self {
|
||||||
|
Self {
|
||||||
|
path: PathBuf::from(value.path),
|
||||||
|
contents: value.contents,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<crate::buildservice::AdditionalFile> for build_request::AdditionalFile {
|
||||||
|
fn from(value: crate::buildservice::AdditionalFile) -> Self {
|
||||||
|
Self {
|
||||||
|
path: value
|
||||||
|
.path
|
||||||
|
.to_str()
|
||||||
|
.expect("Tvix bug: expected a valid path")
|
||||||
|
.to_string(),
|
||||||
|
contents: value.contents,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl TryFrom<build_request::BuildConstraints> for HashSet<crate::buildservice::BuildConstraints> {
|
||||||
|
type Error = ValidateBuildConstraintsError;
|
||||||
|
fn try_from(value: build_request::BuildConstraints) -> Result<Self, Self::Error> {
|
||||||
|
use crate::buildservice::BuildConstraints;
|
||||||
|
|
||||||
|
// validate system
|
||||||
|
if value.system.is_empty() {
|
||||||
|
Err(ValidateBuildConstraintsError::InvalidSystem)?;
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut build_constraints = HashSet::from([
|
||||||
|
BuildConstraints::System(value.system),
|
||||||
|
BuildConstraints::MinMemory(value.min_memory),
|
||||||
|
]);
|
||||||
|
|
||||||
|
// validate available_ro_paths
|
||||||
|
for (i, p) in value.available_ro_paths.iter().enumerate() {
|
||||||
|
if !is_clean_absolute_path(p) {
|
||||||
|
Err(ValidateBuildConstraintsError::InvalidAvailableRoPaths(i))?
|
||||||
|
} else {
|
||||||
|
build_constraints.insert(BuildConstraints::AvailableReadOnlyPath(PathBuf::from(p)));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if !is_sorted(value.available_ro_paths.iter().map(|e| e.as_bytes())) {
|
||||||
|
Err(ValidateBuildConstraintsError::AvailableRoPathsNotSorted)?;
|
||||||
|
}
|
||||||
|
|
||||||
|
if value.network_access {
|
||||||
|
build_constraints.insert(BuildConstraints::NetworkAccess);
|
||||||
|
}
|
||||||
|
if value.provide_bin_sh {
|
||||||
|
build_constraints.insert(BuildConstraints::ProvideBinSh);
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(build_constraints)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
|
// TODO: add testcases for constraints special cases. The default cases in the protos
|
||||||
|
// should result in the constraints not being added. For example min_memory 0 can be omitted.
|
||||||
|
// Also interesting testcases are "merging semantics". MimMemory(1) and MinMemory(100) will
|
||||||
|
// result in mim_memory 100, multiple AvailableReadOnlyPaths need to be merged. Contradicting
|
||||||
|
// system constraints need to fail somewhere (maybe an assertion, as only buggy code can construct it)
|
||||||
mod tests {
|
mod tests {
|
||||||
use super::{is_clean_path, is_clean_relative_path};
|
use super::{is_clean_path, is_clean_relative_path};
|
||||||
use rstest::rstest;
|
use rstest::rstest;
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue