refactor(snix/build): use stronger typed BuildResult type

This changes the BuildService trait to return a typed `BuildResult`,
which bundles the refscan info alongside the castore nodes.

The proto type is renamed to BuildResponse, to better map to gRPC
semantics.

In proto land, we don't send the name for outputs anymore, be it the
full path or the last component, as there's never been a guarantee this
is a valid PathComponent. That entry is now required to be anonymous.
The path of an output can be retrieved by looking at the original
BuildRequest.

Change-Id: If5ce3a009cd3dd6bb6505cd51d5f4deda261ea85
Reviewed-on: https://cl.snix.dev/c/snix/+/30387
Autosubmit: Florian Klink <flokli@flokli.de>
Tested-by: besadii
Reviewed-by: Vova Kryachko <v.kryachko@gmail.com>
This commit is contained in:
Florian Klink 2025-05-03 21:48:41 +03:00 committed by clbot
parent 48807c90ec
commit 9caaa09765
10 changed files with 162 additions and 100 deletions

View file

@ -154,23 +154,25 @@ message BuildRequest {
// TODO: allow describing something like "preferLocal", to influence composition? // TODO: allow describing something like "preferLocal", to influence composition?
} }
// A Build is (one possible) outcome of executing a [BuildRequest]. // A BuildResponse is (one possible) outcome of executing a [BuildRequest].
message Build { message BuildResponse {
// The orginal build request producing the build. // The orginal build request producing the build.
BuildRequest build_request = 1; // <- TODO: define hashing scheme for BuildRequest, refer to it by hash? BuildRequest build_request = 1; // <- TODO: define hashing scheme for BuildRequest, refer to it by hash?
// The outputs that were produced after successfully building. // The outputs that were produced after successfully building.
// They are sorted by their names. // They are provided in the same order as specified in the [BuildRequest].
repeated snix.castore.v1.Entry outputs = 2; repeated Output outputs = 2;
message OutputNeedles { message Output {
// The numbers are indexing into `refscan_needles` originally specified in the BuildRequest. // Output entry produced by the build. It may not contain a name,
repeated uint64 needles = 1; // as not all OS paths can be represented as castore paths.
// The path this was ingested from can be looked up in the original build request.
snix.castore.v1.Entry output = 1;
// Indexes into the found [BuildRequest::refscan_needles] in this output.
repeated uint64 needles = 2;
} }
// Contains the same number of elements as the `outputs` field.
repeated OutputNeedles outputs_needles = 3;
// TODO: where did this run, how long, logs, // TODO: where did this run, how long, logs,
} }

View file

@ -10,5 +10,5 @@ import "snix/build/protos/build.proto";
option go_package = "snix.dev/build/proto;buildv1"; option go_package = "snix.dev/build/proto;buildv1";
service BuildService { service BuildService {
rpc DoBuild(BuildRequest) returns (Build); rpc DoBuild(BuildRequest) returns (BuildResponse);
} }

View file

@ -1,4 +1,4 @@
use std::collections::{BTreeMap, HashSet}; use std::collections::{BTreeMap, BTreeSet, HashSet};
use std::path::PathBuf; use std::path::PathBuf;
use bytes::Bytes; use bytes::Bytes;
@ -129,3 +129,23 @@ pub struct AdditionalFile {
pub path: PathBuf, pub path: PathBuf,
pub contents: Bytes, pub contents: Bytes,
} }
/// Describes the result of a [BuildRequest].
#[derive(Debug, Clone, PartialEq)]
pub struct BuildResult {
/// The original BuildRequest.
pub build_request: BuildRequest,
/// The outputs that were produced after successfully building.
// They are sorted by the order specified in the build request.
pub outputs: Vec<BuildOutput>,
}
/// Specific information about an individual output in [BuildResult].
#[derive(Debug, Clone, PartialEq)]
pub struct BuildOutput {
/// The castore node describing the contents.
pub node: Node,
/// Indexes into the found [BuildRequest::refscan_needles] in that output.
pub output_needles: BTreeSet<u64>,
}

View file

@ -2,8 +2,7 @@ use tonic::async_trait;
use tracing::instrument; use tracing::instrument;
use super::BuildService; use super::BuildService;
use crate::buildservice::BuildRequest; use crate::buildservice::{BuildRequest, BuildResult};
use crate::proto;
#[derive(Default)] #[derive(Default)]
pub struct DummyBuildService {} pub struct DummyBuildService {}
@ -11,7 +10,7 @@ pub struct DummyBuildService {}
#[async_trait] #[async_trait]
impl BuildService for DummyBuildService { impl BuildService for DummyBuildService {
#[instrument(skip(self), ret, err)] #[instrument(skip(self), ret, err)]
async fn do_build(&self, _request: BuildRequest) -> std::io::Result<proto::Build> { async fn do_build(&self, _request: BuildRequest) -> std::io::Result<BuildResult> {
Err(std::io::Error::new( Err(std::io::Error::new(
std::io::ErrorKind::Other, std::io::ErrorKind::Other,
"builds are not supported with DummyBuildService", "builds are not supported with DummyBuildService",

View file

@ -3,7 +3,7 @@ use tonic::{async_trait, transport::Channel};
use crate::buildservice::BuildRequest; use crate::buildservice::BuildRequest;
use crate::proto::{self, build_service_client::BuildServiceClient}; use crate::proto::{self, build_service_client::BuildServiceClient};
use super::BuildService; use super::{BuildResult, BuildService};
pub struct GRPCBuildService { pub struct GRPCBuildService {
client: BuildServiceClient<Channel>, client: BuildServiceClient<Channel>,
@ -18,12 +18,14 @@ impl GRPCBuildService {
#[async_trait] #[async_trait]
impl BuildService for GRPCBuildService { impl BuildService for GRPCBuildService {
async fn do_build(&self, request: BuildRequest) -> std::io::Result<proto::Build> { async fn do_build(&self, request: BuildRequest) -> std::io::Result<BuildResult> {
let mut client = self.client.clone(); let mut client = self.client.clone();
client let resp = client
.do_build(Into::<proto::BuildRequest>::into(request)) .do_build(Into::<proto::BuildRequest>::into(request))
.await .await
.map(|resp| resp.into_inner()) .map_err(std::io::Error::other)?
.map_err(std::io::Error::other) .into_inner();
Ok::<BuildResult, _>(resp.try_into().map_err(std::io::Error::other)?)
} }
} }

View file

@ -1,7 +1,5 @@
use tonic::async_trait; use tonic::async_trait;
use crate::proto;
pub mod build_request; pub mod build_request;
pub use crate::buildservice::build_request::*; pub use crate::buildservice::build_request::*;
mod dummy; mod dummy;
@ -17,5 +15,5 @@ pub use from_addr::from_addr;
#[async_trait] #[async_trait]
pub trait BuildService: Send + Sync { pub trait BuildService: Send + Sync {
/// TODO: document /// TODO: document
async fn do_build(&self, request: BuildRequest) -> std::io::Result<proto::Build>; async fn do_build(&self, request: BuildRequest) -> std::io::Result<BuildResult>;
} }

View file

@ -12,11 +12,8 @@ use tonic::async_trait;
use tracing::{Span, debug, instrument, warn}; use tracing::{Span, debug, instrument, warn};
use uuid::Uuid; use uuid::Uuid;
use crate::buildservice::BuildRequest; use crate::buildservice::{BuildOutput, BuildRequest, BuildResult};
use crate::{ use crate::oci::{get_host_output_paths, make_bundle, make_spec};
oci::{get_host_output_paths, make_bundle, make_spec},
proto::{self, build::OutputNeedles},
};
use std::{ffi::OsStr, path::PathBuf, process::Stdio}; use std::{ffi::OsStr, path::PathBuf, process::Stdio};
use super::BuildService; use super::BuildService;
@ -60,7 +57,7 @@ where
DS: DirectoryService + Clone + 'static, DS: DirectoryService + Clone + 'static,
{ {
#[instrument(skip_all, err)] #[instrument(skip_all, err)]
async fn do_build(&self, request: BuildRequest) -> std::io::Result<proto::Build> { async fn do_build(&self, request: BuildRequest) -> std::io::Result<BuildResult> {
let _permit = self.concurrent_builds.acquire().await.unwrap(); let _permit = self.concurrent_builds.acquire().await.unwrap();
let bundle_name = Uuid::new_v4(); let bundle_name = Uuid::new_v4();
@ -144,61 +141,46 @@ where
// Ingest build outputs into the castore. // Ingest build outputs into the castore.
// We use try_join_all here. No need to spawn new tasks, as this is // We use try_join_all here. No need to spawn new tasks, as this is
// mostly IO bound. // mostly IO bound.
let (outputs, outputs_needles) = futures::future::try_join_all( let outputs = futures::future::try_join_all(host_output_paths.into_iter().enumerate().map(
host_output_paths.into_iter().enumerate().map(|(i, p)| { |(i, host_output_path)| {
let output_path = request.outputs[i].clone(); let output_path = request.outputs[i].clone();
let patterns = patterns.clone(); let patterns = patterns.clone();
async move { async move {
debug!(host.path=?p, output.path=?output_path, "ingesting path"); debug!(host.path=?host_output_path, output.path=?output_path, "ingesting path");
let scanner = ReferenceScanner::new(patterns); let scanner = ReferenceScanner::new(patterns);
let output_node = ingest_path(
self.blob_service.clone(),
&self.directory_service,
p,
Some(&scanner),
)
.await
.map_err(|e| {
std::io::Error::new(
std::io::ErrorKind::InvalidData,
format!("Unable to ingest output: {}", e),
)
})?;
let needles = OutputNeedles { Ok::<_, std::io::Error>(BuildOutput {
needles: scanner node: ingest_path(
self.blob_service.clone(),
&self.directory_service,
host_output_path,
Some(&scanner),
)
.await
.map_err(|e| {
std::io::Error::new(
std::io::ErrorKind::InvalidData,
format!("Unable to ingest output: {}", e),
)
})?,
output_needles: scanner
.matches() .matches()
.into_iter() .into_iter()
.enumerate() .enumerate()
.filter(|(_, val)| *val) .filter(|(_, val)| *val)
.map(|(idx, _)| idx as u64) .map(|(idx, _)| idx as u64)
.collect(), .collect(),
}; })
Ok::<_, std::io::Error>((
snix_castore::proto::Entry::from_name_and_node(
output_path
.file_name()
.and_then(|s| s.to_str())
.map(|s| s.to_string())
.unwrap_or("".into())
.into(),
output_node,
),
needles,
))
} }
}), },
) ))
.await? .await?;
.into_iter()
.unzip();
Ok(proto::Build { Ok(BuildResult {
build_request: Some(request.into()), build_request: request,
outputs, outputs,
outputs_needles,
}) })
} }
} }

View file

@ -2,7 +2,7 @@ use crate::buildservice::BuildService;
use std::ops::Deref; use std::ops::Deref;
use tonic::async_trait; use tonic::async_trait;
use super::{Build, BuildRequest}; use super::{BuildRequest, BuildResponse};
/// Implements the gRPC server trait ([crate::proto::build_service_server::BuildService] /// Implements the gRPC server trait ([crate::proto::build_service_server::BuildService]
/// for anything implementing [BuildService]. /// for anything implementing [BuildService].
@ -26,11 +26,11 @@ where
async fn do_build( async fn do_build(
&self, &self,
request: tonic::Request<BuildRequest>, request: tonic::Request<BuildRequest>,
) -> Result<tonic::Response<Build>, tonic::Status> { ) -> Result<tonic::Response<BuildResponse>, tonic::Status> {
let request = TryInto::<crate::buildservice::BuildRequest>::try_into(request.into_inner()) let request = TryInto::<crate::buildservice::BuildRequest>::try_into(request.into_inner())
.map_err(|err| tonic::Status::new(tonic::Code::InvalidArgument, err.to_string()))?; .map_err(|err| tonic::Status::new(tonic::Code::InvalidArgument, err.to_string()))?;
match self.inner.do_build(request).await { match self.inner.do_build(request).await {
Ok(resp) => Ok(tonic::Response::new(resp)), Ok(resp) => Ok(tonic::Response::new(resp.into())),
Err(e) => Err(tonic::Status::internal(e.to_string())), Err(e) => Err(tonic::Status::internal(e.to_string())),
} }
} }

View file

@ -1,4 +1,4 @@
use std::collections::{BTreeMap, HashSet}; use std::collections::{BTreeMap, BTreeSet, HashSet};
use std::path::{Path, PathBuf}; use std::path::{Path, PathBuf};
use itertools::Itertools; use itertools::Itertools;
@ -8,6 +8,8 @@ mod grpc_buildservice_wrapper;
pub use grpc_buildservice_wrapper::GRPCBuildServiceWrapper; pub use grpc_buildservice_wrapper::GRPCBuildServiceWrapper;
use crate::buildservice::BuildResult;
tonic::include_proto!("snix.build.v1"); tonic::include_proto!("snix.build.v1");
#[cfg(feature = "tonic-reflection")] #[cfg(feature = "tonic-reflection")]
@ -59,6 +61,19 @@ pub enum ValidateBuildRequestError {
AdditionalFilesNotSorted, AdditionalFilesNotSorted,
} }
/// Errors that occur during the validation of [BuildResult] messages.
#[derive(Debug, thiserror::Error)]
pub enum ValidateBuildResultError {
#[error("request field is unpopulated")]
MissingRequestField,
#[error("request is invalid")]
InvalidBuildRequest(ValidateBuildRequestError),
#[error("output entry {0} missing")]
MissingOutputEntry(usize),
#[error("output entry {0} invalid")]
InvalidOutputEntry(usize),
}
/// Checks a path to be without any '..' components, and clean (no superfluous /// Checks a path to be without any '..' components, and clean (no superfluous
/// slashes). /// slashes).
fn is_clean_path<P: AsRef<Path>>(p: P) -> bool { fn is_clean_path<P: AsRef<Path>>(p: P) -> bool {
@ -264,6 +279,56 @@ impl TryFrom<BuildRequest> for crate::buildservice::BuildRequest {
} }
} }
impl From<BuildResult> for BuildResponse {
fn from(value: BuildResult) -> Self {
Self {
build_request: Some(value.build_request.into()),
outputs: value
.outputs
.into_iter()
.map(|output| build_response::Output {
output: Some(snix_castore::proto::Entry::from_name_and_node(
"".into(),
output.node,
)),
needles: output.output_needles.into_iter().collect(),
})
.collect(),
}
}
}
impl TryFrom<BuildResponse> for BuildResult {
type Error = ValidateBuildResultError;
fn try_from(value: BuildResponse) -> Result<Self, Self::Error> {
Ok(Self {
build_request: value
.build_request
.ok_or(ValidateBuildResultError::MissingRequestField)?
.try_into()
.map_err(ValidateBuildResultError::InvalidBuildRequest)?,
outputs: value
.outputs
.into_iter()
.enumerate()
.map(|(i, output)| {
let node = output
.output
.ok_or(ValidateBuildResultError::MissingOutputEntry(i))?
.try_into_anonymous_node()
.map_err(|_| ValidateBuildResultError::InvalidOutputEntry(i))?;
Ok::<_, ValidateBuildResultError>(crate::buildservice::BuildOutput {
node,
output_needles: BTreeSet::from_iter(output.needles),
})
})
.try_collect()?,
})
}
}
/// Errors that occur during the validation of /// Errors that occur during the validation of
/// [build_request::BuildConstraints] messages. /// [build_request::BuildConstraints] messages.
#[derive(Debug, thiserror::Error)] #[derive(Debug, thiserror::Error)]

View file

@ -115,7 +115,6 @@ impl SnixStoreIO {
.get(*store_path.digest()) .get(*store_path.digest())
.await? .await?
{ {
// TODO: use stricter typed BuildRequest here.
Some(path_info) => path_info, Some(path_info) => path_info,
// If there's no PathInfo found, this normally means we have to // If there's no PathInfo found, this normally means we have to
// trigger the build (and insert into PathInfoService, after // trigger the build (and insert into PathInfoService, after
@ -195,6 +194,8 @@ impl SnixStoreIO {
// synthesize the build request. // synthesize the build request.
let build_request = derivation_to_build_request(&drv, &resolved_inputs)?; let build_request = derivation_to_build_request(&drv, &resolved_inputs)?;
let build_request_outputs = build_request.outputs.clone();
// create a build // create a build
let build_result = self let build_result = self
.build_service .build_service
@ -204,39 +205,31 @@ impl SnixStoreIO {
.map_err(|e| std::io::Error::new(io::ErrorKind::Other, e))?; .map_err(|e| std::io::Error::new(io::ErrorKind::Other, e))?;
let mut out_path_info: Option<PathInfo> = None; let mut out_path_info: Option<PathInfo> = None;
// For each output, insert a PathInfo. // For each output, insert a PathInfo.
for ((output, output_needles), drv_output) in build_result for (output, output_path) in
.outputs build_result.outputs.into_iter().zip(build_request_outputs)
.iter()
// Important: build outputs must match drv outputs order,
// otherwise the outputs will be mixed up
// see https://git.snix.dev/snix/snix/issues/89
.zip(build_result.outputs_needles.iter())
.zip(drv.outputs.iter())
{ {
let (output_name, output_node) = output // convert output_path to StorePath
.clone() let output_store_path: StorePath<String> = {
.try_into_name_and_node() use std::os::unix::ffi::OsStrExt;
.expect("invalid node");
StorePath::from_bytes(output_path.as_path().as_os_str().as_bytes())
.map_err(|e| {
std::io::Error::new(std::io::ErrorKind::InvalidData, e)
})?
};
// calculate the nar representation // calculate the nar representation
let (nar_size, nar_sha256) = self let (nar_size, nar_sha256) = self
.nar_calculation_service .nar_calculation_service
.calculate_nar(&output_node) .calculate_nar(&output.node)
.await?; .await?;
// assemble the PathInfo to persist // assemble the PathInfo to persist
let path_info = PathInfo { let path_info = PathInfo {
store_path: drv_output store_path: output_store_path.clone(),
.1 node: output.node,
.path
.as_ref()
.ok_or(std::io::Error::new(
std::io::ErrorKind::Other,
"Snix bug: missing output store path",
))?
.to_owned(),
node: output_node,
references: { references: {
let all_possible_refs: Vec<_> = drv let all_possible_refs: Vec<_> = drv
.outputs .outputs
@ -244,8 +237,8 @@ impl SnixStoreIO {
.filter_map(|output| output.path.as_ref()) .filter_map(|output| output.path.as_ref())
.chain(resolved_inputs.keys()) .chain(resolved_inputs.keys())
.collect(); .collect();
let mut references: Vec<_> = output_needles let mut references: Vec<_> = output
.needles .output_needles
.iter() .iter()
// Map each output needle index back to the refscan_needle // Map each output needle index back to the refscan_needle
.map(|idx| { .map(|idx| {
@ -286,7 +279,8 @@ impl SnixStoreIO {
.put(path_info.clone()) .put(path_info.clone())
.await .await
.map_err(|e| std::io::Error::new(io::ErrorKind::Other, e))?; .map_err(|e| std::io::Error::new(io::ErrorKind::Other, e))?;
if output_name.as_ref() == store_path.to_string().as_bytes() {
if store_path == &output_store_path {
out_path_info = Some(path_info); out_path_info = Some(path_info);
} }
} }