From 9caaa09765ef46a17cf5494f67171a5393c31f13 Mon Sep 17 00:00:00 2001 From: Florian Klink Date: Sat, 3 May 2025 21:48:41 +0300 Subject: [PATCH] refactor(snix/build): use stronger typed BuildResult type This changes the BuildService trait to return a typed `BuildResult`, which bundles the refscan info alongside the castore nodes. The proto type is renamed to BuildResponse, to better map to gRPC semantics. In proto land, we don't send the name for outputs anymore, be it the full path or the last component, as there's never been a guarantee this is a valid PathComponent. That entry is now required to be anonymous. The path of an output can be retrieved by looking at the original BuildRequest. Change-Id: If5ce3a009cd3dd6bb6505cd51d5f4deda261ea85 Reviewed-on: https://cl.snix.dev/c/snix/+/30387 Autosubmit: Florian Klink Tested-by: besadii Reviewed-by: Vova Kryachko --- snix/build/protos/build.proto | 22 +++--- snix/build/protos/rpc_build.proto | 2 +- snix/build/src/buildservice/build_request.rs | 22 +++++- snix/build/src/buildservice/dummy.rs | 5 +- snix/build/src/buildservice/grpc.rs | 12 +-- snix/build/src/buildservice/mod.rs | 4 +- snix/build/src/buildservice/oci.rs | 74 +++++++------------ .../src/proto/grpc_buildservice_wrapper.rs | 6 +- snix/build/src/proto/mod.rs | 67 ++++++++++++++++- snix/glue/src/snix_store_io.rs | 48 ++++++------ 10 files changed, 162 insertions(+), 100 deletions(-) diff --git a/snix/build/protos/build.proto b/snix/build/protos/build.proto index b399d9164..01f35be40 100644 --- a/snix/build/protos/build.proto +++ b/snix/build/protos/build.proto @@ -154,23 +154,25 @@ message BuildRequest { // TODO: allow describing something like "preferLocal", to influence composition? } -// A Build is (one possible) outcome of executing a [BuildRequest]. -message Build { +// A BuildResponse is (one possible) outcome of executing a [BuildRequest]. +message BuildResponse { // The orginal build request producing the build. BuildRequest build_request = 1; // <- TODO: define hashing scheme for BuildRequest, refer to it by hash? // The outputs that were produced after successfully building. - // They are sorted by their names. - repeated snix.castore.v1.Entry outputs = 2; + // They are provided in the same order as specified in the [BuildRequest]. + repeated Output outputs = 2; - message OutputNeedles { - // The numbers are indexing into `refscan_needles` originally specified in the BuildRequest. - repeated uint64 needles = 1; + message Output { + // Output entry produced by the build. It may not contain a name, + // as not all OS paths can be represented as castore paths. + // The path this was ingested from can be looked up in the original build request. + snix.castore.v1.Entry output = 1; + + // Indexes into the found [BuildRequest::refscan_needles] in this output. + repeated uint64 needles = 2; } - // Contains the same number of elements as the `outputs` field. - repeated OutputNeedles outputs_needles = 3; - // TODO: where did this run, how long, logs, … } diff --git a/snix/build/protos/rpc_build.proto b/snix/build/protos/rpc_build.proto index b51defbd1..969e1d51c 100644 --- a/snix/build/protos/rpc_build.proto +++ b/snix/build/protos/rpc_build.proto @@ -10,5 +10,5 @@ import "snix/build/protos/build.proto"; option go_package = "snix.dev/build/proto;buildv1"; service BuildService { - rpc DoBuild(BuildRequest) returns (Build); + rpc DoBuild(BuildRequest) returns (BuildResponse); } diff --git a/snix/build/src/buildservice/build_request.rs b/snix/build/src/buildservice/build_request.rs index e829dcca5..c4fd6e56f 100644 --- a/snix/build/src/buildservice/build_request.rs +++ b/snix/build/src/buildservice/build_request.rs @@ -1,4 +1,4 @@ -use std::collections::{BTreeMap, HashSet}; +use std::collections::{BTreeMap, BTreeSet, HashSet}; use std::path::PathBuf; use bytes::Bytes; @@ -129,3 +129,23 @@ pub struct AdditionalFile { pub path: PathBuf, pub contents: Bytes, } + +/// Describes the result of a [BuildRequest]. +#[derive(Debug, Clone, PartialEq)] +pub struct BuildResult { + /// The original BuildRequest. + pub build_request: BuildRequest, + + /// The outputs that were produced after successfully building. + // They are sorted by the order specified in the build request. + pub outputs: Vec, +} + +/// Specific information about an individual output in [BuildResult]. +#[derive(Debug, Clone, PartialEq)] +pub struct BuildOutput { + /// The castore node describing the contents. + pub node: Node, + /// Indexes into the found [BuildRequest::refscan_needles] in that output. + pub output_needles: BTreeSet, +} diff --git a/snix/build/src/buildservice/dummy.rs b/snix/build/src/buildservice/dummy.rs index 336820137..5d0e5163b 100644 --- a/snix/build/src/buildservice/dummy.rs +++ b/snix/build/src/buildservice/dummy.rs @@ -2,8 +2,7 @@ use tonic::async_trait; use tracing::instrument; use super::BuildService; -use crate::buildservice::BuildRequest; -use crate::proto; +use crate::buildservice::{BuildRequest, BuildResult}; #[derive(Default)] pub struct DummyBuildService {} @@ -11,7 +10,7 @@ pub struct DummyBuildService {} #[async_trait] impl BuildService for DummyBuildService { #[instrument(skip(self), ret, err)] - async fn do_build(&self, _request: BuildRequest) -> std::io::Result { + async fn do_build(&self, _request: BuildRequest) -> std::io::Result { Err(std::io::Error::new( std::io::ErrorKind::Other, "builds are not supported with DummyBuildService", diff --git a/snix/build/src/buildservice/grpc.rs b/snix/build/src/buildservice/grpc.rs index 14f06f0ee..ca735866e 100644 --- a/snix/build/src/buildservice/grpc.rs +++ b/snix/build/src/buildservice/grpc.rs @@ -3,7 +3,7 @@ use tonic::{async_trait, transport::Channel}; use crate::buildservice::BuildRequest; use crate::proto::{self, build_service_client::BuildServiceClient}; -use super::BuildService; +use super::{BuildResult, BuildService}; pub struct GRPCBuildService { client: BuildServiceClient, @@ -18,12 +18,14 @@ impl GRPCBuildService { #[async_trait] impl BuildService for GRPCBuildService { - async fn do_build(&self, request: BuildRequest) -> std::io::Result { + async fn do_build(&self, request: BuildRequest) -> std::io::Result { let mut client = self.client.clone(); - client + let resp = client .do_build(Into::::into(request)) .await - .map(|resp| resp.into_inner()) - .map_err(std::io::Error::other) + .map_err(std::io::Error::other)? + .into_inner(); + + Ok::(resp.try_into().map_err(std::io::Error::other)?) } } diff --git a/snix/build/src/buildservice/mod.rs b/snix/build/src/buildservice/mod.rs index b12db6b95..dfb480076 100644 --- a/snix/build/src/buildservice/mod.rs +++ b/snix/build/src/buildservice/mod.rs @@ -1,7 +1,5 @@ use tonic::async_trait; -use crate::proto; - pub mod build_request; pub use crate::buildservice::build_request::*; mod dummy; @@ -17,5 +15,5 @@ pub use from_addr::from_addr; #[async_trait] pub trait BuildService: Send + Sync { /// TODO: document - async fn do_build(&self, request: BuildRequest) -> std::io::Result; + async fn do_build(&self, request: BuildRequest) -> std::io::Result; } diff --git a/snix/build/src/buildservice/oci.rs b/snix/build/src/buildservice/oci.rs index 16fe593ac..c93859aed 100644 --- a/snix/build/src/buildservice/oci.rs +++ b/snix/build/src/buildservice/oci.rs @@ -12,11 +12,8 @@ use tonic::async_trait; use tracing::{Span, debug, instrument, warn}; use uuid::Uuid; -use crate::buildservice::BuildRequest; -use crate::{ - oci::{get_host_output_paths, make_bundle, make_spec}, - proto::{self, build::OutputNeedles}, -}; +use crate::buildservice::{BuildOutput, BuildRequest, BuildResult}; +use crate::oci::{get_host_output_paths, make_bundle, make_spec}; use std::{ffi::OsStr, path::PathBuf, process::Stdio}; use super::BuildService; @@ -60,7 +57,7 @@ where DS: DirectoryService + Clone + 'static, { #[instrument(skip_all, err)] - async fn do_build(&self, request: BuildRequest) -> std::io::Result { + async fn do_build(&self, request: BuildRequest) -> std::io::Result { let _permit = self.concurrent_builds.acquire().await.unwrap(); let bundle_name = Uuid::new_v4(); @@ -144,61 +141,46 @@ where // Ingest build outputs into the castore. // We use try_join_all here. No need to spawn new tasks, as this is // mostly IO bound. - let (outputs, outputs_needles) = futures::future::try_join_all( - host_output_paths.into_iter().enumerate().map(|(i, p)| { + let outputs = futures::future::try_join_all(host_output_paths.into_iter().enumerate().map( + |(i, host_output_path)| { let output_path = request.outputs[i].clone(); let patterns = patterns.clone(); async move { - debug!(host.path=?p, output.path=?output_path, "ingesting path"); + debug!(host.path=?host_output_path, output.path=?output_path, "ingesting path"); let scanner = ReferenceScanner::new(patterns); - let output_node = ingest_path( - self.blob_service.clone(), - &self.directory_service, - p, - Some(&scanner), - ) - .await - .map_err(|e| { - std::io::Error::new( - std::io::ErrorKind::InvalidData, - format!("Unable to ingest output: {}", e), - ) - })?; - let needles = OutputNeedles { - needles: scanner + Ok::<_, std::io::Error>(BuildOutput { + node: ingest_path( + self.blob_service.clone(), + &self.directory_service, + host_output_path, + Some(&scanner), + ) + .await + .map_err(|e| { + std::io::Error::new( + std::io::ErrorKind::InvalidData, + format!("Unable to ingest output: {}", e), + ) + })?, + + output_needles: scanner .matches() .into_iter() .enumerate() .filter(|(_, val)| *val) .map(|(idx, _)| idx as u64) .collect(), - }; - - Ok::<_, std::io::Error>(( - snix_castore::proto::Entry::from_name_and_node( - output_path - .file_name() - .and_then(|s| s.to_str()) - .map(|s| s.to_string()) - .unwrap_or("".into()) - .into(), - output_node, - ), - needles, - )) + }) } - }), - ) - .await? - .into_iter() - .unzip(); + }, + )) + .await?; - Ok(proto::Build { - build_request: Some(request.into()), + Ok(BuildResult { + build_request: request, outputs, - outputs_needles, }) } } diff --git a/snix/build/src/proto/grpc_buildservice_wrapper.rs b/snix/build/src/proto/grpc_buildservice_wrapper.rs index 8a2d36ac5..abc31f328 100644 --- a/snix/build/src/proto/grpc_buildservice_wrapper.rs +++ b/snix/build/src/proto/grpc_buildservice_wrapper.rs @@ -2,7 +2,7 @@ use crate::buildservice::BuildService; use std::ops::Deref; use tonic::async_trait; -use super::{Build, BuildRequest}; +use super::{BuildRequest, BuildResponse}; /// Implements the gRPC server trait ([crate::proto::build_service_server::BuildService] /// for anything implementing [BuildService]. @@ -26,11 +26,11 @@ where async fn do_build( &self, request: tonic::Request, - ) -> Result, tonic::Status> { + ) -> Result, tonic::Status> { let request = TryInto::::try_into(request.into_inner()) .map_err(|err| tonic::Status::new(tonic::Code::InvalidArgument, err.to_string()))?; match self.inner.do_build(request).await { - Ok(resp) => Ok(tonic::Response::new(resp)), + Ok(resp) => Ok(tonic::Response::new(resp.into())), Err(e) => Err(tonic::Status::internal(e.to_string())), } } diff --git a/snix/build/src/proto/mod.rs b/snix/build/src/proto/mod.rs index ec06450d1..82a9c2bff 100644 --- a/snix/build/src/proto/mod.rs +++ b/snix/build/src/proto/mod.rs @@ -1,4 +1,4 @@ -use std::collections::{BTreeMap, HashSet}; +use std::collections::{BTreeMap, BTreeSet, HashSet}; use std::path::{Path, PathBuf}; use itertools::Itertools; @@ -8,6 +8,8 @@ mod grpc_buildservice_wrapper; pub use grpc_buildservice_wrapper::GRPCBuildServiceWrapper; +use crate::buildservice::BuildResult; + tonic::include_proto!("snix.build.v1"); #[cfg(feature = "tonic-reflection")] @@ -59,6 +61,19 @@ pub enum ValidateBuildRequestError { AdditionalFilesNotSorted, } +/// Errors that occur during the validation of [BuildResult] messages. +#[derive(Debug, thiserror::Error)] +pub enum ValidateBuildResultError { + #[error("request field is unpopulated")] + MissingRequestField, + #[error("request is invalid")] + InvalidBuildRequest(ValidateBuildRequestError), + #[error("output entry {0} missing")] + MissingOutputEntry(usize), + #[error("output entry {0} invalid")] + InvalidOutputEntry(usize), +} + /// Checks a path to be without any '..' components, and clean (no superfluous /// slashes). fn is_clean_path>(p: P) -> bool { @@ -264,6 +279,56 @@ impl TryFrom for crate::buildservice::BuildRequest { } } +impl From for BuildResponse { + fn from(value: BuildResult) -> Self { + Self { + build_request: Some(value.build_request.into()), + outputs: value + .outputs + .into_iter() + .map(|output| build_response::Output { + output: Some(snix_castore::proto::Entry::from_name_and_node( + "".into(), + output.node, + )), + needles: output.output_needles.into_iter().collect(), + }) + .collect(), + } + } +} + +impl TryFrom for BuildResult { + type Error = ValidateBuildResultError; + + fn try_from(value: BuildResponse) -> Result { + Ok(Self { + build_request: value + .build_request + .ok_or(ValidateBuildResultError::MissingRequestField)? + .try_into() + .map_err(ValidateBuildResultError::InvalidBuildRequest)?, + outputs: value + .outputs + .into_iter() + .enumerate() + .map(|(i, output)| { + let node = output + .output + .ok_or(ValidateBuildResultError::MissingOutputEntry(i))? + .try_into_anonymous_node() + .map_err(|_| ValidateBuildResultError::InvalidOutputEntry(i))?; + + Ok::<_, ValidateBuildResultError>(crate::buildservice::BuildOutput { + node, + output_needles: BTreeSet::from_iter(output.needles), + }) + }) + .try_collect()?, + }) + } +} + /// Errors that occur during the validation of /// [build_request::BuildConstraints] messages. #[derive(Debug, thiserror::Error)] diff --git a/snix/glue/src/snix_store_io.rs b/snix/glue/src/snix_store_io.rs index f7306c1b6..970be3b5a 100644 --- a/snix/glue/src/snix_store_io.rs +++ b/snix/glue/src/snix_store_io.rs @@ -115,7 +115,6 @@ impl SnixStoreIO { .get(*store_path.digest()) .await? { - // TODO: use stricter typed BuildRequest here. Some(path_info) => path_info, // If there's no PathInfo found, this normally means we have to // trigger the build (and insert into PathInfoService, after @@ -195,6 +194,8 @@ impl SnixStoreIO { // synthesize the build request. let build_request = derivation_to_build_request(&drv, &resolved_inputs)?; + let build_request_outputs = build_request.outputs.clone(); + // create a build let build_result = self .build_service @@ -204,39 +205,31 @@ impl SnixStoreIO { .map_err(|e| std::io::Error::new(io::ErrorKind::Other, e))?; let mut out_path_info: Option = None; + // For each output, insert a PathInfo. - for ((output, output_needles), drv_output) in build_result - .outputs - .iter() - // Important: build outputs must match drv outputs order, - // otherwise the outputs will be mixed up - // see https://git.snix.dev/snix/snix/issues/89 - .zip(build_result.outputs_needles.iter()) - .zip(drv.outputs.iter()) + for (output, output_path) in + build_result.outputs.into_iter().zip(build_request_outputs) { - let (output_name, output_node) = output - .clone() - .try_into_name_and_node() - .expect("invalid node"); + // convert output_path to StorePath + let output_store_path: StorePath = { + use std::os::unix::ffi::OsStrExt; + + StorePath::from_bytes(output_path.as_path().as_os_str().as_bytes()) + .map_err(|e| { + std::io::Error::new(std::io::ErrorKind::InvalidData, e) + })? + }; // calculate the nar representation let (nar_size, nar_sha256) = self .nar_calculation_service - .calculate_nar(&output_node) + .calculate_nar(&output.node) .await?; // assemble the PathInfo to persist let path_info = PathInfo { - store_path: drv_output - .1 - .path - .as_ref() - .ok_or(std::io::Error::new( - std::io::ErrorKind::Other, - "Snix bug: missing output store path", - ))? - .to_owned(), - node: output_node, + store_path: output_store_path.clone(), + node: output.node, references: { let all_possible_refs: Vec<_> = drv .outputs @@ -244,8 +237,8 @@ impl SnixStoreIO { .filter_map(|output| output.path.as_ref()) .chain(resolved_inputs.keys()) .collect(); - let mut references: Vec<_> = output_needles - .needles + let mut references: Vec<_> = output + .output_needles .iter() // Map each output needle index back to the refscan_needle .map(|idx| { @@ -286,7 +279,8 @@ impl SnixStoreIO { .put(path_info.clone()) .await .map_err(|e| std::io::Error::new(io::ErrorKind::Other, e))?; - if output_name.as_ref() == store_path.to_string().as_bytes() { + + if store_path == &output_store_path { out_path_info = Some(path_info); } }