diff --git a/snix/glue/src/snix_build.rs b/snix/glue/src/snix_build.rs index 3efa0c93f..72c48e293 100644 --- a/snix/glue/src/snix_build.rs +++ b/snix/glue/src/snix_build.rs @@ -1,7 +1,7 @@ //! This module contains glue code translating from //! [nix_compat::derivation::Derivation] to [snix_build::buildservice::BuildRequest]. -use std::collections::{BTreeMap, HashSet}; +use std::collections::{BTreeMap, HashSet, VecDeque}; use std::path::PathBuf; use bytes::Bytes; @@ -10,6 +10,8 @@ use sha2::{Digest, Sha256}; use snix_build::buildservice::{AdditionalFile, BuildConstraints, BuildRequest, EnvVar}; use snix_castore::Node; +use crate::known_paths::KnownPaths; + /// These are the environment variables that Nix sets in its sandbox for every /// build. const NIX_ENVIRONMENT_VARS: [(&str, &str); 12] = [ @@ -27,30 +29,115 @@ const NIX_ENVIRONMENT_VARS: [(&str, &str); 12] = [ ("TMPDIR", "/build"), ]; -/// Get an iterator of store paths whose nixbase32 hashes will be the needles for refscanning -/// Importantly, the returned order will match the one used by [derivation_to_build_request] -/// so users may use this function to map back from the found needles to a store path -pub(crate) fn get_refscan_needles( - derivation: &Derivation, -) -> impl Iterator> { - derivation - .outputs - .values() - .filter_map(|output| output.path.as_ref()) - .chain(derivation.input_sources.iter()) - .chain(derivation.input_derivations.keys()) +/// Bfs queue needs to track both leaf store paths as well as +/// input derivation outputs. +#[derive(Eq, Hash, PartialEq, Clone)] +enum DerivationQueueItem<'a> { + /// Leaf input of a derivation + InputSource(&'a StorePath), + /// Derivation input that can transitively produce more paths + /// that are needed for a given output. + InputDerivation { + drv_path: &'a StorePath, + output: &'a String, + }, +} + +/// Iterator that yields store paths in a breadth-first order. +/// It is used to get all inputs for a derivation and the needles for refscanning. +struct BfsDerivationInputs<'a> { + queue: VecDeque>, + visited: HashSet>, + known_paths: &'a KnownPaths, +} + +impl<'a> Iterator for BfsDerivationInputs<'a> { + type Item = &'a StorePath; + + fn next(&mut self) -> Option { + while let Some(item) = self.queue.pop_front() { + if !self.visited.insert(item.clone()) { + continue; + } + match item { + DerivationQueueItem::InputSource(path) => { + return Some(path); + } + DerivationQueueItem::InputDerivation { drv_path, output } => { + let drv = self + .known_paths + .get_drv_by_drvpath(drv_path) + .expect("drv Bug!!!"); + let output_path = drv + .outputs + .get(output) + .expect("No output bug!") + .path + .as_ref() + .expect("output has no store path"); + if self + .visited + .insert(DerivationQueueItem::InputSource(output_path)) + { + self.queue.extend( + drv.input_sources + .iter() + .map(DerivationQueueItem::InputSource) + .chain(drv.input_derivations.iter().flat_map( + |(drv_path, outs)| { + outs.iter().map(move |output| { + DerivationQueueItem::InputDerivation { + drv_path, + output, + } + }) + }, + )), + ); + } + return Some(output_path); + } + } + } + None + } +} + +/// Get an iterator of a transitive input closure for a derivation. +/// It's used for input propagation into the build and nixbase32 needle propagation +/// for build output refscanning. +pub(crate) fn get_all_inputs<'a>( + derivation: &'a Derivation, + known_paths: &'a KnownPaths, +) -> impl Iterator> { + BfsDerivationInputs { + queue: derivation + .input_sources + .iter() + .map(DerivationQueueItem::InputSource) + .chain( + derivation + .input_derivations + .iter() + .flat_map(|(drv_path, outs)| { + outs.iter() + .map(move |output| DerivationQueueItem::InputDerivation { + drv_path, + output, + }) + }), + ) + .collect(), + visited: HashSet::new(), + known_paths, + } } /// Takes a [Derivation] and turns it into a [buildservice::BuildRequest]. -/// It assumes the Derivation has been validated. -/// It needs two lookup functions: -/// - one translating input sources to a castore node -/// (`fn_input_sources_to_node`) -/// - one translating a tuple of drv path and (a subset of their) output names to -/// castore nodes of the selected outpus (`fn_input_drvs_to_output_nodes`). +/// It assumes the Derivation has been validated, and all referenced output paths are present in `inputs`. pub(crate) fn derivation_to_build_request( derivation: &Derivation, - inputs: BTreeMap, Node>, + inputs: &BTreeMap, Node>, ) -> std::io::Result { debug_assert!(derivation.validate(true).is_ok(), "drv must validate"); @@ -105,23 +192,20 @@ pub(crate) fn derivation_to_build_request( Ok(BuildRequest { // Importantly, this must match the order of get_refscan_needles, since users may use that // function to map back from the found needles to a store path - refscan_needles: get_refscan_needles(derivation) + refscan_needles: derivation + .outputs + .values() + .filter_map(|output| output.path.as_ref()) .map(|path| nixbase32::encode(path.digest())) + .chain(inputs.keys().map(|path| nixbase32::encode(path.digest()))) .collect(), command_args, - outputs: { - // produce output_paths, which is the absolute path of each output (sorted) - let mut output_paths: Vec = derivation - .outputs - .values() - .map(|e| PathBuf::from(e.path_str()[1..].to_owned())) - .collect(); - - // Sort the outputs. We can use sort_unstable, as these are unique strings. - output_paths.sort_unstable(); - output_paths - }, + outputs: derivation + .outputs + .values() + .map(|e| PathBuf::from(e.path_str()[1..].to_owned())) + .collect(), // Turn this into a sorted-by-key Vec. environment_vars: environment_vars @@ -129,14 +213,14 @@ pub(crate) fn derivation_to_build_request( .map(|(key, value)| EnvVar { key, value }) .collect(), inputs: inputs - .into_iter() + .iter() .map(|(path, node)| { ( path.to_string() .as_str() .try_into() .expect("Snix bug: unable to convert store path basename to PathComponent"), - node, + node.clone(), ) }) .collect(), @@ -223,6 +307,7 @@ mod test { use snix_build::buildservice::{AdditionalFile, BuildConstraints, BuildRequest, EnvVar}; + use crate::known_paths::KnownPaths; use crate::snix_build::NIX_ENVIRONMENT_VARS; use super::derivation_to_build_request; @@ -239,11 +324,25 @@ mod test { fn test_derivation_to_build_request() { let aterm_bytes = include_bytes!("tests/ch49594n9avinrf8ip0aslidkc4lxkqv-foo.drv"); - let derivation = Derivation::from_aterm_bytes(aterm_bytes).expect("must parse"); + let dep_drv_bytes = include_bytes!("tests/ss2p4wmxijn652haqyd7dckxwl4c7hxx-bar.drv"); + + let derivation1 = Derivation::from_aterm_bytes(aterm_bytes).expect("drv1 must parse"); + let drv_path1 = + StorePath::::from_bytes("ch49594n9avinrf8ip0aslidkc4lxkqv-foo.drv".as_bytes()) + .expect("drv path1 must parse"); + let derivation2 = Derivation::from_aterm_bytes(dep_drv_bytes).expect("drv2 must parse"); + let drv_path2 = + StorePath::::from_bytes("ss2p4wmxijn652haqyd7dckxwl4c7hxx-bar.drv".as_bytes()) + .expect("drv path2 must parse"); + + let mut known_paths = KnownPaths::default(); + + known_paths.add_derivation(drv_path2, derivation2); + known_paths.add_derivation(drv_path1, derivation1.clone()); let build_request = derivation_to_build_request( - &derivation, - BTreeMap::from([( + &derivation1, + &BTreeMap::from([( StorePath::::from_bytes(&INPUT_NODE_FOO_NAME.clone()).unwrap(), INPUT_NODE_FOO.clone(), )]), @@ -291,7 +390,7 @@ mod test { )]), inputs_dir: "nix/store".into(), constraints: HashSet::from([ - BuildConstraints::System(derivation.system.clone()), + BuildConstraints::System(derivation1.system.clone()), BuildConstraints::ProvideBinSh ]), additional_files: vec![], @@ -299,7 +398,7 @@ mod test { scratch_paths: vec!["build".into(), "nix/store".into()], refscan_needles: vec![ "fhaj6gmwns62s6ypkcldbaj2ybvkhx3p".into(), - "ss2p4wmxijn652haqyd7dckxwl4c7hxx".into() + "mp57d33657rf34lzvlbpfa1gjfv5gmpg".into() ], }, build_request @@ -313,7 +412,7 @@ mod test { let derivation = Derivation::from_aterm_bytes(aterm_bytes).expect("must parse"); let build_request = - derivation_to_build_request(&derivation, BTreeMap::from([])).expect("must succeed"); + derivation_to_build_request(&derivation, &BTreeMap::from([])).expect("must succeed"); let mut expected_environment_vars = vec![ EnvVar { @@ -382,7 +481,7 @@ mod test { let derivation = Derivation::from_aterm_bytes(aterm_bytes).expect("must parse"); let build_request = - derivation_to_build_request(&derivation, BTreeMap::from([])).expect("must succeed"); + derivation_to_build_request(&derivation, &BTreeMap::from([])).expect("must succeed"); let mut expected_environment_vars = vec![ // Note how bar and baz are not present in the env anymore, diff --git a/snix/glue/src/snix_store_io.rs b/snix/glue/src/snix_store_io.rs index 830991f63..43b69a8a6 100644 --- a/snix/glue/src/snix_store_io.rs +++ b/snix/glue/src/snix_store_io.rs @@ -177,70 +177,19 @@ impl SnixStoreIO { span.pb_set_style(&snix_tracing::PB_SPINNER_STYLE); span.pb_set_message(&format!("⏳Waiting for inputs {}", &store_path)); + // Maps from the index in refscan_needles to the full store path + // Used to map back to the actual store path from the found needles + // Importantly, this must match the order of the needles generated in derivation_to_build_request + let inputs = + crate::snix_build::get_all_inputs(&drv, &self.known_paths.borrow()) + .map(StorePath::to_owned) + .collect::>(); + // derivation_to_build_request needs castore nodes for all inputs. // Provide them, which means, here is where we recursively build // all dependencies. - let mut inputs: BTreeMap, Node> = - futures::stream::iter(drv.input_derivations.iter()) - .map(|(input_drv_path, output_names)| { - // look up the derivation object - let input_drv = { - let known_paths = self.known_paths.borrow(); - known_paths - .get_drv_by_drvpath(input_drv_path) - .unwrap_or_else(|| { - panic!("{} not found", input_drv_path) - }) - .to_owned() - }; - - // convert output names to actual paths - let output_paths: Vec> = output_names - .iter() - .map(|output_name| { - input_drv - .outputs - .get(output_name) - .expect("missing output_name") - .path - .as_ref() - .expect("missing output path") - .clone() - }) - .collect(); - - // For each output, ask for the castore node. - // We're in a per-derivation context, so if they're - // not built yet they'll all get built together. - // If they don't need to build, we can however still - // substitute all in parallel (if they don't need to - // be built) - so we turn this into a stream of streams. - // It's up to the builder to deduplicate same build requests. - futures::stream::iter(output_paths.into_iter()).map( - |output_path| async move { - let node = self - .store_path_to_node(&output_path, Path::new("")) - .await?; - - if let Some(node) = node { - Ok((output_path, node)) - } else { - Err(io::Error::other("no node produced")) - } - }, - ) - }) - .flatten() - .buffer_unordered( - 1, /* TODO: increase again once we prevent redundant fetches */ - ) // TODO: make configurable - .try_collect() - .await?; - - // FUTUREWORK: merge these who things together - // add input sources - let input_sources: BTreeMap<_, _> = - futures::stream::iter(drv.input_sources.iter()) + let resolved_inputs: BTreeMap, Node> = + futures::stream::iter(inputs.iter()) .then(|input_source| { Box::pin({ let input_source = input_source.clone(); @@ -259,16 +208,10 @@ impl SnixStoreIO { .try_collect() .await?; - inputs.extend(input_sources); - span.pb_set_message(&format!("🔨Building {}", &store_path)); - // TODO: check if input sources are sufficiently dealth with, - // I think yes, they must be imported into the store by other - // operations, so dealt with in the Some(…) match arm - // synthesize the build request. - let build_request = derivation_to_build_request(&drv, inputs)?; + let build_request = derivation_to_build_request(&drv, &resolved_inputs)?; // create a build let build_result = self @@ -278,16 +221,13 @@ impl SnixStoreIO { .await .map_err(|e| std::io::Error::new(io::ErrorKind::Other, e))?; - // Maps from the index in refscan_needles to the full store path - // Used to map back to the actual store path from the found needles - // Importantly, this must match the order of the needles generated in derivation_to_build_request - let refscan_needles = - crate::snix_build::get_refscan_needles(&drv).collect::>(); - // For each output, insert a PathInfo. for ((output, output_needles), drv_output) in build_result .outputs .iter() + // Important: build outputs must match drv outputs order, + // otherwise the outputs will be mixed up + // see https://git.snix.dev/snix/snix/issues/89 .zip(build_result.outputs_needles.iter()) .zip(drv.outputs.iter()) { @@ -296,20 +236,6 @@ impl SnixStoreIO { .try_into_name_and_node() .expect("invalid node"); - let output_needles: Vec<_> = output_needles - .needles - .iter() - // Map each output needle index back to the refscan_needle - .map(|idx| { - refscan_needles - .get(*idx as usize) - .ok_or(std::io::Error::new( - std::io::ErrorKind::Other, - "invalid build response", - )) - }) - .collect::>()?; - // calculate the nar representation let (nar_size, nar_sha256) = self .nar_calculation_service @@ -328,10 +254,31 @@ impl SnixStoreIO { ))? .to_owned(), node: output_node, - references: output_needles - .iter() - .map(|s| (**s).to_owned()) - .collect(), + references: { + let all_possible_refs: Vec<_> = drv + .outputs + .values() + .filter_map(|output| output.path.as_ref()) + .chain(resolved_inputs.keys()) + .collect(); + let mut references: Vec<_> = output_needles + .needles + .iter() + // Map each output needle index back to the refscan_needle + .map(|idx| { + all_possible_refs + .get(*idx as usize) + .map(|it| (*it).clone()) + .ok_or(std::io::Error::new( + std::io::ErrorKind::Other, + "invalid build response", + )) + }) + .collect::>()?; + // Produce references sorted by name for consistency with nix narinfos + references.sort(); + references + }, nar_size, nar_sha256, signatures: vec![],