diff --git a/snix/eval/benches/eval.rs b/snix/eval/benches/eval.rs index 835b073be..35a8082cb 100644 --- a/snix/eval/benches/eval.rs +++ b/snix/eval/benches/eval.rs @@ -36,6 +36,17 @@ fn eval_merge_attrs(c: &mut Criterion) { interpret(black_box(&expr)); }) }); + + c.bench_function("merge small attrs with large attrs", |b| { + let large_attrs = format!( + "{{{}}}", + (0..10000).map(|n| format!("a{n} = {n};")).join(" ") + ); + let expr = format!("{{ c = 3 }} // {large_attrs}"); + b.iter(move || { + interpret(black_box(&expr)); + }) + }); } fn eval_intersect_attrs(c: &mut Criterion) { diff --git a/snix/eval/src/builtins/mod.rs b/snix/eval/src/builtins/mod.rs index 379cac61b..9efb2820f 100644 --- a/snix/eval/src/builtins/mod.rs +++ b/snix/eval/src/builtins/mod.rs @@ -86,7 +86,7 @@ mod pure_builtins { use bstr::{BString, ByteSlice, B}; use itertools::Itertools; use os_str_bytes::OsStringBytes; - use rustc_hash::FxHashSet; + use rustc_hash::{FxHashMap, FxHashSet}; use crate::{value::PointerEquality, AddContext, NixContext, NixContextElement}; @@ -789,58 +789,14 @@ mod pure_builtins { if left_set.is_empty() { return Ok(Value::attrs(NixAttrs::empty())); } - let mut left_iter = left_set.iter_sorted(); let right_set = y.to_attrs()?; + if right_set.is_empty() { return Ok(Value::attrs(NixAttrs::empty())); } - let mut right_iter = right_set.iter_sorted(); - let mut out: BTreeMap = BTreeMap::new(); - - // Both iterators have at least one entry - let mut left = left_iter.next().unwrap(); - let mut right = right_iter.next().unwrap(); - - // Calculate the intersection of two attribute sets by iterating them - // simultaneously in lexicographic order, similar to a merge sort. - // - // Only when keys match are the key and value clones actually allocated. - // - // We opted for this implementation over simpler ones because of the - // heavy use of this function in nixpkgs. - loop { - match left.0.cmp(right.0) { - Ordering::Equal => { - out.insert(right.0.clone(), right.1.clone()); - - left = match left_iter.next() { - Some(x) => x, - None => break, - }; - - right = match right_iter.next() { - Some(x) => x, - None => break, - }; - } - Ordering::Less => { - left = match left_iter.next() { - Some(x) => x, - None => break, - }; - } - Ordering::Greater => { - right = match right_iter.next() { - Some(x) => x, - None => break, - }; - } - } - } - - Ok(Value::attrs(out.into())) + Ok(Value::attrs(left_set.intersect(&right_set))) } #[builtin("isAttrs")] @@ -949,7 +905,7 @@ mod pure_builtins { #[builtin("listToAttrs")] async fn builtin_list_to_attrs(co: GenCo, list: Value) -> Result { let list = list.to_list()?; - let mut map = BTreeMap::new(); + let mut map = FxHashMap::default(); for val in list { let attrs = try_value!(generators::request_force(&co, val).await).to_attrs()?; let name = try_value!( @@ -960,7 +916,7 @@ mod pure_builtins { // Map entries earlier in the list take precedence over entries later in the list map.entry(name).or_insert(value); } - Ok(Value::attrs(NixAttrs::from_iter(map.into_iter()))) + Ok(Value::attrs(NixAttrs::from(map))) } #[builtin("map")] @@ -986,7 +942,7 @@ mod pure_builtins { attrs: Value, ) -> Result { let attrs = attrs.to_attrs()?; - let mut out: BTreeMap = BTreeMap::new(); + let mut out = FxHashMap::default(); // the best span we can get… let span = generators::request_span(&co).await; diff --git a/snix/eval/src/tests/snix_tests/eval-okay-attrs-comp-order.exp b/snix/eval/src/tests/snix_tests/eval-okay-attrs-comp-order.exp new file mode 100644 index 000000000..c82ddd8a8 --- /dev/null +++ b/snix/eval/src/tests/snix_tests/eval-okay-attrs-comp-order.exp @@ -0,0 +1 @@ +[ false false true ] diff --git a/snix/eval/src/tests/snix_tests/eval-okay-attrs-comp-order.nix b/snix/eval/src/tests/snix_tests/eval-okay-attrs-comp-order.nix new file mode 100644 index 000000000..a3cc4f45d --- /dev/null +++ b/snix/eval/src/tests/snix_tests/eval-okay-attrs-comp-order.nix @@ -0,0 +1,45 @@ +with import ./lib.nix; +let + makeSet = + name: value: + (builtins.listToAttrs ( + map + (n: { + name = "${name}${builtins.toString n}"; + value = value; + }) + (range 1 100) + )); + + # Comparison happens in lexical order, depth first. + # So it should return with false, before encountering any of the error fields + early1 = { + a = { + a = true; + b = throw "error"; + }; + } // makeSet "error" (throw "err"); + + early2 = { + a = { + a = false; + b = throw "error"; + }; + } // makeSet "error" (throw "err"); + + # Hits error first + error1 = { + a = throw "error"; + } // makeSet "foo" true; + + error2 = { + a = throw "error"; + } // makeSet "foo" false; + +in +[ + (early1 == early2) + (builtins.tryEval (error1 == error2)).success + # Compares name first, so doesn't hit the error + ({ foo = throw "err"; } != { bar = throw "err"; }) +] diff --git a/snix/eval/src/tests/snix_tests/eval-okay-attrs-eq.exp b/snix/eval/src/tests/snix_tests/eval-okay-attrs-eq.exp new file mode 100644 index 000000000..ec04aab6a --- /dev/null +++ b/snix/eval/src/tests/snix_tests/eval-okay-attrs-eq.exp @@ -0,0 +1 @@ +[ true true true false ] diff --git a/snix/eval/src/tests/snix_tests/eval-okay-attrs-eq.nix b/snix/eval/src/tests/snix_tests/eval-okay-attrs-eq.nix new file mode 100644 index 000000000..607770ecd --- /dev/null +++ b/snix/eval/src/tests/snix_tests/eval-okay-attrs-eq.nix @@ -0,0 +1,48 @@ +let + # makes the two sets have different capacity internally + a = { + a = 1; + b = 2; + c = 3; + d = 4; + e = 5; + }; + b = + { + a = 1; + b = 2; + c = 3; + } + // { + d = 4; + e = 5; + }; + + nested1 = { + foo = { + bar = "bar"; + }; + b = 1; + }; + + nested2 = { + foo.bar = "bar"; + b = 1; + }; + +in +[ + ( + { + foo = "foo"; + bar = 10; + } == { + bar = 10; + foo = "foo"; + } + ) + (a == b) + (nested1 == nested2) + # just so theres a negativ case + (a == nested1) +] diff --git a/snix/eval/src/value/arbitrary.rs b/snix/eval/src/value/arbitrary.rs index 49b9f2eea..6dd9e07bd 100644 --- a/snix/eval/src/value/arbitrary.rs +++ b/snix/eval/src/value/arbitrary.rs @@ -42,7 +42,7 @@ impl Arbitrary for NixAttrs { .prop_map(|(name, value)| AttrsRep::KV { name, value }.into()), // Map representation btree_map(NixString::arbitrary(), Value::arbitrary_with(args), 0..100) - .prop_map(|map| AttrsRep::Map(map).into()) + .prop_map(|map| AttrsRep::Map(map.into_iter().collect()).into()) ] .boxed() } diff --git a/snix/eval/src/value/attrs.rs b/snix/eval/src/value/attrs.rs index 8df69abd4..f43018523 100644 --- a/snix/eval/src/value/attrs.rs +++ b/snix/eval/src/value/attrs.rs @@ -6,12 +6,14 @@ //! Due to this, construction and management of attribute sets has //! some peculiarities that are encapsulated within this module. use std::borrow::Borrow; -use std::collections::{btree_map, BTreeMap}; +use std::collections::{hash_map, BTreeMap}; use std::iter::FromIterator; use std::rc::Rc; use std::sync::LazyLock; use bstr::BStr; +use itertools::Itertools as _; +use rustc_hash::FxHashMap; use serde::de::{Deserializer, Error, Visitor}; use serde::Deserialize; @@ -33,7 +35,7 @@ pub(super) enum AttrsRep { #[default] Empty, - Map(BTreeMap), + Map(FxHashMap), /// Warning: this represents a **two**-attribute attrset, with /// attribute names "name" and "value", like `{name="foo"; @@ -98,6 +100,12 @@ where impl From> for NixAttrs { fn from(map: BTreeMap) -> Self { + AttrsRep::Map(map.into_iter().collect()).into() + } +} + +impl From> for NixAttrs { + fn from(map: FxHashMap) -> Self { AttrsRep::Map(map).into() } } @@ -204,28 +212,36 @@ impl NixAttrs { (AttrsRep::KV { name, value }, AttrsRep::Map(mut m)) => { match m.entry(NAME.clone()) { - btree_map::Entry::Vacant(e) => { + hash_map::Entry::Vacant(e) => { e.insert(name); } - btree_map::Entry::Occupied(_) => { /* name from `m` has precedence */ } + hash_map::Entry::Occupied(_) => { /* name from `m` has precedence */ } }; match m.entry(VALUE.clone()) { - btree_map::Entry::Vacant(e) => { + hash_map::Entry::Vacant(e) => { e.insert(value); } - btree_map::Entry::Occupied(_) => { /* value from `m` has precedence */ } + hash_map::Entry::Occupied(_) => { /* value from `m` has precedence */ } }; AttrsRep::Map(m).into() } // Plain merge of maps. - (AttrsRep::Map(mut m1), AttrsRep::Map(m2)) => { - m1.extend(m2); - AttrsRep::Map(m1).into() + (AttrsRep::Map(mut m1), AttrsRep::Map(mut m2)) => { + let map = if m1.len() >= m2.len() { + m1.extend(m2); + m1 + } else { + for (key, val) in m1.into_iter() { + m2.entry(key).or_insert(val); + } + m2 + }; + AttrsRep::Map(map).into() } // Cases handled above by the borrowing match: @@ -298,13 +314,49 @@ impl NixAttrs { /// Same as iter(), but marks call sites which rely on the /// iteration being lexicographic. pub fn iter_sorted(&self) -> Iter> { - self.iter() + Iter(match self.0.as_ref() { + AttrsRep::Empty => KeyValue::Empty, + AttrsRep::Map(map) => { + let sorted = map.iter().sorted_by_key(|x| x.0); + KeyValue::Sorted(sorted) + } + AttrsRep::KV { + ref name, + ref value, + } => KeyValue::KV { + name, + value, + at: IterKV::default(), + }, + }) } /// Same as [IntoIterator::into_iter], but marks call sites which rely on the /// iteration being lexicographic. pub fn into_iter_sorted(self) -> OwnedAttrsIterator { - self.into_iter() + let iter = match Rc::::try_unwrap(self.0) { + Ok(attrs) => match attrs { + AttrsRep::Empty => IntoIterRepr::Empty, + AttrsRep::Map(map) => { + IntoIterRepr::Finite(map.into_iter().sorted_by(|(a, _), (b, _)| a.cmp(b))) + } + AttrsRep::KV { name, value } => IntoIterRepr::Finite( + vec![(NAME.clone(), name), (VALUE.clone(), value)].into_iter(), + ), + }, + Err(rc) => match rc.as_ref() { + AttrsRep::Empty => IntoIterRepr::Empty, + AttrsRep::Map(map) => IntoIterRepr::Finite( + map.iter() + .map(|(k, v)| (k.clone(), v.clone())) + .sorted_by(|(a, _), (b, _)| a.cmp(b)), + ), + AttrsRep::KV { name, value } => IntoIterRepr::Finite( + vec![(NAME.clone(), name.clone()), (VALUE.clone(), value.clone())].into_iter(), + ), + }, + }; + OwnedAttrsIterator(iter) } /// Construct an iterator over all the keys of the attribute set @@ -321,7 +373,11 @@ impl NixAttrs { /// Same as [Self::keys], but marks call sites which rely on the /// iteration being lexicographic. pub fn keys_sorted(&self) -> Keys { - self.keys() + Keys(match self.0.as_ref() { + AttrsRep::Map(map) => KeysInner::Sorted(map.keys().sorted()), + AttrsRep::Empty => KeysInner::Empty, + AttrsRep::KV { .. } => KeysInner::KV(IterKV::default()), + }) } /// Implement construction logic of an attribute set, to encapsulate @@ -349,7 +405,7 @@ impl NixAttrs { } } - let mut attrs_map = BTreeMap::new(); + let mut attrs_map = FxHashMap::with_capacity_and_hasher(count, rustc_hash::FxBuildHasher); for _ in 0..count { let value = stack_slice.pop().unwrap(); @@ -379,6 +435,65 @@ impl NixAttrs { pub(crate) fn from_kv(name: Value, value: Value) -> Self { AttrsRep::KV { name, value }.into() } + + /// Calculate the intersection of the attribute sets. + /// The right side value is used when the keys match. + pub(crate) fn intersect(&self, other: &Self) -> NixAttrs { + match (self.0.as_ref(), other.0.as_ref()) { + (AttrsRep::Empty, _) | (_, AttrsRep::Empty) => AttrsRep::Empty.into(), + (AttrsRep::Map(lhs), AttrsRep::Map(rhs)) => { + let mut out = FxHashMap::with_capacity_and_hasher( + std::cmp::min(lhs.len(), rhs.len()), + rustc_hash::FxBuildHasher, + ); + if lhs.len() < rhs.len() { + for key in lhs.keys() { + if let Some(val) = rhs.get(key) { + out.insert(key.clone(), val.clone()); + } + } + } else { + for (key, val) in rhs.iter() { + if lhs.contains_key(key) { + out.insert(key.clone(), val.clone()); + } + } + }; + out.into() + } + (AttrsRep::Map(map), AttrsRep::KV { name, value }) => { + let mut out = FxHashMap::with_capacity_and_hasher(2, rustc_hash::FxBuildHasher); + if map.contains_key(NAME.as_bstr()) { + out.insert(NAME.clone(), name.clone()); + } + if map.contains_key(VALUE.as_bstr()) { + out.insert(VALUE.clone(), value.clone()); + } + + if out.is_empty() { + NixAttrs::empty() + } else { + out.into() + } + } + (AttrsRep::KV { .. }, AttrsRep::Map(map)) => { + let mut out = FxHashMap::with_capacity_and_hasher(2, rustc_hash::FxBuildHasher); + if let Some(name) = map.get(NAME.as_bstr()) { + out.insert(NAME.clone(), name.clone()); + } + if let Some(value) = map.get(VALUE.as_bstr()) { + out.insert(VALUE.clone(), value.clone()); + } + + if out.is_empty() { + NixAttrs::empty() + } else { + out.into() + } + } + (AttrsRep::KV { .. }, AttrsRep::KV { .. }) => other.clone(), + } + } } impl IntoIterator for NixAttrs { @@ -431,16 +546,16 @@ fn attempt_optimise_kv(slice: &mut [Value]) -> Option { /// Set an attribute on an in-construction attribute set, while /// checking against duplicate keys. fn set_attr( - map: &mut BTreeMap, + map: &mut FxHashMap, key: NixString, value: Value, ) -> Result<(), ErrorKind> { match map.entry(key) { - btree_map::Entry::Occupied(entry) => Err(ErrorKind::DuplicateAttrsKey { + hash_map::Entry::Occupied(entry) => Err(ErrorKind::DuplicateAttrsKey { key: entry.key().to_string(), }), - btree_map::Entry::Vacant(entry) => { + hash_map::Entry::Vacant(entry) => { entry.insert(value); Ok(()) } @@ -478,7 +593,9 @@ pub enum KeyValue<'a> { at: IterKV, }, - Map(btree_map::Iter<'a, NixString, Value>), + Map(hash_map::Iter<'a, NixString, Value>), + + Sorted(std::vec::IntoIter<(&'a NixString, &'a Value)>), } /// Iterator over a Nix attribute set. @@ -494,7 +611,6 @@ impl<'a> Iterator for Iter> { match &mut self.0 { KeyValue::Map(inner) => inner.next(), KeyValue::Empty => None, - KeyValue::KV { name, value, at } => match at { IterKV::Name => { at.next(); @@ -508,6 +624,7 @@ impl<'a> Iterator for Iter> { IterKV::Done => None, }, + KeyValue::Sorted(inner) => inner.next(), } } } @@ -518,6 +635,7 @@ impl ExactSizeIterator for Iter> { KeyValue::Empty => 0, KeyValue::KV { .. } => 2, KeyValue::Map(inner) => inner.len(), + KeyValue::Sorted(inner) => inner.len(), } } } @@ -525,7 +643,8 @@ impl ExactSizeIterator for Iter> { enum KeysInner<'a> { Empty, KV(IterKV), - Map(btree_map::Keys<'a, NixString, Value>), + Map(hash_map::Keys<'a, NixString, Value>), + Sorted(std::vec::IntoIter<&'a NixString>), } pub struct Keys<'a>(KeysInner<'a>); @@ -546,6 +665,7 @@ impl<'a> Iterator for Keys<'a> { } KeysInner::KV(IterKV::Done) => None, KeysInner::Map(m) => m.next(), + KeysInner::Sorted(v) => v.next(), } } } @@ -566,6 +686,7 @@ impl ExactSizeIterator for Keys<'_> { KeysInner::Empty => 0, KeysInner::KV(_) => 2, KeysInner::Map(m) => m.len(), + KeysInner::Sorted(v) => v.len(), } } } @@ -574,7 +695,7 @@ impl ExactSizeIterator for Keys<'_> { pub enum IntoIterRepr { Empty, Finite(std::vec::IntoIter<(NixString, Value)>), - Map(btree_map::IntoIter), + Map(hash_map::IntoIter), } /// Wrapper type which hides the internal implementation details from @@ -609,7 +730,8 @@ impl DoubleEndedIterator for OwnedAttrsIterator { match &mut self.0 { IntoIterRepr::Empty => None, IntoIterRepr::Finite(inner) => inner.next_back(), - IntoIterRepr::Map(inner) => inner.next_back(), + // hashmaps have arbitary iteration order, so reversing it would not make a difference + IntoIterRepr::Map(inner) => inner.next(), } } } diff --git a/snix/eval/src/value/mod.rs b/snix/eval/src/value/mod.rs index 998226fe6..feb2508be 100644 --- a/snix/eval/src/value/mod.rs +++ b/snix/eval/src/value/mod.rs @@ -283,7 +283,7 @@ impl Value { } Value::Attrs(attrs) => { - for (_, val) in attrs.into_iter().rev() { + for (_, val) in attrs.into_iter_sorted().rev() { vals.push(val); } continue;