refactor(eval): switch NixAttrs implementation to HashMap

Using hashmap seems to give a decent speedup overall.

hello outpath           time:   [528.01 ms 529.17 ms 530.64 ms]
                        change: [-22.932% -22.563% -22.181%] (p = 0.00 < 0.05)
                        Performance has improved.

firefox outpath         time:   [4.7647 s 4.8149 s 4.8917 s]
                        change: [-21.251% -20.408% -18.914%] (p = 0.00 < 0.05)
                        Performance has improved.

But it slows down derivation parsing by about 1-1.5%
Added another attr merge benchmark that helped me while profiling,
not sure if we want to keep that.

Change-Id: Icb9f1e2d40bbb7150af1b8df192bf3c860bae79b
Reviewed-on: https://cl.snix.dev/c/snix/+/30309
Tested-by: besadii
Reviewed-by: Florian Klink <flokli@flokli.de>
This commit is contained in:
Starnick4444 2025-04-11 11:06:09 +02:00 committed by Bence Nemes
parent 8903fbb975
commit e97cf628a3
9 changed files with 257 additions and 73 deletions

View file

@ -36,6 +36,17 @@ fn eval_merge_attrs(c: &mut Criterion) {
interpret(black_box(&expr));
})
});
c.bench_function("merge small attrs with large attrs", |b| {
let large_attrs = format!(
"{{{}}}",
(0..10000).map(|n| format!("a{n} = {n};")).join(" ")
);
let expr = format!("{{ c = 3 }} // {large_attrs}");
b.iter(move || {
interpret(black_box(&expr));
})
});
}
fn eval_intersect_attrs(c: &mut Criterion) {

View file

@ -86,7 +86,7 @@ mod pure_builtins {
use bstr::{BString, ByteSlice, B};
use itertools::Itertools;
use os_str_bytes::OsStringBytes;
use rustc_hash::FxHashSet;
use rustc_hash::{FxHashMap, FxHashSet};
use crate::{value::PointerEquality, AddContext, NixContext, NixContextElement};
@ -789,58 +789,14 @@ mod pure_builtins {
if left_set.is_empty() {
return Ok(Value::attrs(NixAttrs::empty()));
}
let mut left_iter = left_set.iter_sorted();
let right_set = y.to_attrs()?;
if right_set.is_empty() {
return Ok(Value::attrs(NixAttrs::empty()));
}
let mut right_iter = right_set.iter_sorted();
let mut out: BTreeMap<NixString, Value> = BTreeMap::new();
// Both iterators have at least one entry
let mut left = left_iter.next().unwrap();
let mut right = right_iter.next().unwrap();
// Calculate the intersection of two attribute sets by iterating them
// simultaneously in lexicographic order, similar to a merge sort.
//
// Only when keys match are the key and value clones actually allocated.
//
// We opted for this implementation over simpler ones because of the
// heavy use of this function in nixpkgs.
loop {
match left.0.cmp(right.0) {
Ordering::Equal => {
out.insert(right.0.clone(), right.1.clone());
left = match left_iter.next() {
Some(x) => x,
None => break,
};
right = match right_iter.next() {
Some(x) => x,
None => break,
};
}
Ordering::Less => {
left = match left_iter.next() {
Some(x) => x,
None => break,
};
}
Ordering::Greater => {
right = match right_iter.next() {
Some(x) => x,
None => break,
};
}
}
}
Ok(Value::attrs(out.into()))
Ok(Value::attrs(left_set.intersect(&right_set)))
}
#[builtin("isAttrs")]
@ -949,7 +905,7 @@ mod pure_builtins {
#[builtin("listToAttrs")]
async fn builtin_list_to_attrs(co: GenCo, list: Value) -> Result<Value, ErrorKind> {
let list = list.to_list()?;
let mut map = BTreeMap::new();
let mut map = FxHashMap::default();
for val in list {
let attrs = try_value!(generators::request_force(&co, val).await).to_attrs()?;
let name = try_value!(
@ -960,7 +916,7 @@ mod pure_builtins {
// Map entries earlier in the list take precedence over entries later in the list
map.entry(name).or_insert(value);
}
Ok(Value::attrs(NixAttrs::from_iter(map.into_iter())))
Ok(Value::attrs(NixAttrs::from(map)))
}
#[builtin("map")]
@ -986,7 +942,7 @@ mod pure_builtins {
attrs: Value,
) -> Result<Value, ErrorKind> {
let attrs = attrs.to_attrs()?;
let mut out: BTreeMap<NixString, Value> = BTreeMap::new();
let mut out = FxHashMap::default();
// the best span we can get…
let span = generators::request_span(&co).await;

View file

@ -0,0 +1 @@
[ false false true ]

View file

@ -0,0 +1,45 @@
with import ./lib.nix;
let
makeSet =
name: value:
(builtins.listToAttrs (
map
(n: {
name = "${name}${builtins.toString n}";
value = value;
})
(range 1 100)
));
# Comparison happens in lexical order, depth first.
# So it should return with false, before encountering any of the error fields
early1 = {
a = {
a = true;
b = throw "error";
};
} // makeSet "error" (throw "err");
early2 = {
a = {
a = false;
b = throw "error";
};
} // makeSet "error" (throw "err");
# Hits error first
error1 = {
a = throw "error";
} // makeSet "foo" true;
error2 = {
a = throw "error";
} // makeSet "foo" false;
in
[
(early1 == early2)
(builtins.tryEval (error1 == error2)).success
# Compares name first, so doesn't hit the error
({ foo = throw "err"; } != { bar = throw "err"; })
]

View file

@ -0,0 +1 @@
[ true true true false ]

View file

@ -0,0 +1,48 @@
let
# makes the two sets have different capacity internally
a = {
a = 1;
b = 2;
c = 3;
d = 4;
e = 5;
};
b =
{
a = 1;
b = 2;
c = 3;
}
// {
d = 4;
e = 5;
};
nested1 = {
foo = {
bar = "bar";
};
b = 1;
};
nested2 = {
foo.bar = "bar";
b = 1;
};
in
[
(
{
foo = "foo";
bar = 10;
} == {
bar = 10;
foo = "foo";
}
)
(a == b)
(nested1 == nested2)
# just so theres a negativ case
(a == nested1)
]

View file

@ -42,7 +42,7 @@ impl Arbitrary for NixAttrs {
.prop_map(|(name, value)| AttrsRep::KV { name, value }.into()),
// Map representation
btree_map(NixString::arbitrary(), Value::arbitrary_with(args), 0..100)
.prop_map(|map| AttrsRep::Map(map).into())
.prop_map(|map| AttrsRep::Map(map.into_iter().collect()).into())
]
.boxed()
}

View file

@ -6,12 +6,14 @@
//! Due to this, construction and management of attribute sets has
//! some peculiarities that are encapsulated within this module.
use std::borrow::Borrow;
use std::collections::{btree_map, BTreeMap};
use std::collections::{hash_map, BTreeMap};
use std::iter::FromIterator;
use std::rc::Rc;
use std::sync::LazyLock;
use bstr::BStr;
use itertools::Itertools as _;
use rustc_hash::FxHashMap;
use serde::de::{Deserializer, Error, Visitor};
use serde::Deserialize;
@ -33,7 +35,7 @@ pub(super) enum AttrsRep {
#[default]
Empty,
Map(BTreeMap<NixString, Value>),
Map(FxHashMap<NixString, Value>),
/// Warning: this represents a **two**-attribute attrset, with
/// attribute names "name" and "value", like `{name="foo";
@ -98,6 +100,12 @@ where
impl From<BTreeMap<NixString, Value>> for NixAttrs {
fn from(map: BTreeMap<NixString, Value>) -> Self {
AttrsRep::Map(map.into_iter().collect()).into()
}
}
impl From<FxHashMap<NixString, Value>> for NixAttrs {
fn from(map: FxHashMap<NixString, Value>) -> Self {
AttrsRep::Map(map).into()
}
}
@ -204,28 +212,36 @@ impl NixAttrs {
(AttrsRep::KV { name, value }, AttrsRep::Map(mut m)) => {
match m.entry(NAME.clone()) {
btree_map::Entry::Vacant(e) => {
hash_map::Entry::Vacant(e) => {
e.insert(name);
}
btree_map::Entry::Occupied(_) => { /* name from `m` has precedence */ }
hash_map::Entry::Occupied(_) => { /* name from `m` has precedence */ }
};
match m.entry(VALUE.clone()) {
btree_map::Entry::Vacant(e) => {
hash_map::Entry::Vacant(e) => {
e.insert(value);
}
btree_map::Entry::Occupied(_) => { /* value from `m` has precedence */ }
hash_map::Entry::Occupied(_) => { /* value from `m` has precedence */ }
};
AttrsRep::Map(m).into()
}
// Plain merge of maps.
(AttrsRep::Map(mut m1), AttrsRep::Map(m2)) => {
(AttrsRep::Map(mut m1), AttrsRep::Map(mut m2)) => {
let map = if m1.len() >= m2.len() {
m1.extend(m2);
AttrsRep::Map(m1).into()
m1
} else {
for (key, val) in m1.into_iter() {
m2.entry(key).or_insert(val);
}
m2
};
AttrsRep::Map(map).into()
}
// Cases handled above by the borrowing match:
@ -298,13 +314,49 @@ impl NixAttrs {
/// Same as iter(), but marks call sites which rely on the
/// iteration being lexicographic.
pub fn iter_sorted(&self) -> Iter<KeyValue<'_>> {
self.iter()
Iter(match self.0.as_ref() {
AttrsRep::Empty => KeyValue::Empty,
AttrsRep::Map(map) => {
let sorted = map.iter().sorted_by_key(|x| x.0);
KeyValue::Sorted(sorted)
}
AttrsRep::KV {
ref name,
ref value,
} => KeyValue::KV {
name,
value,
at: IterKV::default(),
},
})
}
/// Same as [IntoIterator::into_iter], but marks call sites which rely on the
/// iteration being lexicographic.
pub fn into_iter_sorted(self) -> OwnedAttrsIterator {
self.into_iter()
let iter = match Rc::<AttrsRep>::try_unwrap(self.0) {
Ok(attrs) => match attrs {
AttrsRep::Empty => IntoIterRepr::Empty,
AttrsRep::Map(map) => {
IntoIterRepr::Finite(map.into_iter().sorted_by(|(a, _), (b, _)| a.cmp(b)))
}
AttrsRep::KV { name, value } => IntoIterRepr::Finite(
vec![(NAME.clone(), name), (VALUE.clone(), value)].into_iter(),
),
},
Err(rc) => match rc.as_ref() {
AttrsRep::Empty => IntoIterRepr::Empty,
AttrsRep::Map(map) => IntoIterRepr::Finite(
map.iter()
.map(|(k, v)| (k.clone(), v.clone()))
.sorted_by(|(a, _), (b, _)| a.cmp(b)),
),
AttrsRep::KV { name, value } => IntoIterRepr::Finite(
vec![(NAME.clone(), name.clone()), (VALUE.clone(), value.clone())].into_iter(),
),
},
};
OwnedAttrsIterator(iter)
}
/// Construct an iterator over all the keys of the attribute set
@ -321,7 +373,11 @@ impl NixAttrs {
/// Same as [Self::keys], but marks call sites which rely on the
/// iteration being lexicographic.
pub fn keys_sorted(&self) -> Keys {
self.keys()
Keys(match self.0.as_ref() {
AttrsRep::Map(map) => KeysInner::Sorted(map.keys().sorted()),
AttrsRep::Empty => KeysInner::Empty,
AttrsRep::KV { .. } => KeysInner::KV(IterKV::default()),
})
}
/// Implement construction logic of an attribute set, to encapsulate
@ -349,7 +405,7 @@ impl NixAttrs {
}
}
let mut attrs_map = BTreeMap::new();
let mut attrs_map = FxHashMap::with_capacity_and_hasher(count, rustc_hash::FxBuildHasher);
for _ in 0..count {
let value = stack_slice.pop().unwrap();
@ -379,6 +435,65 @@ impl NixAttrs {
pub(crate) fn from_kv(name: Value, value: Value) -> Self {
AttrsRep::KV { name, value }.into()
}
/// Calculate the intersection of the attribute sets.
/// The right side value is used when the keys match.
pub(crate) fn intersect(&self, other: &Self) -> NixAttrs {
match (self.0.as_ref(), other.0.as_ref()) {
(AttrsRep::Empty, _) | (_, AttrsRep::Empty) => AttrsRep::Empty.into(),
(AttrsRep::Map(lhs), AttrsRep::Map(rhs)) => {
let mut out = FxHashMap::with_capacity_and_hasher(
std::cmp::min(lhs.len(), rhs.len()),
rustc_hash::FxBuildHasher,
);
if lhs.len() < rhs.len() {
for key in lhs.keys() {
if let Some(val) = rhs.get(key) {
out.insert(key.clone(), val.clone());
}
}
} else {
for (key, val) in rhs.iter() {
if lhs.contains_key(key) {
out.insert(key.clone(), val.clone());
}
}
};
out.into()
}
(AttrsRep::Map(map), AttrsRep::KV { name, value }) => {
let mut out = FxHashMap::with_capacity_and_hasher(2, rustc_hash::FxBuildHasher);
if map.contains_key(NAME.as_bstr()) {
out.insert(NAME.clone(), name.clone());
}
if map.contains_key(VALUE.as_bstr()) {
out.insert(VALUE.clone(), value.clone());
}
if out.is_empty() {
NixAttrs::empty()
} else {
out.into()
}
}
(AttrsRep::KV { .. }, AttrsRep::Map(map)) => {
let mut out = FxHashMap::with_capacity_and_hasher(2, rustc_hash::FxBuildHasher);
if let Some(name) = map.get(NAME.as_bstr()) {
out.insert(NAME.clone(), name.clone());
}
if let Some(value) = map.get(VALUE.as_bstr()) {
out.insert(VALUE.clone(), value.clone());
}
if out.is_empty() {
NixAttrs::empty()
} else {
out.into()
}
}
(AttrsRep::KV { .. }, AttrsRep::KV { .. }) => other.clone(),
}
}
}
impl IntoIterator for NixAttrs {
@ -431,16 +546,16 @@ fn attempt_optimise_kv(slice: &mut [Value]) -> Option<NixAttrs> {
/// Set an attribute on an in-construction attribute set, while
/// checking against duplicate keys.
fn set_attr(
map: &mut BTreeMap<NixString, Value>,
map: &mut FxHashMap<NixString, Value>,
key: NixString,
value: Value,
) -> Result<(), ErrorKind> {
match map.entry(key) {
btree_map::Entry::Occupied(entry) => Err(ErrorKind::DuplicateAttrsKey {
hash_map::Entry::Occupied(entry) => Err(ErrorKind::DuplicateAttrsKey {
key: entry.key().to_string(),
}),
btree_map::Entry::Vacant(entry) => {
hash_map::Entry::Vacant(entry) => {
entry.insert(value);
Ok(())
}
@ -478,7 +593,9 @@ pub enum KeyValue<'a> {
at: IterKV,
},
Map(btree_map::Iter<'a, NixString, Value>),
Map(hash_map::Iter<'a, NixString, Value>),
Sorted(std::vec::IntoIter<(&'a NixString, &'a Value)>),
}
/// Iterator over a Nix attribute set.
@ -494,7 +611,6 @@ impl<'a> Iterator for Iter<KeyValue<'a>> {
match &mut self.0 {
KeyValue::Map(inner) => inner.next(),
KeyValue::Empty => None,
KeyValue::KV { name, value, at } => match at {
IterKV::Name => {
at.next();
@ -508,6 +624,7 @@ impl<'a> Iterator for Iter<KeyValue<'a>> {
IterKV::Done => None,
},
KeyValue::Sorted(inner) => inner.next(),
}
}
}
@ -518,6 +635,7 @@ impl ExactSizeIterator for Iter<KeyValue<'_>> {
KeyValue::Empty => 0,
KeyValue::KV { .. } => 2,
KeyValue::Map(inner) => inner.len(),
KeyValue::Sorted(inner) => inner.len(),
}
}
}
@ -525,7 +643,8 @@ impl ExactSizeIterator for Iter<KeyValue<'_>> {
enum KeysInner<'a> {
Empty,
KV(IterKV),
Map(btree_map::Keys<'a, NixString, Value>),
Map(hash_map::Keys<'a, NixString, Value>),
Sorted(std::vec::IntoIter<&'a NixString>),
}
pub struct Keys<'a>(KeysInner<'a>);
@ -546,6 +665,7 @@ impl<'a> Iterator for Keys<'a> {
}
KeysInner::KV(IterKV::Done) => None,
KeysInner::Map(m) => m.next(),
KeysInner::Sorted(v) => v.next(),
}
}
}
@ -566,6 +686,7 @@ impl ExactSizeIterator for Keys<'_> {
KeysInner::Empty => 0,
KeysInner::KV(_) => 2,
KeysInner::Map(m) => m.len(),
KeysInner::Sorted(v) => v.len(),
}
}
}
@ -574,7 +695,7 @@ impl ExactSizeIterator for Keys<'_> {
pub enum IntoIterRepr {
Empty,
Finite(std::vec::IntoIter<(NixString, Value)>),
Map(btree_map::IntoIter<NixString, Value>),
Map(hash_map::IntoIter<NixString, Value>),
}
/// Wrapper type which hides the internal implementation details from
@ -609,7 +730,8 @@ impl DoubleEndedIterator for OwnedAttrsIterator {
match &mut self.0 {
IntoIterRepr::Empty => None,
IntoIterRepr::Finite(inner) => inner.next_back(),
IntoIterRepr::Map(inner) => inner.next_back(),
// hashmaps have arbitary iteration order, so reversing it would not make a difference
IntoIterRepr::Map(inner) => inner.next(),
}
}
}

View file

@ -283,7 +283,7 @@ impl Value {
}
Value::Attrs(attrs) => {
for (_, val) in attrs.into_iter().rev() {
for (_, val) in attrs.into_iter_sorted().rev() {
vals.push(val);
}
continue;