feat(users/edef/weave): use FxHashSet and dedupe early

Deduping early saves a fair bit of memory, but the extra hashing is
costly.

We switch to FxHash, since we don't need a DoS-proof hash, but we do
need it to be *fast*.

Change-Id: Ic6b7010874c417862baa9b882593208c8dd1d5e6
Reviewed-on: https://cl.tvl.fyi/c/depot/+/12648
Reviewed-by: flokli <flokli@flokli.de>
Tested-by: BuildkiteCI
This commit is contained in:
edef 2024-10-19 13:58:53 +00:00
parent 06d2536eec
commit 201d8f0cf2
4 changed files with 33 additions and 4 deletions

View file

@ -8,8 +8,9 @@
use anyhow::Result;
use hashbrown::{hash_table, HashTable};
use rayon::prelude::*;
use rustc_hash::FxHashSet;
use std::{
collections::{BTreeMap, HashSet},
collections::BTreeMap,
fs::File,
ops::Index,
sync::atomic::{AtomicU32, Ordering},
@ -54,7 +55,8 @@ fn main() -> Result<()> {
eprintln!("{DONE}");
}
let mut todo = HashSet::with_capacity(roots.len());
let mut todo = FxHashSet::default();
todo.reserve(roots.len());
{
let mut unknown_roots = 0usize;
for (_, idx) in roots.table {
@ -99,14 +101,14 @@ fn main() -> Result<()> {
.par_iter()
.flat_map(|&parent| {
if parent == INDEX_NULL {
return vec![];
return FxHashSet::default();
}
ri_array[parent as usize]
.iter()
.cloned()
.filter(|child| !seen.contains(child))
.collect::<Vec<u32>>()
.collect::<FxHashSet<u32>>()
})
.collect();