From 4de1129acbd749e9605695259d83ad6383a6108d Mon Sep 17 00:00:00 2001 From: Vincent Ambo Date: Fri, 30 May 2025 00:24:50 +0300 Subject: [PATCH] refactor(snix/eval): cache regexes for builtins Some analysing what is going on in nixpkgs revealed that we recompiled the same regexes many times, and there aren't that many different regexes anyways. This was actually visible in flamegraphs, now it's not anymore. Fixes https://git.snix.dev/snix/snix/issues/151 Change-Id: Ia04b1833fec083017aebac99cdae7e91148966c4 Reviewed-on: https://cl.tvl.fyi/c/depot/+/13464 Reviewed-by: sterni Tested-by: BuildkiteCI (cherry picked from commit aa1eca36c3b3c18d96ba3081d7053b4c639e2f17) Reviewed-on: https://cl.snix.dev/c/snix/+/30587 Tested-by: besadii Autosubmit: Florian Klink Reviewed-by: Bence Nemes --- snix/eval/src/builtins/mod.rs | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/snix/eval/src/builtins/mod.rs b/snix/eval/src/builtins/mod.rs index 5f271bee6..96a2734f2 100644 --- a/snix/eval/src/builtins/mod.rs +++ b/snix/eval/src/builtins/mod.rs @@ -7,10 +7,12 @@ use bstr::{ByteSlice, ByteVec}; use builtin_macros::builtins; use genawaiter::rc::Gen; use regex::Regex; +use rustc_hash::FxHashMap; use std::cmp::{self, Ordering}; use std::collections::BTreeMap; use std::collections::VecDeque; use std::path::PathBuf; +use std::sync::{Mutex, OnceLock}; use crate::arithmetic_op; use crate::value::PointerEquality; @@ -79,6 +81,22 @@ pub async fn coerce_value_to_path( } } +static REGEX_CACHE: OnceLock>> = OnceLock::new(); + +fn cached_regex(pattern: &str) -> Result { + let cache = REGEX_CACHE.get_or_init(|| Mutex::new(Default::default())); + let mut map = cache.lock().unwrap(); + + match map.get(pattern) { + Some(regex) => Ok(regex.clone()), + None => { + let regex = Regex::new(pattern)?; + map.insert(pattern.to_string(), regex.clone()); + Ok(regex) + } + } +} + #[builtins] mod pure_builtins { use std::ffi::OsString; @@ -973,7 +991,9 @@ mod pure_builtins { return Ok(re); } let re = re.to_str()?; - let re: Regex = Regex::new(&format!("^{}$", re.to_str()?)).unwrap(); + let re: Regex = + cached_regex(&format!("^{}$", re.to_str()?)).expect("TODO(tazjin): propagate error"); + match re.captures(s.to_str()?) { Some(caps) => Ok(Value::List( caps.iter() @@ -1193,7 +1213,7 @@ mod pure_builtins { let s = str.to_contextful_str()?; let text = s.to_str()?; let re = regex.to_str()?; - let re = Regex::new(re.to_str()?).unwrap(); + let re = cached_regex(re.to_str()?).unwrap(); let mut capture_locations = re.capture_locations(); let num_captures = capture_locations.len(); let mut ret = Vec::new();