fix(tvix/eval): Replace inner NixString repr with Box<Bstr>

Storing a full BString here incurs the extra overhead of the capacity
for the inner byte-vector, which we basically never use as Nix strings
are immutable (and we don't do any mutation / sharing analysis).
Switching to a Box<BStr> cuts us from 72 bytes to 64 bytes per
string (and there are a lot of strings!)

Change-Id: I11f34c14a08fa02759f260b1c78b2a2b981714e4
Reviewed-on: https://cl.tvl.fyi/c/depot/+/10794
Autosubmit: aspen <root@gws.fyi>
Reviewed-by: flokli <flokli@flokli.de>
Tested-by: BuildkiteCI
This commit is contained in:
Aspen Smith 2024-02-10 11:29:58 -05:00 committed by clbot
parent 24a089f87d
commit e3c92ac3b4
5 changed files with 92 additions and 65 deletions

View file

@ -3,7 +3,7 @@
//! See //tvix/eval/docs/builtins.md for a some context on the
//! available builtins in Nix.
use bstr::ByteVec;
use bstr::{ByteSlice, ByteVec};
use builtin_macros::builtins;
use genawaiter::rc::Gen;
use imbl::OrdMap;
@ -67,7 +67,7 @@ pub async fn coerce_value_to_path(
.await
{
Ok(vs) => {
let path = (**vs).clone().into_path_buf()?;
let path = vs.to_path()?.to_owned();
if path.is_absolute() {
Ok(Ok(path))
} else {
@ -82,7 +82,7 @@ pub async fn coerce_value_to_path(
mod pure_builtins {
use std::ffi::OsString;
use bstr::{BString, ByteSlice};
use bstr::{BString, ByteSlice, B};
use imbl::Vector;
use itertools::Itertools;
use os_str_bytes::OsStringBytes;
@ -171,26 +171,23 @@ mod pure_builtins {
#[builtin("baseNameOf")]
async fn builtin_base_name_of(co: GenCo, s: Value) -> Result<Value, ErrorKind> {
let span = generators::request_span(&co).await;
let mut s = match s {
val @ Value::Catchable(_) => return Ok(val),
_ => s
.coerce_to_string(
co,
CoercionKind {
strong: false,
import_paths: false,
},
span,
)
.await?
.to_contextful_str()?,
};
let s = s
.coerce_to_string(
co,
CoercionKind {
strong: false,
import_paths: false,
},
span,
)
.await?
.to_contextful_str()?;
let bs = s.as_mut_bstring();
let mut bs = (**s).to_owned();
if let Some(last_slash) = bs.rfind_char('/') {
*bs = bs[(last_slash + 1)..].into();
bs = bs[(last_slash + 1)..].into();
}
Ok(s.into())
Ok(NixString::new_inherit_context_from(&s, bs).into())
}
#[builtin("bitAnd")]
@ -344,7 +341,7 @@ mod pure_builtins {
.map(|last_slash| {
let x = &str[..last_slash];
if x.is_empty() {
b"/"
B("/")
} else {
x
}
@ -356,8 +353,7 @@ mod pure_builtins {
))))
} else {
Ok(Value::from(NixString::new_inherit_context_from(
&str,
result.into(),
&str, result,
)))
}
}
@ -1190,7 +1186,7 @@ mod pure_builtins {
// push the unmatched characters preceding the match
ret.push_back(Value::from(NixString::new_inherit_context_from(
&s,
(&text[pos..thematch.start()]).into(),
&text[pos..thematch.start()],
)));
// Push a list with one element for each capture
@ -1363,7 +1359,7 @@ mod pure_builtins {
Ok(Value::from(NixString::new_inherit_context_from(
&x,
(&x[beg..end]).into(),
&x[beg..end],
)))
}

View file

@ -146,7 +146,7 @@ impl NixContext {
// FIXME: when serializing, ignore the context?
#[derive(Clone, Debug, Serialize)]
pub struct NixString(BString, Option<NixContext>);
pub struct NixString(Box<BStr>, Option<NixContext>);
impl PartialEq for NixString {
fn eq(&self, other: &Self) -> bool {
@ -180,45 +180,66 @@ impl Ord for NixString {
}
}
impl From<Box<BStr>> for NixString {
fn from(value: Box<BStr>) -> Self {
Self(value, None)
}
}
impl From<BString> for NixString {
fn from(value: BString) -> Self {
Self(Vec::<u8>::from(value).into_boxed_slice().into(), None)
}
}
impl From<&BStr> for NixString {
fn from(value: &BStr) -> Self {
Self(value.to_owned(), None)
value.to_owned().into()
}
}
impl From<&[u8]> for NixString {
fn from(value: &[u8]) -> Self {
Self(value.into(), None)
Self::from(value.to_owned())
}
}
impl From<Vec<u8>> for NixString {
fn from(value: Vec<u8>) -> Self {
value.into_boxed_slice().into()
}
}
impl From<Box<[u8]>> for NixString {
fn from(value: Box<[u8]>) -> Self {
Self(value.into(), None)
}
}
impl From<&str> for NixString {
fn from(s: &str) -> Self {
Self(s.as_bytes().into(), None)
s.as_bytes().into()
}
}
impl From<String> for NixString {
fn from(s: String) -> Self {
Self(s.into(), None)
s.into_bytes().into()
}
}
impl From<(String, Option<NixContext>)> for NixString {
fn from((s, ctx): (String, Option<NixContext>)) -> Self {
NixString(s.into(), ctx)
impl<T> From<(T, Option<NixContext>)> for NixString
where
NixString: From<T>,
{
fn from((s, ctx): (T, Option<NixContext>)) -> Self {
NixString(NixString::from(s).0, ctx)
}
}
impl From<Box<str>> for NixString {
fn from(s: Box<str>) -> Self {
Self(s.into_boxed_bytes().into_vec().into(), None)
s.into_boxed_bytes().into()
}
}
@ -234,12 +255,18 @@ impl<'a> From<&'a NixString> for &'a BStr {
}
}
impl From<NixString> for BString {
impl From<NixString> for Box<BStr> {
fn from(s: NixString) -> Self {
s.0
}
}
impl From<NixString> for BString {
fn from(s: NixString) -> Self {
s.0.to_vec().into()
}
}
impl AsRef<[u8]> for NixString {
fn as_ref(&self) -> &[u8] {
&self.0
@ -292,7 +319,7 @@ impl<'de> Deserialize<'de> for NixString {
}
impl Deref for NixString {
type Target = BString;
type Target = BStr;
fn deref(&self) -> &Self::Target {
&self.0
@ -317,13 +344,19 @@ mod arbitrary {
}
impl NixString {
pub fn new_inherit_context_from(other: &NixString, new_contents: BString) -> Self {
Self(new_contents, other.1.clone())
pub fn new_inherit_context_from<T>(other: &NixString, new_contents: T) -> Self
where
NixString: From<T>,
{
Self(Self::from(new_contents).0, other.1.clone())
}
pub fn new_context_from(context: NixContext, contents: BString) -> Self {
pub fn new_context_from<T>(context: NixContext, contents: T) -> Self
where
NixString: From<T>,
{
Self(
contents,
Self::from(contents).0,
if context.is_empty() {
None
} else {
@ -332,12 +365,8 @@ impl NixString {
)
}
pub fn as_mut_bstring(&mut self) -> &mut BString {
&mut self.0
}
pub fn as_bstr(&self) -> &BStr {
BStr::new(self.as_bytes())
self
}
pub fn as_bytes(&self) -> &[u8] {
@ -345,7 +374,7 @@ impl NixString {
}
pub fn into_bstring(self) -> BString {
self.0
(*self.0).to_owned()
}
/// Return a displayable representation of the string as an
@ -378,7 +407,7 @@ impl NixString {
pub fn concat(&self, other: &Self) -> Self {
let mut s = self.to_vec();
s.extend(other.0.as_slice());
s.extend(&(***other));
let context = [&self.1, &other.1]
.into_iter()
@ -386,7 +415,7 @@ impl NixString {
.fold(NixContext::new(), |acc_ctx, new_ctx| {
acc_ctx.join(&mut new_ctx.clone())
});
Self::new_context_from(context, s.into())
Self::new_context_from(context, s)
}
pub(crate) fn context_mut(&mut self) -> Option<&mut NixContext> {
@ -514,6 +543,11 @@ mod tests {
use crate::properties::{eq_laws, hash_laws, ord_laws};
#[test]
fn size() {
assert_eq!(std::mem::size_of::<NixString>(), 64);
}
eq_laws!(NixString);
hash_laws!(NixString);
ord_laws!(NixString);

View file

@ -558,7 +558,7 @@ where
return frame.error(
self,
ErrorKind::AttributeNotFound {
name: (**key).clone().into_string_lossy()
name: key.to_str_lossy().into_owned()
},
);
}