fix(tvix): Represent strings as byte arrays

C++ nix uses C-style zero-terminated char pointers to represent strings
internally - however, up to this point, tvix has used Rust `String` and
`str` for string values. Since those are required to be valid utf-8, we
haven't been able to properly represent all the string values that Nix
supports.

To fix that, this change converts the internal representation of the
NixString struct from `Box<str>` to `BString`, from the `bstr` crate -
this is a wrapper around a `Vec<u8>` with extra functions for treating
that byte vector as a "morally string-like" value, which is basically
exactly what we need.

Since this changes a pretty fundamental assumption about a pretty core
type, there are a *lot* of changes in a lot of places to make this work,
but I've tried to keep the general philosophy and intent of most of the
code in most places intact. Most notably, there's nothing that's been
done to make the derivation stuff in //tvix/glue work with non-utf8
strings everywhere, instead opting to just convert to String/str when
passing things into that - there *might* be something to be done there,
but I don't know what the rules should be and I don't want to figure
them out in this change.

To deal with OS-native paths in a way that also works in WASM for
tvixbolt, this also adds a dependency on the "os_str_bytes" crate.

Fixes: b/189
Fixes: b/337
Change-Id: I5e6eb29c62f47dd91af954f5e12bfc3d186f5526
Reviewed-on: https://cl.tvl.fyi/c/depot/+/10200
Reviewed-by: tazjin <tazjin@tvl.su>
Reviewed-by: flokli <flokli@flokli.de>
Reviewed-by: sterni <sternenseemann@systemli.org>
Autosubmit: aspen <root@gws.fyi>
Tested-by: BuildkiteCI
This commit is contained in:
Aspen Smith 2023-12-05 17:25:52 -05:00 committed by aspen
parent 6f9e25943f
commit 201173afac
24 changed files with 427 additions and 223 deletions

View file

@ -125,6 +125,7 @@ pub(crate) mod derivation_builtins {
use std::collections::BTreeMap;
use super::*;
use bstr::{ByteSlice, ByteVec};
use nix_compat::store_path::hash_placeholder;
use tvix_eval::generators::Gen;
use tvix_eval::{NixContext, NixContextElement, NixString};
@ -139,7 +140,7 @@ pub(crate) mod derivation_builtins {
input
.to_str()
.context("looking at output name in builtins.placeholder")?
.as_str(),
.to_str()?,
);
Ok(placeholder.into())
@ -167,10 +168,10 @@ pub(crate) mod derivation_builtins {
}
let name = name.to_str().context("determining derivation name")?;
if name.is_empty() {
return Err(ErrorKind::Abort("derivation has empty name".to_string()));
}
let name = name.to_str()?;
let mut drv = Derivation::default();
drv.outputs.insert("out".to_string(), Default::default());
@ -199,7 +200,11 @@ pub(crate) mod derivation_builtins {
/// Inserts a key and value into the drv.environment BTreeMap, and fails if the
/// key did already exist before.
fn insert_env(drv: &mut Derivation, k: &str, v: BString) -> Result<(), DerivationError> {
fn insert_env(
drv: &mut Derivation,
k: &str, /* TODO: non-utf8 env keys */
v: BString,
) -> Result<(), DerivationError> {
if drv.environment.insert(k.into(), v).is_some() {
return Err(DerivationError::DuplicateEnvVar(k.into()));
}
@ -228,6 +233,7 @@ pub(crate) mod derivation_builtins {
// Some set special fields in the Derivation struct, some change
// behaviour of other functionality.
for (arg_name, arg_value) in input.clone().into_iter_sorted() {
let arg_name = arg_name.to_str()?;
// force the current value.
let value = generators::request_force(&co, arg_value).await;
@ -236,7 +242,7 @@ pub(crate) mod derivation_builtins {
continue;
}
match arg_name.as_str() {
match arg_name {
// Command line arguments to the builder.
// These are only set in drv.arguments.
"args" => {
@ -245,7 +251,7 @@ pub(crate) mod derivation_builtins {
Err(cek) => return Ok(Value::Catchable(cek)),
Ok(s) => {
input_context.mimic(&s);
drv.arguments.push(s.as_str().to_string())
drv.arguments.push((**s).clone().into_string()?)
}
}
}
@ -274,18 +280,18 @@ pub(crate) mod derivation_builtins {
// Populate drv.outputs
if drv
.outputs
.insert(output_name.as_str().to_string(), Default::default())
.insert((**output_name).clone().into_string()?, Default::default())
.is_some()
{
Err(DerivationError::DuplicateOutput(
output_name.as_str().into(),
(**output_name).clone().into_string_lossy(),
))?
}
output_names.push(output_name.as_str().to_string());
output_names.push((**output_name).clone().into_string()?);
}
// Add drv.environment[outputs] unconditionally.
insert_env(&mut drv, arg_name.as_str(), output_names.join(" ").into())?;
insert_env(&mut drv, arg_name, output_names.join(" ").into())?;
// drv.environment[$output_name] is added after the loop,
// with whatever is in drv.outputs[$output_name].
}
@ -297,19 +303,21 @@ pub(crate) mod derivation_builtins {
Ok(val_str) => {
input_context.mimic(&val_str);
if arg_name.as_str() == "builder" {
drv.builder = val_str.as_str().to_owned();
if arg_name == "builder" {
drv.builder = (**val_str).clone().into_string()?;
} else {
drv.system = val_str.as_str().to_owned();
drv.system = (**val_str).clone().into_string()?;
}
// Either populate drv.environment or structured_attrs.
if let Some(ref mut structured_attrs) = structured_attrs {
// No need to check for dups, we only iterate over every attribute name once
structured_attrs
.insert(arg_name.as_str().into(), val_str.as_str().into());
structured_attrs.insert(
arg_name.to_owned(),
(**val_str).clone().into_string()?.into(),
);
} else {
insert_env(&mut drv, arg_name.as_str(), val_str.as_bytes().into())?;
insert_env(&mut drv, arg_name, val_str.as_bytes().into())?;
}
}
}
@ -339,14 +347,14 @@ pub(crate) mod derivation_builtins {
};
// No need to check for dups, we only iterate over every attribute name once
structured_attrs.insert(arg_name.as_str().to_string(), val_json);
structured_attrs.insert(arg_name.to_owned(), val_json);
} else {
match strong_importing_coerce_to_string(&co, value).await {
Err(cek) => return Ok(Value::Catchable(cek)),
Ok(val_str) => {
input_context.mimic(&val_str);
insert_env(&mut drv, arg_name.as_str(), val_str.as_bytes().into())?;
insert_env(&mut drv, arg_name, val_str.as_bytes().into())?;
}
}
}
@ -365,7 +373,7 @@ pub(crate) mod derivation_builtins {
if let Some(attr) = attrs.select(key) {
match strong_importing_coerce_to_string(co, attr.clone()).await {
Err(cek) => return Ok(Err(cek)),
Ok(str) => return Ok(Ok(Some(str.as_str().to_string()))),
Ok(str) => return Ok(Ok(Some((**str).clone().into_string()?))),
}
}
@ -438,11 +446,11 @@ pub(crate) mod derivation_builtins {
});
// Mutate the Derivation struct and set output paths
drv.calculate_output_paths(&name, &derivation_or_fod_hash_tmp)
drv.calculate_output_paths(name, &derivation_or_fod_hash_tmp)
.map_err(DerivationError::InvalidDerivation)?;
let drv_path = drv
.calculate_derivation_path(&name)
.calculate_derivation_path(name)
.map_err(DerivationError::InvalidDerivation)?;
// recompute the hash derivation modulo and add to known_paths
@ -508,21 +516,23 @@ pub(crate) mod derivation_builtins {
return Err(ErrorKind::UnexpectedContext);
}
let path = nix_compat::store_path::build_text_path(
name.as_str(),
content.as_str(),
content.iter_plain(),
)
.map_err(|_e| {
nix_compat::derivation::DerivationError::InvalidOutputName(name.as_str().to_string())
})
.map_err(DerivationError::InvalidDerivation)?
.to_absolute_path();
let path =
nix_compat::store_path::build_text_path(name.to_str()?, &content, content.iter_plain())
.map_err(|_e| {
nix_compat::derivation::DerivationError::InvalidOutputName(
(**name).clone().into_string_lossy(),
)
})
.map_err(DerivationError::InvalidDerivation)?
.to_absolute_path();
let context: NixContext = NixContextElement::Plain(path.clone()).into();
// TODO: actually persist the file in the store at that path ...
Ok(Value::String(NixString::new_context_from(context, &path)))
Ok(Value::String(NixString::new_context_from(
context,
path.into(),
)))
}
}

View file

@ -74,10 +74,7 @@ mod tests {
match value {
tvix_eval::Value::String(s) => {
assert_eq!(
"/nix/store/xpcvxsx5sw4rbq666blz6sxqlmsqphmr-foo",
s.as_str()
);
assert_eq!(s, "/nix/store/xpcvxsx5sw4rbq666blz6sxqlmsqphmr-foo",);
}
_ => panic!("unexpected value type: {:?}", value),
}
@ -162,7 +159,7 @@ mod tests {
match value {
tvix_eval::Value::String(s) => {
assert_eq!(expected_path, s.as_str());
assert_eq!(s, expected_path);
}
_ => panic!("unexpected value type: {:?}", value),
}
@ -285,7 +282,7 @@ mod tests {
match value {
tvix_eval::Value::String(s) => {
assert_eq!(expected_drvpath, s.as_str());
assert_eq!(s, expected_drvpath);
}
_ => panic!("unexpected value type: {:?}", value),
@ -314,7 +311,7 @@ mod tests {
match value {
tvix_eval::Value::String(s) => {
assert_eq!(expected_path, s.as_str());
assert_eq!(s, expected_path);
}
_ => panic!("unexpected value type: {:?}", value),
}

View file

@ -297,6 +297,7 @@ impl EvalIO for TvixStoreIO {
mod tests {
use std::{path::Path, rc::Rc, sync::Arc};
use bstr::ByteVec;
use tempfile::TempDir;
use tvix_build::buildservice::DummyBuildService;
use tvix_castore::{
@ -355,7 +356,7 @@ mod tests {
let value = result.value.expect("must be some");
match value {
tvix_eval::Value::String(s) => return Some(s.as_str().to_owned()),
tvix_eval::Value::String(s) => Some((**s).clone().into_string_lossy()),
_ => panic!("unexpected value type: {:?}", value),
}
}
@ -421,7 +422,7 @@ mod tests {
match value {
tvix_eval::Value::String(s) => {
assert_eq!("/deep/thought", s.as_str());
assert_eq!(s, "/deep/thought");
}
_ => panic!("unexpected value type: {:?}", value),
}