C++ nix uses C-style zero-terminated char pointers to represent strings internally - however, up to this point, tvix has used Rust `String` and `str` for string values. Since those are required to be valid utf-8, we haven't been able to properly represent all the string values that Nix supports. To fix that, this change converts the internal representation of the NixString struct from `Box<str>` to `BString`, from the `bstr` crate - this is a wrapper around a `Vec<u8>` with extra functions for treating that byte vector as a "morally string-like" value, which is basically exactly what we need. Since this changes a pretty fundamental assumption about a pretty core type, there are a *lot* of changes in a lot of places to make this work, but I've tried to keep the general philosophy and intent of most of the code in most places intact. Most notably, there's nothing that's been done to make the derivation stuff in //tvix/glue work with non-utf8 strings everywhere, instead opting to just convert to String/str when passing things into that - there *might* be something to be done there, but I don't know what the rules should be and I don't want to figure them out in this change. To deal with OS-native paths in a way that also works in WASM for tvixbolt, this also adds a dependency on the "os_str_bytes" crate. Fixes: b/189 Fixes: b/337 Change-Id: I5e6eb29c62f47dd91af954f5e12bfc3d186f5526 Reviewed-on: https://cl.tvl.fyi/c/depot/+/10200 Reviewed-by: tazjin <tazjin@tvl.su> Reviewed-by: flokli <flokli@flokli.de> Reviewed-by: sterni <sternenseemann@systemli.org> Autosubmit: aspen <root@gws.fyi> Tested-by: BuildkiteCI
163 lines
6.1 KiB
Rust
163 lines
6.1 KiB
Rust
use std::cmp::Ordering;
|
|
use std::iter::{once, Chain, Once};
|
|
use std::ops::RangeInclusive;
|
|
|
|
use bstr::{BStr, ByteSlice, B};
|
|
|
|
/// Version strings can be broken up into Parts.
|
|
/// One Part represents either a string of digits or characters.
|
|
/// '.' and '_' represent deviders between parts and are not included in any part.
|
|
#[derive(PartialEq, Eq, Clone, Debug)]
|
|
pub enum VersionPart<'a> {
|
|
Word(&'a BStr),
|
|
Number(&'a BStr),
|
|
}
|
|
|
|
impl PartialOrd for VersionPart<'_> {
|
|
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
|
|
Some(self.cmp(other))
|
|
}
|
|
}
|
|
|
|
impl Ord for VersionPart<'_> {
|
|
fn cmp(&self, other: &Self) -> Ordering {
|
|
match (self, other) {
|
|
(VersionPart::Number(s1), VersionPart::Number(s2)) => {
|
|
// Note: C++ Nix uses `int`, but probably doesn't make a difference
|
|
// We trust that the splitting was done correctly and parsing will work
|
|
let n1: u64 = s1.to_str_lossy().parse().unwrap();
|
|
let n2: u64 = s2.to_str_lossy().parse().unwrap();
|
|
n1.cmp(&n2)
|
|
}
|
|
|
|
// `pre` looses unless the other part is also a `pre`
|
|
(VersionPart::Word(x), VersionPart::Word(y)) if *x == B("pre") && *y == B("pre") => {
|
|
Ordering::Equal
|
|
}
|
|
(VersionPart::Word(x), _) if *x == B("pre") => Ordering::Less,
|
|
(_, VersionPart::Word(y)) if *y == B("pre") => Ordering::Greater,
|
|
|
|
// Number wins against Word
|
|
(VersionPart::Number(_), VersionPart::Word(_)) => Ordering::Greater,
|
|
(VersionPart::Word(_), VersionPart::Number(_)) => Ordering::Less,
|
|
|
|
(VersionPart::Word(w1), VersionPart::Word(w2)) => w1.cmp(w2),
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Type used to hold information about a VersionPart during creation
|
|
enum InternalPart {
|
|
Number { range: RangeInclusive<usize> },
|
|
Word { range: RangeInclusive<usize> },
|
|
Break,
|
|
}
|
|
|
|
/// An iterator which yields the parts of a version string.
|
|
///
|
|
/// This can then be directly used to compare two versions
|
|
pub struct VersionPartsIter<'a> {
|
|
cached_part: InternalPart,
|
|
iter: bstr::CharIndices<'a>,
|
|
version: &'a BStr,
|
|
}
|
|
|
|
impl<'a> VersionPartsIter<'a> {
|
|
pub fn new(version: &'a BStr) -> Self {
|
|
Self {
|
|
cached_part: InternalPart::Break,
|
|
iter: version.char_indices(),
|
|
version,
|
|
}
|
|
}
|
|
|
|
/// Create an iterator that yields all version parts followed by an additional
|
|
/// `VersionPart::Word("")` part (i.e. you can think of this as
|
|
/// `builtins.splitVersion version ++ [ "" ]`). This is necessary, because
|
|
/// Nix's `compareVersions` is not entirely lexicographical: If we have two
|
|
/// equal versions, but one is longer, the longer one is only considered
|
|
/// greater if the first additional part of the longer version is not `pre`,
|
|
/// e.g. `2.3 > 2.3pre`. It is otherwise lexicographical, so peculiar behavior
|
|
/// like `2.3 < 2.3.0pre` ensues. Luckily for us, this means that we can
|
|
/// lexicographically compare two version strings, _if_ we append an extra
|
|
/// component to both versions.
|
|
pub fn new_for_cmp(version: &'a BStr) -> Chain<Self, Once<VersionPart>> {
|
|
Self::new(version).chain(once(VersionPart::Word("".into())))
|
|
}
|
|
}
|
|
|
|
impl<'a> Iterator for VersionPartsIter<'a> {
|
|
type Item = VersionPart<'a>;
|
|
|
|
fn next(&mut self) -> Option<Self::Item> {
|
|
let char = self.iter.next();
|
|
|
|
if char.is_none() {
|
|
let cached_part = std::mem::replace(&mut self.cached_part, InternalPart::Break);
|
|
match cached_part {
|
|
InternalPart::Break => return None,
|
|
InternalPart::Number { range } => {
|
|
return Some(VersionPart::Number(&self.version[range]))
|
|
}
|
|
InternalPart::Word { range } => {
|
|
return Some(VersionPart::Word(&self.version[range]))
|
|
}
|
|
}
|
|
}
|
|
|
|
let (start, end, char) = char.unwrap();
|
|
match char {
|
|
// Divider encountered
|
|
'.' | '-' => {
|
|
let cached_part = std::mem::replace(&mut self.cached_part, InternalPart::Break);
|
|
match cached_part {
|
|
InternalPart::Number { range } => {
|
|
Some(VersionPart::Number(&self.version[range]))
|
|
}
|
|
InternalPart::Word { range } => Some(VersionPart::Word(&self.version[range])),
|
|
InternalPart::Break => self.next(),
|
|
}
|
|
}
|
|
|
|
// digit encountered
|
|
_ if char.is_ascii_digit() => {
|
|
let cached_part = std::mem::replace(
|
|
&mut self.cached_part,
|
|
InternalPart::Number {
|
|
range: start..=(end - 1),
|
|
},
|
|
);
|
|
match cached_part {
|
|
InternalPart::Number { range } => {
|
|
self.cached_part = InternalPart::Number {
|
|
range: *range.start()..=*range.end() + 1,
|
|
};
|
|
self.next()
|
|
}
|
|
InternalPart::Word { range } => Some(VersionPart::Word(&self.version[range])),
|
|
InternalPart::Break => self.next(),
|
|
}
|
|
}
|
|
|
|
// char encountered
|
|
_ => {
|
|
let mut cached_part = InternalPart::Word {
|
|
range: start..=(end - 1),
|
|
};
|
|
std::mem::swap(&mut cached_part, &mut self.cached_part);
|
|
match cached_part {
|
|
InternalPart::Word { range } => {
|
|
self.cached_part = InternalPart::Word {
|
|
range: *range.start()..=*range.end() + char.len_utf8(),
|
|
};
|
|
self.next()
|
|
}
|
|
InternalPart::Number { range } => {
|
|
Some(VersionPart::Number(&self.version[range]))
|
|
}
|
|
InternalPart::Break => self.next(),
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|