feat(tvix/nix-compat): NAR reader
Change-Id: I50d51baf62c0419eaf17f0dc262f728aaff9794d Reviewed-on: https://cl.tvl.fyi/c/depot/+/9688 Reviewed-by: raitobezarius <tvl@lahfa.xyz> Tested-by: BuildkiteCI Reviewed-by: flokli <flokli@flokli.de>
This commit is contained in:
parent
08b98b7503
commit
6638f4d4ea
8 changed files with 530 additions and 4 deletions
132
tvix/nix-compat/src/nar/wire/mod.rs
Normal file
132
tvix/nix-compat/src/nar/wire/mod.rs
Normal file
|
|
@ -0,0 +1,132 @@
|
|||
//! NAR wire format, without I/O details, since those differ between
|
||||
//! the synchronous and asynchronous implementations.
|
||||
//!
|
||||
//! The wire format is an S-expression format, encoded onto the wire
|
||||
//! using simple encoding rules.
|
||||
//!
|
||||
//! # Encoding
|
||||
//!
|
||||
//! Lengths are represented as 64-bit unsigned integers in little-endian
|
||||
//! format. Byte strings, including file contents and syntactic strings
|
||||
//! part of the grammar, are prefixed by their 64-bit length, and padded
|
||||
//! to 8-byte (64-bit) alignment with zero bytes. The zero-length string
|
||||
//! is therefore encoded as eight zero bytes representing its length.
|
||||
//!
|
||||
//! # Grammar
|
||||
//!
|
||||
//! The NAR grammar is as follows:
|
||||
//! ```plain
|
||||
//! archive ::= "nix-archive-1" node
|
||||
//!
|
||||
//! node ::= "(" "type" "symlink" "target" string ")"
|
||||
//! ||= "(" "type" "regular" ("executable" "")? "contents" string ")"
|
||||
//! ||= "(" "type" "directory" entry* ")"
|
||||
//!
|
||||
//! entry ::= "entry" "(" "name" string "node" node ")"
|
||||
//! ```
|
||||
//!
|
||||
//! We rewrite it to pull together the purely syntactic elements into
|
||||
//! unified tokens, producing an equivalent grammar that can be parsed
|
||||
//! and serialized more elegantly:
|
||||
//! ```plain
|
||||
//! archive ::= TOK_NAR node
|
||||
//! node ::= TOK_SYM string TOK_PAR
|
||||
//! ||= (TOK_REG | TOK_EXE) string TOK_PAR
|
||||
//! ||= TOK_DIR entry* TOK_PAR
|
||||
//!
|
||||
//! entry ::= TOK_ENT string TOK_NOD node TOK_PAR
|
||||
//!
|
||||
//! TOK_NAR ::= "nix-archive-1" "(" "type"
|
||||
//! TOK_SYM ::= "symlink" "target"
|
||||
//! TOK_REG ::= "regular" "contents"
|
||||
//! TOK_EXE ::= "regular" "executable" ""
|
||||
//! TOK_DIR ::= "directory"
|
||||
//! TOK_ENT ::= "entry" "(" "name"
|
||||
//! TOK_NOD ::= "node" "(" "type"
|
||||
//! TOK_PAR ::= ")"
|
||||
//! ```
|
||||
//!
|
||||
//! # Restrictions
|
||||
//!
|
||||
//! NOTE: These restrictions are not (and cannot be) enforced by this module,
|
||||
//! but must be enforced by its consumers, [super::reader] and [super::writer].
|
||||
//!
|
||||
//! Directory entry names cannot have the reserved names `.` and `..`, nor contain
|
||||
//! forward slashes. They must appear in strictly ascending lexicographic order
|
||||
//! within a directory, and can be at most [MAX_NAME_LEN] bytes in length.
|
||||
//!
|
||||
//! Symlink targets can be at most [MAX_TARGET_LEN] bytes in length.
|
||||
//!
|
||||
//! Neither is permitted to be empty, or contain null bytes.
|
||||
|
||||
// These values are the standard Linux length limits
|
||||
/// Maximum length of a directory entry name
|
||||
pub const MAX_NAME_LEN: usize = 255;
|
||||
/// Maximum length of a symlink target
|
||||
pub const MAX_TARGET_LEN: usize = 4095;
|
||||
|
||||
#[cfg(test)]
|
||||
fn token(xs: &[&str]) -> Vec<u8> {
|
||||
let mut out = vec![];
|
||||
for x in xs {
|
||||
let len = x.len() as u64;
|
||||
out.extend_from_slice(&len.to_le_bytes());
|
||||
out.extend_from_slice(x.as_bytes());
|
||||
|
||||
let n = x.len() & 7;
|
||||
if n != 0 {
|
||||
const ZERO: [u8; 8] = [0; 8];
|
||||
out.extend_from_slice(&ZERO[n..]);
|
||||
}
|
||||
}
|
||||
out
|
||||
}
|
||||
|
||||
pub const TOK_NAR: [u8; 56] = *b"\x0d\0\0\0\0\0\0\0nix-archive-1\0\0\0\x01\0\0\0\0\0\0\0(\0\0\0\0\0\0\0\x04\0\0\0\0\0\0\0type\0\0\0\0";
|
||||
pub const TOK_SYM: [u8; 32] = *b"\x07\0\0\0\0\0\0\0symlink\0\x06\0\0\0\0\0\0\0target\0\0";
|
||||
pub const TOK_REG: [u8; 32] = *b"\x07\0\0\0\0\0\0\0regular\0\x08\0\0\0\0\0\0\0contents";
|
||||
pub const TOK_EXE: [u8; 64] = *b"\x07\0\0\0\0\0\0\0regular\0\x0a\0\0\0\0\0\0\0executable\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x08\0\0\0\0\0\0\0contents";
|
||||
pub const TOK_DIR: [u8; 24] = *b"\x09\0\0\0\0\0\0\0directory\0\0\0\0\0\0\0";
|
||||
pub const TOK_ENT: [u8; 48] = *b"\x05\0\0\0\0\0\0\0entry\0\0\0\x01\0\0\0\0\0\0\0(\0\0\0\0\0\0\0\x04\0\0\0\0\0\0\0name\0\0\0\0";
|
||||
pub const TOK_NOD: [u8; 48] = *b"\x04\0\0\0\0\0\0\0node\0\0\0\0\x01\0\0\0\0\0\0\0(\0\0\0\0\0\0\0\x04\0\0\0\0\0\0\0type\0\0\0\0";
|
||||
pub const TOK_PAR: [u8; 16] = *b"\x01\0\0\0\0\0\0\0)\0\0\0\0\0\0\0";
|
||||
|
||||
#[test]
|
||||
fn tokens() {
|
||||
let cases: &[(&[u8], &[&str])] = &[
|
||||
(&TOK_NAR, &["nix-archive-1", "(", "type"]),
|
||||
(&TOK_SYM, &["symlink", "target"]),
|
||||
(&TOK_REG, &["regular", "contents"]),
|
||||
(&TOK_EXE, &["regular", "executable", "", "contents"]),
|
||||
(&TOK_DIR, &["directory"]),
|
||||
(&TOK_ENT, &["entry", "(", "name"]),
|
||||
(&TOK_NOD, &["node", "(", "type"]),
|
||||
(&TOK_PAR, &[")"]),
|
||||
];
|
||||
|
||||
for &(tok, xs) in cases {
|
||||
assert_eq!(tok, token(xs));
|
||||
}
|
||||
}
|
||||
|
||||
pub use tag::Tag;
|
||||
mod tag;
|
||||
|
||||
tag::make! {
|
||||
/// These are the node tokens, succeeding [TOK_NAR] or [TOK_NOD],
|
||||
/// and preceding the next variable-length element.
|
||||
pub enum Node[16] {
|
||||
Sym = TOK_SYM,
|
||||
Reg = TOK_REG,
|
||||
Exe = TOK_EXE,
|
||||
Dir = TOK_DIR,
|
||||
}
|
||||
|
||||
/// Directory entry or terminator
|
||||
pub enum Entry[0] {
|
||||
/// End of directory
|
||||
None = TOK_PAR,
|
||||
/// Directory entry, which must be followed by [Node]
|
||||
Some = TOK_ENT,
|
||||
}
|
||||
}
|
||||
165
tvix/nix-compat/src/nar/wire/tag.rs
Normal file
165
tvix/nix-compat/src/nar/wire/tag.rs
Normal file
|
|
@ -0,0 +1,165 @@
|
|||
/// A type implementing Tag represents a static hash set of byte strings,
|
||||
/// with a very simple perfect hash function: every element has a unique
|
||||
/// discriminant at a common byte offset. The values of the type represent
|
||||
/// the members by this single discriminant byte; they are indices into the
|
||||
/// hash set.
|
||||
pub trait Tag: Sized {
|
||||
/// Discriminant offset
|
||||
const OFF: usize;
|
||||
/// Minimum variant length
|
||||
const MIN: usize;
|
||||
|
||||
/// Minimal suitably sized buffer for reading the wire representation
|
||||
/// HACK: This is a workaround for const generics limitations.
|
||||
type Buf: AsMut<[u8]> + Send;
|
||||
|
||||
/// Make an instance of [Self::Buf]
|
||||
fn make_buf() -> Self::Buf;
|
||||
|
||||
/// Convert a discriminant into the corresponding variant
|
||||
fn from_u8(x: u8) -> Option<Self>;
|
||||
|
||||
/// Convert a variant back into the wire representation
|
||||
fn as_bytes(&self) -> &'static [u8];
|
||||
}
|
||||
|
||||
/// Generate an enum implementing [Tag], enforcing at compile time that
|
||||
/// the discriminant values are distinct.
|
||||
macro_rules! make {
|
||||
(
|
||||
$(
|
||||
$(#[doc = $doc:expr])*
|
||||
$vis:vis enum $Enum:ident[$off:expr] {
|
||||
$(
|
||||
$(#[doc = $var_doc:expr])*
|
||||
$Var:ident = $TOK:ident,
|
||||
)+
|
||||
}
|
||||
)*
|
||||
) => {
|
||||
$(
|
||||
$(#[doc = $doc])*
|
||||
#[derive(Debug, PartialEq, Eq)]
|
||||
#[repr(u8)]
|
||||
$vis enum $Enum {
|
||||
$(
|
||||
$(#[doc = $var_doc])*
|
||||
$Var = $TOK[$Enum::OFF]
|
||||
),+
|
||||
}
|
||||
|
||||
impl Tag for $Enum {
|
||||
/// Discriminant offset
|
||||
const OFF: usize = $off;
|
||||
/// Minimum variant length
|
||||
const MIN: usize = tag::min_of(&[$($TOK.len()),+]);
|
||||
|
||||
/// Minimal suitably sized buffer for reading the wire representation
|
||||
type Buf = [u8; tag::buf_of(&[$($TOK.len()),+])];
|
||||
|
||||
/// Make an instance of [Self::Buf]
|
||||
#[inline(always)]
|
||||
fn make_buf() -> Self::Buf {
|
||||
[0u8; tag::buf_of(&[$($TOK.len()),+])]
|
||||
}
|
||||
|
||||
/// Convert a discriminant into the corresponding variant
|
||||
#[inline(always)]
|
||||
fn from_u8(x: u8) -> Option<Self> {
|
||||
#[allow(non_upper_case_globals)]
|
||||
mod __variant {
|
||||
$(
|
||||
pub const $Var: u8 = super::$Enum::$Var as u8;
|
||||
)+
|
||||
}
|
||||
|
||||
match x {
|
||||
$(__variant::$Var => Some(Self::$Var),)+
|
||||
_ => None
|
||||
}
|
||||
}
|
||||
|
||||
/// Convert a variant back into the wire representation
|
||||
#[inline(always)]
|
||||
fn as_bytes(&self) -> &'static [u8] {
|
||||
match self {
|
||||
$(Self::$Var => &$TOK,)+
|
||||
}
|
||||
}
|
||||
}
|
||||
)*
|
||||
};
|
||||
}
|
||||
|
||||
pub(crate) use make;
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::super::tag::{self, Tag};
|
||||
|
||||
const TOK_A: [u8; 3] = [0xed, 0xef, 0x1c];
|
||||
const TOK_B: [u8; 3] = [0xed, 0xf0, 0x1c];
|
||||
|
||||
const OFFSET: usize = 1;
|
||||
|
||||
make! {
|
||||
enum Token[OFFSET] {
|
||||
A = TOK_A,
|
||||
B = TOK_B,
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn example() {
|
||||
assert_eq!(Token::from_u8(0xed), None);
|
||||
|
||||
let tag = Token::from_u8(0xef).unwrap();
|
||||
assert_eq!(tag.as_bytes(), &TOK_A[..]);
|
||||
|
||||
let tag = Token::from_u8(0xf0).unwrap();
|
||||
assert_eq!(tag.as_bytes(), &TOK_B[..]);
|
||||
}
|
||||
}
|
||||
|
||||
// The following functions are written somewhat unusually,
|
||||
// since they're const functions that cannot use iterators.
|
||||
|
||||
/// Maximum element of a slice
|
||||
const fn max_of(mut xs: &[usize]) -> usize {
|
||||
let mut y = usize::MIN;
|
||||
while let &[x, ref tail @ ..] = xs {
|
||||
y = if x > y { x } else { y };
|
||||
xs = tail;
|
||||
}
|
||||
y
|
||||
}
|
||||
|
||||
/// Minimum element of a slice
|
||||
pub const fn min_of(mut xs: &[usize]) -> usize {
|
||||
let mut y = usize::MAX;
|
||||
while let &[x, ref tail @ ..] = xs {
|
||||
y = if x < y { x } else { y };
|
||||
xs = tail;
|
||||
}
|
||||
y
|
||||
}
|
||||
|
||||
/// Minimum buffer size to contain either of `0..Tag::MIN` and `Tag::MIN..`
|
||||
/// at a particular time, for all possible tag wire representations, given
|
||||
/// the sizes of all wire representations.
|
||||
///
|
||||
/// # Example
|
||||
///
|
||||
/// ```plain
|
||||
/// OFF = 16
|
||||
/// MIN = 24
|
||||
/// MAX = 64
|
||||
///
|
||||
/// BUF = max(MIN, MAX-MIN)
|
||||
/// = max(24, 64-24)
|
||||
/// = max(24, 40)
|
||||
/// = 40
|
||||
/// ```
|
||||
pub const fn buf_of(xs: &[usize]) -> usize {
|
||||
max_of(&[min_of(xs), max_of(xs) - min_of(xs)])
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue