chore(nix-compat): bump to nom 8.x

See 72dd5818b7/CHANGELOG.md
for the nom changelog.

Most notably, there's now a .parse() to be added:

`combinator(arg)(input)` -> `combinator(arg).parse(input)`

There also doesn't need to be a tuple combinator (it's implemented on
tuples directly).

This also refactors the string / byte field parsing parts, to make them
more concise.

Change-Id: I9e8a3cedd07d6705be391898eb6a486fb8164069
Reviewed-on: https://cl.tvl.fyi/c/depot/+/13193
Tested-by: BuildkiteCI
Reviewed-by: edef <edef@edef.eu>
Reviewed-by: Brian Olsen <me@griff.name>
This commit is contained in:
Florian Klink 2025-03-02 22:06:11 +07:00 committed by flokli
parent 2daa483249
commit a512f16424
15 changed files with 83 additions and 233 deletions

View file

@ -4,12 +4,12 @@
//! [ATerm]: http://program-transformation.org/Tools/ATermFormat.html
use bstr::BString;
use nom::branch::alt;
use nom::bytes::complete::{escaped_transform, is_not, tag};
use nom::bytes::complete::{escaped_transform, is_not};
use nom::character::complete::char as nomchar;
use nom::combinator::{map, value};
use nom::combinator::{map_res, opt, value};
use nom::multi::separated_list0;
use nom::sequence::delimited;
use nom::IResult;
use nom::{IResult, Parser};
/// Parse a bstr and undo any escaping (which is why this needs to allocate).
// FUTUREWORK: have a version for fields that are known to not need escaping
@ -32,48 +32,37 @@ fn parse_escaped_bytes(i: &[u8]) -> IResult<&[u8], BString> {
/// Parse a field in double quotes, undo any escaping, and return the unquoted
/// and decoded `Vec<u8>`.
pub(crate) fn parse_bytes_field(i: &[u8]) -> IResult<&[u8], BString> {
// inside double quotes…
delimited(
nomchar('\"'),
// There is
alt((
// …either is a bstr after unescaping
parse_escaped_bytes,
// …or an empty string.
map(tag(b""), |_| BString::default()),
)),
opt(parse_escaped_bytes).map(|opt_bstr| opt_bstr.unwrap_or_default()),
nomchar('\"'),
)(i)
)
.parse(i)
}
/// Parse a field in double quotes, undo any escaping, and return the unquoted
/// and decoded [String], if it's valid UTF-8.
/// Or fail parsing if the bytes are no valid UTF-8.
pub(crate) fn parse_string_field(i: &[u8]) -> IResult<&[u8], String> {
// inside double quotes…
delimited(
nomchar('\"'),
// There is
alt((
// either is a String after unescaping
nom::combinator::map_opt(parse_escaped_bytes, |escaped_bytes| {
String::from_utf8(escaped_bytes.into()).ok()
}),
// or an empty string.
map(tag(b""), |_| "".to_string()),
)),
map_res(
opt(parse_escaped_bytes).map(|opt_bstr| opt_bstr.unwrap_or_default()),
|bstr| String::from_utf8(bstr.to_vec()),
),
nomchar('\"'),
)(i)
)
.parse(i)
}
/// Parse a list of string fields (enclosed in brackets)
pub(crate) fn parse_string_list(i: &[u8]) -> IResult<&[u8], Vec<String>> {
// inside brackets
delimited(
nomchar('['),
separated_list0(nomchar(','), parse_string_field),
nomchar(']'),
)(i)
)
.parse(i)
}
#[cfg(test)]

View file

@ -7,7 +7,8 @@ use nom::bytes::complete::tag;
use nom::character::complete::char as nomchar;
use nom::combinator::{all_consuming, map_res};
use nom::multi::{separated_list0, separated_list1};
use nom::sequence::{delimited, preceded, separated_pair, terminated, tuple};
use nom::sequence::{delimited, preceded, separated_pair, terminated};
use nom::Parser;
use std::collections::{btree_map, BTreeMap, BTreeSet};
use thiserror;
@ -27,7 +28,7 @@ pub enum Error<I> {
}
pub(crate) fn parse(i: &[u8]) -> Result<Derivation, Error<&[u8]>> {
match all_consuming(parse_derivation)(i) {
match all_consuming(parse_derivation).parse(i) {
Ok((rest, derivation)) => {
// this shouldn't happen, as all_consuming shouldn't return.
debug_assert!(rest.is_empty());
@ -68,13 +69,14 @@ fn parse_output(i: &[u8]) -> NomResult<&[u8], (String, Output)> {
nomchar('('),
map_res(
|i| {
tuple((
(
terminated(aterm::parse_string_field, nomchar(',')),
terminated(aterm::parse_string_field, nomchar(',')),
terminated(aterm::parse_string_field, nomchar(',')),
aterm::parse_bytes_field,
))(i)
.map_err(into_nomerror)
)
.parse(i)
.map_err(into_nomerror)
},
|(output_name, output_path, algo_and_mode, encoded_digest)| {
// convert these 4 fields into an [Output].
@ -114,7 +116,8 @@ fn parse_output(i: &[u8]) -> NomResult<&[u8], (String, Output)> {
},
),
nomchar(')'),
)(i)
)
.parse(i)
}
/// Parse multiple outputs in ATerm. This is a list of things acccepted by
@ -127,7 +130,8 @@ fn parse_outputs(i: &[u8]) -> NomResult<&[u8], BTreeMap<String, Output>> {
nomchar('['),
separated_list1(tag(","), parse_output),
nomchar(']'),
)(i);
)
.parse(i);
match res {
Ok((rst, outputs_lst)) => {
@ -228,7 +232,7 @@ pub fn parse_derivation(i: &[u8]) -> NomResult<&[u8], Derivation> {
nomchar('('),
// tuple requires all errors to be of the same type, so we need to be a
// bit verbose here wrapping generic IResult into [NomATermResult].
tuple((
(
// parse outputs
terminated(parse_outputs, nomchar(',')),
// // parse input derivations
@ -236,14 +240,26 @@ pub fn parse_derivation(i: &[u8]) -> NomResult<&[u8], Derivation> {
// // parse input sources
terminated(parse_input_sources, nomchar(',')),
// // parse system
|i| terminated(aterm::parse_string_field, nomchar(','))(i).map_err(into_nomerror),
|i| {
terminated(aterm::parse_string_field, nomchar(','))
.parse(i)
.map_err(into_nomerror)
},
// // parse builder
|i| terminated(aterm::parse_string_field, nomchar(','))(i).map_err(into_nomerror),
|i| {
terminated(aterm::parse_string_field, nomchar(','))
.parse(i)
.map_err(into_nomerror)
},
// // parse arguments
|i| terminated(aterm::parse_string_list, nomchar(','))(i).map_err(into_nomerror),
|i| {
terminated(aterm::parse_string_list, nomchar(','))
.parse(i)
.map_err(into_nomerror)
},
// parse environment
parse_kv(aterm::parse_bytes_field),
)),
),
nomchar(')'),
)
.map(
@ -267,7 +283,8 @@ pub fn parse_derivation(i: &[u8]) -> NomResult<&[u8], Derivation> {
}
},
),
)(i)
)
.parse(i)
}
/// Parse a list of key/value pairs into a BTreeMap.
@ -298,7 +315,7 @@ where
),
nomchar(')'),
),
)(ii).map_err(into_nomerror);
).parse(ii).map_err(into_nomerror);
match res {
Ok((rest, pairs)) => {
@ -322,7 +339,7 @@ where
}
},
nomchar(']'),
)(i)
).parse(i)
}
#[cfg(test)]