chore(nix-compat): bump to nom 8.x

See 72dd5818b7/CHANGELOG.md for the nom changelog. Most notably, there's now a .parse() to be added: `combinator(arg)(input)` -> `combinator(arg).parse(input)` There also doesn't need to be a tuple combinator (it's implemented on tuples directly). This also refactors the string / byte field parsing parts, to make them more concise. Change-Id: I9e8a3cedd07d6705be391898eb6a486fb8164069 Reviewed-on: https://cl.tvl.fyi/c/depot/+/13193 Tested-by: BuildkiteCI Reviewed-by: edef <edef@edef.eu> Reviewed-by: Brian Olsen <me@griff.name>
2025-03-02 22:06:11 +07:00 · 2025-03-02 22:06:11 +07:00 · a512f16424
commit a512f16424
parent 2daa483249
15 changed files with 83 additions and 233 deletions
--- a/tvix/nix-compat/src/aterm/parser.rs
+++ b/tvix/nix-compat/src/aterm/parser.rs
@ -4,12 +4,12 @@
 //! [ATerm]: http://program-transformation.org/Tools/ATermFormat.html
 use bstr::BString;
 use nom::branch::alt;
-use nom::bytes::complete::{escaped_transform, is_not, tag};
+use nom::bytes::complete::{escaped_transform, is_not};
 use nom::character::complete::char as nomchar;
-use nom::combinator::{map, value};
+use nom::combinator::{map_res, opt, value};
 use nom::multi::separated_list0;
 use nom::sequence::delimited;
-use nom::IResult;
+use nom::{IResult, Parser};

 /// Parse a bstr and undo any escaping (which is why this needs to allocate).
 // FUTUREWORK: have a version for fields that are known to not need escaping
@ -32,48 +32,37 @@ fn parse_escaped_bytes(i: &[u8]) -> IResult<&[u8], BString> {
 /// Parse a field in double quotes, undo any escaping, and return the unquoted
 /// and decoded `Vec<u8>`.
 pub(crate) fn parse_bytes_field(i: &[u8]) -> IResult<&[u8], BString> {
-    // inside double quotes…
    delimited(
        nomchar('\"'),
-        // There is
-        alt((
-            // …either is a bstr after unescaping
-            parse_escaped_bytes,
-            // …or an empty string.
-            map(tag(b""), |_| BString::default()),
-        )),
+        opt(parse_escaped_bytes).map(|opt_bstr| opt_bstr.unwrap_or_default()),
        nomchar('\"'),
-    )(i)
+    )
+    .parse(i)
 }

 /// Parse a field in double quotes, undo any escaping, and return the unquoted
 /// and decoded [String], if it's valid UTF-8.
 /// Or fail parsing if the bytes are no valid UTF-8.
 pub(crate) fn parse_string_field(i: &[u8]) -> IResult<&[u8], String> {
-    // inside double quotes…
    delimited(
        nomchar('\"'),
-        // There is
-        alt((
-            // either is a String after unescaping
-            nom::combinator::map_opt(parse_escaped_bytes, |escaped_bytes| {
-                String::from_utf8(escaped_bytes.into()).ok()
-            }),
-            // or an empty string.
-            map(tag(b""), |_| "".to_string()),
-        )),
+        map_res(
+            opt(parse_escaped_bytes).map(|opt_bstr| opt_bstr.unwrap_or_default()),
+            |bstr| String::from_utf8(bstr.to_vec()),
+        ),
        nomchar('\"'),
-    )(i)
+    )
+    .parse(i)
 }

 /// Parse a list of string fields (enclosed in brackets)
 pub(crate) fn parse_string_list(i: &[u8]) -> IResult<&[u8], Vec<String>> {
-    // inside brackets
    delimited(
        nomchar('['),
        separated_list0(nomchar(','), parse_string_field),
        nomchar(']'),
-    )(i)
+    )
+    .parse(i)
 }

 #[cfg(test)]
--- a/tvix/nix-compat/src/derivation/parser.rs
+++ b/tvix/nix-compat/src/derivation/parser.rs
@ -7,7 +7,8 @@ use nom::bytes::complete::tag;
 use nom::character::complete::char as nomchar;
 use nom::combinator::{all_consuming, map_res};
 use nom::multi::{separated_list0, separated_list1};
-use nom::sequence::{delimited, preceded, separated_pair, terminated, tuple};
+use nom::sequence::{delimited, preceded, separated_pair, terminated};
+use nom::Parser;
 use std::collections::{btree_map, BTreeMap, BTreeSet};
 use thiserror;

@ -27,7 +28,7 @@ pub enum Error<I> {
 }

 pub(crate) fn parse(i: &[u8]) -> Result<Derivation, Error<&[u8]>> {
-    match all_consuming(parse_derivation)(i) {
+    match all_consuming(parse_derivation).parse(i) {
        Ok((rest, derivation)) => {
            // this shouldn't happen, as all_consuming shouldn't return.
            debug_assert!(rest.is_empty());
@ -68,13 +69,14 @@ fn parse_output(i: &[u8]) -> NomResult<&[u8], (String, Output)> {
        nomchar('('),
        map_res(
            |i| {
-                tuple((
+                (
                    terminated(aterm::parse_string_field, nomchar(',')),
                    terminated(aterm::parse_string_field, nomchar(',')),
                    terminated(aterm::parse_string_field, nomchar(',')),
                    aterm::parse_bytes_field,
-                ))(i)
-                .map_err(into_nomerror)
+                )
+                    .parse(i)
+                    .map_err(into_nomerror)
            },
            |(output_name, output_path, algo_and_mode, encoded_digest)| {
                // convert these 4 fields into an [Output].
@ -114,7 +116,8 @@ fn parse_output(i: &[u8]) -> NomResult<&[u8], (String, Output)> {
            },
        ),
        nomchar(')'),
-    )(i)
+    )
+    .parse(i)
 }

 /// Parse multiple outputs in ATerm. This is a list of things acccepted by
@ -127,7 +130,8 @@ fn parse_outputs(i: &[u8]) -> NomResult<&[u8], BTreeMap<String, Output>> {
        nomchar('['),
        separated_list1(tag(","), parse_output),
        nomchar(']'),
-    )(i);
+    )
+    .parse(i);

    match res {
        Ok((rst, outputs_lst)) => {
@ -228,7 +232,7 @@ pub fn parse_derivation(i: &[u8]) -> NomResult<&[u8], Derivation> {
            nomchar('('),
            // tuple requires all errors to be of the same type, so we need to be a
            // bit verbose here wrapping generic IResult into [NomATermResult].
-            tuple((
+            (
                // parse outputs
                terminated(parse_outputs, nomchar(',')),
                // // parse input derivations
@ -236,14 +240,26 @@ pub fn parse_derivation(i: &[u8]) -> NomResult<&[u8], Derivation> {
                // // parse input sources
                terminated(parse_input_sources, nomchar(',')),
                // // parse system
-                |i| terminated(aterm::parse_string_field, nomchar(','))(i).map_err(into_nomerror),
+                |i| {
+                    terminated(aterm::parse_string_field, nomchar(','))
+                        .parse(i)
+                        .map_err(into_nomerror)
+                },
                // // parse builder
-                |i| terminated(aterm::parse_string_field, nomchar(','))(i).map_err(into_nomerror),
+                |i| {
+                    terminated(aterm::parse_string_field, nomchar(','))
+                        .parse(i)
+                        .map_err(into_nomerror)
+                },
                // // parse arguments
-                |i| terminated(aterm::parse_string_list, nomchar(','))(i).map_err(into_nomerror),
+                |i| {
+                    terminated(aterm::parse_string_list, nomchar(','))
+                        .parse(i)
+                        .map_err(into_nomerror)
+                },
                // parse environment
                parse_kv(aterm::parse_bytes_field),
-            )),
+            ),
            nomchar(')'),
        )
        .map(
@ -267,7 +283,8 @@ pub fn parse_derivation(i: &[u8]) -> NomResult<&[u8], Derivation> {
                }
            },
        ),
-    )(i)
+    )
+    .parse(i)
 }

 /// Parse a list of key/value pairs into a BTreeMap.
@ -298,7 +315,7 @@ where
                    ),
                    nomchar(')'),
                ),
-            )(ii).map_err(into_nomerror);
+            ).parse(ii).map_err(into_nomerror);

            match res {
                Ok((rest, pairs)) => {
@ -322,7 +339,7 @@ where
            }
        },
        nomchar(']'),
-    )(i)
+    ).parse(i)
 }

 #[cfg(test)]