style: format entire depot with nixpkgs-fmt
This CL can be used to compare the style of nixpkgs-fmt against other formatters (nixpkgs, alejandra). Change-Id: I87c6abff6bcb546b02ead15ad0405f81e01b6d9e Reviewed-on: https://cl.tvl.fyi/c/depot/+/4397 Tested-by: BuildkiteCI Reviewed-by: sterni <sternenseemann@systemli.org> Reviewed-by: lukegb <lukegb@tvl.fyi> Reviewed-by: wpcarro <wpcarro@gmail.com> Reviewed-by: Profpatsch <mail@profpatsch.de> Reviewed-by: kanepyork <rikingcoding@gmail.com> Reviewed-by: tazjin <tazjin@tvl.su> Reviewed-by: cynthia <cynthia@tvl.fyi> Reviewed-by: edef <edef@edef.eu> Reviewed-by: eta <tvl@eta.st> Reviewed-by: grfn <grfn@gws.fyi>
This commit is contained in:
parent
2d10d60fac
commit
aa122cbae7
310 changed files with 7278 additions and 5490 deletions
|
|
@ -25,7 +25,7 @@ let
|
|||
Type: integer -> integer
|
||||
*/
|
||||
byteCount = i: flow.cond [
|
||||
[ (int.bitAnd i 128 == 0) 1 ]
|
||||
[ (int.bitAnd i 128 == 0) 1 ]
|
||||
[ (int.bitAnd i 224 == 192) 2 ]
|
||||
[ (int.bitAnd i 240 == 224) 3 ]
|
||||
[ (int.bitAnd i 248 == 240) 4 ]
|
||||
|
|
@ -45,30 +45,30 @@ let
|
|||
first:
|
||||
# byte position as an index starting with 0
|
||||
pos:
|
||||
let
|
||||
defaultRange = int.inRange 128 191;
|
||||
let
|
||||
defaultRange = int.inRange 128 191;
|
||||
|
||||
secondBytePredicate = flow.switch first [
|
||||
[ (int.inRange 194 223) defaultRange ] # C2..DF
|
||||
[ 224 (int.inRange 160 191) ] # E0
|
||||
[ (int.inRange 225 236) defaultRange ] # E1..EC
|
||||
[ 237 (int.inRange 128 159) ] # ED
|
||||
[ (int.inRange 238 239) defaultRange ] # EE..EF
|
||||
[ 240 (int.inRange 144 191) ] # F0
|
||||
[ (int.inRange 241 243) defaultRange ] # F1..F3
|
||||
[ 244 (int.inRange 128 143) ] # F4
|
||||
[ (fun.const true) null ]
|
||||
];
|
||||
secondBytePredicate = flow.switch first [
|
||||
[ (int.inRange 194 223) defaultRange ] # C2..DF
|
||||
[ 224 (int.inRange 160 191) ] # E0
|
||||
[ (int.inRange 225 236) defaultRange ] # E1..EC
|
||||
[ 237 (int.inRange 128 159) ] # ED
|
||||
[ (int.inRange 238 239) defaultRange ] # EE..EF
|
||||
[ 240 (int.inRange 144 191) ] # F0
|
||||
[ (int.inRange 241 243) defaultRange ] # F1..F3
|
||||
[ 244 (int.inRange 128 143) ] # F4
|
||||
[ (fun.const true) null ]
|
||||
];
|
||||
|
||||
firstBytePredicate = byte: assert first == byte;
|
||||
first < 128 || secondBytePredicate != null;
|
||||
in
|
||||
# Either ASCII or in one of the byte ranges of Table 3-6.
|
||||
if pos == 0 then firstBytePredicate
|
||||
# return predicate according to Table 3-6.
|
||||
else if pos == 1 then assert secondBytePredicate != null; secondBytePredicate
|
||||
# 3rd and 4th byte have only one validity rule
|
||||
else defaultRange;
|
||||
firstBytePredicate = byte: assert first == byte;
|
||||
first < 128 || secondBytePredicate != null;
|
||||
in
|
||||
# Either ASCII or in one of the byte ranges of Table 3-6.
|
||||
if pos == 0 then firstBytePredicate
|
||||
# return predicate according to Table 3-6.
|
||||
else if pos == 1 then assert secondBytePredicate != null; secondBytePredicate
|
||||
# 3rd and 4th byte have only one validity rule
|
||||
else defaultRange;
|
||||
|
||||
/* Iteration step for decoding an UTF-8 byte sequence.
|
||||
It decodes incrementally, i. e. it has to be fed
|
||||
|
|
@ -128,23 +128,24 @@ let
|
|||
# the current value by the amount of bytes left.
|
||||
offset = (count - (pos + 1)) * 6;
|
||||
in
|
||||
code + (int.bitShiftL (int.bitAnd mask value) offset);
|
||||
code + (int.bitShiftL (int.bitAnd mask value) offset);
|
||||
illFormedMsg =
|
||||
"Ill-formed byte ${int.toHex value} at position ${toString pos} in ${toString count} byte UTF-8 sequence";
|
||||
in
|
||||
if !(wellFormedByte first pos value) then builtins.throw illFormedMsg
|
||||
else if pos + 1 == count
|
||||
then (builtins.removeAttrs args [ # allow extra state being passed through
|
||||
"count"
|
||||
"code"
|
||||
"pos"
|
||||
"first"
|
||||
]) // { result = newCode; }
|
||||
else (builtins.removeAttrs args [ "result" ]) // {
|
||||
inherit count first;
|
||||
code = newCode;
|
||||
pos = pos + 1;
|
||||
};
|
||||
if !(wellFormedByte first pos value) then builtins.throw illFormedMsg
|
||||
else if pos + 1 == count
|
||||
then (builtins.removeAttrs args [
|
||||
# allow extra state being passed through
|
||||
"count"
|
||||
"code"
|
||||
"pos"
|
||||
"first"
|
||||
]) // { result = newCode; }
|
||||
else (builtins.removeAttrs args [ "result" ]) // {
|
||||
inherit count first;
|
||||
code = newCode;
|
||||
pos = pos + 1;
|
||||
};
|
||||
|
||||
/* Decode an UTF-8 string into a list of codepoints.
|
||||
|
||||
|
|
@ -161,7 +162,7 @@ let
|
|||
{
|
||||
key = "start";
|
||||
stringIndex = -1;
|
||||
state = {};
|
||||
state = { };
|
||||
codepoint = null;
|
||||
}
|
||||
];
|
||||
|
|
@ -170,7 +171,8 @@ let
|
|||
# updated values for current iteration step
|
||||
newIndex = stringIndex + 1;
|
||||
newState = step state (builtins.substring newIndex 1 s);
|
||||
in lib.optional (newIndex < stringLength) {
|
||||
in
|
||||
lib.optional (newIndex < stringLength) {
|
||||
# unique keys to make genericClosure happy
|
||||
key = toString newIndex;
|
||||
# carryover state for the next step
|
||||
|
|
@ -183,35 +185,39 @@ let
|
|||
in
|
||||
# extract all steps that yield a code point into a list
|
||||
builtins.map (v: v.codepoint) (
|
||||
builtins.filter (
|
||||
{ codepoint, stringIndex, state, ... }:
|
||||
builtins.filter
|
||||
(
|
||||
{ codepoint, stringIndex, state, ... }:
|
||||
|
||||
let
|
||||
# error message in case we are missing bytes at the end of input
|
||||
earlyEndMsg =
|
||||
if state ? count && state ? pos
|
||||
then "Missing ${toString (with state; count - pos)} bytes at end of input"
|
||||
else "Unexpected end of input";
|
||||
in
|
||||
let
|
||||
# error message in case we are missing bytes at the end of input
|
||||
earlyEndMsg =
|
||||
if state ? count && state ? pos
|
||||
then "Missing ${toString (with state; count - pos)} bytes at end of input"
|
||||
else "Unexpected end of input";
|
||||
in
|
||||
|
||||
# filter out all iteration steps without a codepoint value
|
||||
codepoint != null
|
||||
# filter out all iteration steps without a codepoint value
|
||||
codepoint != null
|
||||
# if we are at the iteration step of a non-empty input string, throw
|
||||
# an error if no codepoint was returned, as it indicates an incomplete
|
||||
# UTF-8 sequence.
|
||||
|| (stringLength > 0 && stringIndex == stringLength - 1 && throw earlyEndMsg)
|
||||
|
||||
) iterResult
|
||||
)
|
||||
iterResult
|
||||
);
|
||||
|
||||
/* Pretty prints a Unicode codepoint in the U+<HEX> notation.
|
||||
|
||||
Type: integer -> string
|
||||
*/
|
||||
formatCodepoint = cp: "U+" + string.fit {
|
||||
width = 4;
|
||||
char = "0";
|
||||
} (int.toHex cp);
|
||||
formatCodepoint = cp: "U+" + string.fit
|
||||
{
|
||||
width = 4;
|
||||
char = "0";
|
||||
}
|
||||
(int.toHex cp);
|
||||
|
||||
encodeCodepoint = cp:
|
||||
let
|
||||
|
|
@ -219,11 +225,11 @@ let
|
|||
# Note that this doesn't check if the Unicode codepoint is allowed,
|
||||
# but rather allows all theoretically UTF-8-encodeable ones.
|
||||
count = flow.switch cp [
|
||||
[ (int.inRange 0 127) 1 ] # 00000000 0xxxxxxx
|
||||
[ (int.inRange 128 2047) 2 ] # 00000yyy yyxxxxxx
|
||||
[ (int.inRange 2048 65535) 3 ] # zzzzyyyy yyxxxxxx
|
||||
[ (int.inRange 0 127) 1 ] # 00000000 0xxxxxxx
|
||||
[ (int.inRange 128 2047) 2 ] # 00000yyy yyxxxxxx
|
||||
[ (int.inRange 2048 65535) 3 ] # zzzzyyyy yyxxxxxx
|
||||
[ (int.inRange 65536 1114111) 4 ] # 000uuuuu zzzzyyyy yyxxxxxx,
|
||||
# capped at U+10FFFF
|
||||
# capped at U+10FFFF
|
||||
|
||||
[ (fun.const true) (builtins.throw invalidCodepointMsg) ]
|
||||
];
|
||||
|
|
@ -234,32 +240,34 @@ let
|
|||
# according to Table 3-6. from The Unicode Standard, Version 13.0,
|
||||
# section 3.9. u is split into uh and ul since they are used in
|
||||
# different bytes in the end.
|
||||
components = lib.mapAttrs (_: { mask, offset }:
|
||||
int.bitAnd (int.bitShiftR cp offset) mask
|
||||
) {
|
||||
x = {
|
||||
mask = if count > 1 then 63 else 127;
|
||||
offset = 0;
|
||||
components = lib.mapAttrs
|
||||
(_: { mask, offset }:
|
||||
int.bitAnd (int.bitShiftR cp offset) mask
|
||||
)
|
||||
{
|
||||
x = {
|
||||
mask = if count > 1 then 63 else 127;
|
||||
offset = 0;
|
||||
};
|
||||
y = {
|
||||
mask = if count > 2 then 63 else 31;
|
||||
offset = 6;
|
||||
};
|
||||
z = {
|
||||
mask = 15;
|
||||
offset = 12;
|
||||
};
|
||||
# u which belongs into the second byte
|
||||
ul = {
|
||||
mask = 3;
|
||||
offset = 16;
|
||||
};
|
||||
# u which belongs into the first byte
|
||||
uh = {
|
||||
mask = 7;
|
||||
offset = 18;
|
||||
};
|
||||
};
|
||||
y = {
|
||||
mask = if count > 2 then 63 else 31;
|
||||
offset = 6;
|
||||
};
|
||||
z = {
|
||||
mask = 15;
|
||||
offset = 12;
|
||||
};
|
||||
# u which belongs into the second byte
|
||||
ul = {
|
||||
mask = 3;
|
||||
offset = 16;
|
||||
};
|
||||
# u which belongs into the first byte
|
||||
uh = {
|
||||
mask = 7;
|
||||
offset = 18;
|
||||
};
|
||||
};
|
||||
inherit (components) x y z ul uh;
|
||||
|
||||
# Finally construct the byte sequence for the given codepoint. This is
|
||||
|
|
@ -286,15 +294,18 @@ let
|
|||
|
||||
unableToEncodeMessage = "Can't encode ${formatCodepoint cp} as UTF-8";
|
||||
|
||||
in string.fromBytes (
|
||||
builtins.genList (i:
|
||||
let
|
||||
byte = builtins.elemAt bytes i;
|
||||
in
|
||||
in
|
||||
string.fromBytes (
|
||||
builtins.genList
|
||||
(i:
|
||||
let
|
||||
byte = builtins.elemAt bytes i;
|
||||
in
|
||||
if wellFormedByte firstByte i byte
|
||||
then byte
|
||||
else builtins.throw unableToEncodeMessage
|
||||
) count
|
||||
)
|
||||
count
|
||||
);
|
||||
|
||||
/* Encode a list of Unicode codepoints into an UTF-8 string.
|
||||
|
|
@ -303,7 +314,8 @@ let
|
|||
*/
|
||||
encode = lib.concatMapStrings encodeCodepoint;
|
||||
|
||||
in {
|
||||
in
|
||||
{
|
||||
inherit
|
||||
encode
|
||||
decode
|
||||
|
|
|
|||
|
|
@ -25,9 +25,10 @@ let
|
|||
char
|
||||
;
|
||||
|
||||
rustDecoder = rustSimple {
|
||||
name = "utf8-decode";
|
||||
} ''
|
||||
rustDecoder = rustSimple
|
||||
{
|
||||
name = "utf8-decode";
|
||||
} ''
|
||||
use std::io::{self, Read};
|
||||
fn main() -> std::io::Result<()> {
|
||||
let mut buffer = String::new();
|
||||
|
|
@ -47,10 +48,11 @@ let
|
|||
|
||||
rustDecode = s:
|
||||
let
|
||||
expr = runCommandLocal "${s}-decoded" {} ''
|
||||
expr = runCommandLocal "${s}-decoded" { } ''
|
||||
printf '%s' ${lib.escapeShellArg s} | ${rustDecoder} > $out
|
||||
'';
|
||||
in import expr;
|
||||
in
|
||||
import expr;
|
||||
|
||||
hexDecode = l:
|
||||
utf8.decode (string.fromBytes (builtins.map int.fromHex l));
|
||||
|
|
@ -65,23 +67,27 @@ let
|
|||
(assertEq "well-formed: F4 80 83 92" (hexDecode [ "F4" "80" "83" "92" ]) [ 1048786 ])
|
||||
(assertThrows "Codepoint out of range: 0xFFFFFF" (hexEncode [ "FFFFFF" ]))
|
||||
(assertThrows "Codepoint out of range: -0x02" (hexEncode [ "-02" ]))
|
||||
] ++ builtins.genList (i:
|
||||
let
|
||||
cp = i + int.fromHex "D800";
|
||||
in
|
||||
] ++ builtins.genList
|
||||
(i:
|
||||
let
|
||||
cp = i + int.fromHex "D800";
|
||||
in
|
||||
assertThrows "Can't encode UTF-16 reserved characters: ${utf8.formatCodepoint cp}"
|
||||
(utf8.encode [ cp ])
|
||||
) (int.fromHex "07FF"));
|
||||
)
|
||||
(int.fromHex "07FF"));
|
||||
|
||||
testAscii = it "checks decoding of ascii strings"
|
||||
(builtins.map (s: assertEq "ASCII decoding is equal to UTF-8 decoding for \"${s}\""
|
||||
(string.toBytes s) (utf8.decode s)) [
|
||||
"foo bar"
|
||||
"hello\nworld"
|
||||
"carriage\r\nreturn"
|
||||
"1238398494829304 []<><>({})[]!!)"
|
||||
(string.take 127 char.allChars)
|
||||
]);
|
||||
(builtins.map
|
||||
(s: assertEq "ASCII decoding is equal to UTF-8 decoding for \"${s}\""
|
||||
(string.toBytes s)
|
||||
(utf8.decode s)) [
|
||||
"foo bar"
|
||||
"hello\nworld"
|
||||
"carriage\r\nreturn"
|
||||
"1238398494829304 []<><>({})[]!!)"
|
||||
(string.take 127 char.allChars)
|
||||
]);
|
||||
|
||||
randomUnicode = [
|
||||
"" # empty string should yield empty list
|
||||
|
|
@ -126,16 +132,17 @@ let
|
|||
testDecodingEncoding = it "checks that decoding and then encoding forms an identity"
|
||||
(builtins.map
|
||||
(s: assertEq "Decoding and then encoding “${s}” yields itself"
|
||||
(utf8.encode (utf8.decode s)) s)
|
||||
(utf8.encode (utf8.decode s))
|
||||
s)
|
||||
(lib.flatten [
|
||||
glassSentences
|
||||
randomUnicode
|
||||
]));
|
||||
|
||||
in
|
||||
runTestsuite "nix.utf8" [
|
||||
testFailures
|
||||
testAscii
|
||||
testDecoding
|
||||
testDecodingEncoding
|
||||
]
|
||||
runTestsuite "nix.utf8" [
|
||||
testFailures
|
||||
testAscii
|
||||
testDecoding
|
||||
testDecodingEncoding
|
||||
]
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue