style: format entire depot with nixpkgs-fmt

This CL can be used to compare the style of nixpkgs-fmt against other
formatters (nixpkgs, alejandra).

Change-Id: I87c6abff6bcb546b02ead15ad0405f81e01b6d9e
Reviewed-on: https://cl.tvl.fyi/c/depot/+/4397
Tested-by: BuildkiteCI
Reviewed-by: sterni <sternenseemann@systemli.org>
Reviewed-by: lukegb <lukegb@tvl.fyi>
Reviewed-by: wpcarro <wpcarro@gmail.com>
Reviewed-by: Profpatsch <mail@profpatsch.de>
Reviewed-by: kanepyork <rikingcoding@gmail.com>
Reviewed-by: tazjin <tazjin@tvl.su>
Reviewed-by: cynthia <cynthia@tvl.fyi>
Reviewed-by: edef <edef@edef.eu>
Reviewed-by: eta <tvl@eta.st>
Reviewed-by: grfn <grfn@gws.fyi>
This commit is contained in:
Vincent Ambo 2022-01-30 19:06:58 +03:00 committed by tazjin
parent 2d10d60fac
commit aa122cbae7
310 changed files with 7278 additions and 5490 deletions

View file

@ -25,7 +25,7 @@ let
Type: integer -> integer
*/
byteCount = i: flow.cond [
[ (int.bitAnd i 128 == 0) 1 ]
[ (int.bitAnd i 128 == 0) 1 ]
[ (int.bitAnd i 224 == 192) 2 ]
[ (int.bitAnd i 240 == 224) 3 ]
[ (int.bitAnd i 248 == 240) 4 ]
@ -45,30 +45,30 @@ let
first:
# byte position as an index starting with 0
pos:
let
defaultRange = int.inRange 128 191;
let
defaultRange = int.inRange 128 191;
secondBytePredicate = flow.switch first [
[ (int.inRange 194 223) defaultRange ] # C2..DF
[ 224 (int.inRange 160 191) ] # E0
[ (int.inRange 225 236) defaultRange ] # E1..EC
[ 237 (int.inRange 128 159) ] # ED
[ (int.inRange 238 239) defaultRange ] # EE..EF
[ 240 (int.inRange 144 191) ] # F0
[ (int.inRange 241 243) defaultRange ] # F1..F3
[ 244 (int.inRange 128 143) ] # F4
[ (fun.const true) null ]
];
secondBytePredicate = flow.switch first [
[ (int.inRange 194 223) defaultRange ] # C2..DF
[ 224 (int.inRange 160 191) ] # E0
[ (int.inRange 225 236) defaultRange ] # E1..EC
[ 237 (int.inRange 128 159) ] # ED
[ (int.inRange 238 239) defaultRange ] # EE..EF
[ 240 (int.inRange 144 191) ] # F0
[ (int.inRange 241 243) defaultRange ] # F1..F3
[ 244 (int.inRange 128 143) ] # F4
[ (fun.const true) null ]
];
firstBytePredicate = byte: assert first == byte;
first < 128 || secondBytePredicate != null;
in
# Either ASCII or in one of the byte ranges of Table 3-6.
if pos == 0 then firstBytePredicate
# return predicate according to Table 3-6.
else if pos == 1 then assert secondBytePredicate != null; secondBytePredicate
# 3rd and 4th byte have only one validity rule
else defaultRange;
firstBytePredicate = byte: assert first == byte;
first < 128 || secondBytePredicate != null;
in
# Either ASCII or in one of the byte ranges of Table 3-6.
if pos == 0 then firstBytePredicate
# return predicate according to Table 3-6.
else if pos == 1 then assert secondBytePredicate != null; secondBytePredicate
# 3rd and 4th byte have only one validity rule
else defaultRange;
/* Iteration step for decoding an UTF-8 byte sequence.
It decodes incrementally, i. e. it has to be fed
@ -128,23 +128,24 @@ let
# the current value by the amount of bytes left.
offset = (count - (pos + 1)) * 6;
in
code + (int.bitShiftL (int.bitAnd mask value) offset);
code + (int.bitShiftL (int.bitAnd mask value) offset);
illFormedMsg =
"Ill-formed byte ${int.toHex value} at position ${toString pos} in ${toString count} byte UTF-8 sequence";
in
if !(wellFormedByte first pos value) then builtins.throw illFormedMsg
else if pos + 1 == count
then (builtins.removeAttrs args [ # allow extra state being passed through
"count"
"code"
"pos"
"first"
]) // { result = newCode; }
else (builtins.removeAttrs args [ "result" ]) // {
inherit count first;
code = newCode;
pos = pos + 1;
};
if !(wellFormedByte first pos value) then builtins.throw illFormedMsg
else if pos + 1 == count
then (builtins.removeAttrs args [
# allow extra state being passed through
"count"
"code"
"pos"
"first"
]) // { result = newCode; }
else (builtins.removeAttrs args [ "result" ]) // {
inherit count first;
code = newCode;
pos = pos + 1;
};
/* Decode an UTF-8 string into a list of codepoints.
@ -161,7 +162,7 @@ let
{
key = "start";
stringIndex = -1;
state = {};
state = { };
codepoint = null;
}
];
@ -170,7 +171,8 @@ let
# updated values for current iteration step
newIndex = stringIndex + 1;
newState = step state (builtins.substring newIndex 1 s);
in lib.optional (newIndex < stringLength) {
in
lib.optional (newIndex < stringLength) {
# unique keys to make genericClosure happy
key = toString newIndex;
# carryover state for the next step
@ -183,35 +185,39 @@ let
in
# extract all steps that yield a code point into a list
builtins.map (v: v.codepoint) (
builtins.filter (
{ codepoint, stringIndex, state, ... }:
builtins.filter
(
{ codepoint, stringIndex, state, ... }:
let
# error message in case we are missing bytes at the end of input
earlyEndMsg =
if state ? count && state ? pos
then "Missing ${toString (with state; count - pos)} bytes at end of input"
else "Unexpected end of input";
in
let
# error message in case we are missing bytes at the end of input
earlyEndMsg =
if state ? count && state ? pos
then "Missing ${toString (with state; count - pos)} bytes at end of input"
else "Unexpected end of input";
in
# filter out all iteration steps without a codepoint value
codepoint != null
# filter out all iteration steps without a codepoint value
codepoint != null
# if we are at the iteration step of a non-empty input string, throw
# an error if no codepoint was returned, as it indicates an incomplete
# UTF-8 sequence.
|| (stringLength > 0 && stringIndex == stringLength - 1 && throw earlyEndMsg)
) iterResult
)
iterResult
);
/* Pretty prints a Unicode codepoint in the U+<HEX> notation.
Type: integer -> string
*/
formatCodepoint = cp: "U+" + string.fit {
width = 4;
char = "0";
} (int.toHex cp);
formatCodepoint = cp: "U+" + string.fit
{
width = 4;
char = "0";
}
(int.toHex cp);
encodeCodepoint = cp:
let
@ -219,11 +225,11 @@ let
# Note that this doesn't check if the Unicode codepoint is allowed,
# but rather allows all theoretically UTF-8-encodeable ones.
count = flow.switch cp [
[ (int.inRange 0 127) 1 ] # 00000000 0xxxxxxx
[ (int.inRange 128 2047) 2 ] # 00000yyy yyxxxxxx
[ (int.inRange 2048 65535) 3 ] # zzzzyyyy yyxxxxxx
[ (int.inRange 0 127) 1 ] # 00000000 0xxxxxxx
[ (int.inRange 128 2047) 2 ] # 00000yyy yyxxxxxx
[ (int.inRange 2048 65535) 3 ] # zzzzyyyy yyxxxxxx
[ (int.inRange 65536 1114111) 4 ] # 000uuuuu zzzzyyyy yyxxxxxx,
# capped at U+10FFFF
# capped at U+10FFFF
[ (fun.const true) (builtins.throw invalidCodepointMsg) ]
];
@ -234,32 +240,34 @@ let
# according to Table 3-6. from The Unicode Standard, Version 13.0,
# section 3.9. u is split into uh and ul since they are used in
# different bytes in the end.
components = lib.mapAttrs (_: { mask, offset }:
int.bitAnd (int.bitShiftR cp offset) mask
) {
x = {
mask = if count > 1 then 63 else 127;
offset = 0;
components = lib.mapAttrs
(_: { mask, offset }:
int.bitAnd (int.bitShiftR cp offset) mask
)
{
x = {
mask = if count > 1 then 63 else 127;
offset = 0;
};
y = {
mask = if count > 2 then 63 else 31;
offset = 6;
};
z = {
mask = 15;
offset = 12;
};
# u which belongs into the second byte
ul = {
mask = 3;
offset = 16;
};
# u which belongs into the first byte
uh = {
mask = 7;
offset = 18;
};
};
y = {
mask = if count > 2 then 63 else 31;
offset = 6;
};
z = {
mask = 15;
offset = 12;
};
# u which belongs into the second byte
ul = {
mask = 3;
offset = 16;
};
# u which belongs into the first byte
uh = {
mask = 7;
offset = 18;
};
};
inherit (components) x y z ul uh;
# Finally construct the byte sequence for the given codepoint. This is
@ -286,15 +294,18 @@ let
unableToEncodeMessage = "Can't encode ${formatCodepoint cp} as UTF-8";
in string.fromBytes (
builtins.genList (i:
let
byte = builtins.elemAt bytes i;
in
in
string.fromBytes (
builtins.genList
(i:
let
byte = builtins.elemAt bytes i;
in
if wellFormedByte firstByte i byte
then byte
else builtins.throw unableToEncodeMessage
) count
)
count
);
/* Encode a list of Unicode codepoints into an UTF-8 string.
@ -303,7 +314,8 @@ let
*/
encode = lib.concatMapStrings encodeCodepoint;
in {
in
{
inherit
encode
decode

View file

@ -25,9 +25,10 @@ let
char
;
rustDecoder = rustSimple {
name = "utf8-decode";
} ''
rustDecoder = rustSimple
{
name = "utf8-decode";
} ''
use std::io::{self, Read};
fn main() -> std::io::Result<()> {
let mut buffer = String::new();
@ -47,10 +48,11 @@ let
rustDecode = s:
let
expr = runCommandLocal "${s}-decoded" {} ''
expr = runCommandLocal "${s}-decoded" { } ''
printf '%s' ${lib.escapeShellArg s} | ${rustDecoder} > $out
'';
in import expr;
in
import expr;
hexDecode = l:
utf8.decode (string.fromBytes (builtins.map int.fromHex l));
@ -65,23 +67,27 @@ let
(assertEq "well-formed: F4 80 83 92" (hexDecode [ "F4" "80" "83" "92" ]) [ 1048786 ])
(assertThrows "Codepoint out of range: 0xFFFFFF" (hexEncode [ "FFFFFF" ]))
(assertThrows "Codepoint out of range: -0x02" (hexEncode [ "-02" ]))
] ++ builtins.genList (i:
let
cp = i + int.fromHex "D800";
in
] ++ builtins.genList
(i:
let
cp = i + int.fromHex "D800";
in
assertThrows "Can't encode UTF-16 reserved characters: ${utf8.formatCodepoint cp}"
(utf8.encode [ cp ])
) (int.fromHex "07FF"));
)
(int.fromHex "07FF"));
testAscii = it "checks decoding of ascii strings"
(builtins.map (s: assertEq "ASCII decoding is equal to UTF-8 decoding for \"${s}\""
(string.toBytes s) (utf8.decode s)) [
"foo bar"
"hello\nworld"
"carriage\r\nreturn"
"1238398494829304 []<><>({})[]!!)"
(string.take 127 char.allChars)
]);
(builtins.map
(s: assertEq "ASCII decoding is equal to UTF-8 decoding for \"${s}\""
(string.toBytes s)
(utf8.decode s)) [
"foo bar"
"hello\nworld"
"carriage\r\nreturn"
"1238398494829304 []<><>({})[]!!)"
(string.take 127 char.allChars)
]);
randomUnicode = [
"" # empty string should yield empty list
@ -126,16 +132,17 @@ let
testDecodingEncoding = it "checks that decoding and then encoding forms an identity"
(builtins.map
(s: assertEq "Decoding and then encoding ${s} yields itself"
(utf8.encode (utf8.decode s)) s)
(utf8.encode (utf8.decode s))
s)
(lib.flatten [
glassSentences
randomUnicode
]));
in
runTestsuite "nix.utf8" [
testFailures
testAscii
testDecoding
testDecodingEncoding
]
runTestsuite "nix.utf8" [
testFailures
testAscii
testDecoding
testDecodingEncoding
]