style: format entire depot with nixpkgs-fmt

This CL can be used to compare the style of nixpkgs-fmt against other
formatters (nixpkgs, alejandra).

Change-Id: I87c6abff6bcb546b02ead15ad0405f81e01b6d9e
Reviewed-on: https://cl.tvl.fyi/c/depot/+/4397
Tested-by: BuildkiteCI
Reviewed-by: sterni <sternenseemann@systemli.org>
Reviewed-by: lukegb <lukegb@tvl.fyi>
Reviewed-by: wpcarro <wpcarro@gmail.com>
Reviewed-by: Profpatsch <mail@profpatsch.de>
Reviewed-by: kanepyork <rikingcoding@gmail.com>
Reviewed-by: tazjin <tazjin@tvl.su>
Reviewed-by: cynthia <cynthia@tvl.fyi>
Reviewed-by: edef <edef@edef.eu>
Reviewed-by: eta <tvl@eta.st>
Reviewed-by: grfn <grfn@gws.fyi>
This commit is contained in:
Vincent Ambo 2022-01-30 19:06:58 +03:00 committed by tazjin
parent 2d10d60fac
commit aa122cbae7
310 changed files with 7278 additions and 5490 deletions

View file

@ -25,7 +25,7 @@ let
Type: integer -> integer
*/
byteCount = i: flow.cond [
[ (int.bitAnd i 128 == 0) 1 ]
[ (int.bitAnd i 128 == 0) 1 ]
[ (int.bitAnd i 224 == 192) 2 ]
[ (int.bitAnd i 240 == 224) 3 ]
[ (int.bitAnd i 248 == 240) 4 ]
@ -45,30 +45,30 @@ let
first:
# byte position as an index starting with 0
pos:
let
defaultRange = int.inRange 128 191;
let
defaultRange = int.inRange 128 191;
secondBytePredicate = flow.switch first [
[ (int.inRange 194 223) defaultRange ] # C2..DF
[ 224 (int.inRange 160 191) ] # E0
[ (int.inRange 225 236) defaultRange ] # E1..EC
[ 237 (int.inRange 128 159) ] # ED
[ (int.inRange 238 239) defaultRange ] # EE..EF
[ 240 (int.inRange 144 191) ] # F0
[ (int.inRange 241 243) defaultRange ] # F1..F3
[ 244 (int.inRange 128 143) ] # F4
[ (fun.const true) null ]
];
secondBytePredicate = flow.switch first [
[ (int.inRange 194 223) defaultRange ] # C2..DF
[ 224 (int.inRange 160 191) ] # E0
[ (int.inRange 225 236) defaultRange ] # E1..EC
[ 237 (int.inRange 128 159) ] # ED
[ (int.inRange 238 239) defaultRange ] # EE..EF
[ 240 (int.inRange 144 191) ] # F0
[ (int.inRange 241 243) defaultRange ] # F1..F3
[ 244 (int.inRange 128 143) ] # F4
[ (fun.const true) null ]
];
firstBytePredicate = byte: assert first == byte;
first < 128 || secondBytePredicate != null;
in
# Either ASCII or in one of the byte ranges of Table 3-6.
if pos == 0 then firstBytePredicate
# return predicate according to Table 3-6.
else if pos == 1 then assert secondBytePredicate != null; secondBytePredicate
# 3rd and 4th byte have only one validity rule
else defaultRange;
firstBytePredicate = byte: assert first == byte;
first < 128 || secondBytePredicate != null;
in
# Either ASCII or in one of the byte ranges of Table 3-6.
if pos == 0 then firstBytePredicate
# return predicate according to Table 3-6.
else if pos == 1 then assert secondBytePredicate != null; secondBytePredicate
# 3rd and 4th byte have only one validity rule
else defaultRange;
/* Iteration step for decoding an UTF-8 byte sequence.
It decodes incrementally, i. e. it has to be fed
@ -128,23 +128,24 @@ let
# the current value by the amount of bytes left.
offset = (count - (pos + 1)) * 6;
in
code + (int.bitShiftL (int.bitAnd mask value) offset);
code + (int.bitShiftL (int.bitAnd mask value) offset);
illFormedMsg =
"Ill-formed byte ${int.toHex value} at position ${toString pos} in ${toString count} byte UTF-8 sequence";
in
if !(wellFormedByte first pos value) then builtins.throw illFormedMsg
else if pos + 1 == count
then (builtins.removeAttrs args [ # allow extra state being passed through
"count"
"code"
"pos"
"first"
]) // { result = newCode; }
else (builtins.removeAttrs args [ "result" ]) // {
inherit count first;
code = newCode;
pos = pos + 1;
};
if !(wellFormedByte first pos value) then builtins.throw illFormedMsg
else if pos + 1 == count
then (builtins.removeAttrs args [
# allow extra state being passed through
"count"
"code"
"pos"
"first"
]) // { result = newCode; }
else (builtins.removeAttrs args [ "result" ]) // {
inherit count first;
code = newCode;
pos = pos + 1;
};
/* Decode an UTF-8 string into a list of codepoints.
@ -161,7 +162,7 @@ let
{
key = "start";
stringIndex = -1;
state = {};
state = { };
codepoint = null;
}
];
@ -170,7 +171,8 @@ let
# updated values for current iteration step
newIndex = stringIndex + 1;
newState = step state (builtins.substring newIndex 1 s);
in lib.optional (newIndex < stringLength) {
in
lib.optional (newIndex < stringLength) {
# unique keys to make genericClosure happy
key = toString newIndex;
# carryover state for the next step
@ -183,35 +185,39 @@ let
in
# extract all steps that yield a code point into a list
builtins.map (v: v.codepoint) (
builtins.filter (
{ codepoint, stringIndex, state, ... }:
builtins.filter
(
{ codepoint, stringIndex, state, ... }:
let
# error message in case we are missing bytes at the end of input
earlyEndMsg =
if state ? count && state ? pos
then "Missing ${toString (with state; count - pos)} bytes at end of input"
else "Unexpected end of input";
in
let
# error message in case we are missing bytes at the end of input
earlyEndMsg =
if state ? count && state ? pos
then "Missing ${toString (with state; count - pos)} bytes at end of input"
else "Unexpected end of input";
in
# filter out all iteration steps without a codepoint value
codepoint != null
# filter out all iteration steps without a codepoint value
codepoint != null
# if we are at the iteration step of a non-empty input string, throw
# an error if no codepoint was returned, as it indicates an incomplete
# UTF-8 sequence.
|| (stringLength > 0 && stringIndex == stringLength - 1 && throw earlyEndMsg)
) iterResult
)
iterResult
);
/* Pretty prints a Unicode codepoint in the U+<HEX> notation.
Type: integer -> string
*/
formatCodepoint = cp: "U+" + string.fit {
width = 4;
char = "0";
} (int.toHex cp);
formatCodepoint = cp: "U+" + string.fit
{
width = 4;
char = "0";
}
(int.toHex cp);
encodeCodepoint = cp:
let
@ -219,11 +225,11 @@ let
# Note that this doesn't check if the Unicode codepoint is allowed,
# but rather allows all theoretically UTF-8-encodeable ones.
count = flow.switch cp [
[ (int.inRange 0 127) 1 ] # 00000000 0xxxxxxx
[ (int.inRange 128 2047) 2 ] # 00000yyy yyxxxxxx
[ (int.inRange 2048 65535) 3 ] # zzzzyyyy yyxxxxxx
[ (int.inRange 0 127) 1 ] # 00000000 0xxxxxxx
[ (int.inRange 128 2047) 2 ] # 00000yyy yyxxxxxx
[ (int.inRange 2048 65535) 3 ] # zzzzyyyy yyxxxxxx
[ (int.inRange 65536 1114111) 4 ] # 000uuuuu zzzzyyyy yyxxxxxx,
# capped at U+10FFFF
# capped at U+10FFFF
[ (fun.const true) (builtins.throw invalidCodepointMsg) ]
];
@ -234,32 +240,34 @@ let
# according to Table 3-6. from The Unicode Standard, Version 13.0,
# section 3.9. u is split into uh and ul since they are used in
# different bytes in the end.
components = lib.mapAttrs (_: { mask, offset }:
int.bitAnd (int.bitShiftR cp offset) mask
) {
x = {
mask = if count > 1 then 63 else 127;
offset = 0;
components = lib.mapAttrs
(_: { mask, offset }:
int.bitAnd (int.bitShiftR cp offset) mask
)
{
x = {
mask = if count > 1 then 63 else 127;
offset = 0;
};
y = {
mask = if count > 2 then 63 else 31;
offset = 6;
};
z = {
mask = 15;
offset = 12;
};
# u which belongs into the second byte
ul = {
mask = 3;
offset = 16;
};
# u which belongs into the first byte
uh = {
mask = 7;
offset = 18;
};
};
y = {
mask = if count > 2 then 63 else 31;
offset = 6;
};
z = {
mask = 15;
offset = 12;
};
# u which belongs into the second byte
ul = {
mask = 3;
offset = 16;
};
# u which belongs into the first byte
uh = {
mask = 7;
offset = 18;
};
};
inherit (components) x y z ul uh;
# Finally construct the byte sequence for the given codepoint. This is
@ -286,15 +294,18 @@ let
unableToEncodeMessage = "Can't encode ${formatCodepoint cp} as UTF-8";
in string.fromBytes (
builtins.genList (i:
let
byte = builtins.elemAt bytes i;
in
in
string.fromBytes (
builtins.genList
(i:
let
byte = builtins.elemAt bytes i;
in
if wellFormedByte firstByte i byte
then byte
else builtins.throw unableToEncodeMessage
) count
)
count
);
/* Encode a list of Unicode codepoints into an UTF-8 string.
@ -303,7 +314,8 @@ let
*/
encode = lib.concatMapStrings encodeCodepoint;
in {
in
{
inherit
encode
decode