package duplication present in (e.g.) a profile. It shows the
number of instances of each package in a closure, along with the
size in bytes of each instance as well as the "waste" (the
difference between the sum of the sizes of all instances and the
average size).
$ ./show-duplication.pl /nix/var/nix/profiles/default
gcc 11
3.3.6 19293318
3.4.4 21425257
...
average 14942970, waste 149429707
coreutils 6
...
average package duplication 1.87628865979381, total size 3486330471, total waste 1335324237, 38.3017114443825% wasted
This utility is useful for measuring the cost in terms of disk space
of the Nix approach.
73 lines
1.9 KiB
Prolog
Executable file
73 lines
1.9 KiB
Prolog
Executable file
#! /usr/bin/perl -w
|
|
|
|
if (scalar @ARGV != 1) {
|
|
print "syntax: show-duplication.pl PATH\n";
|
|
exit 1;
|
|
}
|
|
|
|
my $root = $ARGV[0];
|
|
|
|
|
|
my $nameRE = "(?:(?:[A-Za-z0-9\+\_]|(?:-[^0-9]))+)";
|
|
my $versionRE = "(?:[A-Za-z0-9\.\-]+)";
|
|
|
|
|
|
my %pkgInstances;
|
|
|
|
|
|
my $pid = open(PATHS, "-|") || exec "nix-store", "-qR", $root;
|
|
while (<PATHS>) {
|
|
chomp;
|
|
/^.*\/[0-9a-z]*-(.*)$/;
|
|
my $nameVersion = $1;
|
|
$nameVersion =~ /^($nameRE)(-($versionRE))?$/;
|
|
$name = $1;
|
|
$version = $3;
|
|
$version = "(unnumbered)" unless defined $version;
|
|
# print "$nameVersion $name $version\n";
|
|
push @{$pkgInstances{$name}}, {version => $version, path => $_};
|
|
}
|
|
close PATHS or exit 1;
|
|
|
|
|
|
sub pathSize {
|
|
my $path = shift;
|
|
my @st = lstat $path or die;
|
|
|
|
my $size = $st[7];
|
|
|
|
if (-d $path) {
|
|
opendir DIR, $path or die;
|
|
foreach my $name (readdir DIR) {
|
|
next if $name eq "." || $name eq "..";
|
|
$size += pathSize("$path/$name");
|
|
}
|
|
}
|
|
|
|
return $size;
|
|
}
|
|
|
|
|
|
my $totalPaths = 0;
|
|
my $totalSize = 0, $totalWaste = 0;
|
|
|
|
foreach my $name (sort {scalar @{$pkgInstances{$b}} <=> scalar @{$pkgInstances{$a}}} (keys %pkgInstances)) {
|
|
print "$name ", scalar @{$pkgInstances{$name}}, "\n";
|
|
my $allSize = 0;
|
|
foreach my $x (sort {$a->{version} cmp $b->{version}} @{$pkgInstances{$name}}) {
|
|
$totalPaths++;
|
|
my $size = pathSize $x->{path};
|
|
$allSize += $size;
|
|
print " $x->{version} $size\n";
|
|
}
|
|
my $avgSize = int($allSize / scalar @{$pkgInstances{$name}});
|
|
my $waste = $allSize - $avgSize;
|
|
$totalSize += $allSize;
|
|
$totalWaste += $waste;
|
|
print " average $avgSize, waste $waste\n";
|
|
}
|
|
|
|
|
|
my $avgDupl = $totalPaths / scalar (keys %pkgInstances);
|
|
my $wasteFactor = ($totalWaste / $totalSize) * 100;
|
|
print "average package duplication $avgDupl, total size $totalSize, total waste $totalWaste, $wasteFactor% wasted\n";
|