nix-store query options `--referer' and `--referer-closure' have been changed to `--referrer' and `--referrer-closure' (but the old ones are still accepted for compatibility).
		
			
				
	
	
		
			388 lines
		
	
	
	
		
			11 KiB
		
	
	
	
		
			Perl
		
	
	
		
			Executable file
		
	
	
	
	
			
		
		
	
	
			388 lines
		
	
	
	
		
			11 KiB
		
	
	
	
		
			Perl
		
	
	
		
			Executable file
		
	
	
	
	
| >#! @perl@ -w -I@libexecdir@/nix
 | |
| 
 | |
| use strict;
 | |
| use POSIX qw(tmpnam);
 | |
| use readmanifest;
 | |
| 
 | |
| die unless scalar @ARGV == 5;
 | |
| 
 | |
| my $hashAlgo = "sha256";
 | |
| 
 | |
| my $cacheDir = $ARGV[0];
 | |
| my $patchesDir = $ARGV[1];
 | |
| my $patchesURL = $ARGV[2];
 | |
| my $srcDir = $ARGV[3];
 | |
| my $dstDir = $ARGV[4];
 | |
| 
 | |
| my $tmpdir;
 | |
| do { $tmpdir = tmpnam(); }
 | |
| until mkdir $tmpdir, 0777;
 | |
| 
 | |
| print "TEMP = $tmpdir\n";
 | |
| 
 | |
| #END { rmdir $tmpdir; }
 | |
| 
 | |
| my %srcNarFiles;
 | |
| my %srcPatches;
 | |
| my %srcSuccessors;
 | |
| 
 | |
| my %dstNarFiles;
 | |
| my %dstPatches;
 | |
| my %dstSuccessors;
 | |
| 
 | |
| readManifest "$srcDir/MANIFEST",
 | |
|     \%srcNarFiles, \%srcPatches, \%srcSuccessors;
 | |
| 
 | |
| readManifest "$dstDir/MANIFEST",
 | |
|     \%dstNarFiles, \%dstPatches, \%dstSuccessors;
 | |
| 
 | |
| 
 | |
| sub findOutputPaths {
 | |
|     my $narFiles = shift;
 | |
|     my $successors = shift;
 | |
| 
 | |
|     my %outPaths;
 | |
|     
 | |
|     foreach my $p (keys %{$narFiles}) {
 | |
| 
 | |
|         # Ignore store expressions.
 | |
|         next if ($p =~ /\.store$/);
 | |
|         next if ($p =~ /\.drv$/);
 | |
|         
 | |
|         # Ignore builders (too much ambiguity -- they're all called
 | |
|         # `builder.sh').
 | |
|         next if ($p =~ /\.sh$/);
 | |
|         next if ($p =~ /\.patch$/);
 | |
|         
 | |
|         # Don't bother including tar files etc.
 | |
|         next if ($p =~ /\.tar\.(gz|bz2)$/ || $p =~ /\.zip$/ || $p =~ /\.bin$/);
 | |
| 
 | |
|         $outPaths{$p} = 1;
 | |
|     }
 | |
| 
 | |
|     return %outPaths;
 | |
| }
 | |
| 
 | |
| print "finding src output paths...\n";
 | |
| my %srcOutPaths = findOutputPaths \%srcNarFiles, \%srcSuccessors;
 | |
| 
 | |
| print "finding dst output paths...\n";
 | |
| my %dstOutPaths = findOutputPaths \%dstNarFiles, \%dstSuccessors;
 | |
| 
 | |
| 
 | |
| sub getNameVersion {
 | |
|     my $p = shift;
 | |
|     $p =~ /\/[0-9a-z]+((?:-[a-zA-Z][^\/-]*)+)([^\/]*)$/;
 | |
|     my $name = $1;
 | |
|     my $version = $2;
 | |
|     $name =~ s/^-//;
 | |
|     $version =~ s/^-//;
 | |
|     return ($name, $version);
 | |
| }
 | |
| 
 | |
| 
 | |
| # A quick hack to get a measure of the `distance' between two
 | |
| # versions: it's just the position of the first character that differs
 | |
| # (or 999 if they are the same).
 | |
| sub versionDiff {
 | |
|     my $s = shift;
 | |
|     my $t = shift;
 | |
|     my $i;
 | |
|     return 999 if $s eq $t;
 | |
|     for ($i = 0; $i < length $s; $i++) {
 | |
|         return $i if $i >= length $t or
 | |
|             substr($s, $i, 1) ne substr($t, $i, 1);
 | |
|     }
 | |
|     return $i;
 | |
| }
 | |
| 
 | |
| 
 | |
| sub getNarBz2 {
 | |
|     my $narFiles = shift;
 | |
|     my $storePath = shift;
 | |
|     
 | |
|     my $narFileList = $$narFiles{$storePath};
 | |
|     die "missing store expression $storePath" unless defined $narFileList;
 | |
| 
 | |
|     my $narFile = @{$narFileList}[0];
 | |
|     die unless defined $narFile;
 | |
| 
 | |
|     $narFile->{url} =~ /\/([^\/]+)$/;
 | |
|     die unless defined $1;
 | |
|     return "$cacheDir/$1";
 | |
| }
 | |
| 
 | |
| 
 | |
| sub containsPatch {
 | |
|     my $patches = shift;
 | |
|     my $storePath = shift;
 | |
|     my $basePath = shift;
 | |
|     my $patchList = $$patches{$storePath};
 | |
|     return 0 if !defined $patchList;
 | |
|     my $found = 0;
 | |
|     foreach my $patch (@{$patchList}) {
 | |
|         # !!! baseHash might differ
 | |
|         return 1 if $patch->{basePath} eq $basePath;
 | |
|     }
 | |
|     return 0;
 | |
| }
 | |
| 
 | |
| 
 | |
| # Compute the "weighted" number of uses of a path in the build graph.
 | |
| sub computeUses {
 | |
|     my $narFiles = shift;
 | |
|     my $path = shift;
 | |
| 
 | |
|     # Find the deriver of $path.
 | |
|     return 1 unless defined $$narFiles{$path};
 | |
|     my $deriver = @{$$narFiles{$path}}[0]->{deriver};
 | |
|     return 1 unless defined $deriver && $deriver ne "";
 | |
| 
 | |
| #    print "  DERIVER $deriver\n";
 | |
| 
 | |
|     # Optimisation: build the referrers graph from the references
 | |
|     # graph.
 | |
|     my %referrers;
 | |
|     foreach my $q (keys %{$narFiles}) {
 | |
|         my @refs = split " ", @{$$narFiles{$q}}[0]->{references};
 | |
|         foreach my $r (@refs) {
 | |
|             $referrers{$r} = [] unless defined $referrers{$r};
 | |
|             push @{$referrers{$r}}, $q;
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     # Determine the shortest path from $deriver to all other reachable
 | |
|     # paths in the `referrers' graph.
 | |
| 
 | |
|     my %dist;
 | |
|     $dist{$deriver} = 0;
 | |
| 
 | |
|     my @queue = ($deriver);
 | |
|     my $pos = 0;
 | |
|     
 | |
|     while ($pos < scalar @queue) {
 | |
|         my $p = $queue[$pos];
 | |
|         $pos++;
 | |
| 
 | |
|         foreach my $q (@{$referrers{$p}}) {
 | |
|             if (!defined $dist{$q}) {
 | |
|                 $dist{$q} = $dist{$p} + 1;
 | |
| #                print "    $q $dist{$q}\n";
 | |
|                 push @queue, $q;
 | |
|             }
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     my $wuse = 1.0;
 | |
|     foreach my $user (keys %dist) {
 | |
|         next if $user eq $deriver;
 | |
| #        print "    $user $dist{$user}\n";
 | |
|         $wuse += 1.0 / 2.0**$dist{$user};
 | |
|     }
 | |
| 
 | |
| #    print "    XXX $path $wuse\n";
 | |
|     
 | |
|     return $wuse;
 | |
| }
 | |
| 
 | |
| 
 | |
| # For each output path in the destination, see if we need to / can
 | |
| # create a patch.
 | |
| 
 | |
| print "creating patches...\n";
 | |
| 
 | |
| foreach my $p (keys %dstOutPaths) {
 | |
| 
 | |
|     # If exactly the same path already exists in the source, skip it.
 | |
|     next if defined $srcOutPaths{$p};
 | |
|     
 | |
|     print "  $p\n";
 | |
| 
 | |
|     # If not, then we should find the paths in the source that are
 | |
|     # `most' likely to be present on a system that wants to install
 | |
|     # this path.
 | |
| 
 | |
|     (my $name, my $version) = getNameVersion $p;
 | |
| 
 | |
|     my @closest = ();
 | |
|     my $closestVersion;
 | |
|     my $minDist = -1; # actually, larger means closer
 | |
| 
 | |
|     # Find all source paths with the same name.
 | |
| 
 | |
|     foreach my $q (keys %srcOutPaths) {
 | |
|         (my $name2, my $version2) = getNameVersion $q;
 | |
|         if ($name eq $name2) {
 | |
| 
 | |
|             # If the sizes differ too much, then skip.  This
 | |
|             # disambiguates between, e.g., a real component and a
 | |
|             # wrapper component (cf. Firefox in Nixpkgs).
 | |
|             my $srcSize = @{$srcNarFiles{$q}}[0]->{size};
 | |
|             my $dstSize = @{$dstNarFiles{$p}}[0]->{size};
 | |
|             my $ratio = $srcSize / $dstSize;
 | |
|             $ratio = 1 / $ratio if $ratio < 1;
 | |
| #            print "  SIZE $srcSize $dstSize $ratio $q\n";
 | |
| 
 | |
|             if ($ratio >= 3) {
 | |
|                 print "  SKIPPING $q due to size ratio $ratio ($srcSize $dstSize)\n";
 | |
|                 next;
 | |
|             }
 | |
| 
 | |
|             # If the numbers of weighted uses differ too much, then
 | |
|             # skip.  This disambiguates between, e.g., the bootstrap
 | |
|             # GCC and the final GCC in Nixpkgs.
 | |
|             my $srcUses = computeUses \%srcNarFiles, $q;
 | |
|             my $dstUses = computeUses \%dstNarFiles, $p;
 | |
|             $ratio = $srcUses / $dstUses;
 | |
|             $ratio = 1 / $ratio if $ratio < 1;
 | |
|             print "  USE $srcUses $dstUses $ratio $q\n";
 | |
|             
 | |
| #            if ($ratio >= 2) {
 | |
| #                print "  SKIPPING $q due to use ratio $ratio ($srcUses $dstUses)\n";
 | |
| #                next;
 | |
| #            }
 | |
| 
 | |
|             # If there are multiple matching names, include the ones
 | |
|             # with the closest version numbers.
 | |
|             my $dist = versionDiff $version, $version2;
 | |
|             if ($dist > $minDist) {
 | |
|                 $minDist = $dist;
 | |
|                 @closest = ($q);
 | |
|                 $closestVersion = $version2;
 | |
|             } elsif ($dist == $minDist) {
 | |
|                 push @closest, $q;
 | |
|             }
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     if (scalar(@closest) == 0) {
 | |
|         print "  NO BASE: $p\n";
 | |
|         next;
 | |
|     }
 | |
| 
 | |
|     foreach my $closest (@closest) {
 | |
| 
 | |
|         # Generate a patch between $closest and $p.
 | |
|         print "  $p <- $closest\n";
 | |
| 
 | |
|         # If the patch already exists, skip it.
 | |
|         if (containsPatch(\%srcPatches, $p, $closest) ||
 | |
|             containsPatch(\%dstPatches, $p, $closest))
 | |
|         {
 | |
|             print "    skipping, already exists\n";
 | |
|             next;
 | |
|         }
 | |
| 
 | |
| #        next;
 | |
|         
 | |
|         my $srcNarBz2 = getNarBz2 \%srcNarFiles, $closest;
 | |
|         my $dstNarBz2 = getNarBz2 \%dstNarFiles, $p;
 | |
| 
 | |
|         my $maxNarSize = 150 * 1024 * 1024;
 | |
| 
 | |
|         system("@bunzip2@ < $srcNarBz2 > $tmpdir/A") == 0
 | |
|             or die "cannot unpack $srcNarBz2";
 | |
| 
 | |
|         if ((stat "$tmpdir/A")[7] >= $maxNarSize) {
 | |
|             print "    skipping, source is too large\n";
 | |
|             next;
 | |
|         }
 | |
|         
 | |
|         system("@bunzip2@ < $dstNarBz2 > $tmpdir/B") == 0
 | |
|             or die "cannot unpack $dstNarBz2";
 | |
| 
 | |
|         if ((stat "$tmpdir/B")[7] >= $maxNarSize) {
 | |
|             print "    skipping, destination is too large\n";
 | |
|             next;
 | |
|         }
 | |
|         
 | |
|         system("@libexecdir@/bsdiff $tmpdir/A $tmpdir/B $tmpdir/DIFF") == 0
 | |
|             or die "cannot compute binary diff";
 | |
| 
 | |
|         my $baseHash = `@bindir@/nix-hash --flat --type $hashAlgo --base32 $tmpdir/A` or die;
 | |
|         chomp $baseHash;
 | |
| 
 | |
|         my $narHash = `@bindir@/nix-hash --flat --type $hashAlgo --base32 $tmpdir/B` or die;
 | |
|         chomp $narHash;
 | |
| 
 | |
|         my $narDiffHash = `@bindir@/nix-hash --flat --type $hashAlgo --base32 $tmpdir/DIFF` or die;
 | |
|         chomp $narDiffHash;
 | |
| 
 | |
|         my $narDiffSize = (stat "$tmpdir/DIFF")[7];
 | |
|         my $dstNarBz2Size = (stat $dstNarBz2)[7];
 | |
| 
 | |
|         if ($narDiffSize >= $dstNarBz2Size) {
 | |
|             print "    rejecting; patch bigger than full archive\n";
 | |
|             next;
 | |
|         }
 | |
|     
 | |
|         my $finalName =
 | |
|             "$narDiffHash.nar-bsdiff";
 | |
| 
 | |
|         print "    size $narDiffSize; full size $dstNarBz2Size\n";
 | |
|         
 | |
|         if (-e "$patchesDir/$finalName") {
 | |
|             print "    not copying, already exists\n";
 | |
|         }
 | |
| 
 | |
|         else {
 | |
| 
 | |
|             system("cp '$tmpdir/DIFF' '$patchesDir/$finalName.tmp'") == 0
 | |
|                 or die "cannot copy diff";
 | |
|             
 | |
|             rename("$patchesDir/$finalName.tmp", "$patchesDir/$finalName")
 | |
|                 or die "cannot rename $patchesDir/$finalName.tmp";
 | |
|             
 | |
|         }
 | |
|         
 | |
|         # Add the patch to the manifest.
 | |
|         addPatch \%dstPatches, $p,
 | |
|             { url => "$patchesURL/$finalName", hash => "$hashAlgo:$narDiffHash"
 | |
|             , size => $narDiffSize, basePath => $closest, baseHash => "$hashAlgo:$baseHash"
 | |
|             , narHash => "$hashAlgo:$narHash", patchType => "nar-bsdiff"
 | |
|             }, 0;
 | |
|     }
 | |
| }
 | |
| 
 | |
| 
 | |
| # Add in any potentially useful patches in the source (namely, those
 | |
| # patches that produce either paths in the destination or paths that
 | |
| # can be used as the base for other useful patches).
 | |
| 
 | |
| my $changed;
 | |
| do {
 | |
|     # !!! we repeat this to reach the transitive closure; inefficient
 | |
|     $changed = 0;
 | |
| 
 | |
|     foreach my $p (keys %srcPatches) {
 | |
|         my $patchList = $srcPatches{$p};
 | |
| 
 | |
|         my $include = 0;
 | |
| 
 | |
|         # Is path $p included in the destination?  If so, include
 | |
|         # patches that produce it.
 | |
|         $include = 1 if (defined $dstNarFiles{$p});
 | |
| 
 | |
|         # Is path $p a path that serves as a base for paths in the
 | |
|         # destination?  If so, include patches that produce it.
 | |
|         foreach my $q (keys %dstPatches) {
 | |
|             foreach my $patch (@{$dstPatches{$q}}) {
 | |
|                 # !!! check baseHash
 | |
|                 $include = 1 if ($p eq $patch->{basePath});
 | |
|             }
 | |
|         }
 | |
| 
 | |
|         if ($include) {
 | |
|             foreach my $patch (@{$patchList}) {
 | |
|                 $changed = 1 if addPatch \%dstPatches, $p, $patch;
 | |
|             }
 | |
|         }            
 | |
|         
 | |
|     }
 | |
|     
 | |
| } while $changed;
 | |
| 
 | |
| 
 | |
| # Rewrite the manifest of the destination (with the new patches).
 | |
| writeManifest "$dstDir/MANIFEST",
 | |
|     \%dstNarFiles, \%dstPatches, \%dstSuccessors;
 |