* Move the patch generator into a module.
This commit is contained in:
		
							parent
							
								
									9737a7eba0
								
							
						
					
					
						commit
						d3bba0c2d8
					
				
					 4 changed files with 345 additions and 437 deletions
				
			
		
							
								
								
									
										335
									
								
								scripts/GeneratePatches.pm.in
									
										
									
									
									
										Executable file
									
								
							
							
						
						
									
										335
									
								
								scripts/GeneratePatches.pm.in
									
										
									
									
									
										Executable file
									
								
							|  | @ -0,0 +1,335 @@ | ||||||
|  | #! @perl@ -w -I@libexecdir@/nix | ||||||
|  | 
 | ||||||
|  | use strict; | ||||||
|  | use File::Temp qw(tempdir); | ||||||
|  | use readmanifest; | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | # Some patch generations options. | ||||||
|  | 
 | ||||||
|  | # Max size of NAR archives to generate patches for. | ||||||
|  | my $maxNarSize = $ENV{"NIX_MAX_NAR_SIZE"}; | ||||||
|  | $maxNarSize = 160 * 1024 * 1024 if !defined $maxNarSize; | ||||||
|  | 
 | ||||||
|  | # If patch is bigger than this fraction of full archive, reject. | ||||||
|  | my $maxPatchFraction = $ENV{"NIX_PATCH_FRACTION"}; | ||||||
|  | $maxPatchFraction = 0.60 if !defined $maxPatchFraction; | ||||||
|  | 
 | ||||||
|  | my $timeLimit = $ENV{"NIX_BSDIFF_TIME_LIMIT"}; | ||||||
|  | $timeLimit = 180 if !defined $timeLimit; | ||||||
|  | 
 | ||||||
|  | my $hashAlgo = "sha256"; | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | sub findOutputPaths { | ||||||
|  |     my $narFiles = shift; | ||||||
|  | 
 | ||||||
|  |     my %outPaths; | ||||||
|  |      | ||||||
|  |     foreach my $p (keys %{$narFiles}) { | ||||||
|  | 
 | ||||||
|  |         # Ignore derivations. | ||||||
|  |         next if ($p =~ /\.drv$/); | ||||||
|  |          | ||||||
|  |         # Ignore builders (too much ambiguity -- they're all called | ||||||
|  |         # `builder.sh'). | ||||||
|  |         next if ($p =~ /\.sh$/); | ||||||
|  |         next if ($p =~ /\.patch$/); | ||||||
|  |          | ||||||
|  |         # Don't bother including tar files etc. | ||||||
|  |         next if ($p =~ /\.tar$/ || $p =~ /\.tar\.(gz|bz2|Z|lzma|xz)$/ || $p =~ /\.zip$/ || $p =~ /\.bin$/ || $p =~ /\.tgz$/ || $p =~ /\.rpm$/ || $p =~ /cvs-export$/ || $p =~ /fetchhg$/); | ||||||
|  | 
 | ||||||
|  |         $outPaths{$p} = 1; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     return %outPaths; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | sub getNameVersion { | ||||||
|  |     my $p = shift; | ||||||
|  |     $p =~ /\/[0-9a-z]+((?:-[a-zA-Z][^\/-]*)+)([^\/]*)$/; | ||||||
|  |     my $name = $1; | ||||||
|  |     my $version = $2; | ||||||
|  |     return undef unless defined $name && defined $version; | ||||||
|  |     $name =~ s/^-//; | ||||||
|  |     $version =~ s/^-//; | ||||||
|  |     return ($name, $version); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | # A quick hack to get a measure of the `distance' between two | ||||||
|  | # versions: it's just the position of the first character that differs | ||||||
|  | # (or 999 if they are the same). | ||||||
|  | sub versionDiff { | ||||||
|  |     my $s = shift; | ||||||
|  |     my $t = shift; | ||||||
|  |     my $i; | ||||||
|  |     return 999 if $s eq $t; | ||||||
|  |     for ($i = 0; $i < length $s; $i++) { | ||||||
|  |         return $i if $i >= length $t or | ||||||
|  |             substr($s, $i, 1) ne substr($t, $i, 1); | ||||||
|  |     } | ||||||
|  |     return $i; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | sub getNarBz2 { | ||||||
|  |     my $narPath = shift; | ||||||
|  |     my $narFiles = shift; | ||||||
|  |     my $storePath = shift; | ||||||
|  |      | ||||||
|  |     my $narFileList = $$narFiles{$storePath}; | ||||||
|  |     die "missing path $storePath" unless defined $narFileList; | ||||||
|  | 
 | ||||||
|  |     my $narFile = @{$narFileList}[0]; | ||||||
|  |     die unless defined $narFile; | ||||||
|  | 
 | ||||||
|  |     $narFile->{url} =~ /\/([^\/]+)$/; | ||||||
|  |     die unless defined $1; | ||||||
|  |     return "$narPath/$1"; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | sub containsPatch { | ||||||
|  |     my $patches = shift; | ||||||
|  |     my $storePath = shift; | ||||||
|  |     my $basePath = shift; | ||||||
|  |     my $patchList = $$patches{$storePath}; | ||||||
|  |     return 0 if !defined $patchList; | ||||||
|  |     my $found = 0; | ||||||
|  |     foreach my $patch (@{$patchList}) { | ||||||
|  |         # !!! baseHash might differ | ||||||
|  |         return 1 if $patch->{basePath} eq $basePath; | ||||||
|  |     } | ||||||
|  |     return 0; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | sub generatePatches { | ||||||
|  |     my ($srcNarFiles, $dstNarFiles, $srcPatches, $dstPatches, $narPath, $patchesPath, $patchesURL, $tmpDir) = @_; | ||||||
|  | 
 | ||||||
|  |     my %srcOutPaths = findOutputPaths $srcNarFiles; | ||||||
|  |     my %dstOutPaths = findOutputPaths $dstNarFiles; | ||||||
|  | 
 | ||||||
|  |     # For each output path in the destination, see if we need to / can | ||||||
|  |     # create a patch. | ||||||
|  | 
 | ||||||
|  |     print STDERR "creating patches...\n"; | ||||||
|  | 
 | ||||||
|  |     foreach my $p (keys %dstOutPaths) { | ||||||
|  | 
 | ||||||
|  |         # If exactly the same path already exists in the source, skip it. | ||||||
|  |         next if defined $srcOutPaths{$p}; | ||||||
|  |      | ||||||
|  |         print "  $p\n"; | ||||||
|  | 
 | ||||||
|  |         # If not, then we should find the paths in the source that are | ||||||
|  |         # `most' likely to be present on a system that wants to | ||||||
|  |         # install this path. | ||||||
|  | 
 | ||||||
|  |         (my $name, my $version) = getNameVersion $p; | ||||||
|  |         next unless defined $name && defined $version; | ||||||
|  | 
 | ||||||
|  |         my @closest = (); | ||||||
|  |         my $closestVersion; | ||||||
|  |         my $minDist = -1; # actually, larger means closer | ||||||
|  | 
 | ||||||
|  |         # Find all source paths with the same name. | ||||||
|  | 
 | ||||||
|  |         foreach my $q (keys %srcOutPaths) { | ||||||
|  |             (my $name2, my $version2) = getNameVersion $q; | ||||||
|  |             next unless defined $name2 && defined $version2; | ||||||
|  | 
 | ||||||
|  |             if ($name eq $name2) { | ||||||
|  | 
 | ||||||
|  |                 my $srcSystem = @{$$dstNarFiles{$p}}[0]->{system}; | ||||||
|  |                 my $dstSystem = @{$$srcNarFiles{$q}}[0]->{system}; | ||||||
|  |                 if (defined $srcSystem && defined $dstSystem && $srcSystem ne $dstSystem) { | ||||||
|  |                     print "    SKIPPING $q due to different systems ($srcSystem vs. $dstSystem)\n"; | ||||||
|  |                     next; | ||||||
|  |                 } | ||||||
|  | 
 | ||||||
|  |                 # If the sizes differ too much, then skip.  This | ||||||
|  |                 # disambiguates between, e.g., a real component and a | ||||||
|  |                 # wrapper component (cf. Firefox in Nixpkgs). | ||||||
|  |                 my $srcSize = @{$$srcNarFiles{$q}}[0]->{size}; | ||||||
|  |                 my $dstSize = @{$$dstNarFiles{$p}}[0]->{size}; | ||||||
|  |                 my $ratio = $srcSize / $dstSize; | ||||||
|  |                 $ratio = 1 / $ratio if $ratio < 1; | ||||||
|  |                 # print "  SIZE $srcSize $dstSize $ratio $q\n"; | ||||||
|  | 
 | ||||||
|  |                 if ($ratio >= 3) { | ||||||
|  |                     print "    SKIPPING $q due to size ratio $ratio ($srcSize vs. $dstSize)\n"; | ||||||
|  |                     next; | ||||||
|  |                 } | ||||||
|  | 
 | ||||||
|  |                 # If there are multiple matching names, include the | ||||||
|  |                 # ones with the closest version numbers. | ||||||
|  |                 my $dist = versionDiff $version, $version2; | ||||||
|  |                 if ($dist > $minDist) { | ||||||
|  |                     $minDist = $dist; | ||||||
|  |                     @closest = ($q); | ||||||
|  |                     $closestVersion = $version2; | ||||||
|  |                 } elsif ($dist == $minDist) { | ||||||
|  |                     push @closest, $q; | ||||||
|  |                 } | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  | 
 | ||||||
|  |         if (scalar(@closest) == 0) { | ||||||
|  |             print "    NO BASE: $p\n"; | ||||||
|  |             next; | ||||||
|  |         } | ||||||
|  | 
 | ||||||
|  |         foreach my $closest (@closest) { | ||||||
|  | 
 | ||||||
|  |             # Generate a patch between $closest and $p. | ||||||
|  |             print STDERR "  $p <- $closest\n"; | ||||||
|  | 
 | ||||||
|  |             # If the patch already exists, skip it. | ||||||
|  |             if (containsPatch($srcPatches, $p, $closest) || | ||||||
|  |                 containsPatch($dstPatches, $p, $closest)) | ||||||
|  |             { | ||||||
|  |                 print "    skipping, already exists\n"; | ||||||
|  |                 next; | ||||||
|  |             } | ||||||
|  | 
 | ||||||
|  |             my $srcNarBz2 = getNarBz2 $narPath, $srcNarFiles, $closest; | ||||||
|  |             my $dstNarBz2 = getNarBz2 $narPath, $dstNarFiles, $p; | ||||||
|  | 
 | ||||||
|  |             if (! -f $srcNarBz2) { | ||||||
|  |                 warn "patch source archive $srcNarBz2 is missing\n"; | ||||||
|  |                 next; | ||||||
|  |             } | ||||||
|  | 
 | ||||||
|  |             system("@bunzip2@ < $srcNarBz2 > $tmpDir/A") == 0 | ||||||
|  |                 or die "cannot unpack $srcNarBz2"; | ||||||
|  | 
 | ||||||
|  |             if ((stat "$tmpDir/A")[7] >= $maxNarSize) { | ||||||
|  |                 print "    skipping, source is too large\n"; | ||||||
|  |                 next; | ||||||
|  |             } | ||||||
|  |          | ||||||
|  |             system("@bunzip2@ < $dstNarBz2 > $tmpDir/B") == 0 | ||||||
|  |                 or die "cannot unpack $dstNarBz2"; | ||||||
|  | 
 | ||||||
|  |             if ((stat "$tmpDir/B")[7] >= $maxNarSize) { | ||||||
|  |                 print "    skipping, destination is too large\n"; | ||||||
|  |                 next; | ||||||
|  |             } | ||||||
|  |          | ||||||
|  |             my $time1 = time(); | ||||||
|  |             my $res = system("ulimit -t $timeLimit; @libexecdir@/bsdiff $tmpDir/A $tmpDir/B $tmpDir/DIFF"); | ||||||
|  |             my $time2 = time(); | ||||||
|  |             if ($res) { | ||||||
|  |                 warn "binary diff computation aborted after ", $time2 - $time1, " seconds\n"; | ||||||
|  |                 next; | ||||||
|  |             } | ||||||
|  | 
 | ||||||
|  |             my $baseHash = `@bindir@/nix-hash --flat --type $hashAlgo --base32 $tmpDir/A` or die; | ||||||
|  |             chomp $baseHash; | ||||||
|  | 
 | ||||||
|  |             my $narHash = `@bindir@/nix-hash --flat --type $hashAlgo --base32 $tmpDir/B` or die; | ||||||
|  |             chomp $narHash; | ||||||
|  | 
 | ||||||
|  |             my $narDiffHash = `@bindir@/nix-hash --flat --type $hashAlgo --base32 $tmpDir/DIFF` or die; | ||||||
|  |             chomp $narDiffHash; | ||||||
|  | 
 | ||||||
|  |             my $narDiffSize = (stat "$tmpDir/DIFF")[7]; | ||||||
|  |             my $dstNarBz2Size = (stat $dstNarBz2)[7]; | ||||||
|  | 
 | ||||||
|  |             print "    size $narDiffSize; full size $dstNarBz2Size; ", $time2 - $time1, " seconds\n"; | ||||||
|  |          | ||||||
|  |             if ($narDiffSize >= $dstNarBz2Size) { | ||||||
|  |                 print "    rejecting; patch bigger than full archive\n"; | ||||||
|  |                 next; | ||||||
|  |             } | ||||||
|  |      | ||||||
|  |             if ($narDiffSize / $dstNarBz2Size >= $maxPatchFraction) { | ||||||
|  |                 print "    rejecting; patch too large relative to full archive\n"; | ||||||
|  |                 next; | ||||||
|  |             } | ||||||
|  |      | ||||||
|  |             my $finalName = "$narDiffHash.nar-bsdiff"; | ||||||
|  | 
 | ||||||
|  |             if (-e "$patchesPath/$finalName") { | ||||||
|  |                 print "    not copying, already exists\n"; | ||||||
|  |             } | ||||||
|  | 
 | ||||||
|  |             else { | ||||||
|  |                 system("cp '$tmpDir/DIFF' '$patchesPath/$finalName.tmp'") == 0 | ||||||
|  |                     or die "cannot copy diff"; | ||||||
|  |                 rename("$patchesPath/$finalName.tmp", "$patchesPath/$finalName") | ||||||
|  |                     or die "cannot rename $patchesPath/$finalName.tmp"; | ||||||
|  |             } | ||||||
|  |          | ||||||
|  |             # Add the patch to the manifest. | ||||||
|  |             addPatch $dstPatches, $p, | ||||||
|  |                 { url => "$patchesURL/$finalName", hash => "$hashAlgo:$narDiffHash" | ||||||
|  |                 , size => $narDiffSize, basePath => $closest, baseHash => "$hashAlgo:$baseHash" | ||||||
|  |                 , narHash => "$hashAlgo:$narHash", patchType => "nar-bsdiff" | ||||||
|  |                 }; | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | # Propagate useful patches from $srcPatches to $dstPatches.  A patch | ||||||
|  | # is useful if it produces either paths in the $dstNarFiles or paths | ||||||
|  | # that can be used as the base for other useful patches. | ||||||
|  | sub propagatePatches { | ||||||
|  |     my ($srcPatches, $dstNarFiles, $dstPatches) = @_; | ||||||
|  | 
 | ||||||
|  |     print STDERR "propagating patches...\n"; | ||||||
|  | 
 | ||||||
|  |     my $changed; | ||||||
|  |     do { | ||||||
|  |         # !!! we repeat this to reach the transitive closure; inefficient | ||||||
|  |         $changed = 0; | ||||||
|  | 
 | ||||||
|  |         print STDERR "loop\n"; | ||||||
|  | 
 | ||||||
|  |         my %dstBasePaths; | ||||||
|  |         foreach my $q (keys %{$dstPatches}) { | ||||||
|  |             foreach my $patch (@{$$dstPatches{$q}}) { | ||||||
|  |                 $dstBasePaths{$patch->{basePath}} = 1; | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  | 
 | ||||||
|  |         foreach my $p (keys %{$srcPatches}) { | ||||||
|  |             my $patchList = $$srcPatches{$p}; | ||||||
|  | 
 | ||||||
|  |             my $include = 0; | ||||||
|  | 
 | ||||||
|  |             # Is path $p included in the destination?  If so, include | ||||||
|  |             # patches that produce it. | ||||||
|  |             $include = 1 if defined $$dstNarFiles{$p}; | ||||||
|  | 
 | ||||||
|  |             # Is path $p a path that serves as a base for paths in the | ||||||
|  |             # destination?  If so, include patches that produce it. | ||||||
|  |             # !!! check baseHash | ||||||
|  |             $include = 1 if defined $dstBasePaths{$p}; | ||||||
|  | 
 | ||||||
|  |             if ($include) { | ||||||
|  |                 foreach my $patch (@{$patchList}) { | ||||||
|  |                     $changed = 1 if addPatch $dstPatches, $p, $patch; | ||||||
|  |                 } | ||||||
|  |             } | ||||||
|  |          | ||||||
|  |         } | ||||||
|  |      | ||||||
|  |     } while $changed; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | # Add all new patches in $srcPatches to $dstPatches. | ||||||
|  | sub copyPatches { | ||||||
|  |     my ($srcPatches, $dstPatches) = @_; | ||||||
|  |     foreach my $p (keys %{$srcPatches}) { | ||||||
|  |         addPatch $dstPatches, $p, $_ foreach @{$$srcPatches{$p}}; | ||||||
|  |     } | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | return 1; | ||||||
|  | @ -3,7 +3,7 @@ bin_SCRIPTS = nix-collect-garbage \ | ||||||
|   nix-install-package nix-channel nix-build \ |   nix-install-package nix-channel nix-build \ | ||||||
|   nix-copy-closure  |   nix-copy-closure  | ||||||
| 
 | 
 | ||||||
| noinst_SCRIPTS = nix-profile.sh generate-patches.pl \ | noinst_SCRIPTS = nix-profile.sh GeneratePatches.pm \ | ||||||
|   find-runtime-roots.pl build-remote.pl nix-reduce-build \ |   find-runtime-roots.pl build-remote.pl nix-reduce-build \ | ||||||
|   copy-from-other-stores.pl nix-http-export.cgi |   copy-from-other-stores.pl nix-http-export.cgi | ||||||
| 
 | 
 | ||||||
|  | @ -17,7 +17,7 @@ install-exec-local: readmanifest.pm download-using-manifests.pl copy-from-other- | ||||||
| 	$(INSTALL_DATA) readconfig.pm $(DESTDIR)$(libexecdir)/nix  | 	$(INSTALL_DATA) readconfig.pm $(DESTDIR)$(libexecdir)/nix  | ||||||
| 	$(INSTALL_DATA) ssh.pm $(DESTDIR)$(libexecdir)/nix  | 	$(INSTALL_DATA) ssh.pm $(DESTDIR)$(libexecdir)/nix  | ||||||
| 	$(INSTALL_PROGRAM) find-runtime-roots.pl $(DESTDIR)$(libexecdir)/nix  | 	$(INSTALL_PROGRAM) find-runtime-roots.pl $(DESTDIR)$(libexecdir)/nix  | ||||||
| 	$(INSTALL_PROGRAM) generate-patches.pl $(DESTDIR)$(libexecdir)/nix  | 	$(INSTALL_PROGRAM) GeneratePatches.pm $(DESTDIR)$(libexecdir)/nix  | ||||||
| 	$(INSTALL_PROGRAM) build-remote.pl $(DESTDIR)$(libexecdir)/nix  | 	$(INSTALL_PROGRAM) build-remote.pl $(DESTDIR)$(libexecdir)/nix  | ||||||
| 	$(INSTALL) -d $(DESTDIR)$(libexecdir)/nix/substituters | 	$(INSTALL) -d $(DESTDIR)$(libexecdir)/nix/substituters | ||||||
| 	$(INSTALL_PROGRAM) download-using-manifests.pl $(DESTDIR)$(libexecdir)/nix/substituters | 	$(INSTALL_PROGRAM) download-using-manifests.pl $(DESTDIR)$(libexecdir)/nix/substituters | ||||||
|  | @ -36,7 +36,7 @@ EXTRA_DIST = nix-collect-garbage.in \ | ||||||
|   nix-build.in \ |   nix-build.in \ | ||||||
|   download-using-manifests.pl.in \ |   download-using-manifests.pl.in \ | ||||||
|   copy-from-other-stores.pl.in \ |   copy-from-other-stores.pl.in \ | ||||||
|   generate-patches.pl.in \ |   GeneratePatches.pm.in \ | ||||||
|   nix-copy-closure.in \ |   nix-copy-closure.in \ | ||||||
|   find-runtime-roots.pl.in \ |   find-runtime-roots.pl.in \ | ||||||
|   build-remote.pl.in \ |   build-remote.pl.in \ | ||||||
|  |  | ||||||
|  | @ -1,429 +0,0 @@ | ||||||
| #! @perl@ -w -I@libexecdir@/nix |  | ||||||
| 
 |  | ||||||
| use strict; |  | ||||||
| use File::Temp qw(tempdir); |  | ||||||
| use readmanifest; |  | ||||||
| 
 |  | ||||||
| 
 |  | ||||||
| # Some patch generations options. |  | ||||||
| 
 |  | ||||||
| # Max size of NAR archives to generate patches for. |  | ||||||
| my $maxNarSize = $ENV{"NIX_MAX_NAR_SIZE"}; |  | ||||||
| $maxNarSize = 100 * 1024 * 1024 if !defined $maxNarSize; |  | ||||||
| 
 |  | ||||||
| # If patch is bigger than this fraction of full archive, reject. |  | ||||||
| my $maxPatchFraction = $ENV{"NIX_PATCH_FRACTION"}; |  | ||||||
| $maxPatchFraction = 0.60 if !defined $maxPatchFraction; |  | ||||||
| 
 |  | ||||||
| my $timeLimit = $ENV{"NIX_BSDIFF_TIME_LIMIT"}; |  | ||||||
| $timeLimit = 180 if !defined $timeLimit; |  | ||||||
| 
 |  | ||||||
| 
 |  | ||||||
| die unless scalar @ARGV == 5; |  | ||||||
| 
 |  | ||||||
| my $hashAlgo = "sha256"; |  | ||||||
| 
 |  | ||||||
| my $narDir = $ARGV[0]; |  | ||||||
| my $patchesDir = $ARGV[1]; |  | ||||||
| my $patchesURL = $ARGV[2]; |  | ||||||
| my $srcManifest = $ARGV[3]; |  | ||||||
| my $dstManifest = $ARGV[4]; |  | ||||||
| 
 |  | ||||||
| my $tmpDir = tempdir("nix-generate-patches.XXXXXX", CLEANUP => 1, TMPDIR => 1) |  | ||||||
|     or die "cannot create a temporary directory"; |  | ||||||
| 
 |  | ||||||
| #END { rmdir $tmpDir; } |  | ||||||
| 
 |  | ||||||
| my %srcNarFiles; |  | ||||||
| my %srcLocalPaths; |  | ||||||
| my %srcPatches; |  | ||||||
| 
 |  | ||||||
| my %dstNarFiles; |  | ||||||
| my %dstLocalPaths; |  | ||||||
| my %dstPatches; |  | ||||||
| 
 |  | ||||||
| readManifest "$srcManifest", |  | ||||||
|     \%srcNarFiles, \%srcLocalPaths, \%srcPatches; |  | ||||||
| 
 |  | ||||||
| readManifest "$dstManifest", |  | ||||||
|     \%dstNarFiles, \%dstLocalPaths, \%dstPatches; |  | ||||||
| 
 |  | ||||||
| 
 |  | ||||||
| sub findOutputPaths { |  | ||||||
|     my $narFiles = shift; |  | ||||||
| 
 |  | ||||||
|     my %outPaths; |  | ||||||
|      |  | ||||||
|     foreach my $p (keys %{$narFiles}) { |  | ||||||
| 
 |  | ||||||
|         # Ignore derivations. |  | ||||||
|         next if ($p =~ /\.drv$/); |  | ||||||
|          |  | ||||||
|         # Ignore builders (too much ambiguity -- they're all called |  | ||||||
|         # `builder.sh'). |  | ||||||
|         next if ($p =~ /\.sh$/); |  | ||||||
|         next if ($p =~ /\.patch$/); |  | ||||||
|          |  | ||||||
|         # Don't bother including tar files etc. |  | ||||||
|         next if ($p =~ /\.tar$/ || $p =~ /\.tar\.(gz|bz2|Z|lzma|xz)$/ || $p =~ /\.zip$/ || $p =~ /\.bin$/ || $p =~ /\.tgz$/ || $p =~ /\.rpm$/ || $p =~ /cvs-export$/ || $p =~ /fetchhg$/); |  | ||||||
| 
 |  | ||||||
|         $outPaths{$p} = 1; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     return %outPaths; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| print "finding src output paths...\n"; |  | ||||||
| my %srcOutPaths = findOutputPaths \%srcNarFiles; |  | ||||||
| 
 |  | ||||||
| print "finding dst output paths...\n"; |  | ||||||
| my %dstOutPaths = findOutputPaths \%dstNarFiles; |  | ||||||
| 
 |  | ||||||
| 
 |  | ||||||
| sub getNameVersion { |  | ||||||
|     my $p = shift; |  | ||||||
|     $p =~ /\/[0-9a-z]+((?:-[a-zA-Z][^\/-]*)+)([^\/]*)$/; |  | ||||||
|     my $name = $1; |  | ||||||
|     my $version = $2; |  | ||||||
|     return undef unless defined $name && defined $version; |  | ||||||
|     $name =~ s/^-//; |  | ||||||
|     $version =~ s/^-//; |  | ||||||
|     return ($name, $version); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| 
 |  | ||||||
| # A quick hack to get a measure of the `distance' between two |  | ||||||
| # versions: it's just the position of the first character that differs |  | ||||||
| # (or 999 if they are the same). |  | ||||||
| sub versionDiff { |  | ||||||
|     my $s = shift; |  | ||||||
|     my $t = shift; |  | ||||||
|     my $i; |  | ||||||
|     return 999 if $s eq $t; |  | ||||||
|     for ($i = 0; $i < length $s; $i++) { |  | ||||||
|         return $i if $i >= length $t or |  | ||||||
|             substr($s, $i, 1) ne substr($t, $i, 1); |  | ||||||
|     } |  | ||||||
|     return $i; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| 
 |  | ||||||
| sub getNarBz2 { |  | ||||||
|     my $narFiles = shift; |  | ||||||
|     my $storePath = shift; |  | ||||||
|      |  | ||||||
|     my $narFileList = $$narFiles{$storePath}; |  | ||||||
|     die "missing path $storePath" unless defined $narFileList; |  | ||||||
| 
 |  | ||||||
|     my $narFile = @{$narFileList}[0]; |  | ||||||
|     die unless defined $narFile; |  | ||||||
| 
 |  | ||||||
|     $narFile->{url} =~ /\/([^\/]+)$/; |  | ||||||
|     die unless defined $1; |  | ||||||
|     return "$narDir/$1"; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| 
 |  | ||||||
| sub containsPatch { |  | ||||||
|     my $patches = shift; |  | ||||||
|     my $storePath = shift; |  | ||||||
|     my $basePath = shift; |  | ||||||
|     my $patchList = $$patches{$storePath}; |  | ||||||
|     return 0 if !defined $patchList; |  | ||||||
|     my $found = 0; |  | ||||||
|     foreach my $patch (@{$patchList}) { |  | ||||||
|         # !!! baseHash might differ |  | ||||||
|         return 1 if $patch->{basePath} eq $basePath; |  | ||||||
|     } |  | ||||||
|     return 0; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| 
 |  | ||||||
| # Compute the "weighted" number of uses of a path in the build graph. |  | ||||||
| sub computeUses { |  | ||||||
|     my $narFiles = shift; |  | ||||||
|     my $path = shift; |  | ||||||
| 
 |  | ||||||
|     # Find the deriver of $path. |  | ||||||
|     return 1 unless defined $$narFiles{$path}; |  | ||||||
|     my $deriver = @{$$narFiles{$path}}[0]->{deriver}; |  | ||||||
|     return 1 unless defined $deriver && $deriver ne ""; |  | ||||||
| 
 |  | ||||||
| #    print "  DERIVER $deriver\n"; |  | ||||||
| 
 |  | ||||||
|     # Optimisation: build the referrers graph from the references |  | ||||||
|     # graph. |  | ||||||
|     my %referrers; |  | ||||||
|     foreach my $q (keys %{$narFiles}) { |  | ||||||
|         my @refs = split " ", @{$$narFiles{$q}}[0]->{references}; |  | ||||||
|         foreach my $r (@refs) { |  | ||||||
|             $referrers{$r} = [] unless defined $referrers{$r}; |  | ||||||
|             push @{$referrers{$r}}, $q; |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     # Determine the shortest path from $deriver to all other reachable |  | ||||||
|     # paths in the `referrers' graph. |  | ||||||
| 
 |  | ||||||
|     my %dist; |  | ||||||
|     $dist{$deriver} = 0; |  | ||||||
| 
 |  | ||||||
|     my @queue = ($deriver); |  | ||||||
|     my $pos = 0; |  | ||||||
|      |  | ||||||
|     while ($pos < scalar @queue) { |  | ||||||
|         my $p = $queue[$pos]; |  | ||||||
|         $pos++; |  | ||||||
| 
 |  | ||||||
|         foreach my $q (@{$referrers{$p}}) { |  | ||||||
|             if (!defined $dist{$q}) { |  | ||||||
|                 $dist{$q} = $dist{$p} + 1; |  | ||||||
| #                print "    $q $dist{$q}\n"; |  | ||||||
|                 push @queue, $q; |  | ||||||
|             } |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     my $wuse = 1.0; |  | ||||||
|     foreach my $user (keys %dist) { |  | ||||||
|         next if $user eq $deriver; |  | ||||||
| #        print "    $user $dist{$user}\n"; |  | ||||||
|         $wuse += 1.0 / 2.0**$dist{$user}; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
| #    print "    XXX $path $wuse\n"; |  | ||||||
|      |  | ||||||
|     return $wuse; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| 
 |  | ||||||
| # For each output path in the destination, see if we need to / can |  | ||||||
| # create a patch. |  | ||||||
| 
 |  | ||||||
| print "creating patches...\n"; |  | ||||||
| 
 |  | ||||||
| foreach my $p (keys %dstOutPaths) { |  | ||||||
| 
 |  | ||||||
|     # If exactly the same path already exists in the source, skip it. |  | ||||||
|     next if defined $srcOutPaths{$p}; |  | ||||||
|      |  | ||||||
|     print "  $p\n"; |  | ||||||
| 
 |  | ||||||
|     # If not, then we should find the paths in the source that are |  | ||||||
|     # `most' likely to be present on a system that wants to install |  | ||||||
|     # this path. |  | ||||||
| 
 |  | ||||||
|     (my $name, my $version) = getNameVersion $p; |  | ||||||
|     next unless defined $name && defined $version; |  | ||||||
| 
 |  | ||||||
|     my @closest = (); |  | ||||||
|     my $closestVersion; |  | ||||||
|     my $minDist = -1; # actually, larger means closer |  | ||||||
| 
 |  | ||||||
|     # Find all source paths with the same name. |  | ||||||
| 
 |  | ||||||
|     foreach my $q (keys %srcOutPaths) { |  | ||||||
|         (my $name2, my $version2) = getNameVersion $q; |  | ||||||
|         next unless defined $name2 && defined $version2; |  | ||||||
| 
 |  | ||||||
|         if ($name eq $name2) { |  | ||||||
| 
 |  | ||||||
|             my $srcSystem = @{$dstNarFiles{$p}}[0]->{system}; |  | ||||||
|             my $dstSystem = @{$srcNarFiles{$q}}[0]->{system}; |  | ||||||
|             if (defined $srcSystem && defined $dstSystem && $srcSystem ne $dstSystem) { |  | ||||||
|                 print "  SKIPPING $q due to different systems ($srcSystem vs. $dstSystem)\n"; |  | ||||||
|                 next; |  | ||||||
|             } |  | ||||||
| 
 |  | ||||||
|             # If the sizes differ too much, then skip.  This |  | ||||||
|             # disambiguates between, e.g., a real component and a |  | ||||||
|             # wrapper component (cf. Firefox in Nixpkgs). |  | ||||||
|             my $srcSize = @{$srcNarFiles{$q}}[0]->{size}; |  | ||||||
|             my $dstSize = @{$dstNarFiles{$p}}[0]->{size}; |  | ||||||
|             my $ratio = $srcSize / $dstSize; |  | ||||||
|             $ratio = 1 / $ratio if $ratio < 1; |  | ||||||
| #            print "  SIZE $srcSize $dstSize $ratio $q\n"; |  | ||||||
| 
 |  | ||||||
|             if ($ratio >= 3) { |  | ||||||
|                 print "  SKIPPING $q due to size ratio $ratio ($srcSize vs. $dstSize)\n"; |  | ||||||
|                 next; |  | ||||||
|             } |  | ||||||
| 
 |  | ||||||
|             # If the numbers of weighted uses differ too much, then |  | ||||||
|             # skip.  This disambiguates between, e.g., the bootstrap |  | ||||||
|             # GCC and the final GCC in Nixpkgs. |  | ||||||
| #            my $srcUses = computeUses \%srcNarFiles, $q; |  | ||||||
| #            my $dstUses = computeUses \%dstNarFiles, $p; |  | ||||||
| #            $ratio = $srcUses / $dstUses; |  | ||||||
| #            $ratio = 1 / $ratio if $ratio < 1; |  | ||||||
| #            print "  USE $srcUses $dstUses $ratio $q\n"; |  | ||||||
|              |  | ||||||
| #            if ($ratio >= 2) { |  | ||||||
| #                print "  SKIPPING $q due to use ratio $ratio ($srcUses $dstUses)\n"; |  | ||||||
| #                next; |  | ||||||
| #            } |  | ||||||
| 
 |  | ||||||
|             # If there are multiple matching names, include the ones |  | ||||||
|             # with the closest version numbers. |  | ||||||
|             my $dist = versionDiff $version, $version2; |  | ||||||
|             if ($dist > $minDist) { |  | ||||||
|                 $minDist = $dist; |  | ||||||
|                 @closest = ($q); |  | ||||||
|                 $closestVersion = $version2; |  | ||||||
|             } elsif ($dist == $minDist) { |  | ||||||
|                 push @closest, $q; |  | ||||||
|             } |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     if (scalar(@closest) == 0) { |  | ||||||
|         print "  NO BASE: $p\n"; |  | ||||||
|         next; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     foreach my $closest (@closest) { |  | ||||||
| 
 |  | ||||||
|         # Generate a patch between $closest and $p. |  | ||||||
|         print "  $p <- $closest\n"; |  | ||||||
| 
 |  | ||||||
|         # If the patch already exists, skip it. |  | ||||||
|         if (containsPatch(\%srcPatches, $p, $closest) || |  | ||||||
|             containsPatch(\%dstPatches, $p, $closest)) |  | ||||||
|         { |  | ||||||
|             print "    skipping, already exists\n"; |  | ||||||
|             next; |  | ||||||
|         } |  | ||||||
| 
 |  | ||||||
| #        next; |  | ||||||
|          |  | ||||||
|         my $srcNarBz2 = getNarBz2 \%srcNarFiles, $closest; |  | ||||||
|         my $dstNarBz2 = getNarBz2 \%dstNarFiles, $p; |  | ||||||
| 
 |  | ||||||
|         if (! -f $srcNarBz2) { |  | ||||||
|             warn "patch source archive $srcNarBz2 is missing\n"; |  | ||||||
|             next; |  | ||||||
|         } |  | ||||||
| 
 |  | ||||||
|         system("@bunzip2@ < $srcNarBz2 > $tmpDir/A") == 0 |  | ||||||
|             or die "cannot unpack $srcNarBz2"; |  | ||||||
| 
 |  | ||||||
|         if ((stat "$tmpDir/A")[7] >= $maxNarSize) { |  | ||||||
|             print "    skipping, source is too large\n"; |  | ||||||
|             next; |  | ||||||
|         } |  | ||||||
|          |  | ||||||
|         system("@bunzip2@ < $dstNarBz2 > $tmpDir/B") == 0 |  | ||||||
|             or die "cannot unpack $dstNarBz2"; |  | ||||||
| 
 |  | ||||||
|         if ((stat "$tmpDir/B")[7] >= $maxNarSize) { |  | ||||||
|             print "    skipping, destination is too large\n"; |  | ||||||
|             next; |  | ||||||
|         } |  | ||||||
|          |  | ||||||
|         my $time1 = time(); |  | ||||||
|         my $res = system("ulimit -t $timeLimit; @libexecdir@/bsdiff $tmpDir/A $tmpDir/B $tmpDir/DIFF"); |  | ||||||
|         my $time2 = time(); |  | ||||||
|         if ($res) { |  | ||||||
|             warn "binary diff computation aborted after ", $time2 - $time1, " seconds\n"; |  | ||||||
|             next; |  | ||||||
|         } |  | ||||||
| 
 |  | ||||||
|         my $baseHash = `@bindir@/nix-hash --flat --type $hashAlgo --base32 $tmpDir/A` or die; |  | ||||||
|         chomp $baseHash; |  | ||||||
| 
 |  | ||||||
|         my $narHash = `@bindir@/nix-hash --flat --type $hashAlgo --base32 $tmpDir/B` or die; |  | ||||||
|         chomp $narHash; |  | ||||||
| 
 |  | ||||||
|         my $narDiffHash = `@bindir@/nix-hash --flat --type $hashAlgo --base32 $tmpDir/DIFF` or die; |  | ||||||
|         chomp $narDiffHash; |  | ||||||
| 
 |  | ||||||
|         my $narDiffSize = (stat "$tmpDir/DIFF")[7]; |  | ||||||
|         my $dstNarBz2Size = (stat $dstNarBz2)[7]; |  | ||||||
| 
 |  | ||||||
|         print "    size $narDiffSize; full size $dstNarBz2Size; ", $time2 - $time1, " seconds\n"; |  | ||||||
|          |  | ||||||
|         if ($narDiffSize >= $dstNarBz2Size) { |  | ||||||
|             print "    rejecting; patch bigger than full archive\n"; |  | ||||||
|             next; |  | ||||||
|         } |  | ||||||
|      |  | ||||||
|         if ($narDiffSize / $dstNarBz2Size >= $maxPatchFraction) { |  | ||||||
|             print "    rejecting; patch too large relative to full archive\n"; |  | ||||||
|             next; |  | ||||||
|         } |  | ||||||
|      |  | ||||||
|         my $finalName = |  | ||||||
|             "$narDiffHash.nar-bsdiff"; |  | ||||||
| 
 |  | ||||||
|         if (-e "$patchesDir/$finalName") { |  | ||||||
|             print "    not copying, already exists\n"; |  | ||||||
|         } |  | ||||||
| 
 |  | ||||||
|         else { |  | ||||||
| 
 |  | ||||||
|             system("cp '$tmpDir/DIFF' '$patchesDir/$finalName.tmp'") == 0 |  | ||||||
|                 or die "cannot copy diff"; |  | ||||||
|              |  | ||||||
|             rename("$patchesDir/$finalName.tmp", "$patchesDir/$finalName") |  | ||||||
|                 or die "cannot rename $patchesDir/$finalName.tmp"; |  | ||||||
|              |  | ||||||
|         } |  | ||||||
|          |  | ||||||
|         # Add the patch to the manifest. |  | ||||||
|         addPatch \%dstPatches, $p, |  | ||||||
|             { url => "$patchesURL/$finalName", hash => "$hashAlgo:$narDiffHash" |  | ||||||
|             , size => $narDiffSize, basePath => $closest, baseHash => "$hashAlgo:$baseHash" |  | ||||||
|             , narHash => "$hashAlgo:$narHash", patchType => "nar-bsdiff" |  | ||||||
|             }, 0; |  | ||||||
|     } |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| 
 |  | ||||||
| # Add in any potentially useful patches in the source (namely, those |  | ||||||
| # patches that produce either paths in the destination or paths that |  | ||||||
| # can be used as the base for other useful patches). |  | ||||||
| 
 |  | ||||||
| print "propagating patches...\n"; |  | ||||||
| 
 |  | ||||||
| my $changed; |  | ||||||
| do { |  | ||||||
|     # !!! we repeat this to reach the transitive closure; inefficient |  | ||||||
|     $changed = 0; |  | ||||||
| 
 |  | ||||||
|     print "loop\n"; |  | ||||||
| 
 |  | ||||||
|     my %dstBasePaths; |  | ||||||
|     foreach my $q (keys %dstPatches) { |  | ||||||
| 	foreach my $patch (@{$dstPatches{$q}}) { |  | ||||||
| 	    $dstBasePaths{$patch->{basePath}} = 1; |  | ||||||
| 	} |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     foreach my $p (keys %srcPatches) { |  | ||||||
|         my $patchList = $srcPatches{$p}; |  | ||||||
| 
 |  | ||||||
|         my $include = 0; |  | ||||||
| 
 |  | ||||||
|         # Is path $p included in the destination?  If so, include |  | ||||||
|         # patches that produce it. |  | ||||||
|         $include = 1 if defined $dstNarFiles{$p}; |  | ||||||
| 
 |  | ||||||
|         # Is path $p a path that serves as a base for paths in the |  | ||||||
|         # destination?  If so, include patches that produce it. |  | ||||||
| 	# !!! check baseHash |  | ||||||
|         $include = 1 if defined $dstBasePaths{$p}; |  | ||||||
| 
 |  | ||||||
|         if ($include) { |  | ||||||
|             foreach my $patch (@{$patchList}) { |  | ||||||
|                 $changed = 1 if addPatch \%dstPatches, $p, $patch; |  | ||||||
|             } |  | ||||||
|         } |  | ||||||
|          |  | ||||||
|     } |  | ||||||
|      |  | ||||||
| } while $changed; |  | ||||||
| 
 |  | ||||||
| 
 |  | ||||||
| # Rewrite the manifest of the destination (with the new patches). |  | ||||||
| writeManifest "${dstManifest}", |  | ||||||
|     \%dstNarFiles, \%dstPatches; |  | ||||||
|  | @ -146,7 +146,7 @@ sub readManifest { | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| sub writeManifest { | sub writeManifest { | ||||||
|     my ($manifest, $narFiles, $patches) = @_; |     my ($manifest, $narFiles, $patches, $noCompress) = @_; | ||||||
| 
 | 
 | ||||||
|     open MANIFEST, ">$manifest.tmp"; # !!! check exclusive |     open MANIFEST, ">$manifest.tmp"; # !!! check exclusive | ||||||
| 
 | 
 | ||||||
|  | @ -198,11 +198,13 @@ sub writeManifest { | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
|     # Create a bzipped manifest. |     # Create a bzipped manifest. | ||||||
|     system("@bzip2@ < $manifest > $manifest.bz2.tmp") == 0 |     unless (defined $noCompress) { | ||||||
|         or die "cannot compress manifest"; | 	system("@bzip2@ < $manifest > $manifest.bz2.tmp") == 0 | ||||||
|  | 	    or die "cannot compress manifest"; | ||||||
| 
 | 
 | ||||||
|     rename("$manifest.bz2.tmp", "$manifest.bz2") | 	rename("$manifest.bz2.tmp", "$manifest.bz2") | ||||||
|         or die "cannot rename $manifest.bz2.tmp: $!"; | 	    or die "cannot rename $manifest.bz2.tmp: $!"; | ||||||
|  |     } | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue