286 lines
		
	
	
	
		
			9.2 KiB
		
	
	
	
		
			Perl
		
	
	
		
			Executable file
		
	
	
	
	
			
		
		
	
	
			286 lines
		
	
	
	
		
			9.2 KiB
		
	
	
	
		
			Perl
		
	
	
		
			Executable file
		
	
	
	
	
| #! @perl@ -w @perlFlags@
 | ||
| 
 | ||
| use utf8;
 | ||
| use Fcntl qw(:DEFAULT :flock);
 | ||
| use English '-no_match_vars';
 | ||
| use IO::Handle;
 | ||
| use Nix::Config;
 | ||
| use Nix::SSH;
 | ||
| use Nix::CopyClosure;
 | ||
| use Nix::Store;
 | ||
| use Encode;
 | ||
| no warnings('once');
 | ||
| 
 | ||
| STDERR->autoflush(1);
 | ||
| binmode STDERR, ":encoding(utf8)";
 | ||
| 
 | ||
| my $debug = defined $ENV{NIX_DEBUG_HOOK};
 | ||
| 
 | ||
| setVerbosity(0); # make exportPath() less verbose
 | ||
| 
 | ||
| 
 | ||
| # General operation:
 | ||
| #
 | ||
| # Try to find a free machine of type $neededSystem.  We do this as
 | ||
| # follows:
 | ||
| # - We acquire an exclusive lock on $currentLoad/main-lock.
 | ||
| # - For each machine $machine of type $neededSystem and for each $slot
 | ||
| #   less than the maximum load for that machine, we try to get an
 | ||
| #   exclusive lock on $currentLoad/$machine-$slot (without blocking).
 | ||
| #   If we get such a lock, we send "accept" to the caller.  Otherwise,
 | ||
| #   we send "postpone" and exit.
 | ||
| # - We release the exclusive lock on $currentLoad/main-lock.
 | ||
| # - We perform the build on $neededSystem.
 | ||
| # - We release the exclusive lock on $currentLoad/$machine-$slot.
 | ||
| #
 | ||
| # The nice thing about this scheme is that if we die prematurely, the
 | ||
| # locks are released automatically.
 | ||
| 
 | ||
| 
 | ||
| # Make sure that we don't get any SSH passphrase or host key popups -
 | ||
| # if there is any problem it should fail, not do something
 | ||
| # interactive.
 | ||
| $ENV{"DISPLAY"} = "";
 | ||
| $ENV{"SSH_ASKPASS"} = "";
 | ||
| 
 | ||
| 
 | ||
| sub sendReply {
 | ||
|     my $reply = shift;
 | ||
|     print STDERR "# $reply\n";
 | ||
| }
 | ||
| 
 | ||
| sub all { $_ || return 0 for @_; 1 }
 | ||
| 
 | ||
| 
 | ||
| # Initialisation.
 | ||
| my $loadIncreased = 0;
 | ||
| 
 | ||
| my ($localSystem, $maxSilentTime, $printBuildTrace, $buildTimeout) = @ARGV;
 | ||
| 
 | ||
| my $currentLoad = $ENV{"NIX_CURRENT_LOAD"} // "/run/nix/current-load";
 | ||
| my $conf = $ENV{"NIX_REMOTE_SYSTEMS"} // "@sysconfdir@/nix/machines";
 | ||
| 
 | ||
| 
 | ||
| sub openSlotLock {
 | ||
|     my ($machine, $slot) = @_;
 | ||
|     my $slotLockFn = "$currentLoad/" . (join '+', @{$machine->{systemTypes}}) . "-" . $machine->{hostName} . "-$slot";
 | ||
|     my $slotLock = new IO::Handle;
 | ||
|     sysopen $slotLock, "$slotLockFn", O_RDWR|O_CREAT, 0600 or die;
 | ||
|     return $slotLock;
 | ||
| }
 | ||
| 
 | ||
| 
 | ||
| # Read the list of machines.
 | ||
| my @machines;
 | ||
| if (defined $conf && -e $conf) {
 | ||
|     open CONF, "<$conf" or die;
 | ||
|     while (<CONF>) {
 | ||
|         chomp;
 | ||
|         s/\#.*$//g;
 | ||
|         next if /^\s*$/;
 | ||
|         my @tokens = split /\s/, $_;
 | ||
|         my @supportedFeatures = split(/,/, $tokens[5] || "");
 | ||
|         my @mandatoryFeatures = split(/,/, $tokens[6] || "");
 | ||
|         push @machines,
 | ||
|             { hostName => $tokens[0]
 | ||
|             , systemTypes => [ split(/,/, $tokens[1]) ]
 | ||
|             , sshKey => $tokens[2]
 | ||
|             , maxJobs => int($tokens[3])
 | ||
|             , speedFactor => 1.0 * (defined $tokens[4] ? int($tokens[4]) : 1)
 | ||
|             , supportedFeatures => [ @supportedFeatures, @mandatoryFeatures ]
 | ||
|             , mandatoryFeatures => [ @mandatoryFeatures ]
 | ||
|             , enabled => 1
 | ||
|             };
 | ||
|     }
 | ||
|     close CONF;
 | ||
| }
 | ||
| 
 | ||
| 
 | ||
| 
 | ||
| # Wait for the calling process to ask us whether we can build some derivation.
 | ||
| my ($drvPath, $hostName, $slotLock);
 | ||
| my ($from, $to);
 | ||
| 
 | ||
| REQ: while (1) {
 | ||
|     $_ = <STDIN> || exit 0;
 | ||
|     (my $amWilling, my $neededSystem, $drvPath, my $requiredFeatures) = split;
 | ||
|     my @requiredFeatures = split /,/, $requiredFeatures;
 | ||
| 
 | ||
|     my $canBuildLocally = $amWilling && ($localSystem eq $neededSystem);
 | ||
| 
 | ||
|     if (!defined $currentLoad) {
 | ||
|         sendReply "decline";
 | ||
|         next;
 | ||
|     }
 | ||
| 
 | ||
|     # Acquire the exclusive lock on $currentLoad/main-lock.
 | ||
|     mkdir $currentLoad, 0777 or die unless -d $currentLoad;
 | ||
|     my $mainLock = "$currentLoad/main-lock";
 | ||
|     sysopen MAINLOCK, "$mainLock", O_RDWR|O_CREAT, 0600 or die;
 | ||
|     flock(MAINLOCK, LOCK_EX) or die;
 | ||
| 
 | ||
| 
 | ||
|     while (1) {
 | ||
|         # Find all machine that can execute this build, i.e., that
 | ||
|         # support builds for the given platform and features, and are
 | ||
|         # not at their job limit.
 | ||
|         my $rightType = 0;
 | ||
|         my @available = ();
 | ||
|         LOOP: foreach my $cur (@machines) {
 | ||
|             if ($cur->{enabled}
 | ||
|                 && (grep { $neededSystem eq $_ } @{$cur->{systemTypes}})
 | ||
|                 && all(map { my $f = $_; 0 != grep { $f eq $_ } @{$cur->{supportedFeatures}} } (@requiredFeatures, @mandatoryFeatures))
 | ||
|                 && all(map { my $f = $_; 0 != grep { $f eq $_ } @requiredFeatures } @{$cur->{mandatoryFeatures}})
 | ||
|                 )
 | ||
|             {
 | ||
|                 $rightType = 1;
 | ||
| 
 | ||
|                 # We have a machine of the right type.  Determine the load on
 | ||
|                 # the machine.
 | ||
|                 my $slot = 0;
 | ||
|                 my $load = 0;
 | ||
|                 my $free;
 | ||
|                 while ($slot < $cur->{maxJobs}) {
 | ||
|                     my $slotLock = openSlotLock($cur, $slot);
 | ||
|                     if (flock($slotLock, LOCK_EX | LOCK_NB)) {
 | ||
|                         $free = $slot unless defined $free;
 | ||
|                         flock($slotLock, LOCK_UN) or die;
 | ||
|                     } else {
 | ||
|                         $load++;
 | ||
|                     }
 | ||
|                     close $slotLock;
 | ||
|                     $slot++;
 | ||
|                 }
 | ||
| 
 | ||
|                 push @available, { machine => $cur, load => $load, free => $free }
 | ||
|                 if $load < $cur->{maxJobs};
 | ||
|             }
 | ||
|         }
 | ||
| 
 | ||
|         if ($debug) {
 | ||
|             print STDERR "load on " . $_->{machine}->{hostName} . " = " . $_->{load} . "\n"
 | ||
|                 foreach @available;
 | ||
|         }
 | ||
| 
 | ||
| 
 | ||
|         # Didn't find any available machine?  Then decline or postpone.
 | ||
|         if (scalar @available == 0) {
 | ||
|             # Postpone if we have a machine of the right type, except
 | ||
|             # if the local system can and wants to do the build.
 | ||
|             if ($rightType && !$canBuildLocally) {
 | ||
|                 sendReply "postpone";
 | ||
|             } else {
 | ||
|                 sendReply "decline";
 | ||
|             }
 | ||
|             close MAINLOCK;
 | ||
|             next REQ;
 | ||
|         }
 | ||
| 
 | ||
| 
 | ||
|         # Prioritise the available machines as follows:
 | ||
|         # - First by load divided by speed factor, rounded to the nearest
 | ||
|         #   integer.  This causes fast machines to be preferred over slow
 | ||
|         #   machines with similar loads.
 | ||
|         # - Then by speed factor.
 | ||
|         # - Finally by load.
 | ||
|         sub lf { my $x = shift; return int($x->{load} / $x->{machine}->{speedFactor} + 0.4999); }
 | ||
|         @available = sort
 | ||
|             { lf($a) <=> lf($b)
 | ||
|                   || $b->{machine}->{speedFactor} <=> $a->{machine}->{speedFactor}
 | ||
|                   || $a->{load} <=> $b->{load}
 | ||
|             } @available;
 | ||
| 
 | ||
| 
 | ||
|         # Select the best available machine and lock a free slot.
 | ||
|         my $selected = $available[0];
 | ||
|         my $machine = $selected->{machine};
 | ||
| 
 | ||
|         $slotLock = openSlotLock($machine, $selected->{free});
 | ||
|         flock($slotLock, LOCK_EX | LOCK_NB) or die;
 | ||
|         utime undef, undef, $slotLock;
 | ||
| 
 | ||
|         close MAINLOCK;
 | ||
| 
 | ||
| 
 | ||
|         # Connect to the selected machine.
 | ||
|         my @sshOpts = ("-i", $machine->{sshKey});
 | ||
|         $hostName = $machine->{hostName};
 | ||
|         eval {
 | ||
|             ($from, $to) = connectToRemoteNix($hostName, \@sshOpts, "2>&4");
 | ||
|             # FIXME: check if builds are inhibited.
 | ||
|         };
 | ||
|         last REQ unless $@;
 | ||
|         print STDERR "$@";
 | ||
|         warn "unable to open SSH connection to ‘$hostName’, trying other available machines...\n";
 | ||
|         $from = undef;
 | ||
|         $to = undef;
 | ||
|         $machine->{enabled} = 0;
 | ||
|     }
 | ||
| }
 | ||
| 
 | ||
| 
 | ||
| # Tell Nix we've accepted the build.
 | ||
| sendReply "accept";
 | ||
| my @inputs = split /\s/, readline(STDIN);
 | ||
| my @outputs = split /\s/, readline(STDIN);
 | ||
| 
 | ||
| 
 | ||
| print STDERR "@ build-remote $drvPath $hostName\n" if $printBuildTrace;
 | ||
| 
 | ||
| 
 | ||
| my $maybeSign = "";
 | ||
| $maybeSign = "--sign" if -e "$Nix::Config::confDir/signing-key.sec";
 | ||
| 
 | ||
| 
 | ||
| # Copy the derivation and its dependencies to the build machine.  This
 | ||
| # is guarded by an exclusive lock per machine to prevent multiple
 | ||
| # build-remote instances from copying to a machine simultaneously.
 | ||
| # That's undesirable because we may end up with N instances uploading
 | ||
| # the same missing path simultaneously, causing the effective network
 | ||
| # bandwidth and target disk speed to be divided by N.
 | ||
| my $uploadLock = "$currentLoad/$hostName.upload-lock";
 | ||
| sysopen UPLOADLOCK, "$uploadLock", O_RDWR|O_CREAT, 0600 or die;
 | ||
| eval {
 | ||
|     local $SIG{ALRM} = sub { die "alarm\n" };
 | ||
|     # Don't wait forever, so that a process that gets stuck while
 | ||
|     # holding the lock doesn't block everybody else indefinitely.
 | ||
|     # It's safe to continue after a timeout, just (potentially)
 | ||
|     # inefficient.
 | ||
|     alarm 15 * 60;
 | ||
|     flock(UPLOADLOCK, LOCK_EX);
 | ||
|     alarm 0;
 | ||
| };
 | ||
| if ($@) {
 | ||
|     die unless $@ eq "alarm\n";
 | ||
|     print STDERR "somebody is hogging $uploadLock, continuing...\n";
 | ||
|     unlink $uploadLock;
 | ||
| }
 | ||
| Nix::CopyClosure::copyToOpen($from, $to, $hostName, [ $drvPath, @inputs ], 0, 0, $maybeSign ne "");
 | ||
| close UPLOADLOCK;
 | ||
| 
 | ||
| 
 | ||
| # Perform the build.
 | ||
| print STDERR "building ‘$drvPath’ on ‘$hostName’\n";
 | ||
| print STDERR "@ build-remote-start $drvPath $hostName\n" if $printBuildTrace;
 | ||
| writeInt(6, $to) or die; # == cmdBuildPaths
 | ||
| writeStrings([$drvPath], $to);
 | ||
| writeInt($maxSilentTime, $to);
 | ||
| writeInt($buildTimeout, $to);
 | ||
| my $res = readInt($from);
 | ||
| print STDERR "@ build-remote-done $drvPath $hostName\n" if $printBuildTrace;
 | ||
| if ($res != 0) {
 | ||
|     my $msg = decode("utf-8", readString($from));
 | ||
|     print STDERR "error: $msg on ‘$hostName’\n";
 | ||
|     exit $res;
 | ||
| }
 | ||
| 
 | ||
| 
 | ||
| # Copy the output from the build machine.
 | ||
| my @outputs2 = grep { !isValidPath($_) } @outputs;
 | ||
| if (scalar @outputs2 > 0) {
 | ||
|     writeInt(5, $to); # == cmdExportPaths
 | ||
|     writeInt(0, $to); # don't sign
 | ||
|     writeStrings(\@outputs2, $to);
 | ||
|     $ENV{'NIX_HELD_LOCKS'} = "@outputs2"; # FIXME: ugly
 | ||
|     importPaths(fileno($from));
 | ||
| }
 |