270 lines
		
	
	
	
		
			9 KiB
		
	
	
	
		
			Perl
		
	
	
		
			Executable file
		
	
	
	
	
			
		
		
	
	
			270 lines
		
	
	
	
		
			9 KiB
		
	
	
	
		
			Perl
		
	
	
		
			Executable file
		
	
	
	
	
#! @perl@ -w @perlFlags@
 | 
						|
 | 
						|
use Fcntl ':flock';
 | 
						|
use English '-no_match_vars';
 | 
						|
use IO::Handle;
 | 
						|
use Nix::Config;
 | 
						|
use Nix::SSH qw/sshOpts openSSHConnection/;
 | 
						|
use Nix::CopyClosure;
 | 
						|
no warnings('once');
 | 
						|
 | 
						|
 | 
						|
# General operation:
 | 
						|
#
 | 
						|
# Try to find a free machine of type $neededSystem.  We do this as
 | 
						|
# follows:
 | 
						|
# - We acquire an exclusive lock on $currentLoad/main-lock.
 | 
						|
# - For each machine $machine of type $neededSystem and for each $slot
 | 
						|
#   less than the maximum load for that machine, we try to get an
 | 
						|
#   exclusive lock on $currentLoad/$machine-$slot (without blocking).
 | 
						|
#   If we get such a lock, we send "accept" to the caller.  Otherwise,
 | 
						|
#   we send "postpone" and exit. 
 | 
						|
# - We release the exclusive lock on $currentLoad/main-lock.
 | 
						|
# - We perform the build on $neededSystem.
 | 
						|
# - We release the exclusive lock on $currentLoad/$machine-$slot.
 | 
						|
#
 | 
						|
# The nice thing about this scheme is that if we die prematurely, the
 | 
						|
# locks are released automatically.
 | 
						|
 | 
						|
 | 
						|
# Make sure that we don't get any SSH passphrase or host key popups -
 | 
						|
# if there is any problem it should fail, not do something
 | 
						|
# interactive.
 | 
						|
$ENV{"DISPLAY"} = "";
 | 
						|
$ENV{"SSH_ASKPASS"} = "";
 | 
						|
 | 
						|
 | 
						|
sub sendReply {
 | 
						|
    my $reply = shift;
 | 
						|
    print STDERR "# $reply\n";
 | 
						|
}
 | 
						|
 | 
						|
sub all { $_ || return 0 for @_; 1 }
 | 
						|
 | 
						|
 | 
						|
# Initialisation.
 | 
						|
my $loadIncreased = 0;
 | 
						|
 | 
						|
my ($localSystem, $maxSilentTime, $printBuildTrace) = @ARGV;
 | 
						|
$maxSilentTime = 0 unless defined $maxSilentTime;
 | 
						|
 | 
						|
my $currentLoad = $ENV{"NIX_CURRENT_LOAD"};
 | 
						|
my $conf = $ENV{"NIX_REMOTE_SYSTEMS"};
 | 
						|
 | 
						|
 | 
						|
sub openSlotLock {
 | 
						|
    my ($machine, $slot) = @_;
 | 
						|
    my $slotLockFn = "$currentLoad/" . (join '+', @{$machine->{systemTypes}}) . "-" . $machine->{hostName} . "-$slot";
 | 
						|
    my $slotLock = new IO::Handle;
 | 
						|
    open $slotLock, ">>$slotLockFn" or die;
 | 
						|
    return $slotLock;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
# Read the list of machines.
 | 
						|
my @machines;
 | 
						|
if (defined $conf && -e $conf) {
 | 
						|
    open CONF, "< $conf" or die;
 | 
						|
    while (<CONF>) {
 | 
						|
        chomp;
 | 
						|
        s/\#.*$//g;
 | 
						|
        next if /^\s*$/;
 | 
						|
        my @tokens = split /\s/, $_;
 | 
						|
        push @machines,
 | 
						|
            { hostName => $tokens[0]
 | 
						|
            , systemTypes => [ split(/,/, $tokens[1]) ]
 | 
						|
            , sshKeys => $tokens[2]
 | 
						|
            , maxJobs => int($tokens[3])
 | 
						|
            , speedFactor => 1.0 * (defined $tokens[4] ? int($tokens[4]) : 1)
 | 
						|
            , features => [ split(/,/, $tokens[5] || "") ]
 | 
						|
            , enabled => 1
 | 
						|
            };
 | 
						|
    }
 | 
						|
    close CONF;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
 | 
						|
# Wait for the calling process to ask us whether we can build some derivation.
 | 
						|
my ($drvPath, $hostName, $slotLock);
 | 
						|
 | 
						|
REQ: while (1) {
 | 
						|
    $_ = <STDIN> || exit 0;
 | 
						|
    my ($amWilling, $neededSystem);
 | 
						|
    ($amWilling, $neededSystem, $drvPath, $requiredFeatures) = split;
 | 
						|
    my @requiredFeatures = split /,/, $requiredFeatures;
 | 
						|
 | 
						|
    my $canBuildLocally = $amWilling && ($localSystem eq $neededSystem);
 | 
						|
 | 
						|
    if (!defined $currentLoad) {
 | 
						|
        sendReply "decline";
 | 
						|
        next;
 | 
						|
    }
 | 
						|
    
 | 
						|
    # Acquire the exclusive lock on $currentLoad/main-lock.
 | 
						|
    mkdir $currentLoad, 0777 or die unless -d $currentLoad;
 | 
						|
    my $mainLock = "$currentLoad/main-lock";
 | 
						|
    open MAINLOCK, ">>$mainLock" or die;
 | 
						|
    flock(MAINLOCK, LOCK_EX) or die;
 | 
						|
    
 | 
						|
    
 | 
						|
    while (1) {
 | 
						|
        # Find all machine that can execute this build, i.e., that
 | 
						|
        # support builds for the given platform and features, and are
 | 
						|
        # not at their job limit.
 | 
						|
        my $rightType = 0;
 | 
						|
        my @available = ();
 | 
						|
        LOOP: foreach my $cur (@machines) {
 | 
						|
            if ($cur->{enabled}
 | 
						|
                && (grep { $neededSystem eq $_ } @{$cur->{systemTypes}})
 | 
						|
                && all(map { my $f = $_; 0 != grep { $f eq $_ } @{$cur->{features}} } @requiredFeatures))
 | 
						|
            {
 | 
						|
                $rightType = 1;
 | 
						|
 | 
						|
                # We have a machine of the right type.  Determine the load on
 | 
						|
                # the machine.
 | 
						|
                my $slot = 0;
 | 
						|
                my $load = 0;
 | 
						|
                my $free;
 | 
						|
                while ($slot < $cur->{maxJobs}) {
 | 
						|
                    my $slotLock = openSlotLock($cur, $slot);
 | 
						|
                    if (flock($slotLock, LOCK_EX | LOCK_NB)) {
 | 
						|
                        $free = $slot unless defined $free;
 | 
						|
                        flock($slotLock, LOCK_UN) or die;
 | 
						|
                    } else {
 | 
						|
                        $load++;
 | 
						|
                    }
 | 
						|
                    close $slotLock;
 | 
						|
                    $slot++;
 | 
						|
                }
 | 
						|
                
 | 
						|
                push @available, { machine => $cur, load => $load, free => $free }
 | 
						|
                if $load < $cur->{maxJobs};
 | 
						|
            }
 | 
						|
        }
 | 
						|
 | 
						|
        if (defined $ENV{NIX_DEBUG_HOOK}) {
 | 
						|
            print STDERR "load on " . $_->{machine}->{hostName} . " = " . $_->{load} . "\n"
 | 
						|
                foreach @available;
 | 
						|
        }
 | 
						|
 | 
						|
 | 
						|
        # Didn't find any available machine?  Then decline or postpone.
 | 
						|
        if (scalar @available == 0) {
 | 
						|
            # Postpone if we have a machine of the right type, except
 | 
						|
            # if the local system can and wants to do the build.
 | 
						|
            if ($rightType && !$canBuildLocally) {
 | 
						|
                sendReply "postpone";
 | 
						|
            } else {
 | 
						|
                sendReply "decline";                
 | 
						|
            }
 | 
						|
            close MAINLOCK;
 | 
						|
            next REQ;
 | 
						|
        }
 | 
						|
 | 
						|
 | 
						|
        # Prioritise the available machines as follows:
 | 
						|
        # - First by load divided by speed factor, rounded to the nearest
 | 
						|
        #   integer.  This causes fast machines to be preferred over slow
 | 
						|
        #   machines with similar loads.
 | 
						|
        # - Then by speed factor.
 | 
						|
        # - Finally by load.
 | 
						|
        sub lf { my $x = shift; return int($x->{load} / $x->{machine}->{speedFactor} + 0.4999); }
 | 
						|
        @available = sort
 | 
						|
            { lf($a) <=> lf($b)
 | 
						|
                  || $b->{machine}->{speedFactor} <=> $a->{machine}->{speedFactor}
 | 
						|
                  || $a->{load} <=> $b->{load}
 | 
						|
            } @available;
 | 
						|
 | 
						|
 | 
						|
        # Select the best available machine and lock a free slot.
 | 
						|
        my $selected = $available[0]; 
 | 
						|
        my $machine = $selected->{machine};
 | 
						|
        
 | 
						|
        $slotLock = openSlotLock($machine, $selected->{free});
 | 
						|
        flock($slotLock, LOCK_EX | LOCK_NB) or die;
 | 
						|
        utime undef, undef, $slotLock;
 | 
						|
 | 
						|
        close MAINLOCK;
 | 
						|
 | 
						|
 | 
						|
        # Connect to the selected machine.
 | 
						|
        @sshOpts = ("-i", $machine->{sshKeys}, "-x");
 | 
						|
        $hostName = $machine->{hostName};
 | 
						|
        last REQ if openSSHConnection $hostName;
 | 
						|
    
 | 
						|
        warn "unable to open SSH connection to $hostName, trying other available machines...\n";
 | 
						|
        $machine->{enabled} = 0;
 | 
						|
    }
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
# Tell Nix we've accepted the build.
 | 
						|
sendReply "accept";
 | 
						|
my @inputs = split /\s/, readline(STDIN);
 | 
						|
my @outputs = split /\s/, readline(STDIN);
 | 
						|
 | 
						|
 | 
						|
print STDERR "building `$drvPath' on `$hostName'\n";
 | 
						|
print STDERR "@ build-remote $drvPath $hostName\n" if $printBuildTrace;
 | 
						|
 | 
						|
 | 
						|
my $maybeSign = "";
 | 
						|
$maybeSign = "--sign" if -e "$Nix::Config::confDir/signing-key.sec";
 | 
						|
 | 
						|
 | 
						|
# Register the derivation as a temporary GC root.  Note that $PPID is
 | 
						|
# the PID of the remote SSH process, which, due to the use of a
 | 
						|
# persistant SSH connection, should be the same across all remote
 | 
						|
# command invocations for this session.
 | 
						|
my $rootsDir = "@localstatedir@/nix/gcroots/tmp";
 | 
						|
system("ssh $hostName @sshOpts 'mkdir -m 1777 -p $rootsDir; ln -sfn $drvPath $rootsDir/\$PPID.drv'");
 | 
						|
 | 
						|
sub removeRoots {
 | 
						|
    system("ssh $hostName @sshOpts 'rm -f $rootsDir/\$PPID.drv $rootsDir/\$PPID.out'");
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
# Copy the derivation and its dependencies to the build machine.
 | 
						|
Nix::CopyClosure::copyTo($hostName, [ @sshOpts ], [ $drvPath, @inputs ], "", "", 0, 0, $maybeSign ne "");
 | 
						|
 | 
						|
 | 
						|
# Perform the build.
 | 
						|
my $buildFlags = "--max-silent-time $maxSilentTime --fallback --add-root $rootsDir/\$PPID.out --option verbosity 0";
 | 
						|
 | 
						|
# We let the remote side kill its process group when the connection is
 | 
						|
# closed unexpectedly.  This is necessary to ensure that no processes
 | 
						|
# are left running on the remote system if the local Nix process is
 | 
						|
# killed.  (SSH itself doesn't kill child processes if the connection
 | 
						|
# is interrupted unless the `-tt' flag is used to force a pseudo-tty,
 | 
						|
# in which case every child receives SIGHUP; however, `-tt' doesn't
 | 
						|
# work on some platforms when connection sharing is used.)
 | 
						|
pipe STDIN, DUMMY; # make sure we have a readable STDIN
 | 
						|
if (system("exec ssh $hostName @sshOpts '(read; kill -INT -\$\$) <&0 & nix-store -r $drvPath $buildFlags > /dev/null' 2>&4") != 0) {
 | 
						|
    # Note that if we get exit code 100 from `nix-store -r', it
 | 
						|
    # denotes a permanent build failure (as opposed to an SSH problem
 | 
						|
    # or a temporary Nix problem).  We propagate this to the caller to
 | 
						|
    # allow it to distinguish between transient and permanent
 | 
						|
    # failures.
 | 
						|
    my $res = $? >> 8;
 | 
						|
    print STDERR "build of `$drvPath' on `$hostName' failed with exit code $res\n";
 | 
						|
    removeRoots;
 | 
						|
    exit $res;
 | 
						|
}
 | 
						|
 | 
						|
#print "build of `$drvPath' on `$hostName' succeeded\n";
 | 
						|
 | 
						|
 | 
						|
# Copy the output from the build machine.
 | 
						|
foreach my $output (@outputs) {
 | 
						|
    my $maybeSignRemote = "";
 | 
						|
    $maybeSignRemote = "--sign" if $UID != 0;
 | 
						|
    
 | 
						|
    system("exec ssh $hostName @sshOpts 'nix-store --export $maybeSignRemote $output'" .
 | 
						|
           "| NIX_HELD_LOCKS=$output @bindir@/nix-store --import > /dev/null") == 0
 | 
						|
	or die "cannot copy $output from $hostName: $?";
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
# Get rid of the temporary GC roots.
 | 
						|
removeRoots;
 |