* In the build hook, if connecting to a machine fails, try the other
machines of the right type (if available). This makes the build farm more robust to failures.
This commit is contained in:
		
							parent
							
								
									f56a039775
								
							
						
					
					
						commit
						d0c32dc135
					
				
					 3 changed files with 83 additions and 79 deletions
				
			
		| 
						 | 
					@ -71,6 +71,7 @@ while (<CONF>) {
 | 
				
			||||||
        , sshKeys => $3
 | 
					        , sshKeys => $3
 | 
				
			||||||
        , maxJobs => $4
 | 
					        , maxJobs => $4
 | 
				
			||||||
        , speedFactor => 1.0 * ($6 || 1)
 | 
					        , speedFactor => 1.0 * ($6 || 1)
 | 
				
			||||||
 | 
					        , enabled => 1
 | 
				
			||||||
        };
 | 
					        };
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -92,12 +93,16 @@ sub openSlotLock {
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
    
 | 
					    
 | 
				
			||||||
 | 
					
 | 
				
			||||||
# Find all machine that can execute this build, i.e., that support
 | 
					my $hostName;
 | 
				
			||||||
# builds for the given platform and are not at their job limit.
 | 
					
 | 
				
			||||||
my $rightType = 0;
 | 
					while (1) {
 | 
				
			||||||
my @available = ();
 | 
					    
 | 
				
			||||||
LOOP: foreach my $cur (@machines) {
 | 
					    # Find all machine that can execute this build, i.e., that support
 | 
				
			||||||
    if (grep { $neededSystem eq $_ } @{$cur->{systemTypes}}) {
 | 
					    # builds for the given platform and are not at their job limit.
 | 
				
			||||||
 | 
					    my $rightType = 0;
 | 
				
			||||||
 | 
					    my @available = ();
 | 
				
			||||||
 | 
					    LOOP: foreach my $cur (@machines) {
 | 
				
			||||||
 | 
					        if ($cur->{enabled} && grep { $neededSystem eq $_ } @{$cur->{systemTypes}}) {
 | 
				
			||||||
            $rightType = 1;
 | 
					            $rightType = 1;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            # We have a machine of the right type.  Determine the load on
 | 
					            # We have a machine of the right type.  Determine the load on
 | 
				
			||||||
| 
						 | 
					@ -120,16 +125,16 @@ LOOP: foreach my $cur (@machines) {
 | 
				
			||||||
            push @available, { machine => $cur, load => $load, free => $free }
 | 
					            push @available, { machine => $cur, load => $load, free => $free }
 | 
				
			||||||
            if $load < $cur->{maxJobs};
 | 
					            if $load < $cur->{maxJobs};
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
}
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
if (defined $ENV{NIX_DEBUG_HOOK}) {
 | 
					    if (defined $ENV{NIX_DEBUG_HOOK}) {
 | 
				
			||||||
        print STDERR "load on " . $_->{machine}->{hostName} . " = " . $_->{load} . "\n"
 | 
					        print STDERR "load on " . $_->{machine}->{hostName} . " = " . $_->{load} . "\n"
 | 
				
			||||||
            foreach @available;
 | 
					            foreach @available;
 | 
				
			||||||
}
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
# Didn't find any available machine?  Then decline or postpone.
 | 
					    # Didn't find any available machine?  Then decline or postpone.
 | 
				
			||||||
if (scalar @available == 0) {
 | 
					    if (scalar @available == 0) {
 | 
				
			||||||
        # Postpone if we have a machine of the right type, except if the
 | 
					        # Postpone if we have a machine of the right type, except if the
 | 
				
			||||||
        # local system can and wants to do the build.
 | 
					        # local system can and wants to do the build.
 | 
				
			||||||
        if ($rightType && !$canBuildLocally) {
 | 
					        if ($rightType && !$canBuildLocally) {
 | 
				
			||||||
| 
						 | 
					@ -138,43 +143,46 @@ if (scalar @available == 0) {
 | 
				
			||||||
        } else {
 | 
					        } else {
 | 
				
			||||||
            decline;
 | 
					            decline;
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
}
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
# Prioritise the available machines as follows:
 | 
					    # Prioritise the available machines as follows:
 | 
				
			||||||
# - First by load divided by speed factor, rounded to the nearest
 | 
					    # - First by load divided by speed factor, rounded to the nearest
 | 
				
			||||||
#   integer.  This causes fast machines to be preferred over slow
 | 
					    #   integer.  This causes fast machines to be preferred over slow
 | 
				
			||||||
#   machines with similar loads.
 | 
					    #   machines with similar loads.
 | 
				
			||||||
# - Then by speed factor.
 | 
					    # - Then by speed factor.
 | 
				
			||||||
# - Finally by load.
 | 
					    # - Finally by load.
 | 
				
			||||||
sub lf { my $x = shift; return int($x->{load} / $x->{machine}->{speedFactor} + 0.4999); }
 | 
					    sub lf { my $x = shift; return int($x->{load} / $x->{machine}->{speedFactor} + 0.4999); }
 | 
				
			||||||
@available = sort
 | 
					    @available = sort
 | 
				
			||||||
        { lf($a) <=> lf($b)
 | 
					        { lf($a) <=> lf($b)
 | 
				
			||||||
              || $b->{machine}->{speedFactor} <=> $a->{machine}->{speedFactor}
 | 
					              || $b->{machine}->{speedFactor} <=> $a->{machine}->{speedFactor}
 | 
				
			||||||
              || $a->{load} <=> $b->{load}
 | 
					              || $a->{load} <=> $b->{load}
 | 
				
			||||||
        } @available;
 | 
					        } @available;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
# Select the best available machine and lock a free slot.
 | 
					    # Select the best available machine and lock a free slot.
 | 
				
			||||||
my $selected = $available[0]; 
 | 
					    my $selected = $available[0]; 
 | 
				
			||||||
my $machine = $selected->{machine};
 | 
					    my $machine = $selected->{machine};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
my $slotLock = openSlotLock($machine, $selected->{free});
 | 
					    my $slotLock = openSlotLock($machine, $selected->{free});
 | 
				
			||||||
flock($slotLock, LOCK_EX | LOCK_NB) or die;
 | 
					    flock($slotLock, LOCK_EX | LOCK_NB) or die;
 | 
				
			||||||
utime undef, undef, $slotLock;
 | 
					    utime undef, undef, $slotLock;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
close MAINLOCK;
 | 
					    close MAINLOCK;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # Connect to the selected machine.
 | 
				
			||||||
 | 
					    @sshOpts = ("-i", $machine->{sshKeys}, "-x");
 | 
				
			||||||
 | 
					    $hostName = $machine->{hostName};
 | 
				
			||||||
 | 
					    last if openSSHConnection $hostName;
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    warn "unable to open SSH connection to $hostName, trying other available machines...\n";
 | 
				
			||||||
 | 
					    $machine->{enabled} = 0;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
# Tell Nix we've accepted the build.
 | 
					# Tell Nix we've accepted the build.
 | 
				
			||||||
sendReply "accept";
 | 
					sendReply "accept";
 | 
				
			||||||
if (defined $ENV{NIX_DEBUG_HOOK}) {
 | 
					 | 
				
			||||||
    my $hostName = $machine->{hostName};
 | 
					 | 
				
			||||||
    my $sp = $machine->{speedFactor};
 | 
					 | 
				
			||||||
    print STDERR "building `$drvPath' on `$hostName' - $sp - " . $selected->{free} . "\n";
 | 
					 | 
				
			||||||
    sleep 10;
 | 
					 | 
				
			||||||
    exit 0;
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
my $x = <STDIN>;
 | 
					my $x = <STDIN>;
 | 
				
			||||||
chomp $x;
 | 
					chomp $x;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -184,13 +192,8 @@ if ($x ne "okay") {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
# Do the actual build.
 | 
					# Do the actual build.
 | 
				
			||||||
my $hostName = $machine->{hostName};
 | 
					 | 
				
			||||||
print STDERR "building `$drvPath' on `$hostName'\n";
 | 
					print STDERR "building `$drvPath' on `$hostName'\n";
 | 
				
			||||||
 | 
					
 | 
				
			||||||
push @sshOpts, "-i", $machine->{sshKeys}, "-x";
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
openSSHConnection $hostName;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
my $inputs = `cat inputs`; die if ($? != 0);
 | 
					my $inputs = `cat inputs`; die if ($? != 0);
 | 
				
			||||||
$inputs =~ s/\n/ /g;
 | 
					$inputs =~ s/\n/ /g;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -53,7 +53,7 @@ while (@ARGV) {
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
openSSHConnection $sshHost;
 | 
					openSSHConnection $sshHost or die "$0: unable to start SSH\n";
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
if ($toMode) { # Copy TO the remote machine.
 | 
					if ($toMode) { # Copy TO the remote machine.
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -12,15 +12,16 @@ sub openSSHConnection {
 | 
				
			||||||
    my ($host) = @_;
 | 
					    my ($host) = @_;
 | 
				
			||||||
    die if $sshStarted;
 | 
					    die if $sshStarted;
 | 
				
			||||||
    $sshHost = $host;
 | 
					    $sshHost = $host;
 | 
				
			||||||
    return if system("ssh $sshHost @sshOpts -O check 2> /dev/null") == 0;
 | 
					    return 1 if system("ssh $sshHost @sshOpts -O check 2> /dev/null") == 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    my $tmpDir = tempdir("nix-ssh.XXXXXX", CLEANUP => 1, TMPDIR => 1)
 | 
					    my $tmpDir = tempdir("nix-ssh.XXXXXX", CLEANUP => 1, TMPDIR => 1)
 | 
				
			||||||
        or die "cannot create a temporary directory";
 | 
					        or die "cannot create a temporary directory";
 | 
				
			||||||
    
 | 
					    
 | 
				
			||||||
    push @sshOpts, "-S", "$tmpDir/control";
 | 
					    push @sshOpts, "-S", "$tmpDir/control";
 | 
				
			||||||
    system("ssh $sshHost @sshOpts -M -N -f") == 0
 | 
					    system("ssh $sshHost @sshOpts -M -N -f") == 0
 | 
				
			||||||
        or die "unable to start SSH: $?";
 | 
					        or return 0;
 | 
				
			||||||
    $sshStarted = 1;
 | 
					    $sshStarted = 1;
 | 
				
			||||||
 | 
					    return 1;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
# Tell the master SSH client to exit.
 | 
					# Tell the master SSH client to exit.
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue