Propagate remote timeouts properly
This commit is contained in:
		
							parent
							
								
									c6e85ee474
								
							
						
					
					
						commit
						42e9ad8fd1
					
				
					 2 changed files with 38 additions and 31 deletions
				
			
		|  | @ -258,13 +258,8 @@ writeInt($maxSilentTime, $to); | ||||||
| writeInt($buildTimeout, $to); | writeInt($buildTimeout, $to); | ||||||
| my $res = readInt($from); | my $res = readInt($from); | ||||||
| if ($res != 0) { | if ($res != 0) { | ||||||
|     # Note that if we get exit code 100 from `nix-store -r', it |  | ||||||
|     # denotes a permanent build failure (as opposed to an SSH problem |  | ||||||
|     # or a temporary Nix problem).  We propagate this to the caller to |  | ||||||
|     # allow it to distinguish between transient and permanent |  | ||||||
|     # failures. |  | ||||||
|     my $msg = readString($from); |     my $msg = readString($from); | ||||||
|     print STDERR "error: $msg (on `$hostName')\n"; |     print STDERR "error: $msg on `$hostName'\n"; | ||||||
|     exit $res; |     exit $res; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -238,6 +238,9 @@ public: | ||||||
|        failure). */ |        failure). */ | ||||||
|     bool permanentFailure; |     bool permanentFailure; | ||||||
| 
 | 
 | ||||||
|  |     /* Set if at least one derivation had a timeout. */ | ||||||
|  |     bool timedOut; | ||||||
|  | 
 | ||||||
|     LocalStore & store; |     LocalStore & store; | ||||||
| 
 | 
 | ||||||
|     std::shared_ptr<HookInstance> hook; |     std::shared_ptr<HookInstance> hook; | ||||||
|  | @ -1440,33 +1443,39 @@ void DerivationGoal::buildDone() | ||||||
|         outputLocks.unlock(); |         outputLocks.unlock(); | ||||||
|         buildUser.release(); |         buildUser.release(); | ||||||
| 
 | 
 | ||||||
|         /* When using a build hook, the hook will return a remote
 |         if (hook && WIFEXITED(status) && WEXITSTATUS(status) == 101) { | ||||||
|            build failure using exit code 100.  Anything else is a hook |             if (settings.printBuildTrace) | ||||||
|            problem. */ |                 printMsg(lvlError, format("@ build-failed %1% - timeout") % drvPath); | ||||||
|         bool hookError = hook && |             worker.timedOut = true; | ||||||
|             (!WIFEXITED(status) || WEXITSTATUS(status) != 100); |  | ||||||
| 
 |  | ||||||
|         if (settings.printBuildTrace) { |  | ||||||
|             if (hook && hookError) |  | ||||||
|                 printMsg(lvlError, format("@ hook-failed %1% - %2% %3%") |  | ||||||
|                     % drvPath % status % e.msg()); |  | ||||||
|             else |  | ||||||
|                 printMsg(lvlError, format("@ build-failed %1% - %2% %3%") |  | ||||||
|                     % drvPath % 1 % e.msg()); |  | ||||||
|         } |         } | ||||||
| 
 | 
 | ||||||
|         /* Register the outputs of this build as "failed" so we won't
 |         else if (hook && (!WIFEXITED(status) || WEXITSTATUS(status) != 100)) { | ||||||
|            try to build them again (negative caching).  However, don't |             if (settings.printBuildTrace) | ||||||
|            do this for fixed-output derivations, since they're likely |                 printMsg(lvlError, format("@ hook-failed %1% - %2% %3%") | ||||||
|            to fail for transient reasons (e.g., fetchurl not being |                     % drvPath % status % e.msg()); | ||||||
|            able to access the network).  Hook errors (like |         } | ||||||
|            communication problems with the remote machine) shouldn't | 
 | ||||||
|            be cached either. */ |         else { | ||||||
|         if (settings.cacheFailure && !hookError && !fixedOutput) |             if (settings.printBuildTrace) | ||||||
|             foreach (DerivationOutputs::iterator, i, drv.outputs) |                 printMsg(lvlError, format("@ build-failed %1% - %2% %3%") | ||||||
|                 worker.store.registerFailedPath(i->second.path); |                     % drvPath % 1 % e.msg()); | ||||||
|  |             worker.permanentFailure = !fixedOutput && !diskFull; | ||||||
|  | 
 | ||||||
|  |             /* Register the outputs of this build as "failed" so we
 | ||||||
|  |                won't try to build them again (negative caching). | ||||||
|  |                However, don't do this for fixed-output derivations, | ||||||
|  |                since they're likely to fail for transient reasons | ||||||
|  |                (e.g., fetchurl not being able to access the network). | ||||||
|  |                Hook errors (like communication problems with the | ||||||
|  |                remote machine) shouldn't be cached either. */ | ||||||
|  |             if (/* settings.cacheFailure && */ !fixedOutput && !diskFull) | ||||||
|  |             { | ||||||
|  |                 printMsg(lvlError, "REG"); | ||||||
|  |                 foreach (DerivationOutputs::iterator, i, drv.outputs) | ||||||
|  |                     worker.store.registerFailedPath(i->second.path); | ||||||
|  |             } | ||||||
|  |         } | ||||||
| 
 | 
 | ||||||
|         worker.permanentFailure = !hookError && !fixedOutput && !diskFull; |  | ||||||
|         amDone(ecFailed); |         amDone(ecFailed); | ||||||
|         return; |         return; | ||||||
|     } |     } | ||||||
|  | @ -2909,6 +2918,7 @@ Worker::Worker(LocalStore & store) | ||||||
|     nrLocalBuilds = 0; |     nrLocalBuilds = 0; | ||||||
|     lastWokenUp = 0; |     lastWokenUp = 0; | ||||||
|     permanentFailure = false; |     permanentFailure = false; | ||||||
|  |     timedOut = false; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
|  | @ -3220,6 +3230,7 @@ void Worker::waitForInput() | ||||||
|                 format("%1% timed out after %2% seconds of silence") |                 format("%1% timed out after %2% seconds of silence") | ||||||
|                 % goal->getName() % settings.maxSilentTime); |                 % goal->getName() % settings.maxSilentTime); | ||||||
|             goal->cancel(true); |             goal->cancel(true); | ||||||
|  |             timedOut = true; | ||||||
|         } |         } | ||||||
| 
 | 
 | ||||||
|         else if (goal->getExitCode() == Goal::ecBusy && |         else if (goal->getExitCode() == Goal::ecBusy && | ||||||
|  | @ -3231,6 +3242,7 @@ void Worker::waitForInput() | ||||||
|                 format("%1% timed out after %2% seconds") |                 format("%1% timed out after %2% seconds") | ||||||
|                 % goal->getName() % settings.buildTimeout); |                 % goal->getName() % settings.buildTimeout); | ||||||
|             goal->cancel(true); |             goal->cancel(true); | ||||||
|  |             timedOut = true; | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|  | @ -3247,7 +3259,7 @@ void Worker::waitForInput() | ||||||
| 
 | 
 | ||||||
| unsigned int Worker::exitStatus() | unsigned int Worker::exitStatus() | ||||||
| { | { | ||||||
|     return permanentFailure ? 100 : 1; |     return timedOut ? 101 : (permanentFailure ? 100 : 1); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue