Propagate remote timeouts properly
This commit is contained in:
		
							parent
							
								
									c6e85ee474
								
							
						
					
					
						commit
						42e9ad8fd1
					
				
					 2 changed files with 38 additions and 31 deletions
				
			
		|  | @ -258,13 +258,8 @@ writeInt($maxSilentTime, $to); | |||
| writeInt($buildTimeout, $to); | ||||
| my $res = readInt($from); | ||||
| if ($res != 0) { | ||||
|     # Note that if we get exit code 100 from `nix-store -r', it | ||||
|     # denotes a permanent build failure (as opposed to an SSH problem | ||||
|     # or a temporary Nix problem).  We propagate this to the caller to | ||||
|     # allow it to distinguish between transient and permanent | ||||
|     # failures. | ||||
|     my $msg = readString($from); | ||||
|     print STDERR "error: $msg (on `$hostName')\n"; | ||||
|     print STDERR "error: $msg on `$hostName'\n"; | ||||
|     exit $res; | ||||
| } | ||||
| 
 | ||||
|  |  | |||
|  | @ -238,6 +238,9 @@ public: | |||
|        failure). */ | ||||
|     bool permanentFailure; | ||||
| 
 | ||||
|     /* Set if at least one derivation had a timeout. */ | ||||
|     bool timedOut; | ||||
| 
 | ||||
|     LocalStore & store; | ||||
| 
 | ||||
|     std::shared_ptr<HookInstance> hook; | ||||
|  | @ -1440,33 +1443,39 @@ void DerivationGoal::buildDone() | |||
|         outputLocks.unlock(); | ||||
|         buildUser.release(); | ||||
| 
 | ||||
|         /* When using a build hook, the hook will return a remote
 | ||||
|            build failure using exit code 100.  Anything else is a hook | ||||
|            problem. */ | ||||
|         bool hookError = hook && | ||||
|             (!WIFEXITED(status) || WEXITSTATUS(status) != 100); | ||||
| 
 | ||||
|         if (settings.printBuildTrace) { | ||||
|             if (hook && hookError) | ||||
|                 printMsg(lvlError, format("@ hook-failed %1% - %2% %3%") | ||||
|                     % drvPath % status % e.msg()); | ||||
|             else | ||||
|                 printMsg(lvlError, format("@ build-failed %1% - %2% %3%") | ||||
|                     % drvPath % 1 % e.msg()); | ||||
|         if (hook && WIFEXITED(status) && WEXITSTATUS(status) == 101) { | ||||
|             if (settings.printBuildTrace) | ||||
|                 printMsg(lvlError, format("@ build-failed %1% - timeout") % drvPath); | ||||
|             worker.timedOut = true; | ||||
|         } | ||||
| 
 | ||||
|         /* Register the outputs of this build as "failed" so we won't
 | ||||
|            try to build them again (negative caching).  However, don't | ||||
|            do this for fixed-output derivations, since they're likely | ||||
|            to fail for transient reasons (e.g., fetchurl not being | ||||
|            able to access the network).  Hook errors (like | ||||
|            communication problems with the remote machine) shouldn't | ||||
|            be cached either. */ | ||||
|         if (settings.cacheFailure && !hookError && !fixedOutput) | ||||
|             foreach (DerivationOutputs::iterator, i, drv.outputs) | ||||
|                 worker.store.registerFailedPath(i->second.path); | ||||
|         else if (hook && (!WIFEXITED(status) || WEXITSTATUS(status) != 100)) { | ||||
|             if (settings.printBuildTrace) | ||||
|                 printMsg(lvlError, format("@ hook-failed %1% - %2% %3%") | ||||
|                     % drvPath % status % e.msg()); | ||||
|         } | ||||
| 
 | ||||
|         else { | ||||
|             if (settings.printBuildTrace) | ||||
|                 printMsg(lvlError, format("@ build-failed %1% - %2% %3%") | ||||
|                     % drvPath % 1 % e.msg()); | ||||
|             worker.permanentFailure = !fixedOutput && !diskFull; | ||||
| 
 | ||||
|             /* Register the outputs of this build as "failed" so we
 | ||||
|                won't try to build them again (negative caching). | ||||
|                However, don't do this for fixed-output derivations, | ||||
|                since they're likely to fail for transient reasons | ||||
|                (e.g., fetchurl not being able to access the network). | ||||
|                Hook errors (like communication problems with the | ||||
|                remote machine) shouldn't be cached either. */ | ||||
|             if (/* settings.cacheFailure && */ !fixedOutput && !diskFull) | ||||
|             { | ||||
|                 printMsg(lvlError, "REG"); | ||||
|                 foreach (DerivationOutputs::iterator, i, drv.outputs) | ||||
|                     worker.store.registerFailedPath(i->second.path); | ||||
|             } | ||||
|         } | ||||
| 
 | ||||
|         worker.permanentFailure = !hookError && !fixedOutput && !diskFull; | ||||
|         amDone(ecFailed); | ||||
|         return; | ||||
|     } | ||||
|  | @ -2909,6 +2918,7 @@ Worker::Worker(LocalStore & store) | |||
|     nrLocalBuilds = 0; | ||||
|     lastWokenUp = 0; | ||||
|     permanentFailure = false; | ||||
|     timedOut = false; | ||||
| } | ||||
| 
 | ||||
| 
 | ||||
|  | @ -3220,6 +3230,7 @@ void Worker::waitForInput() | |||
|                 format("%1% timed out after %2% seconds of silence") | ||||
|                 % goal->getName() % settings.maxSilentTime); | ||||
|             goal->cancel(true); | ||||
|             timedOut = true; | ||||
|         } | ||||
| 
 | ||||
|         else if (goal->getExitCode() == Goal::ecBusy && | ||||
|  | @ -3231,6 +3242,7 @@ void Worker::waitForInput() | |||
|                 format("%1% timed out after %2% seconds") | ||||
|                 % goal->getName() % settings.buildTimeout); | ||||
|             goal->cancel(true); | ||||
|             timedOut = true; | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|  | @ -3247,7 +3259,7 @@ void Worker::waitForInput() | |||
| 
 | ||||
| unsigned int Worker::exitStatus() | ||||
| { | ||||
|     return permanentFailure ? 100 : 1; | ||||
|     return timedOut ? 101 : (permanentFailure ? 100 : 1); | ||||
| } | ||||
| 
 | ||||
| 
 | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue