Heuristically detect if a build may have failed due to a full disk
This will allow Hydra to detect that a build should not be marked as "permanently failed", allowing it to be retried later.
This commit is contained in:
		
							parent
							
								
									e81d38c02b
								
							
						
					
					
						commit
						00d30496ca
					
				
					 2 changed files with 28 additions and 5 deletions
				
			
		|  | @ -33,7 +33,6 @@ | |||
| 
 | ||||
| #include <bzlib.h> | ||||
| 
 | ||||
| 
 | ||||
| /* Includes required for chroot support. */ | ||||
| #if HAVE_SYS_PARAM_H | ||||
| #include <sys/param.h> | ||||
|  | @ -60,12 +59,15 @@ | |||
| #include <netinet/ip.h> | ||||
| #endif | ||||
| 
 | ||||
| 
 | ||||
| #if HAVE_SYS_PERSONALITY_H | ||||
| #include <sys/personality.h> | ||||
| #define CAN_DO_LINUX32_BUILDS | ||||
| #endif | ||||
| 
 | ||||
| #if HAVE_STATVFS | ||||
| #include <sys/statvfs.h> | ||||
| #endif | ||||
| 
 | ||||
| 
 | ||||
| namespace nix { | ||||
| 
 | ||||
|  | @ -1383,6 +1385,25 @@ void DerivationGoal::buildDone() | |||
|        root. */ | ||||
|     if (buildUser.enabled()) buildUser.kill(); | ||||
| 
 | ||||
|     /* If the build failed, heuristically check whether this may have
 | ||||
|        been caused by a disk full condition.  We have no way of | ||||
|        knowing whether the build actually got an ENOSPC.  So instead, | ||||
|        check if the disk is (nearly) full now.  If so, we don't mark | ||||
|        this build as a permanent failure. */ | ||||
|     bool diskFull = false; | ||||
| #if HAVE_STATVFS | ||||
|     if (!statusOk(status)) { | ||||
|         unsigned long long required = 8ULL * 1024 * 1024; // FIXME: make configurable
 | ||||
|         struct statvfs st; | ||||
|         if (statvfs(settings.nixStore.c_str(), &st) == 0 && | ||||
|             (unsigned long long) st.f_bavail * st.f_bsize < required) | ||||
|             diskFull = true; | ||||
|         if (statvfs(tmpDir.c_str(), &st) == 0 && | ||||
|             (unsigned long long) st.f_bavail * st.f_bsize < required) | ||||
|             diskFull = true; | ||||
|     } | ||||
| #endif | ||||
| 
 | ||||
|     try { | ||||
| 
 | ||||
|         /* Some cleanup per path.  We do this here and not in
 | ||||
|  | @ -1449,6 +1470,8 @@ void DerivationGoal::buildDone() | |||
|             deleteTmpDir(false); | ||||
|             if (WIFEXITED(status) && WEXITSTATUS(status) == childSetupFailed) | ||||
|                 throw Error(format("failed to set up the build environment for `%1%'") % drvPath); | ||||
|             if (diskFull) | ||||
|                 printMsg(lvlError, "note: build failure may have been caused by lack of free disk space"); | ||||
|             throw BuildError(format("builder for `%1%' %2%") | ||||
|                 % drvPath % statusToString(status)); | ||||
|         } | ||||
|  | @ -1504,7 +1527,7 @@ void DerivationGoal::buildDone() | |||
|             foreach (DerivationOutputs::iterator, i, drv.outputs) | ||||
|                 worker.store.registerFailedPath(i->second.path); | ||||
| 
 | ||||
|         worker.permanentFailure = !hookError && !fixedOutput; | ||||
|         worker.permanentFailure = !hookError && !fixedOutput && !diskFull; | ||||
|         amDone(ecFailed); | ||||
|         return; | ||||
|     } | ||||
|  |  | |||
|  | @ -456,8 +456,8 @@ void LocalStore::makeStoreWritable() | |||
|     if (getuid() != 0) return; | ||||
|     /* Check if /nix/store is on a read-only mount. */ | ||||
|     struct statvfs stat; | ||||
|     if (statvfs(settings.nixStore.c_str(), &stat) !=0) | ||||
|         throw SysError("Getting info of nix store mountpoint"); | ||||
|     if (statvfs(settings.nixStore.c_str(), &stat) != 0) | ||||
|         throw SysError("getting info about the Nix store mount point"); | ||||
| 
 | ||||
|     if (stat.f_flag & ST_RDONLY) { | ||||
|         if (unshare(CLONE_NEWNS) == -1) | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue