Heuristically detect if a build may have failed due to a full disk
This will allow Hydra to detect that a build should not be marked as "permanently failed", allowing it to be retried later.
This commit is contained in:
		
							parent
							
								
									e81d38c02b
								
							
						
					
					
						commit
						00d30496ca
					
				
					 2 changed files with 28 additions and 5 deletions
				
			
		|  | @ -33,7 +33,6 @@ | ||||||
| 
 | 
 | ||||||
| #include <bzlib.h> | #include <bzlib.h> | ||||||
| 
 | 
 | ||||||
| 
 |  | ||||||
| /* Includes required for chroot support. */ | /* Includes required for chroot support. */ | ||||||
| #if HAVE_SYS_PARAM_H | #if HAVE_SYS_PARAM_H | ||||||
| #include <sys/param.h> | #include <sys/param.h> | ||||||
|  | @ -60,12 +59,15 @@ | ||||||
| #include <netinet/ip.h> | #include <netinet/ip.h> | ||||||
| #endif | #endif | ||||||
| 
 | 
 | ||||||
| 
 |  | ||||||
| #if HAVE_SYS_PERSONALITY_H | #if HAVE_SYS_PERSONALITY_H | ||||||
| #include <sys/personality.h> | #include <sys/personality.h> | ||||||
| #define CAN_DO_LINUX32_BUILDS | #define CAN_DO_LINUX32_BUILDS | ||||||
| #endif | #endif | ||||||
| 
 | 
 | ||||||
|  | #if HAVE_STATVFS | ||||||
|  | #include <sys/statvfs.h> | ||||||
|  | #endif | ||||||
|  | 
 | ||||||
| 
 | 
 | ||||||
| namespace nix { | namespace nix { | ||||||
| 
 | 
 | ||||||
|  | @ -1383,6 +1385,25 @@ void DerivationGoal::buildDone() | ||||||
|        root. */ |        root. */ | ||||||
|     if (buildUser.enabled()) buildUser.kill(); |     if (buildUser.enabled()) buildUser.kill(); | ||||||
| 
 | 
 | ||||||
|  |     /* If the build failed, heuristically check whether this may have
 | ||||||
|  |        been caused by a disk full condition.  We have no way of | ||||||
|  |        knowing whether the build actually got an ENOSPC.  So instead, | ||||||
|  |        check if the disk is (nearly) full now.  If so, we don't mark | ||||||
|  |        this build as a permanent failure. */ | ||||||
|  |     bool diskFull = false; | ||||||
|  | #if HAVE_STATVFS | ||||||
|  |     if (!statusOk(status)) { | ||||||
|  |         unsigned long long required = 8ULL * 1024 * 1024; // FIXME: make configurable
 | ||||||
|  |         struct statvfs st; | ||||||
|  |         if (statvfs(settings.nixStore.c_str(), &st) == 0 && | ||||||
|  |             (unsigned long long) st.f_bavail * st.f_bsize < required) | ||||||
|  |             diskFull = true; | ||||||
|  |         if (statvfs(tmpDir.c_str(), &st) == 0 && | ||||||
|  |             (unsigned long long) st.f_bavail * st.f_bsize < required) | ||||||
|  |             diskFull = true; | ||||||
|  |     } | ||||||
|  | #endif | ||||||
|  | 
 | ||||||
|     try { |     try { | ||||||
| 
 | 
 | ||||||
|         /* Some cleanup per path.  We do this here and not in
 |         /* Some cleanup per path.  We do this here and not in
 | ||||||
|  | @ -1449,6 +1470,8 @@ void DerivationGoal::buildDone() | ||||||
|             deleteTmpDir(false); |             deleteTmpDir(false); | ||||||
|             if (WIFEXITED(status) && WEXITSTATUS(status) == childSetupFailed) |             if (WIFEXITED(status) && WEXITSTATUS(status) == childSetupFailed) | ||||||
|                 throw Error(format("failed to set up the build environment for `%1%'") % drvPath); |                 throw Error(format("failed to set up the build environment for `%1%'") % drvPath); | ||||||
|  |             if (diskFull) | ||||||
|  |                 printMsg(lvlError, "note: build failure may have been caused by lack of free disk space"); | ||||||
|             throw BuildError(format("builder for `%1%' %2%") |             throw BuildError(format("builder for `%1%' %2%") | ||||||
|                 % drvPath % statusToString(status)); |                 % drvPath % statusToString(status)); | ||||||
|         } |         } | ||||||
|  | @ -1504,7 +1527,7 @@ void DerivationGoal::buildDone() | ||||||
|             foreach (DerivationOutputs::iterator, i, drv.outputs) |             foreach (DerivationOutputs::iterator, i, drv.outputs) | ||||||
|                 worker.store.registerFailedPath(i->second.path); |                 worker.store.registerFailedPath(i->second.path); | ||||||
| 
 | 
 | ||||||
|         worker.permanentFailure = !hookError && !fixedOutput; |         worker.permanentFailure = !hookError && !fixedOutput && !diskFull; | ||||||
|         amDone(ecFailed); |         amDone(ecFailed); | ||||||
|         return; |         return; | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  | @ -456,8 +456,8 @@ void LocalStore::makeStoreWritable() | ||||||
|     if (getuid() != 0) return; |     if (getuid() != 0) return; | ||||||
|     /* Check if /nix/store is on a read-only mount. */ |     /* Check if /nix/store is on a read-only mount. */ | ||||||
|     struct statvfs stat; |     struct statvfs stat; | ||||||
|     if (statvfs(settings.nixStore.c_str(), &stat) !=0) |     if (statvfs(settings.nixStore.c_str(), &stat) != 0) | ||||||
|         throw SysError("Getting info of nix store mountpoint"); |         throw SysError("getting info about the Nix store mount point"); | ||||||
| 
 | 
 | ||||||
|     if (stat.f_flag & ST_RDONLY) { |     if (stat.f_flag & ST_RDONLY) { | ||||||
|         if (unshare(CLONE_NEWNS) == -1) |         if (unshare(CLONE_NEWNS) == -1) | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue