Implement caching of fetchurl/fetchTarball results
ETags are used to prevent redownloading unchanged files.
This commit is contained in:
		
							parent
							
								
									1711679ea5
								
							
						
					
					
						commit
						60340ce3e2
					
				
					 4 changed files with 152 additions and 29 deletions
				
			
		|  | @ -1490,6 +1490,9 @@ struct Curl | ||||||
| { | { | ||||||
|     CURL * curl; |     CURL * curl; | ||||||
|     string data; |     string data; | ||||||
|  |     string etag, status, expectedETag; | ||||||
|  | 
 | ||||||
|  |     struct curl_slist * requestHeaders; | ||||||
| 
 | 
 | ||||||
|     static size_t writeCallback(void * contents, size_t size, size_t nmemb, void * userp) |     static size_t writeCallback(void * contents, size_t size, size_t nmemb, void * userp) | ||||||
|     { |     { | ||||||
|  | @ -1499,36 +1502,92 @@ struct Curl | ||||||
|         return realSize; |         return realSize; | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|  |     static size_t headerCallback(void * contents, size_t size, size_t nmemb, void * userp) | ||||||
|  |     { | ||||||
|  |         Curl & c(* (Curl *) userp); | ||||||
|  |         size_t realSize = size * nmemb; | ||||||
|  |         string line = string((char *) contents, realSize); | ||||||
|  |         printMsg(lvlVomit, format("got header: %1%") % trim(line)); | ||||||
|  |         if (line.compare(0, 5, "HTTP/") == 0) { // new response starts
 | ||||||
|  |             c.etag = ""; | ||||||
|  |             auto ss = tokenizeString<vector<string>>(line, " "); | ||||||
|  |             c.status = ss.size() >= 2 ? ss[1] : ""; | ||||||
|  |         } else { | ||||||
|  |             auto i = line.find(':'); | ||||||
|  |             if (i != string::npos) { | ||||||
|  |                 string name = trim(string(line, 0, i)); | ||||||
|  |                 if (name == "ETag") { // FIXME: case
 | ||||||
|  |                     c.etag = trim(string(line, i + 1)); | ||||||
|  |                     /* Hack to work around a GitHub bug: it sends
 | ||||||
|  |                        ETags, but ignores If-None-Match. So if we get | ||||||
|  |                        the expected ETag on a 200 response, then shut | ||||||
|  |                        down the connection because we already have the | ||||||
|  |                        data. */ | ||||||
|  |                     printMsg(lvlDebug, format("got ETag: %1%") % c.etag); | ||||||
|  |                     if (c.etag == c.expectedETag && c.status == "200") { | ||||||
|  |                         printMsg(lvlDebug, format("shutting down on 200 HTTP response with expected ETag")); | ||||||
|  |                         return 0; | ||||||
|  |                     } | ||||||
|  |                 } | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |         return realSize; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|     Curl() |     Curl() | ||||||
|     { |     { | ||||||
|  |         requestHeaders = 0; | ||||||
|  | 
 | ||||||
|         curl = curl_easy_init(); |         curl = curl_easy_init(); | ||||||
|         if (!curl) throw Error("unable to initialize curl"); |         if (!curl) throw Error("unable to initialize curl"); | ||||||
| 
 | 
 | ||||||
|         curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L); |         curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L); | ||||||
|         curl_easy_setopt(curl, CURLOPT_CAINFO, getEnv("SSL_CERT_FILE", "/etc/ssl/certs/ca-certificates.crt").c_str()); |         curl_easy_setopt(curl, CURLOPT_CAINFO, getEnv("SSL_CERT_FILE", "/etc/ssl/certs/ca-certificates.crt").c_str()); | ||||||
|         curl_easy_setopt(curl, CURLOPT_USERAGENT, ("Nix/" + nixVersion).c_str()); |         curl_easy_setopt(curl, CURLOPT_USERAGENT, ("Nix/" + nixVersion).c_str()); | ||||||
|  |         curl_easy_setopt(curl, CURLOPT_FAILONERROR, 1); | ||||||
| 
 | 
 | ||||||
|         curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, writeCallback); |         curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, writeCallback); | ||||||
|         curl_easy_setopt(curl, CURLOPT_WRITEDATA, (void *) &curl); |         curl_easy_setopt(curl, CURLOPT_WRITEDATA, (void *) &curl); | ||||||
|  | 
 | ||||||
|  |         curl_easy_setopt(curl, CURLOPT_HEADERFUNCTION, headerCallback); | ||||||
|  |         curl_easy_setopt(curl, CURLOPT_HEADERDATA, (void *) &curl); | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     ~Curl() |     ~Curl() | ||||||
|     { |     { | ||||||
|         if (curl) curl_easy_cleanup(curl); |         if (curl) curl_easy_cleanup(curl); | ||||||
|  |         if (requestHeaders) curl_slist_free_all(requestHeaders); | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     string fetch(const string & url) |     bool fetch(const string & url, const string & expectedETag = "") | ||||||
|     { |     { | ||||||
|         curl_easy_setopt(curl, CURLOPT_URL, url.c_str()); |         curl_easy_setopt(curl, CURLOPT_URL, url.c_str()); | ||||||
| 
 | 
 | ||||||
|         data.clear(); |         data.clear(); | ||||||
| 
 | 
 | ||||||
|         CURLcode res = curl_easy_perform(curl); |         if (requestHeaders) { | ||||||
|         if (res != CURLE_OK) |             curl_slist_free_all(requestHeaders); | ||||||
|             throw Error(format("unable to download ‘%1%’: %2%") |             requestHeaders = 0; | ||||||
|                 % url % curl_easy_strerror(res)); |         } | ||||||
| 
 | 
 | ||||||
|         return data; |         if (!expectedETag.empty()) { | ||||||
|  |             this->expectedETag = expectedETag; | ||||||
|  |             requestHeaders = curl_slist_append(requestHeaders, ("If-None-Match: " + expectedETag).c_str()); | ||||||
|  |         } | ||||||
|  | 
 | ||||||
|  |         curl_easy_setopt(curl, CURLOPT_HTTPHEADER, requestHeaders); | ||||||
|  | 
 | ||||||
|  |         CURLcode res = curl_easy_perform(curl); | ||||||
|  |         if (res == CURLE_WRITE_ERROR && etag == expectedETag) return false; | ||||||
|  |         if (res != CURLE_OK) | ||||||
|  |             throw Error(format("unable to download ‘%1%’: %2% (%3%)") | ||||||
|  |                 % url % curl_easy_strerror(res) % res); | ||||||
|  | 
 | ||||||
|  |         long httpStatus = 0; | ||||||
|  |         curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &httpStatus); | ||||||
|  |         if (httpStatus == 304) return false; | ||||||
|  | 
 | ||||||
|  |         return true; | ||||||
|     } |     } | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
|  | @ -1560,25 +1619,71 @@ void fetch(EvalState & state, const Pos & pos, Value * * args, Value & v, | ||||||
|     } else |     } else | ||||||
|         url = state.forceStringNoCtx(*args[0], pos); |         url = state.forceStringNoCtx(*args[0], pos); | ||||||
| 
 | 
 | ||||||
|     // TODO: cache downloads.
 |     Path cacheDir = getEnv("XDG_CACHE_HOME", getEnv("HOME", "") + "/.cache") + "/nix/tarballs"; | ||||||
|  |     createDirs(cacheDir); | ||||||
| 
 | 
 | ||||||
|     Curl curl; |     string urlHash = printHash32(hashString(htSHA256, url)); | ||||||
|     string data = curl.fetch(url); |  | ||||||
| 
 | 
 | ||||||
|     string name; |     Path dataFile = cacheDir + "/" + urlHash + ".info"; | ||||||
|     string::size_type p = url.rfind('/'); |     Path fileLink = cacheDir + "/" + urlHash + "-file"; | ||||||
|     if (p != string::npos) name = string(url, p + 1); |  | ||||||
| 
 | 
 | ||||||
|     Path storePath = store->addTextToStore(name, data, PathSet(), state.repair); |     Path storePath; | ||||||
| 
 | 
 | ||||||
|     if (unpack) { |     string expectedETag; | ||||||
|         Path tmpDir = createTempDir(); | 
 | ||||||
|         AutoDelete autoDelete(tmpDir, true); |     if (pathExists(fileLink) && pathExists(dataFile)) { | ||||||
|         runProgram("tar", true, {"xf", storePath, "-C", tmpDir, "--strip-components", "1"}, ""); |         storePath = readLink(fileLink); | ||||||
|         storePath = store->addToStore(name, tmpDir, true, htSHA256, defaultPathFilter, state.repair); |         store->addTempRoot(storePath); | ||||||
|  |         if (store->isValidPath(storePath)) { | ||||||
|  |             auto ss = tokenizeString<vector<string>>(readFile(dataFile), "\n"); | ||||||
|  |             if (ss.size() >= 2 && ss[0] == url) { | ||||||
|  |                 printMsg(lvlDebug, format("verifying previous ETag ‘%1%’") % ss[1]); | ||||||
|  |                 expectedETag = ss[1]; | ||||||
|  |             } | ||||||
|  |         } else | ||||||
|  |             storePath = ""; | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     mkString(v, storePath, singleton<PathSet>(storePath)); |     string name; | ||||||
|  |     auto p = url.rfind('/'); | ||||||
|  |     if (p != string::npos) name = string(url, p + 1); | ||||||
|  | 
 | ||||||
|  |     if (expectedETag.empty()) | ||||||
|  |         printMsg(lvlInfo, format("downloading ‘%1%’...") % url); | ||||||
|  |     else | ||||||
|  |         printMsg(lvlInfo, format("checking ‘%1%’...") % url); | ||||||
|  |     Curl curl; | ||||||
|  | 
 | ||||||
|  |     if (curl.fetch(url, expectedETag)) | ||||||
|  |         storePath = store->addTextToStore(name, curl.data, PathSet(), state.repair); | ||||||
|  | 
 | ||||||
|  |     assert(!storePath.empty()); | ||||||
|  |     replaceSymlink(storePath, fileLink); | ||||||
|  | 
 | ||||||
|  |     writeFile(dataFile, url + "\n" + curl.etag + "\n"); | ||||||
|  | 
 | ||||||
|  |     if (unpack) { | ||||||
|  |         Path unpackedLink = cacheDir + "/" + baseNameOf(storePath) + "-unpacked"; | ||||||
|  |         Path unpackedStorePath; | ||||||
|  |         if (pathExists(unpackedLink)) { | ||||||
|  |             unpackedStorePath = readLink(unpackedLink); | ||||||
|  |             store->addTempRoot(unpackedStorePath); | ||||||
|  |             if (!store->isValidPath(unpackedStorePath)) | ||||||
|  |                 unpackedStorePath = ""; | ||||||
|  |         } | ||||||
|  |         if (unpackedStorePath.empty()) { | ||||||
|  |             printMsg(lvlDebug, format("unpacking ‘%1%’...") % storePath); | ||||||
|  |             Path tmpDir = createTempDir(); | ||||||
|  |             AutoDelete autoDelete(tmpDir, true); | ||||||
|  |             runProgram("tar", true, {"xf", storePath, "-C", tmpDir, "--strip-components", "1"}, ""); | ||||||
|  |             unpackedStorePath = store->addToStore(name, tmpDir, true, htSHA256, defaultPathFilter, state.repair); | ||||||
|  |         } | ||||||
|  |         replaceSymlink(unpackedStorePath, unpackedLink); | ||||||
|  |         mkString(v, unpackedStorePath, singleton<PathSet>(unpackedStorePath)); | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     else | ||||||
|  |         mkString(v, storePath, singleton<PathSet>(storePath)); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -413,6 +413,17 @@ void createSymlink(const Path & target, const Path & link) | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
|  | void replaceSymlink(const Path & target, const Path & link) | ||||||
|  | { | ||||||
|  |     Path tmp = canonPath(dirOf(link) + "/.new_" + baseNameOf(link)); | ||||||
|  | 
 | ||||||
|  |     createSymlink(target, tmp); | ||||||
|  | 
 | ||||||
|  |     if (rename(tmp.c_str(), link.c_str()) != 0) | ||||||
|  |         throw SysError(format("renaming ‘%1%’ to ‘%2%’") % tmp % link); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
| LogType logType = ltPretty; | LogType logType = ltPretty; | ||||||
| Verbosity verbosity = lvlInfo; | Verbosity verbosity = lvlInfo; | ||||||
| 
 | 
 | ||||||
|  | @ -1076,6 +1087,15 @@ string chomp(const string & s) | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
|  | string trim(const string & s, const string & whitespace) | ||||||
|  | { | ||||||
|  |     auto i = s.find_first_not_of(whitespace); | ||||||
|  |     if (i == string::npos) return ""; | ||||||
|  |     auto j = s.find_last_not_of(whitespace); | ||||||
|  |     return string(s, i, j == string::npos ? j : j - i + 1); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
| string statusToString(int status) | string statusToString(int status) | ||||||
| { | { | ||||||
|     if (!WIFEXITED(status) || WEXITSTATUS(status) != 0) { |     if (!WIFEXITED(status) || WEXITSTATUS(status) != 0) { | ||||||
|  |  | ||||||
|  | @ -110,6 +110,9 @@ Paths createDirs(const Path & path); | ||||||
| /* Create a symlink. */ | /* Create a symlink. */ | ||||||
| void createSymlink(const Path & target, const Path & link); | void createSymlink(const Path & target, const Path & link); | ||||||
| 
 | 
 | ||||||
|  | /* Atomically create or replace a symlink. */ | ||||||
|  | void replaceSymlink(const Path & target, const Path & link); | ||||||
|  | 
 | ||||||
| 
 | 
 | ||||||
| template<class T, class A> | template<class T, class A> | ||||||
| T singleton(const A & a) | T singleton(const A & a) | ||||||
|  | @ -334,6 +337,10 @@ string concatStringsSep(const string & sep, const StringSet & ss); | ||||||
| string chomp(const string & s); | string chomp(const string & s); | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
|  | /* Remove whitespace from the start and end of a string. */ | ||||||
|  | string trim(const string & s, const string & whitespace = " \n\r\t"); | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
| /* Convert the exit status of a child as returned by wait() into an
 | /* Convert the exit status of a child as returned by wait() into an
 | ||||||
|    error string. */ |    error string. */ | ||||||
| string statusToString(int status); | string statusToString(int status); | ||||||
|  |  | ||||||
|  | @ -116,16 +116,7 @@ void switchLink(Path link, Path target) | ||||||
|     /* Hacky. */ |     /* Hacky. */ | ||||||
|     if (dirOf(target) == dirOf(link)) target = baseNameOf(target); |     if (dirOf(target) == dirOf(link)) target = baseNameOf(target); | ||||||
| 
 | 
 | ||||||
|     Path tmp = canonPath(dirOf(link) + "/.new_" + baseNameOf(link)); |     replaceSymlink(target, link); | ||||||
|     createSymlink(target, tmp); |  | ||||||
|     /* The rename() system call is supposed to be essentially atomic
 |  | ||||||
|        on Unix.  That is, if we have links `current -> X' and |  | ||||||
|        `new_current -> Y', and we rename new_current to current, a |  | ||||||
|        process accessing current will see X or Y, but never a |  | ||||||
|        file-not-found or other error condition.  This is sufficient to |  | ||||||
|        atomically switch user environments. */ |  | ||||||
|     if (rename(tmp.c_str(), link.c_str()) != 0) |  | ||||||
|         throw SysError(format("renaming ‘%1%’ to ‘%2%’") % tmp % link); |  | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue