Fix Brotli decompression in 'nix log'
This didn't work anymore since decompression was only done in the
non-coroutine case.
Decompressors are now sinks, just like compressors.
Also fixed a bug in bzip2 API handling (we have to handle BZ_RUN_OK
rather than BZ_OK), which we didn't notice because there was a missing
'throw':
  if (ret != BZ_OK)
      CompressionError("error while compressing bzip2 file");
			
			
This commit is contained in:
		
							parent
							
								
									fa4def3d46
								
							
						
					
					
						commit
						d3761f5f8b
					
				
					 7 changed files with 339 additions and 411 deletions
				
			
		|  | @ -217,17 +217,6 @@ void BinaryCacheStore::narFromPath(const Path & storePath, Sink & sink) | |||
| { | ||||
|     auto info = queryPathInfo(storePath).cast<const NarInfo>(); | ||||
| 
 | ||||
|     auto source = sinkToSource([this, url{info->url}](Sink & sink) { | ||||
|         try { | ||||
|             getFile(url, sink); | ||||
|         } catch (NoSuchBinaryCacheFile & e) { | ||||
|             throw SubstituteGone(e.what()); | ||||
|         } | ||||
|     }); | ||||
| 
 | ||||
|     stats.narRead++; | ||||
|     //stats.narReadCompressedBytes += nar->size(); // FIXME
 | ||||
| 
 | ||||
|     uint64_t narSize = 0; | ||||
| 
 | ||||
|     LambdaSink wrapperSink([&](const unsigned char * data, size_t len) { | ||||
|  | @ -235,8 +224,18 @@ void BinaryCacheStore::narFromPath(const Path & storePath, Sink & sink) | |||
|         narSize += len; | ||||
|     }); | ||||
| 
 | ||||
|     decompress(info->compression, *source, wrapperSink); | ||||
|     auto decompressor = makeDecompressionSink(info->compression, wrapperSink); | ||||
| 
 | ||||
|     try { | ||||
|         getFile(info->url, *decompressor); | ||||
|     } catch (NoSuchBinaryCacheFile & e) { | ||||
|         throw SubstituteGone(e.what()); | ||||
|     } | ||||
| 
 | ||||
|     decompressor->flush(); | ||||
| 
 | ||||
|     stats.narRead++; | ||||
|     //stats.narReadCompressedBytes += nar->size(); // FIXME
 | ||||
|     stats.narReadBytes += narSize; | ||||
| } | ||||
| 
 | ||||
|  |  | |||
|  | @ -39,21 +39,16 @@ void builtinFetchurl(const BasicDerivation & drv, const std::string & netrcData) | |||
|             request.verifyTLS = false; | ||||
|             request.decompress = false; | ||||
| 
 | ||||
|             downloader->download(std::move(request), sink); | ||||
|             auto decompressor = makeDecompressionSink( | ||||
|                 hasSuffix(mainUrl, ".xz") ? "xz" : "none", sink); | ||||
|             downloader->download(std::move(request), *decompressor); | ||||
|             decompressor->finish(); | ||||
|         }); | ||||
| 
 | ||||
|         if (get(drv.env, "unpack", "") == "1") { | ||||
| 
 | ||||
|             if (hasSuffix(mainUrl, ".xz")) { | ||||
|                 auto source2 = sinkToSource([&](Sink & sink) { | ||||
|                     decompress("xz", *source, sink); | ||||
|                 }); | ||||
|                 restorePath(storePath, *source2); | ||||
|             } else | ||||
|                 restorePath(storePath, *source); | ||||
| 
 | ||||
|         } else | ||||
|               writeFile(storePath, *source); | ||||
|         if (get(drv.env, "unpack", "") == "1") | ||||
|             restorePath(storePath, *source); | ||||
|         else | ||||
|             writeFile(storePath, *source); | ||||
| 
 | ||||
|         auto executable = drv.env.find("executable"); | ||||
|         if (executable != drv.env.end() && executable->second == "1") { | ||||
|  |  | |||
|  | @ -58,16 +58,6 @@ std::string resolveUri(const std::string & uri) | |||
|         return uri; | ||||
| } | ||||
| 
 | ||||
| ref<std::string> decodeContent(const std::string & encoding, ref<std::string> data) | ||||
| { | ||||
|     if (encoding == "") | ||||
|         return data; | ||||
|     else if (encoding == "br") | ||||
|         return decompress(encoding, *data); | ||||
|     else | ||||
|         throw Error("unsupported Content-Encoding '%s'", encoding); | ||||
| } | ||||
| 
 | ||||
| struct CurlDownloader : public Downloader | ||||
| { | ||||
|     CURLM * curlm = 0; | ||||
|  | @ -106,6 +96,12 @@ struct CurlDownloader : public Downloader | |||
|                 fmt(request.data ? "uploading '%s'" : "downloading '%s'", request.uri), | ||||
|                 {request.uri}, request.parentAct) | ||||
|             , callback(callback) | ||||
|             , finalSink([this](const unsigned char * data, size_t len) { | ||||
|                 if (this->request.dataCallback) | ||||
|                     this->request.dataCallback((char *) data, len); | ||||
|                 else | ||||
|                     this->result.data->append((char *) data, len); | ||||
|               }) | ||||
|         { | ||||
|             if (!request.expectedETag.empty()) | ||||
|                 requestHeaders = curl_slist_append(requestHeaders, ("If-None-Match: " + request.expectedETag).c_str()); | ||||
|  | @ -129,23 +125,40 @@ struct CurlDownloader : public Downloader | |||
|             } | ||||
|         } | ||||
| 
 | ||||
|         template<class T> | ||||
|         void fail(const T & e) | ||||
|         void failEx(std::exception_ptr ex) | ||||
|         { | ||||
|             assert(!done); | ||||
|             done = true; | ||||
|             callback.rethrow(std::make_exception_ptr(e)); | ||||
|             callback.rethrow(ex); | ||||
|         } | ||||
| 
 | ||||
|         template<class T> | ||||
|         void fail(const T & e) | ||||
|         { | ||||
|             failEx(std::make_exception_ptr(e)); | ||||
|         } | ||||
| 
 | ||||
|         LambdaSink finalSink; | ||||
|         std::shared_ptr<CompressionSink> decompressionSink; | ||||
| 
 | ||||
|         std::exception_ptr writeException; | ||||
| 
 | ||||
|         size_t writeCallback(void * contents, size_t size, size_t nmemb) | ||||
|         { | ||||
|             size_t realSize = size * nmemb; | ||||
|             result.bodySize += realSize; | ||||
|             if (request.dataCallback) | ||||
|                 request.dataCallback((char *) contents, realSize); | ||||
|             else | ||||
|                 result.data->append((char *) contents, realSize); | ||||
|             return realSize; | ||||
|             try { | ||||
|                 size_t realSize = size * nmemb; | ||||
|                 result.bodySize += realSize; | ||||
| 
 | ||||
|                 if (!decompressionSink) | ||||
|                     decompressionSink = makeDecompressionSink(encoding, finalSink); | ||||
| 
 | ||||
|                 (*decompressionSink)((unsigned char *) contents, realSize); | ||||
| 
 | ||||
|                 return realSize; | ||||
|             } catch (...) { | ||||
|                 writeException = std::current_exception(); | ||||
|                 return 0; | ||||
|             } | ||||
|         } | ||||
| 
 | ||||
|         static size_t writeCallbackWrapper(void * contents, size_t size, size_t nmemb, void * userp) | ||||
|  | @ -314,27 +327,33 @@ struct CurlDownloader : public Downloader | |||
|             debug("finished %s of '%s'; curl status = %d, HTTP status = %d, body = %d bytes", | ||||
|                 request.verb(), request.uri, code, httpStatus, result.bodySize); | ||||
| 
 | ||||
|             if (decompressionSink) | ||||
|                 decompressionSink->finish(); | ||||
| 
 | ||||
|             if (code == CURLE_WRITE_ERROR && result.etag == request.expectedETag) { | ||||
|                 code = CURLE_OK; | ||||
|                 httpStatus = 304; | ||||
|             } | ||||
| 
 | ||||
|             if (code == CURLE_OK && | ||||
|             if (writeException) | ||||
|                 failEx(writeException); | ||||
| 
 | ||||
|             else if (code == CURLE_OK && | ||||
|                 (httpStatus == 200 || httpStatus == 201 || httpStatus == 204 || httpStatus == 304 || httpStatus == 226 /* FTP */ || httpStatus == 0 /* other protocol */)) | ||||
|             { | ||||
|                 result.cached = httpStatus == 304; | ||||
|                 done = true; | ||||
| 
 | ||||
|                 try { | ||||
|                     if (request.decompress) | ||||
|                         result.data = decodeContent(encoding, ref<std::string>(result.data)); | ||||
|                     act.progress(result.data->size(), result.data->size()); | ||||
|                     callback(std::move(result)); | ||||
|                 } catch (...) { | ||||
|                     done = true; | ||||
|                     callback.rethrow(); | ||||
|                 } | ||||
|             } else { | ||||
|             } | ||||
| 
 | ||||
|             else { | ||||
|                 // We treat most errors as transient, but won't retry when hopeless
 | ||||
|                 Error err = Transient; | ||||
| 
 | ||||
|  | @ -369,6 +388,7 @@ struct CurlDownloader : public Downloader | |||
|                         case CURLE_UNKNOWN_OPTION: | ||||
|                         case CURLE_SSL_CACERT_BADFILE: | ||||
|                         case CURLE_TOO_MANY_REDIRECTS: | ||||
|                         case CURLE_WRITE_ERROR: | ||||
|                             err = Misc; | ||||
|                             break; | ||||
|                         default: // Shut up warnings
 | ||||
|  |  | |||
|  | @ -88,7 +88,4 @@ public: | |||
| 
 | ||||
| bool isUri(const string & s); | ||||
| 
 | ||||
| /* Decode data according to the Content-Encoding header. */ | ||||
| ref<std::string> decodeContent(const std::string & encoding, ref<std::string> data); | ||||
| 
 | ||||
| } | ||||
|  |  | |||
|  | @ -153,10 +153,8 @@ S3Helper::DownloadResult S3Helper::getObject( | |||
|         auto result = checkAws(fmt("AWS error fetching '%s'", key), | ||||
|             client->GetObject(request)); | ||||
| 
 | ||||
|         res.data = decodeContent( | ||||
|             result.GetContentEncoding(), | ||||
|             make_ref<std::string>( | ||||
|                 dynamic_cast<std::stringstream &>(result.GetBody()).str())); | ||||
|         res.data = decompress(result.GetContentEncoding(), | ||||
|             dynamic_cast<std::stringstream &>(result.GetBody()).str()); | ||||
| 
 | ||||
|     } catch (S3Error & e) { | ||||
|         if (e.err != Aws::S3::S3Errors::NO_SUCH_KEY) throw; | ||||
|  |  | |||
|  | @ -17,197 +17,24 @@ namespace nix { | |||
| 
 | ||||
| static const size_t bufSize = 32 * 1024; | ||||
| 
 | ||||
| static void decompressNone(Source & source, Sink & sink) | ||||
| // Don't feed brotli too much at once.
 | ||||
| struct ChunkedCompressionSink : CompressionSink | ||||
| { | ||||
|     std::vector<unsigned char> buf(bufSize); | ||||
|     while (true) { | ||||
|         size_t n; | ||||
|         try { | ||||
|             n = source.read(buf.data(), buf.size()); | ||||
|         } catch (EndOfFile &) { | ||||
|             break; | ||||
|     uint8_t outbuf[BUFSIZ]; | ||||
| 
 | ||||
|     void write(const unsigned char * data, size_t len) override | ||||
|     { | ||||
|         const size_t CHUNK_SIZE = sizeof(outbuf) << 2; | ||||
|         while (len) { | ||||
|             size_t n = std::min(CHUNK_SIZE, len); | ||||
|             writeInternal(data, n); | ||||
|             data += n; | ||||
|             len -= n; | ||||
|         } | ||||
|         sink(buf.data(), n); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| static void decompressXZ(Source & source, Sink & sink) | ||||
| { | ||||
|     lzma_stream strm(LZMA_STREAM_INIT); | ||||
| 
 | ||||
|     lzma_ret ret = lzma_stream_decoder( | ||||
|         &strm, UINT64_MAX, LZMA_CONCATENATED); | ||||
|     if (ret != LZMA_OK) | ||||
|         throw CompressionError("unable to initialise lzma decoder"); | ||||
| 
 | ||||
|     Finally free([&]() { lzma_end(&strm); }); | ||||
| 
 | ||||
|     lzma_action action = LZMA_RUN; | ||||
|     std::vector<uint8_t> inbuf(bufSize), outbuf(bufSize); | ||||
|     strm.next_in = nullptr; | ||||
|     strm.avail_in = 0; | ||||
|     strm.next_out = outbuf.data(); | ||||
|     strm.avail_out = outbuf.size(); | ||||
|     bool eof = false; | ||||
| 
 | ||||
|     while (true) { | ||||
|         checkInterrupt(); | ||||
| 
 | ||||
|         if (strm.avail_in == 0 && !eof) { | ||||
|             strm.next_in = inbuf.data(); | ||||
|             try { | ||||
|                 strm.avail_in = source.read((unsigned char *) strm.next_in, inbuf.size()); | ||||
|             } catch (EndOfFile &) { | ||||
|                 eof = true; | ||||
|             } | ||||
|         } | ||||
| 
 | ||||
|         if (strm.avail_in == 0) | ||||
|             action = LZMA_FINISH; | ||||
| 
 | ||||
|         lzma_ret ret = lzma_code(&strm, action); | ||||
| 
 | ||||
|         if (strm.avail_out < outbuf.size()) { | ||||
|             sink((unsigned char *) outbuf.data(), outbuf.size() - strm.avail_out); | ||||
|             strm.next_out = outbuf.data(); | ||||
|             strm.avail_out = outbuf.size(); | ||||
|         } | ||||
| 
 | ||||
|         if (ret == LZMA_STREAM_END) return; | ||||
| 
 | ||||
|         if (ret != LZMA_OK) | ||||
|             throw CompressionError("error %d while decompressing xz file", ret); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| static void decompressBzip2(Source & source, Sink & sink) | ||||
| { | ||||
|     bz_stream strm; | ||||
|     memset(&strm, 0, sizeof(strm)); | ||||
| 
 | ||||
|     int ret = BZ2_bzDecompressInit(&strm, 0, 0); | ||||
|     if (ret != BZ_OK) | ||||
|         throw CompressionError("unable to initialise bzip2 decoder"); | ||||
| 
 | ||||
|     Finally free([&]() { BZ2_bzDecompressEnd(&strm); }); | ||||
| 
 | ||||
|     std::vector<char> inbuf(bufSize), outbuf(bufSize); | ||||
|     strm.next_in = nullptr; | ||||
|     strm.avail_in = 0; | ||||
|     strm.next_out = outbuf.data(); | ||||
|     strm.avail_out = outbuf.size(); | ||||
|     bool eof = false; | ||||
| 
 | ||||
|     while (true) { | ||||
|         checkInterrupt(); | ||||
| 
 | ||||
|         if (strm.avail_in == 0 && !eof) { | ||||
|             strm.next_in = inbuf.data(); | ||||
|             try { | ||||
|                 strm.avail_in = source.read((unsigned char *) strm.next_in, inbuf.size()); | ||||
|             } catch (EndOfFile &) { | ||||
|                 eof = true; | ||||
|             } | ||||
|         } | ||||
| 
 | ||||
|         int ret = BZ2_bzDecompress(&strm); | ||||
| 
 | ||||
|         if (strm.avail_in == 0 && strm.avail_out == outbuf.size() && eof) | ||||
|             throw CompressionError("bzip2 data ends prematurely"); | ||||
| 
 | ||||
|         if (strm.avail_out < outbuf.size()) { | ||||
|             sink((unsigned char *) outbuf.data(), outbuf.size() - strm.avail_out); | ||||
|             strm.next_out = outbuf.data(); | ||||
|             strm.avail_out = outbuf.size(); | ||||
|         } | ||||
| 
 | ||||
|         if (ret == BZ_STREAM_END) return; | ||||
| 
 | ||||
|         if (ret != BZ_OK) | ||||
|             throw CompressionError("error while decompressing bzip2 file"); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| static void decompressBrotli(Source & source, Sink & sink) | ||||
| { | ||||
|     auto *s = BrotliDecoderCreateInstance(nullptr, nullptr, nullptr); | ||||
|     if (!s) | ||||
|         throw CompressionError("unable to initialize brotli decoder"); | ||||
| 
 | ||||
|     Finally free([s]() { BrotliDecoderDestroyInstance(s); }); | ||||
| 
 | ||||
|     std::vector<uint8_t> inbuf(bufSize), outbuf(bufSize); | ||||
|     const uint8_t * next_in = nullptr; | ||||
|     size_t avail_in = 0; | ||||
|     bool eof = false; | ||||
| 
 | ||||
|     while (true) { | ||||
|         checkInterrupt(); | ||||
| 
 | ||||
|         if (avail_in == 0 && !eof) { | ||||
|             next_in = inbuf.data(); | ||||
|             try { | ||||
|                 avail_in = source.read((unsigned char *) next_in, inbuf.size()); | ||||
|             } catch (EndOfFile &) { | ||||
|                 eof = true; | ||||
|             } | ||||
|         } | ||||
| 
 | ||||
|         uint8_t * next_out = outbuf.data(); | ||||
|         size_t avail_out = outbuf.size(); | ||||
| 
 | ||||
|         auto ret = BrotliDecoderDecompressStream(s, | ||||
|                 &avail_in, &next_in, | ||||
|                 &avail_out, &next_out, | ||||
|                 nullptr); | ||||
| 
 | ||||
|         switch (ret) { | ||||
|         case BROTLI_DECODER_RESULT_ERROR: | ||||
|             throw CompressionError("error while decompressing brotli file"); | ||||
|         case BROTLI_DECODER_RESULT_NEEDS_MORE_INPUT: | ||||
|             if (eof) | ||||
|                 throw CompressionError("incomplete or corrupt brotli file"); | ||||
|             break; | ||||
|         case BROTLI_DECODER_RESULT_SUCCESS: | ||||
|             if (avail_in != 0) | ||||
|                 throw CompressionError("unexpected input after brotli decompression"); | ||||
|             break; | ||||
|         case BROTLI_DECODER_RESULT_NEEDS_MORE_OUTPUT: | ||||
|             // I'm not sure if this can happen, but abort if this happens with empty buffer
 | ||||
|             if (avail_out == outbuf.size()) | ||||
|                 throw CompressionError("brotli decompression requires larger buffer"); | ||||
|             break; | ||||
|         } | ||||
| 
 | ||||
|         // Always ensure we have full buffer for next invocation
 | ||||
|         if (avail_out < outbuf.size()) | ||||
|             sink((unsigned char *) outbuf.data(), outbuf.size() - avail_out); | ||||
| 
 | ||||
|         if (ret == BROTLI_DECODER_RESULT_SUCCESS) return; | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| ref<std::string> decompress(const std::string & method, const std::string & in) | ||||
| { | ||||
|     StringSource source(in); | ||||
|     StringSink sink; | ||||
|     decompress(method, source, sink); | ||||
|     return sink.s; | ||||
| } | ||||
| 
 | ||||
| void decompress(const std::string & method, Source & source, Sink & sink) | ||||
| { | ||||
|     if (method == "none") | ||||
|         return decompressNone(source, sink); | ||||
|     else if (method == "xz") | ||||
|         return decompressXZ(source, sink); | ||||
|     else if (method == "bzip2") | ||||
|         return decompressBzip2(source, sink); | ||||
|     else if (method == "br") | ||||
|         return decompressBrotli(source, sink); | ||||
|     else | ||||
|         throw UnknownCompressionMethod("unknown compression method '%s'", method); | ||||
| } | ||||
|     virtual void writeInternal(const unsigned char * data, size_t len) = 0; | ||||
| }; | ||||
| 
 | ||||
| struct NoneSink : CompressionSink | ||||
| { | ||||
|  | @ -217,28 +44,25 @@ struct NoneSink : CompressionSink | |||
|     void write(const unsigned char * data, size_t len) override { nextSink(data, len); } | ||||
| }; | ||||
| 
 | ||||
| struct XzSink : CompressionSink | ||||
| struct XzDecompressionSink : CompressionSink | ||||
| { | ||||
|     Sink & nextSink; | ||||
|     uint8_t outbuf[BUFSIZ]; | ||||
|     lzma_stream strm = LZMA_STREAM_INIT; | ||||
|     bool finished = false; | ||||
| 
 | ||||
|     template <typename F> | ||||
|     XzSink(Sink & nextSink, F&& initEncoder) : nextSink(nextSink) { | ||||
|         lzma_ret ret = initEncoder(); | ||||
|     XzDecompressionSink(Sink & nextSink) : nextSink(nextSink) | ||||
|     { | ||||
|         lzma_ret ret = lzma_stream_decoder( | ||||
|             &strm, UINT64_MAX, LZMA_CONCATENATED); | ||||
|         if (ret != LZMA_OK) | ||||
|             throw CompressionError("unable to initialise lzma encoder"); | ||||
|         // FIXME: apply the x86 BCJ filter?
 | ||||
|             throw CompressionError("unable to initialise lzma decoder"); | ||||
| 
 | ||||
|         strm.next_out = outbuf; | ||||
|         strm.avail_out = sizeof(outbuf); | ||||
|     } | ||||
|     XzSink(Sink & nextSink) : XzSink(nextSink, [this]() { | ||||
|         return lzma_easy_encoder(&strm, 6, LZMA_CHECK_CRC64); | ||||
|     }) {} | ||||
| 
 | ||||
|     ~XzSink() | ||||
|     ~XzDecompressionSink() | ||||
|     { | ||||
|         lzma_end(&strm); | ||||
|     } | ||||
|  | @ -246,43 +70,25 @@ struct XzSink : CompressionSink | |||
|     void finish() override | ||||
|     { | ||||
|         CompressionSink::flush(); | ||||
| 
 | ||||
|         assert(!finished); | ||||
|         finished = true; | ||||
| 
 | ||||
|         while (true) { | ||||
|             checkInterrupt(); | ||||
| 
 | ||||
|             lzma_ret ret = lzma_code(&strm, LZMA_FINISH); | ||||
|             if (ret != LZMA_OK && ret != LZMA_STREAM_END) | ||||
|                 throw CompressionError("error while flushing xz file"); | ||||
| 
 | ||||
|             if (strm.avail_out == 0 || ret == LZMA_STREAM_END) { | ||||
|                 nextSink(outbuf, sizeof(outbuf) - strm.avail_out); | ||||
|                 strm.next_out = outbuf; | ||||
|                 strm.avail_out = sizeof(outbuf); | ||||
|             } | ||||
| 
 | ||||
|             if (ret == LZMA_STREAM_END) break; | ||||
|         } | ||||
|         write(nullptr, 0); | ||||
|     } | ||||
| 
 | ||||
|     void write(const unsigned char * data, size_t len) override | ||||
|     { | ||||
|         assert(!finished); | ||||
| 
 | ||||
|         strm.next_in = data; | ||||
|         strm.avail_in = len; | ||||
| 
 | ||||
|         while (strm.avail_in) { | ||||
|         while (!finished && (!data || strm.avail_in)) { | ||||
|             checkInterrupt(); | ||||
| 
 | ||||
|             lzma_ret ret = lzma_code(&strm, LZMA_RUN); | ||||
|             if (ret != LZMA_OK) | ||||
|                 throw CompressionError("error while compressing xz file"); | ||||
|             lzma_ret ret = lzma_code(&strm, data ? LZMA_RUN : LZMA_FINISH); | ||||
|             if (ret != LZMA_OK && ret != LZMA_STREAM_END) | ||||
|                 throw CompressionError("error %d while decompressing xz file", ret); | ||||
| 
 | ||||
|             if (strm.avail_out == 0) { | ||||
|                 nextSink(outbuf, sizeof(outbuf)); | ||||
|             finished = ret == LZMA_STREAM_END; | ||||
| 
 | ||||
|             if (strm.avail_out < sizeof(outbuf) || strm.avail_in == 0) { | ||||
|                 nextSink(outbuf, sizeof(outbuf) - strm.avail_out); | ||||
|                 strm.next_out = outbuf; | ||||
|                 strm.avail_out = sizeof(outbuf); | ||||
|             } | ||||
|  | @ -290,89 +96,36 @@ struct XzSink : CompressionSink | |||
|     } | ||||
| }; | ||||
| 
 | ||||
| #ifdef HAVE_LZMA_MT | ||||
| struct ParallelXzSink : public XzSink | ||||
| { | ||||
|   ParallelXzSink(Sink &nextSink) : XzSink(nextSink, [this]() { | ||||
|         lzma_mt mt_options = {}; | ||||
|         mt_options.flags = 0; | ||||
|         mt_options.timeout = 300; // Using the same setting as the xz cmd line
 | ||||
|         mt_options.preset = LZMA_PRESET_DEFAULT; | ||||
|         mt_options.filters = NULL; | ||||
|         mt_options.check = LZMA_CHECK_CRC64; | ||||
|         mt_options.threads = lzma_cputhreads(); | ||||
|         mt_options.block_size = 0; | ||||
|         if (mt_options.threads == 0) | ||||
|             mt_options.threads = 1; | ||||
|         // FIXME: maybe use lzma_stream_encoder_mt_memusage() to control the
 | ||||
|         // number of threads.
 | ||||
|         return lzma_stream_encoder_mt(&strm, &mt_options); | ||||
|   }) {} | ||||
| }; | ||||
| #endif | ||||
| 
 | ||||
| struct BzipSink : CompressionSink | ||||
| struct BzipDecompressionSink : ChunkedCompressionSink | ||||
| { | ||||
|     Sink & nextSink; | ||||
|     char outbuf[BUFSIZ]; | ||||
|     bz_stream strm; | ||||
|     bool finished = false; | ||||
| 
 | ||||
|     BzipSink(Sink & nextSink) : nextSink(nextSink) | ||||
|     BzipDecompressionSink(Sink & nextSink) : nextSink(nextSink) | ||||
|     { | ||||
|         memset(&strm, 0, sizeof(strm)); | ||||
|         int ret = BZ2_bzCompressInit(&strm, 9, 0, 30); | ||||
|         int ret = BZ2_bzDecompressInit(&strm, 0, 0); | ||||
|         if (ret != BZ_OK) | ||||
|             throw CompressionError("unable to initialise bzip2 encoder"); | ||||
|             throw CompressionError("unable to initialise bzip2 decoder"); | ||||
| 
 | ||||
|         strm.next_out = outbuf; | ||||
|         strm.next_out = (char *) outbuf; | ||||
|         strm.avail_out = sizeof(outbuf); | ||||
|     } | ||||
| 
 | ||||
|     ~BzipSink() | ||||
|     ~BzipDecompressionSink() | ||||
|     { | ||||
|         BZ2_bzCompressEnd(&strm); | ||||
|         BZ2_bzDecompressEnd(&strm); | ||||
|     } | ||||
| 
 | ||||
|     void finish() override | ||||
|     { | ||||
|         flush(); | ||||
| 
 | ||||
|         assert(!finished); | ||||
|         finished = true; | ||||
| 
 | ||||
|         while (true) { | ||||
|             checkInterrupt(); | ||||
| 
 | ||||
|             int ret = BZ2_bzCompress(&strm, BZ_FINISH); | ||||
|             if (ret != BZ_FINISH_OK && ret != BZ_STREAM_END) | ||||
|                 throw CompressionError("error while flushing bzip2 file"); | ||||
| 
 | ||||
|             if (strm.avail_out == 0 || ret == BZ_STREAM_END) { | ||||
|                 nextSink((unsigned char *) outbuf, sizeof(outbuf) - strm.avail_out); | ||||
|                 strm.next_out = outbuf; | ||||
|                 strm.avail_out = sizeof(outbuf); | ||||
|             } | ||||
| 
 | ||||
|             if (ret == BZ_STREAM_END) break; | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     void write(const unsigned char * data, size_t len) override | ||||
|     { | ||||
|         /* Bzip2's 'avail_in' parameter is an unsigned int, so we need
 | ||||
|            to split the input into chunks of at most 4 GiB. */ | ||||
|         while (len) { | ||||
|             auto n = std::min((size_t) std::numeric_limits<decltype(strm.avail_in)>::max(), len); | ||||
|             writeInternal(data, n); | ||||
|             data += n; | ||||
|             len -= n; | ||||
|         } | ||||
|         write(nullptr, 0); | ||||
|     } | ||||
| 
 | ||||
|     void writeInternal(const unsigned char * data, size_t len) | ||||
|     { | ||||
|         assert(!finished); | ||||
|         assert(len <= std::numeric_limits<decltype(strm.avail_in)>::max()); | ||||
| 
 | ||||
|         strm.next_in = (char *) data; | ||||
|  | @ -381,12 +134,167 @@ struct BzipSink : CompressionSink | |||
|         while (strm.avail_in) { | ||||
|             checkInterrupt(); | ||||
| 
 | ||||
|             int ret = BZ2_bzCompress(&strm, BZ_RUN); | ||||
|             if (ret != BZ_OK) | ||||
|                 CompressionError("error while compressing bzip2 file"); | ||||
|             int ret = BZ2_bzDecompress(&strm); | ||||
|             if (ret != BZ_OK && ret != BZ_STREAM_END) | ||||
|                 throw CompressionError("error while decompressing bzip2 file"); | ||||
| 
 | ||||
|             if (strm.avail_out == 0) { | ||||
|                 nextSink((unsigned char *) outbuf, sizeof(outbuf)); | ||||
|             finished = ret == BZ_STREAM_END; | ||||
| 
 | ||||
|             if (strm.avail_out < sizeof(outbuf) || strm.avail_in == 0) { | ||||
|                 nextSink(outbuf, sizeof(outbuf) - strm.avail_out); | ||||
|                 strm.next_out = (char *) outbuf; | ||||
|                 strm.avail_out = sizeof(outbuf); | ||||
|             } | ||||
|         } | ||||
|     } | ||||
| }; | ||||
| 
 | ||||
| struct BrotliDecompressionSink : ChunkedCompressionSink | ||||
| { | ||||
|     Sink & nextSink; | ||||
|     BrotliDecoderState * state; | ||||
|     bool finished = false; | ||||
| 
 | ||||
|     BrotliDecompressionSink(Sink & nextSink) : nextSink(nextSink) | ||||
|     { | ||||
|         state = BrotliDecoderCreateInstance(nullptr, nullptr, nullptr); | ||||
|         if (!state) | ||||
|             throw CompressionError("unable to initialize brotli decoder"); | ||||
|     } | ||||
| 
 | ||||
|     ~BrotliDecompressionSink() | ||||
|     { | ||||
|         BrotliDecoderDestroyInstance(state); | ||||
|     } | ||||
| 
 | ||||
|     void finish() override | ||||
|     { | ||||
|         flush(); | ||||
|         writeInternal(nullptr, 0); | ||||
|     } | ||||
| 
 | ||||
|     void writeInternal(const unsigned char * data, size_t len) | ||||
|     { | ||||
|         const uint8_t * next_in = data; | ||||
|         size_t avail_in = len; | ||||
|         uint8_t * next_out = outbuf; | ||||
|         size_t avail_out = sizeof(outbuf); | ||||
| 
 | ||||
|         while (!finished && (!data || avail_in)) { | ||||
|             checkInterrupt(); | ||||
| 
 | ||||
|             if (!BrotliDecoderDecompressStream(state, | ||||
|                     &avail_in, &next_in, | ||||
|                     &avail_out, &next_out, | ||||
|                     nullptr)) | ||||
|                 throw CompressionError("error while decompressing brotli file"); | ||||
| 
 | ||||
|             if (avail_out < sizeof(outbuf) || avail_in == 0) { | ||||
|                 nextSink(outbuf, sizeof(outbuf) - avail_out); | ||||
|                 next_out = outbuf; | ||||
|                 avail_out = sizeof(outbuf); | ||||
|             } | ||||
| 
 | ||||
|             finished = BrotliDecoderIsFinished(state); | ||||
|         } | ||||
|     } | ||||
| }; | ||||
| 
 | ||||
| ref<std::string> decompress(const std::string & method, const std::string & in) | ||||
| { | ||||
|     StringSink ssink; | ||||
|     auto sink = makeDecompressionSink(method, ssink); | ||||
|     (*sink)(in); | ||||
|     sink->finish(); | ||||
|     return ssink.s; | ||||
| } | ||||
| 
 | ||||
| ref<CompressionSink> makeDecompressionSink(const std::string & method, Sink & nextSink) | ||||
| { | ||||
|     if (method == "none" || method == "") | ||||
|         return make_ref<NoneSink>(nextSink); | ||||
|     else if (method == "xz") | ||||
|         return make_ref<XzDecompressionSink>(nextSink); | ||||
|     else if (method == "bzip2") | ||||
|         return make_ref<BzipDecompressionSink>(nextSink); | ||||
|     else if (method == "br") | ||||
|         return make_ref<BrotliDecompressionSink>(nextSink); | ||||
|     else | ||||
|         throw UnknownCompressionMethod("unknown compression method '%s'", method); | ||||
| } | ||||
| 
 | ||||
| struct XzCompressionSink : CompressionSink | ||||
| { | ||||
|     Sink & nextSink; | ||||
|     uint8_t outbuf[BUFSIZ]; | ||||
|     lzma_stream strm = LZMA_STREAM_INIT; | ||||
|     bool finished = false; | ||||
| 
 | ||||
|     XzCompressionSink(Sink & nextSink, bool parallel) : nextSink(nextSink) | ||||
|     { | ||||
|         lzma_ret ret; | ||||
|         bool done = false; | ||||
| 
 | ||||
|         if (parallel) { | ||||
| #ifdef HAVE_LZMA_MT | ||||
|             lzma_mt mt_options = {}; | ||||
|             mt_options.flags = 0; | ||||
|             mt_options.timeout = 300; // Using the same setting as the xz cmd line
 | ||||
|             mt_options.preset = LZMA_PRESET_DEFAULT; | ||||
|             mt_options.filters = NULL; | ||||
|             mt_options.check = LZMA_CHECK_CRC64; | ||||
|             mt_options.threads = lzma_cputhreads(); | ||||
|             mt_options.block_size = 0; | ||||
|             if (mt_options.threads == 0) | ||||
|                 mt_options.threads = 1; | ||||
|             // FIXME: maybe use lzma_stream_encoder_mt_memusage() to control the
 | ||||
|             // number of threads.
 | ||||
|             ret = lzma_stream_encoder_mt(&strm, &mt_options); | ||||
|             done = true; | ||||
| #else | ||||
|             printMsg(lvlError, "warning: parallel compression requested but not supported for metho  d '%1%', falling back to single-threaded compression", method); | ||||
| #endif | ||||
|         } | ||||
| 
 | ||||
|         if (!done) | ||||
|             ret = lzma_easy_encoder(&strm, 6, LZMA_CHECK_CRC64); | ||||
| 
 | ||||
|         if (ret != LZMA_OK) | ||||
|             throw CompressionError("unable to initialise lzma encoder"); | ||||
| 
 | ||||
|         // FIXME: apply the x86 BCJ filter?
 | ||||
| 
 | ||||
|         strm.next_out = outbuf; | ||||
|         strm.avail_out = sizeof(outbuf); | ||||
|     } | ||||
| 
 | ||||
|     ~XzCompressionSink() | ||||
|     { | ||||
|         lzma_end(&strm); | ||||
|     } | ||||
| 
 | ||||
|     void finish() override | ||||
|     { | ||||
|         CompressionSink::flush(); | ||||
|         write(nullptr, 0); | ||||
|     } | ||||
| 
 | ||||
|     void write(const unsigned char * data, size_t len) override | ||||
|     { | ||||
|         strm.next_in = data; | ||||
|         strm.avail_in = len; | ||||
| 
 | ||||
|         while (!finished && (!data || strm.avail_in)) { | ||||
|             checkInterrupt(); | ||||
| 
 | ||||
|             lzma_ret ret = lzma_code(&strm, data ? LZMA_RUN : LZMA_FINISH); | ||||
|             if (ret != LZMA_OK && ret != LZMA_STREAM_END) | ||||
|                 throw CompressionError("error %d while compressing xz file", ret); | ||||
| 
 | ||||
|             finished = ret == LZMA_STREAM_END; | ||||
| 
 | ||||
|             if (strm.avail_out < sizeof(outbuf) || strm.avail_in == 0) { | ||||
|                 nextSink(outbuf, sizeof(outbuf) - strm.avail_out); | ||||
|                 strm.next_out = outbuf; | ||||
|                 strm.avail_out = sizeof(outbuf); | ||||
|             } | ||||
|  | @ -394,21 +302,74 @@ struct BzipSink : CompressionSink | |||
|     } | ||||
| }; | ||||
| 
 | ||||
| struct BrotliSink : CompressionSink | ||||
| struct BzipCompressionSink : ChunkedCompressionSink | ||||
| { | ||||
|     Sink & nextSink; | ||||
|     bz_stream strm; | ||||
|     bool finished = false; | ||||
| 
 | ||||
|     BzipCompressionSink(Sink & nextSink) : nextSink(nextSink) | ||||
|     { | ||||
|         memset(&strm, 0, sizeof(strm)); | ||||
|         int ret = BZ2_bzCompressInit(&strm, 9, 0, 30); | ||||
|         if (ret != BZ_OK) | ||||
|             throw CompressionError("unable to initialise bzip2 encoder"); | ||||
| 
 | ||||
|         strm.next_out = (char *) outbuf; | ||||
|         strm.avail_out = sizeof(outbuf); | ||||
|     } | ||||
| 
 | ||||
|     ~BzipCompressionSink() | ||||
|     { | ||||
|         BZ2_bzCompressEnd(&strm); | ||||
|     } | ||||
| 
 | ||||
|     void finish() override | ||||
|     { | ||||
|         flush(); | ||||
|         writeInternal(nullptr, 0); | ||||
|     } | ||||
| 
 | ||||
|     void writeInternal(const unsigned char * data, size_t len) | ||||
|     { | ||||
|         assert(len <= std::numeric_limits<decltype(strm.avail_in)>::max()); | ||||
| 
 | ||||
|         strm.next_in = (char *) data; | ||||
|         strm.avail_in = len; | ||||
| 
 | ||||
|         while (!finished && (!data || strm.avail_in)) { | ||||
|             checkInterrupt(); | ||||
| 
 | ||||
|             int ret = BZ2_bzCompress(&strm, data ? BZ_RUN : BZ_FINISH); | ||||
|             if (ret != BZ_RUN_OK && ret != BZ_FINISH_OK && ret != BZ_STREAM_END) | ||||
|                 throw CompressionError("error %d while compressing bzip2 file", ret); | ||||
| 
 | ||||
|             finished = ret == BZ_STREAM_END; | ||||
| 
 | ||||
|             if (strm.avail_out < sizeof(outbuf) || strm.avail_in == 0) { | ||||
|                 nextSink(outbuf, sizeof(outbuf) - strm.avail_out); | ||||
|                 strm.next_out = (char *) outbuf; | ||||
|                 strm.avail_out = sizeof(outbuf); | ||||
|             } | ||||
|         } | ||||
|     } | ||||
| }; | ||||
| 
 | ||||
| struct BrotliCompressionSink : ChunkedCompressionSink | ||||
| { | ||||
|     Sink & nextSink; | ||||
|     uint8_t outbuf[BUFSIZ]; | ||||
|     BrotliEncoderState *state; | ||||
|     bool finished = false; | ||||
| 
 | ||||
|     BrotliSink(Sink & nextSink) : nextSink(nextSink) | ||||
|     BrotliCompressionSink(Sink & nextSink) : nextSink(nextSink) | ||||
|     { | ||||
|         state = BrotliEncoderCreateInstance(nullptr, nullptr, nullptr); | ||||
|         if (!state) | ||||
|             throw CompressionError("unable to initialise brotli encoder"); | ||||
|     } | ||||
| 
 | ||||
|     ~BrotliSink() | ||||
|     ~BrotliCompressionSink() | ||||
|     { | ||||
|         BrotliEncoderDestroyInstance(state); | ||||
|     } | ||||
|  | @ -416,89 +377,47 @@ struct BrotliSink : CompressionSink | |||
|     void finish() override | ||||
|     { | ||||
|         flush(); | ||||
|         assert(!finished); | ||||
| 
 | ||||
|         const uint8_t *next_in = nullptr; | ||||
|         size_t avail_in = 0; | ||||
|         uint8_t *next_out = outbuf; | ||||
|         size_t avail_out = sizeof(outbuf); | ||||
|         while (!finished) { | ||||
|             checkInterrupt(); | ||||
| 
 | ||||
|             if (!BrotliEncoderCompressStream(state, | ||||
|                         BROTLI_OPERATION_FINISH, | ||||
|                         &avail_in, &next_in, | ||||
|                         &avail_out, &next_out, | ||||
|                         nullptr)) | ||||
|                 throw CompressionError("error while finishing brotli file"); | ||||
| 
 | ||||
|             finished = BrotliEncoderIsFinished(state); | ||||
|             if (avail_out == 0 || finished) { | ||||
|                 nextSink(outbuf, sizeof(outbuf) - avail_out); | ||||
|                 next_out = outbuf; | ||||
|                 avail_out = sizeof(outbuf); | ||||
|             } | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     void write(const unsigned char * data, size_t len) override | ||||
|     { | ||||
|         // Don't feed brotli too much at once
 | ||||
|         const size_t CHUNK_SIZE = sizeof(outbuf) << 2; | ||||
|         while (len) { | ||||
|           size_t n = std::min(CHUNK_SIZE, len); | ||||
|           writeInternal(data, n); | ||||
|           data += n; | ||||
|           len -= n; | ||||
|         } | ||||
|         writeInternal(nullptr, 0); | ||||
|     } | ||||
| 
 | ||||
|     void writeInternal(const unsigned char * data, size_t len) | ||||
|     { | ||||
|         assert(!finished); | ||||
| 
 | ||||
|         const uint8_t *next_in = data; | ||||
|         const uint8_t * next_in = data; | ||||
|         size_t avail_in = len; | ||||
|         uint8_t *next_out = outbuf; | ||||
|         uint8_t * next_out = outbuf; | ||||
|         size_t avail_out = sizeof(outbuf); | ||||
| 
 | ||||
|         while (avail_in > 0) { | ||||
|         while (!finished && (!data || avail_in)) { | ||||
|             checkInterrupt(); | ||||
| 
 | ||||
|             if (!BrotliEncoderCompressStream(state, | ||||
|                       BROTLI_OPERATION_PROCESS, | ||||
|                       &avail_in, &next_in, | ||||
|                       &avail_out, &next_out, | ||||
|                       nullptr)) | ||||
|                 throw CompressionError("error while compressing brotli file"); | ||||
|                     data ? BROTLI_OPERATION_PROCESS : BROTLI_OPERATION_FINISH, | ||||
|                     &avail_in, &next_in, | ||||
|                     &avail_out, &next_out, | ||||
|                     nullptr)) | ||||
|                 throw CompressionError("error while compressing brotli compression"); | ||||
| 
 | ||||
|             if (avail_out < sizeof(outbuf) || avail_in == 0) { | ||||
|                 nextSink(outbuf, sizeof(outbuf) - avail_out); | ||||
|                 next_out = outbuf; | ||||
|                 avail_out = sizeof(outbuf); | ||||
|             } | ||||
| 
 | ||||
|             finished = BrotliEncoderIsFinished(state); | ||||
|         } | ||||
|     } | ||||
| }; | ||||
| 
 | ||||
| ref<CompressionSink> makeCompressionSink(const std::string & method, Sink & nextSink, const bool parallel) | ||||
| { | ||||
|     if (parallel) { | ||||
| #ifdef HAVE_LZMA_MT | ||||
|         if (method == "xz") | ||||
|             return make_ref<ParallelXzSink>(nextSink); | ||||
| #endif | ||||
|         printMsg(lvlError, format("Warning: parallel compression requested but not supported for method '%1%', falling back to single-threaded compression") % method); | ||||
|     } | ||||
| 
 | ||||
|     if (method == "none") | ||||
|         return make_ref<NoneSink>(nextSink); | ||||
|     else if (method == "xz") | ||||
|         return make_ref<XzSink>(nextSink); | ||||
|         return make_ref<XzCompressionSink>(nextSink, parallel); | ||||
|     else if (method == "bzip2") | ||||
|         return make_ref<BzipSink>(nextSink); | ||||
|         return make_ref<BzipCompressionSink>(nextSink); | ||||
|     else if (method == "br") | ||||
|         return make_ref<BrotliSink>(nextSink); | ||||
|         return make_ref<BrotliCompressionSink>(nextSink); | ||||
|     else | ||||
|         throw UnknownCompressionMethod(format("unknown compression method '%s'") % method); | ||||
| } | ||||
|  |  | |||
|  | @ -8,17 +8,17 @@ | |||
| 
 | ||||
| namespace nix { | ||||
| 
 | ||||
| ref<std::string> decompress(const std::string & method, const std::string & in); | ||||
| 
 | ||||
| void decompress(const std::string & method, Source & source, Sink & sink); | ||||
| 
 | ||||
| ref<std::string> compress(const std::string & method, const std::string & in, const bool parallel = false); | ||||
| 
 | ||||
| struct CompressionSink : BufferedSink | ||||
| { | ||||
|     virtual void finish() = 0; | ||||
| }; | ||||
| 
 | ||||
| ref<std::string> decompress(const std::string & method, const std::string & in); | ||||
| 
 | ||||
| ref<CompressionSink> makeDecompressionSink(const std::string & method, Sink & nextSink); | ||||
| 
 | ||||
| ref<std::string> compress(const std::string & method, const std::string & in, const bool parallel = false); | ||||
| 
 | ||||
| ref<CompressionSink> makeCompressionSink(const std::string & method, Sink & nextSink, const bool parallel = false); | ||||
| 
 | ||||
| MakeError(UnknownCompressionMethod, Error); | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue