fix(tvix): Avoid buffering file into memory in builtins.hashFile
Right now `builtins.hashFile` always reads the entire file into memory before hashing, which is not ideal for large files. This replaces `read_to_string` with `open_file` which allows calculating the hash of the file without buffering it entirely into memory. Other callers can continue to buffer into memory if they choose, but they still use the `open_file` VM request and then call `read_to_string` or `read_to_end` on the `std::io::Reader`. Fixes b/380 Change-Id: Ifa1c8324bcee8f751604b0b449feab875c632fda Reviewed-on: https://cl.tvl.fyi/c/depot/+/11236 Reviewed-by: flokli <flokli@flokli.de> Tested-by: BuildkiteCI
This commit is contained in:
parent
17849c5c00
commit
63116d8c21
9 changed files with 80 additions and 74 deletions
|
|
@ -6,18 +6,22 @@ use sha2::{digest::Output, Digest, Sha256, Sha512};
|
|||
|
||||
use crate::ErrorKind;
|
||||
|
||||
fn hash<D: Digest>(b: &[u8]) -> Output<D> {
|
||||
/// Reads through all data from the passed reader, and returns the resulting [Digest].
|
||||
/// The exact hash function used is left generic over all [Digest].
|
||||
fn hash<D: Digest + std::io::Write>(mut r: impl std::io::Read) -> Result<Output<D>, ErrorKind> {
|
||||
let mut hasher = D::new();
|
||||
hasher.update(b);
|
||||
hasher.finalize()
|
||||
std::io::copy(&mut r, &mut hasher)?;
|
||||
Ok(hasher.finalize())
|
||||
}
|
||||
|
||||
pub fn hash_nix_string(algo: impl AsRef<[u8]>, s: impl AsRef<[u8]>) -> Result<String, ErrorKind> {
|
||||
/// For a given algo "string" and reader for data, calculate the digest
|
||||
/// and return it as a hexlower encoded [String].
|
||||
pub fn hash_nix_string(algo: impl AsRef<[u8]>, s: impl std::io::Read) -> Result<String, ErrorKind> {
|
||||
match algo.as_ref() {
|
||||
b"md5" => Ok(HEXLOWER.encode(hash::<Md5>(s.as_ref()).as_bstr())),
|
||||
b"sha1" => Ok(HEXLOWER.encode(hash::<Sha1>(s.as_ref()).as_bstr())),
|
||||
b"sha256" => Ok(HEXLOWER.encode(hash::<Sha256>(s.as_ref()).as_bstr())),
|
||||
b"sha512" => Ok(HEXLOWER.encode(hash::<Sha512>(s.as_ref()).as_bstr())),
|
||||
b"md5" => Ok(HEXLOWER.encode(hash::<Md5>(s)?.as_bstr())),
|
||||
b"sha1" => Ok(HEXLOWER.encode(hash::<Sha1>(s)?.as_bstr())),
|
||||
b"sha256" => Ok(HEXLOWER.encode(hash::<Sha256>(s)?.as_bstr())),
|
||||
b"sha512" => Ok(HEXLOWER.encode(hash::<Sha512>(s)?.as_bstr())),
|
||||
_ => Err(ErrorKind::UnknownHashType(
|
||||
algo.as_ref().as_bstr().to_string(),
|
||||
)),
|
||||
|
|
|
|||
|
|
@ -31,14 +31,13 @@ mod impure_builtins {
|
|||
}
|
||||
|
||||
#[builtin("hashFile")]
|
||||
#[allow(non_snake_case)]
|
||||
async fn builtin_hashFile(co: GenCo, algo: Value, path: Value) -> Result<Value, ErrorKind> {
|
||||
async fn builtin_hash_file(co: GenCo, algo: Value, path: Value) -> Result<Value, ErrorKind> {
|
||||
let path = match coerce_value_to_path(&co, path).await? {
|
||||
Err(cek) => return Ok(Value::from(cek)),
|
||||
Ok(p) => p,
|
||||
};
|
||||
let s = generators::request_read_to_string(&co, path).await;
|
||||
hash_nix_string(algo.to_str()?, s.to_str()?).map(Value::from)
|
||||
let r = generators::request_open_file(&co, path).await;
|
||||
Ok(hash_nix_string(algo.to_str()?, r).map(Value::from)?)
|
||||
}
|
||||
|
||||
#[builtin("pathExists")]
|
||||
|
|
@ -79,7 +78,13 @@ mod impure_builtins {
|
|||
async fn builtin_read_file(co: GenCo, path: Value) -> Result<Value, ErrorKind> {
|
||||
match coerce_value_to_path(&co, path).await? {
|
||||
Err(cek) => Ok(Value::from(cek)),
|
||||
Ok(path) => Ok(generators::request_read_to_string(&co, path).await),
|
||||
Ok(path) => {
|
||||
let mut buf = Vec::new();
|
||||
generators::request_open_file(&co, path)
|
||||
.await
|
||||
.read_to_end(&mut buf)?;
|
||||
Ok(Value::from(buf))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -773,9 +773,8 @@ mod pure_builtins {
|
|||
}
|
||||
|
||||
#[builtin("hashString")]
|
||||
#[allow(non_snake_case)]
|
||||
async fn builtin_hashString(co: GenCo, algo: Value, s: Value) -> Result<Value, ErrorKind> {
|
||||
hash_nix_string(algo.to_str()?, s.to_str()?).map(Value::from)
|
||||
async fn builtin_hash_string(co: GenCo, algo: Value, s: Value) -> Result<Value, ErrorKind> {
|
||||
hash_nix_string(algo.to_str()?, std::io::Cursor::new(s.to_str()?)).map(Value::from)
|
||||
}
|
||||
|
||||
#[builtin("head")]
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue