refactor(tvix/glue): move Fetch[er] into its own types, fetch lazily

We actually want to delay fetching until we actually need the file. A
simple evaluation asking for `.outPath` or `.drvPath` should work even
in a pure offline environment.

Before this CL, the fetching logic was quite distributed between
tvix_store_io, and builtins/fetchers.rs.

Rather than having various functions and conversions between structs,
describe a Fetch as an enum type, with the fields describing the fetch.

Define a store_path() function on top of `Fetch` which can be used to
ask for the calculated store path (if the digest has been provided
upfront).

Have a `Fetcher` struct, and give it a `fetch_and_persist` function,
taking a `Fetch` as well as a desired name, and have it deal with all
the logic of persisting the PathInfos. It also returns a StorePathRef,
similar to the `.store_path()` method on a `Fetch` struct.

In a followup CL, we can extend KnownPaths to track fetches AND
derivations, and then use `Fetcher` when we need to do IO into that
store path.

Change-Id: Ib39a96baeb661750a8706b461f8ba4abb342e777
Reviewed-on: https://cl.tvl.fyi/c/depot/+/11500
Reviewed-by: raitobezarius <tvl@lahfa.xyz>
Autosubmit: flokli <flokli@flokli.de>
Tested-by: BuildkiteCI
This commit is contained in:
Florian Klink 2024-04-22 14:02:48 +03:00 committed by clbot
parent dc444e55dc
commit 091de12a9a
8 changed files with 538 additions and 399 deletions

View file

@ -41,17 +41,17 @@ pub enum FetcherError {
#[error("Invalid hash type '{0}' for fetcher")]
InvalidHashType(&'static str),
#[error("Error in store path for fetcher output: {0}")]
StorePath(#[from] BuildStorePathError),
#[error(transparent)]
Http(#[from] reqwest::Error),
}
impl From<FetcherError> for tvix_eval::ErrorKind {
fn from(err: FetcherError) -> Self {
tvix_eval::ErrorKind::TvixError(Rc::new(err))
}
#[error(transparent)]
Io(#[from] std::io::Error),
#[error(transparent)]
Import(#[from] tvix_castore::import::Error),
#[error("Error calculating store path for fetcher output: {0}")]
StorePath(#[from] BuildStorePathError),
}
/// Errors related to `builtins.path` and `builtins.filterSource`,

View file

@ -1,197 +1,74 @@
//! Contains builtins that fetch paths from the Internet
use crate::tvix_store_io::TvixStoreIO;
use bstr::ByteSlice;
use nix_compat::nixhash::{self, CAHash};
use nix_compat::store_path::{build_ca_path, StorePathRef};
use std::rc::Rc;
use tvix_eval::builtin_macros::builtins;
use tvix_eval::generators::GenCo;
use tvix_eval::{CatchableErrorKind, ErrorKind, NixContextElement, NixString, Value};
use super::utils::select_string;
use super::{DerivationError, FetcherError};
use crate::{
fetchers::{url_basename, Fetch},
tvix_store_io::TvixStoreIO,
};
use nix_compat::nixhash;
use nix_compat::nixhash::NixHash;
use std::rc::Rc;
use tracing::info;
use tvix_eval::builtin_macros::builtins;
use tvix_eval::generators::Gen;
use tvix_eval::generators::GenCo;
use tvix_eval::{CatchableErrorKind, ErrorKind, Value};
/// Attempts to mimic `nix::libutil::baseNameOf`
fn url_basename(s: &str) -> &str {
if s.is_empty() {
return "";
}
let mut last = s.len() - 1;
if s.chars().nth(last).unwrap() == '/' && last > 0 {
last -= 1;
}
if last == 0 {
return "";
}
let pos = match s[..=last].rfind('/') {
Some(pos) => {
if pos == last - 1 {
0
} else {
pos
}
}
None => 0,
};
&s[(pos + 1)..=last]
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum HashMode {
Flat,
Recursive,
}
/// Struct representing the arguments passed to fetcher functions
#[derive(Debug, PartialEq, Eq)]
struct FetchArgs {
struct NixFetchArgs {
url: String,
name: String,
hash: Option<CAHash>,
name: Option<String>,
sha256: Option<[u8; 32]>,
}
impl FetchArgs {
pub fn new(
url: String,
name: Option<String>,
sha256: Option<String>,
mode: HashMode,
) -> nixhash::NixHashResult<Self> {
Ok(Self {
name: name.unwrap_or_else(|| url_basename(&url).to_owned()),
url,
hash: sha256
.map(|h| {
let hash = nixhash::from_str(&h, Some("sha256"))?;
Ok(match mode {
HashMode::Flat => Some(nixhash::CAHash::Flat(hash)),
HashMode::Recursive => Some(nixhash::CAHash::Nar(hash)),
})
})
.transpose()?
.flatten(),
})
}
fn store_path(&self) -> Result<Option<StorePathRef>, ErrorKind> {
let Some(h) = &self.hash else {
return Ok(None);
};
build_ca_path(&self.name, h, Vec::<String>::new(), false)
.map(Some)
.map_err(|e| FetcherError::from(e).into())
}
async fn extract(
co: &GenCo,
args: Value,
default_name: Option<&str>,
mode: HashMode,
) -> Result<Result<Self, CatchableErrorKind>, ErrorKind> {
if let Ok(url) = args.to_str() {
return Ok(Ok(FetchArgs::new(
url.to_str()?.to_owned(),
None,
None,
mode,
)
.map_err(DerivationError::InvalidOutputHash)?));
}
let attrs = args.to_attrs().map_err(|_| ErrorKind::TypeError {
expected: "attribute set or string",
actual: args.type_of(),
})?;
let url = match select_string(co, &attrs, "url").await? {
Ok(s) => s.ok_or_else(|| ErrorKind::AttributeNotFound { name: "url".into() })?,
Err(cek) => return Ok(Err(cek)),
};
let name = match select_string(co, &attrs, "name").await? {
Ok(s) => s.or_else(|| default_name.map(|s| s.to_owned())),
Err(cek) => return Ok(Err(cek)),
};
let sha256 = match select_string(co, &attrs, "sha256").await? {
Ok(s) => s,
Err(cek) => return Ok(Err(cek)),
};
Ok(Ok(
FetchArgs::new(url, name, sha256, mode).map_err(DerivationError::InvalidOutputHash)?
))
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum FetchMode {
Url,
Tarball,
}
impl From<FetchMode> for HashMode {
fn from(value: FetchMode) -> Self {
match value {
FetchMode::Url => HashMode::Flat,
FetchMode::Tarball => HashMode::Recursive,
}
}
}
impl FetchMode {
fn default_name(self) -> Option<&'static str> {
match self {
FetchMode::Url => None,
FetchMode::Tarball => Some("source"),
}
}
}
fn string_from_store_path(store_path: StorePathRef) -> NixString {
NixString::new_context_from(
NixContextElement::Plain(store_path.to_absolute_path()).into(),
store_path.to_absolute_path(),
)
}
async fn fetch(
state: Rc<TvixStoreIO>,
co: GenCo,
// `fetchurl` and `fetchTarball` accept a single argument, which can either be the URL (as string),
// or an attrset, where `url`, `sha256` and `name` keys are allowed.
async fn extract_fetch_args(
co: &GenCo,
args: Value,
mode: FetchMode,
) -> Result<Value, ErrorKind> {
let args = match FetchArgs::extract(&co, args, mode.default_name(), mode.into()).await? {
Ok(args) => args,
Err(cek) => return Ok(cek.into()),
};
if let Some(store_path) = args.store_path()? {
if state.store_path_exists(store_path).await? {
return Ok(string_from_store_path(store_path).into());
}
) -> Result<Result<NixFetchArgs, CatchableErrorKind>, ErrorKind> {
if let Ok(url) = args.to_str() {
// Get the raw bytes, not the ToString repr.
let url = String::from_utf8(url.as_bytes().to_vec()).map_err(|_| ErrorKind::Utf8)?;
return Ok(Ok(NixFetchArgs {
url,
name: None,
sha256: None,
}));
}
let ca = args.hash;
let store_path = Rc::clone(&state).tokio_handle.block_on(async move {
match mode {
FetchMode::Url => {
state
.fetch_url(
&args.url,
&args.name,
ca.as_ref().map(|c| c.hash().into_owned()).as_ref(),
)
.await
}
FetchMode::Tarball => state.fetch_tarball(&args.url, &args.name, ca).await,
}
let attrs = args.to_attrs().map_err(|_| ErrorKind::TypeError {
expected: "attribute set or contextless string",
actual: args.type_of(),
})?;
Ok(string_from_store_path(store_path.as_ref()).into())
let url = match select_string(co, &attrs, "url").await? {
Ok(s) => s.ok_or_else(|| ErrorKind::AttributeNotFound { name: "url".into() })?,
Err(cek) => return Ok(Err(cek)),
};
let name = match select_string(co, &attrs, "name").await? {
Ok(s) => s,
Err(cek) => return Ok(Err(cek)),
};
let sha256_str = match select_string(co, &attrs, "sha256").await? {
Ok(s) => s,
Err(cek) => return Ok(Err(cek)),
};
// TODO: disallow other attrset keys, to match Nix' behaviour.
// parse the sha256 string into a digest.
let sha256 = match sha256_str {
Some(sha256_str) => {
let nixhash = nixhash::from_str(&sha256_str, Some("sha256"))
// TODO: DerivationError::InvalidOutputHash should be moved to ErrorKind::InvalidHash and used here instead
.map_err(|e| ErrorKind::TvixError(Rc::new(e)))?;
Some(nixhash.digest_as_bytes().try_into().expect("is sha256"))
}
None => None,
};
Ok(Ok(NixFetchArgs { url, name, sha256 }))
}
#[allow(unused_variables)] // for the `state` arg, for now
@ -199,7 +76,36 @@ async fn fetch(
pub(crate) mod fetcher_builtins {
use super::*;
use tvix_eval::generators::Gen;
/// Consumes a fetch.
/// If there is enough info to calculate the store path without fetching,
/// queue the fetch to be fetched lazily, and return the store path.
/// If there's not enough info to calculate it, do the fetch now, and then
/// return the store path.
fn fetch_lazy(state: Rc<TvixStoreIO>, name: String, fetch: Fetch) -> Result<Value, ErrorKind> {
match fetch
.store_path(&name)
.map_err(|e| ErrorKind::TvixError(Rc::new(e)))?
{
Some(store_path) => {
let path = store_path.to_absolute_path().into();
// TODO: add fetch to fetcher
drop(fetch);
Ok(Value::Path(Box::new(path)))
}
None => {
// If we don't have enough info, do the fetch now.
info!(?fetch, "triggering required fetch");
let (store_path, _root_node) = state
.tokio_handle
.block_on(async { state.fetcher.ingest_and_persist(&name, fetch).await })
.map_err(|e| ErrorKind::TvixError(Rc::new(e)))?;
Ok(Value::Path(Box::new(store_path.to_absolute_path().into())))
}
}
}
#[builtin("fetchurl")]
async fn builtin_fetchurl(
@ -207,7 +113,21 @@ pub(crate) mod fetcher_builtins {
co: GenCo,
args: Value,
) -> Result<Value, ErrorKind> {
fetch(state, co, args, FetchMode::Url).await
let args = match extract_fetch_args(&co, args).await? {
Ok(args) => args,
Err(cek) => return Ok(Value::from(cek)),
};
// Derive the name from the URL basename if not set explicitly.
let name = args
.name
.unwrap_or_else(|| url_basename(&args.url).to_owned());
fetch_lazy(
state,
name,
Fetch::URL(args.url, args.sha256.map(NixHash::Sha256)),
)
}
#[builtin("fetchTarball")]
@ -216,7 +136,18 @@ pub(crate) mod fetcher_builtins {
co: GenCo,
args: Value,
) -> Result<Value, ErrorKind> {
fetch(state, co, args, FetchMode::Tarball).await
let args = match extract_fetch_args(&co, args).await? {
Ok(args) => args,
Err(cek) => return Ok(Value::from(cek)),
};
// Name defaults to "source" if not set explicitly.
const DEFAULT_NAME_FETCH_TARBALL: &str = "source";
let name = args
.name
.unwrap_or_else(|| DEFAULT_NAME_FETCH_TARBALL.to_owned());
fetch_lazy(state, name, Fetch::Tarball(args.url, args.sha256))
}
#[builtin("fetchGit")]
@ -228,71 +159,3 @@ pub(crate) mod fetcher_builtins {
Err(ErrorKind::NotImplemented("fetchGit"))
}
}
#[cfg(test)]
mod tests {
use std::str::FromStr;
use nix_compat::store_path::StorePath;
use super::*;
#[test]
fn fetchurl_store_path() {
let url = "https://raw.githubusercontent.com/aaptel/notmuch-extract-patch/f732a53e12a7c91a06755ebfab2007adc9b3063b/notmuch-extract-patch";
let sha256 = "0nawkl04sj7psw6ikzay7kydj3dhd0fkwghcsf5rzaw4bmp4kbax";
let args = FetchArgs::new(url.into(), None, Some(sha256.into()), HashMode::Flat).unwrap();
assert_eq!(
args.store_path().unwrap().unwrap().to_owned(),
StorePath::from_str("06qi00hylriyfm0nl827crgjvbax84mz-notmuch-extract-patch").unwrap()
)
}
#[test]
fn fetch_tarball_store_path() {
let url = "https://github.com/NixOS/nixpkgs/archive/91050ea1e57e50388fa87a3302ba12d188ef723a.tar.gz";
let sha256 = "1hf6cgaci1n186kkkjq106ryf8mmlq9vnwgfwh625wa8hfgdn4dm";
let args = FetchArgs::new(
url.into(),
Some("source".into()),
Some(sha256.into()),
HashMode::Recursive,
)
.unwrap();
assert_eq!(
args.store_path().unwrap().unwrap().to_owned(),
StorePath::from_str("7adgvk5zdfq4pwrhsm3n9lzypb12gw0g-source").unwrap()
)
}
mod url_basename {
use super::*;
#[test]
fn empty_path() {
assert_eq!(url_basename(""), "");
}
#[test]
fn path_on_root() {
assert_eq!(url_basename("/dir"), "dir");
}
#[test]
fn relative_path() {
assert_eq!(url_basename("dir/foo"), "foo");
}
#[test]
fn root_with_trailing_slash() {
assert_eq!(url_basename("/"), "");
}
#[test]
fn trailing_slash() {
assert_eq!(url_basename("/dir/"), "dir");
}
}
}