feat(tvix/nar-bridge): avoid unnecessary NAR uploads

When uploading a Store Path to a Nix HTTP Binary Cache, Nix first does a
HEAD request for $outhash.narinfo, and if that's not found, for
`{narhash}.nar[.compression_suffix]`.

If the NAR is already present, only the NARInfo is uploaded.

Even though we don't have a service allowing to globally look up from
NARHash to root node, `root_nodes` in `AppState` at least contains
recently uploaded NARHashes.

If we find it in there, we can prevent Nix unnecessarily uploading NARs
if the same contents have already been recently uploaded.
We also promote this key, chances are high Nix will subsequently upload
a NARInfo referring to this NARHash.

Change-Id: I34e3fd9b334b695abe945e64cd291e30f303c2a2
Reviewed-on: https://cl.tvl.fyi/c/depot/+/12752
Tested-by: BuildkiteCI
Reviewed-by: Ilan Joselevich <personal@ilanjoselevich.com>
Autosubmit: flokli <flokli@flokli.de>
This commit is contained in:
Florian Klink 2024-11-09 15:45:06 +00:00 committed by clbot
parent 8400e523ce
commit 85de9b8dab
2 changed files with 32 additions and 8 deletions

View file

@ -53,10 +53,8 @@ impl AppState {
pub fn gen_router(priority: u64) -> Router<AppState> { pub fn gen_router(priority: u64) -> Router<AppState> {
Router::new() Router::new()
.route("/", get(root)) .route("/", get(root))
// FUTUREWORK: respond for NARs that we still have in root_nodes (at least HEAD)
// This avoids some unnecessary NAR uploading from multiple concurrent clients, and is cheap.
.route("/nar/:nar_str", get(four_o_four)) .route("/nar/:nar_str", get(four_o_four))
.route("/nar/:nar_str", head(four_o_four)) .route("/nar/:nar_str", head(nar::head_root_nodes))
.route("/nar/:nar_str", put(nar::put)) .route("/nar/:nar_str", put(nar::put))
.route("/nar/tvix-castore/:root_node_enc", get(nar::get_head)) .route("/nar/tvix-castore/:root_node_enc", get(nar::get_head))
.route("/nar/tvix-castore/:root_node_enc", head(nar::get_head)) .route("/nar/tvix-castore/:root_node_enc", head(nar::get_head))

View file

@ -1,6 +1,5 @@
use axum::extract::Query; use axum::extract::Query;
use axum::http::StatusCode; use axum::http::{Response, StatusCode};
use axum::response::Response;
use axum::{body::Body, response::IntoResponse}; use axum::{body::Body, response::IntoResponse};
use axum_extra::{headers::Range, TypedHeader}; use axum_extra::{headers::Range, TypedHeader};
use axum_range::{KnownSize, Ranged}; use axum_range::{KnownSize, Ranged};
@ -116,6 +115,36 @@ pub async fn get_head(
)) ))
} }
/// Handler to respond to GET/HEAD requests for recently uploaded NAR files.
/// Nix probes at {narhash}.nar[.compression_suffix] to determine whether a NAR
/// has already been uploaded, by responding to (some of) these requests we
/// avoid it unnecessarily uploading.
/// We don't keep a full K/V from NAR hash to root note around, only the
/// in-memory cache used to connect to the castore node when processing a PUT
/// for the NARInfo.
#[instrument(skip_all, fields(nar_str))]
pub async fn head_root_nodes(
axum::extract::Path(nar_str): axum::extract::Path<String>,
axum::extract::State(AppState { root_nodes, .. }): axum::extract::State<AppState>,
) -> Result<impl axum::response::IntoResponse, StatusCode> {
let (nar_hash, compression_suffix) =
nix_http::parse_nar_str(&nar_str).ok_or(StatusCode::UNAUTHORIZED)?;
// No paths with compression suffix are supported.
if !compression_suffix.is_empty() {
warn!(%compression_suffix, "invalid compression suffix requested");
return Err(StatusCode::UNAUTHORIZED);
}
// Check root_nodes, updating the moving it to the most recently used,
// as it might be referred in a subsequent NARInfo upload.
if root_nodes.write().get(&nar_hash).is_some() {
Ok("")
} else {
Err(StatusCode::NOT_FOUND)
}
}
#[instrument(skip(blob_service, directory_service, request))] #[instrument(skip(blob_service, directory_service, request))]
pub async fn put( pub async fn put(
axum::extract::Path(nar_str): axum::extract::Path<String>, axum::extract::Path(nar_str): axum::extract::Path<String>,
@ -172,6 +201,3 @@ pub async fn put(
Ok("") Ok("")
} }
// FUTUREWORK: maybe head by narhash. Though not too critical, as we do
// implement HEAD for .narinfo.