feat(tvix/store/nar-info): Support async blob upload

Right now all blob uploads are performed synchronously, this means if a
NAR contains many small files, the import time is dominated by round
trip time to the blob service. For small files, we can buffer them in
memory and upload them asynchronously to the blob service. Before
returning we make sure to join all the uploads to make sure they
complete successfully before responding OK.

This reduces time to import a bash-interactive closure on my machine
from 1m19s to 7s.

Change-Id: Ica3695c159e6c8ad8769281ac20d037e3143e856
Reviewed-on: https://cl.tvl.fyi/c/depot/+/10679
Tested-by: BuildkiteCI
Reviewed-by: flokli <flokli@flokli.de>
Autosubmit: Connor Brewster <cbrewster@hey.com>
This commit is contained in:
Connor Brewster 2024-01-22 20:37:22 -06:00
parent d056329412
commit 85421b7f97
5 changed files with 85 additions and 11 deletions

View file

@ -7,6 +7,7 @@ import (
"fmt"
"io"
"os"
"sync"
"testing"
castorev1pb "code.tvl.fyi/tvix/castore-go"
@ -27,6 +28,7 @@ func TestRoundtrip(t *testing.T) {
narContents, err := io.ReadAll(f)
require.NoError(t, err)
var mu sync.Mutex
blobsMap := make(map[string][]byte, 0)
directoriesMap := make(map[string]*castorev1pb.Directory)
@ -41,7 +43,9 @@ func TestRoundtrip(t *testing.T) {
dgst := mustBlobDigest(bytes.NewReader(contents))
// put it in filesMap
mu.Lock()
blobsMap[base64.StdEncoding.EncodeToString(dgst)] = contents
mu.Unlock()
return dgst, nil
},