feat(tvix/store/nar-bridge): Setup OpenTelemetry

Sets up OpenTelemetry integration for nar-bridge. Right now it will
export spans for HTTP server requests and all gRPC client requests.
Having the spans available will make performance work significantly
easier as it provides a high level overview of where time is being
spent.

In the future we can add application-specifc metrics and
integrate logrus.

Change-Id: Ie3860675d7ffc626a95673ba062c3c798d8bb2a7
Reviewed-on: https://cl.tvl.fyi/c/depot/+/10678
Reviewed-by: flokli <flokli@flokli.de>
Tested-by: BuildkiteCI
Autosubmit: Connor Brewster <cbrewster@hey.com>
This commit is contained in:
Connor Brewster 2024-01-21 14:52:46 -06:00 committed by clbot
parent e8061fc619
commit d056329412
6 changed files with 200 additions and 34 deletions

View file

@ -4,17 +4,18 @@ import (
"context"
"os"
"os/signal"
"runtime/debug"
"time"
"github.com/alecthomas/kong"
"go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc"
"google.golang.org/grpc"
"google.golang.org/grpc/credentials/insecure"
castorev1pb "code.tvl.fyi/tvix/castore-go"
narBridgeHttp "code.tvl.fyi/tvix/nar-bridge/pkg/http"
storev1pb "code.tvl.fyi/tvix/store-go"
"github.com/sirupsen/logrus"
log "github.com/sirupsen/logrus"
)
@ -24,24 +25,41 @@ var cli struct {
ListenAddr string `name:"listen-addr" help:"The address this service listens on" type:"string" default:"[::]:9000"` //nolint:lll
EnableAccessLog bool `name:"access-log" help:"Enable access logging" type:"bool" default:"true" negatable:""` //nolint:lll
StoreAddr string `name:"store-addr" help:"The address to the tvix-store RPC interface this will connect to" default:"localhost:8000"` //nolint:lll
EnableOtlp bool `name:"otlp" help:"Enable OpenTelemetry for logs, spans, and metrics" default:"true"` //nolint:lll
}
func main() {
_ = kong.Parse(&cli)
logLevel, err := logrus.ParseLevel(cli.LogLevel)
logLevel, err := log.ParseLevel(cli.LogLevel)
if err != nil {
log.Panic("invalid log level")
return
}
logrus.SetLevel(logLevel)
log.SetLevel(logLevel)
ctx, stop := signal.NotifyContext(context.Background(), os.Interrupt)
defer stop()
if cli.EnableOtlp {
buildInfo, ok := debug.ReadBuildInfo()
if !ok {
log.Fatal("failed to read build info")
}
shutdown, err := setupOpenTelemetry(ctx, "nar-bridge", buildInfo.Main.Version)
if err != nil {
log.WithError(err).Fatal("failed to setup OpenTelemetry")
}
defer shutdown(context.Background())
}
// connect to tvix-store
log.Debugf("Dialing to %v", cli.StoreAddr)
conn, err := grpc.DialContext(ctx, cli.StoreAddr, grpc.WithTransportCredentials(insecure.NewCredentials()))
conn, err := grpc.DialContext(ctx, cli.StoreAddr,
grpc.WithTransportCredentials(insecure.NewCredentials()),
grpc.WithStatsHandler(otelgrpc.NewClientHandler()),
)
if err != nil {
log.Fatalf("did not connect: %v", err)
}

View file

@ -0,0 +1,87 @@
package main
import (
"context"
"errors"
"go.opentelemetry.io/otel"
"go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc"
"go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc"
"go.opentelemetry.io/otel/propagation"
"go.opentelemetry.io/otel/sdk/metric"
"go.opentelemetry.io/otel/sdk/resource"
"go.opentelemetry.io/otel/sdk/trace"
semconv "go.opentelemetry.io/otel/semconv/v1.24.0"
)
func setupOpenTelemetry(ctx context.Context, serviceName, serviceVersion string) (func(context.Context) error, error) {
var shutdownFuncs []func(context.Context) error
shutdown := func(ctx context.Context) error {
var err error
for _, fn := range shutdownFuncs {
err = errors.Join(err, fn(ctx))
}
shutdownFuncs = nil
return err
}
res, err := resource.Merge(
resource.Default(),
resource.NewWithAttributes(
semconv.SchemaURL,
semconv.ServiceName(serviceName),
semconv.ServiceVersion(serviceVersion),
),
)
if err != nil {
return nil, errors.Join(err, shutdown(ctx))
}
prop := propagation.NewCompositeTextMapPropagator(
propagation.TraceContext{},
propagation.Baggage{},
)
otel.SetTextMapPropagator(prop)
tracerProvider, err := newTraceProvider(ctx, res)
if err != nil {
return nil, errors.Join(err, shutdown(ctx))
}
shutdownFuncs = append(shutdownFuncs, tracerProvider.Shutdown)
otel.SetTracerProvider(tracerProvider)
meterProvider, err := newMeterProvider(ctx, res)
if err != nil {
return nil, errors.Join(err, shutdown(ctx))
}
shutdownFuncs = append(shutdownFuncs, meterProvider.Shutdown)
otel.SetMeterProvider(meterProvider)
return shutdown, nil
}
func newTraceProvider(ctx context.Context, res *resource.Resource) (*trace.TracerProvider, error) {
traceExporter, err := otlptracegrpc.New(ctx)
if err != nil {
return nil, err
}
traceProvider := trace.NewTracerProvider(
trace.WithBatcher(traceExporter),
trace.WithResource(res),
)
return traceProvider, nil
}
func newMeterProvider(ctx context.Context, res *resource.Resource) (*metric.MeterProvider, error) {
metricExporter, err := otlpmetricgrpc.New(ctx)
if err != nil {
return nil, err
}
meterProvider := metric.NewMeterProvider(
metric.WithResource(res),
metric.WithReader(metric.NewPeriodicReader(metricExporter)),
)
return meterProvider, nil
}