diff --git a/ops/machines/build01/default.nix b/ops/machines/build01/default.nix index c91fb7e7d..698026893 100644 --- a/ops/machines/build01/default.nix +++ b/ops/machines/build01/default.nix @@ -5,7 +5,7 @@ let in { imports = [ - (mod "o11y/agent.nix") + (mod "o11y/alloy.nix") (mod "snix-buildkite.nix") (mod "harmonia.nix") (mod "known-hosts.nix") diff --git a/ops/machines/gerrit01/default.nix b/ops/machines/gerrit01/default.nix index ab53c6fc8..a895316a8 100644 --- a/ops/machines/gerrit01/default.nix +++ b/ops/machines/gerrit01/default.nix @@ -10,7 +10,7 @@ in (mod "hetzner-cloud.nix") (mod "restic.nix") - (mod "o11y/agent.nix") + (mod "o11y/alloy.nix") (mod "gerrit-autosubmit.nix") (mod "monorepo-gerrit.nix") (mod "www/cl.snix.dev.nix") diff --git a/ops/machines/meta01/default.nix b/ops/machines/meta01/default.nix index ae9eb0123..d1450808f 100644 --- a/ops/machines/meta01/default.nix +++ b/ops/machines/meta01/default.nix @@ -9,7 +9,7 @@ in ./disko.nix (mod "hetzner-cloud.nix") - (mod "o11y/agent.nix") + (mod "o11y/alloy.nix") (mod "o11y/mimir.nix") (mod "o11y/loki.nix") (mod "o11y/tempo.nix") diff --git a/ops/machines/public01/default.nix b/ops/machines/public01/default.nix index 5a738b4a7..c9b3c2067 100644 --- a/ops/machines/public01/default.nix +++ b/ops/machines/public01/default.nix @@ -12,7 +12,7 @@ in (mod "forgejo.nix") (mod "restic.nix") # Automatically enable metric and log collection. - (mod "o11y/agent.nix") + (mod "o11y/alloy.nix") (mod "o11y/grafana.nix") (mod "www/snix.dev.nix") (mod "www/bolt.snix.dev.nix") diff --git a/ops/modules/o11y/agent.nix b/ops/modules/o11y/agent.nix deleted file mode 100644 index cbf939828..000000000 --- a/ops/modules/o11y/agent.nix +++ /dev/null @@ -1,113 +0,0 @@ -{ depot -, config -, lib -, ... -}: -let - cfg = config.infra.monitoring.grafana-agent; - inherit (lib) mkEnableOption mkOption mkIf types; - passwordAsCredential = "\${CREDENTIALS_DIRECTORY}/password"; -in -{ - options.infra.monitoring.grafana-agent = { - enable = (mkEnableOption "Grafana Agent") // { default = true; }; - - exporters = mkOption { - description = '' - Set of additional exporters to scrape. - - The attribute name will be used as `job_name` - internally, which ends up exported as `job` label - on all metrics of that exporter. - ''; - type = types.attrsOf (types.submodule ({ config, name, ... }: { - options.port = mkOption { - description = "Exporter port"; - type = types.int; - }; - options.scrapeConfig = mkOption { - description = "Prometheus scrape config"; - type = types.attrs; - }; - config.scrapeConfig = lib.mkMerge [{ - job_name = name; - static_configs = [ - { targets = [ "localhost:${toString config.port}" ]; } - ]; - }]; - })); - default = { }; - }; - }; - - config = mkIf cfg.enable { - age.secrets.grafana-agent-password.file = depot.ops.secrets."grafana-agent-password.age"; - - services.grafana-agent = { - enable = true; - credentials = lib.mkMerge ([{ password = config.age.secrets.grafana-agent-password.path; }] ++ - lib.mapAttrsToList (name: value: value.secrets) config.infra.monitoring.grafana-agent.exporters); - settings = { - metrics = { - global.remote_write = [ - { - url = "https://mimir.snix.dev/api/v1/push"; - basic_auth = { - username = "promtail"; - password_file = passwordAsCredential; - }; - } - ]; - global.external_labels = { - hostname = config.networking.hostName; - }; - configs = [ - { - name = config.networking.hostName; - scrape_configs = lib.mapAttrsToList (name: value: value.scrapeConfig) config.infra.monitoring.grafana-agent.exporters; - } - ]; - }; - # logs = { - # global.clients = [ - # { - # url = "https://loki.forkos.org/loki/api/v1/push"; - # basic_auth = { - # username = "promtail"; - # password_file = passwordAsCredential; - # }; - # } - # ]; - # configs = [ - # { - # name = "journald"; - # scrape_configs = [ - # { - # job_name = "system"; - # journal = { - # max_age = "12h"; - # labels = { - # job = "systemd-journal"; - # host = config.networking.hostName; - # }; - # }; - # relabel_configs = [ - # { - # source_labels = [ "__journal__systemd_unit" ]; - # target_label = "unit"; - # } - # ]; - # } - # ]; - # } - # ]; - # positions_directory = "\${STATE_DIRECTORY}/positions"; - # }; - integrations.node_exporter.enable_collectors = [ - "processes" - "systemd" - ]; - }; - }; - }; -} diff --git a/ops/modules/o11y/alloy.nix b/ops/modules/o11y/alloy.nix new file mode 100644 index 000000000..f48e67ac0 --- /dev/null +++ b/ops/modules/o11y/alloy.nix @@ -0,0 +1,92 @@ +{ depot +, config +, lib +, ... +}: +let + cfg = config.infra.monitoring.alloy; + inherit (lib) mkEnableOption mkOption mkIf types mapAttrs' nameValuePair; +in +{ + options.infra.monitoring.alloy = { + enable = (mkEnableOption "Grafana Alloy") // { default = true; }; + + exporters = mkOption { + description = '' + Set of additional exporters to scrape. + + The attribute name will be used as `job_name` + internally, which ends up exported as `job` label + on all metrics of that exporter. + ''; + type = types.attrsOf (types.submodule ({ config, name, ... }: { + options.port = mkOption { + description = "Exporter port"; + type = types.int; + }; + })); + default = { }; + }; + }; + + config = mkIf cfg.enable { + age.secrets.alloy-password.file = depot.ops.secrets."grafana-agent-password.age"; + + services.alloy.enable = true; + + environment.etc = { + "alloy/config.alloy".text = '' + prometheus.exporter.unix "default" { + enable_collectors = [ + "processes", + // cannot work currently, as alloy cannot talk to dbus: + // "systemd" + ] + } + + // Configure node exporter + prometheus.scrape "node_exporter" { + targets = prometheus.exporter.unix.default.targets + forward_to = [prometheus.remote_write.mimir.receiver] + } + + // Configure a prometheus.scrape component to collect Alloy metrics. + prometheus.exporter.self "default" {} + prometheus.scrape "self" { + targets = prometheus.exporter.self.default.targets + forward_to = [prometheus.remote_write.mimir.receiver] + } + + prometheus.remote_write "mimir" { + endpoint { + url = "https://mimir.snix.dev/api/v1/push" + basic_auth { + username = "promtail" // FUTUREWORK: rename this + password_file = format("%s/metrics_remote_write_password", env("CREDENTIALS_DIRECTORY")) + } + } + external_labels = { + hostname = constants.hostname, + } + } + ''; + } // (mapAttrs' + (name: v: nameValuePair "alloy/scrape_${name}.alloy" { + text = '' + prometheus.scrape "${name}" { + targets = [ + {"__address__" = "localhost:${toString v.port}"}, + ] + forward_to = [prometheus.remote_write.mimir.receiver] + } + ''; + }) + cfg.exporters); + + systemd.services.alloy.serviceConfig = { + LoadCredential = [ + "metrics_remote_write_password:${config.age.secrets.alloy-password.path}" + ]; + }; + }; +} diff --git a/ops/modules/o11y/grafana.nix b/ops/modules/o11y/grafana.nix index 30c22543c..e1f5723d1 100644 --- a/ops/modules/o11y/grafana.nix +++ b/ops/modules/o11y/grafana.nix @@ -143,6 +143,6 @@ in }; }; - infra.monitoring.grafana-agent.exporters.grafana.port = 2342; + infra.monitoring.alloy.exporters.grafana.port = 2342; }; } diff --git a/ops/modules/o11y/loki.nix b/ops/modules/o11y/loki.nix index 37eafdb39..134b0ebe9 100644 --- a/ops/modules/o11y/loki.nix +++ b/ops/modules/o11y/loki.nix @@ -85,6 +85,6 @@ in systemd.services.loki.serviceConfig.EnvironmentFile = [ config.age.secrets.loki-environment.path ]; - infra.monitoring.grafana-agent.exporters.loki.port = 9090; + infra.monitoring.alloy.exporters.loki.port = 9090; }; } diff --git a/ops/modules/o11y/mimir.nix b/ops/modules/o11y/mimir.nix index fb1260725..72174ce24 100644 --- a/ops/modules/o11y/mimir.nix +++ b/ops/modules/o11y/mimir.nix @@ -118,6 +118,6 @@ in }; }; - infra.monitoring.grafana-agent.exporters.mimir.port = 9009; + infra.monitoring.alloy.exporters.mimir.port = 9009; }; } diff --git a/ops/modules/o11y/tempo.nix b/ops/modules/o11y/tempo.nix index 98f122cc2..75a8dac9d 100644 --- a/ops/modules/o11y/tempo.nix +++ b/ops/modules/o11y/tempo.nix @@ -66,6 +66,6 @@ in }; }; - infra.monitoring.grafana-agent.exporters.tempo.port = 9190; + infra.monitoring.alloy.exporters.tempo.port = 9190; }; }