feat(xanthous/server): Add simple prometheus metrics
Add a prometheus exporter and some simple prometheus metrics, so that I can look at dashboards and get alerts for things like lots of connections Change-Id: Ic1e0568200299dc852b74da647a6354267ee7576 Reviewed-on: https://cl.tvl.fyi/c/depot/+/3811 Reviewed-by: grfn <grfn@gws.fyi> Tested-by: BuildkiteCI
This commit is contained in:
parent
77f0d62a2c
commit
5327d238e3
4 changed files with 589 additions and 11 deletions
|
|
@ -1,5 +1,4 @@
|
|||
use std::net::SocketAddr;
|
||||
|
||||
use std::pin::Pin;
|
||||
use std::process::Command;
|
||||
use std::sync::Arc;
|
||||
|
|
@ -9,6 +8,7 @@ use color_eyre::eyre::Result;
|
|||
use eyre::{bail, eyre};
|
||||
use futures::future::{ready, Ready};
|
||||
use futures::Future;
|
||||
use metrics_exporter_prometheus::PrometheusBuilder;
|
||||
use nix::pty::Winsize;
|
||||
use pty::ChildHandle;
|
||||
use thrussh::ChannelId;
|
||||
|
|
@ -19,13 +19,18 @@ use thrussh::{
|
|||
use tokio::io::{AsyncReadExt, AsyncWriteExt};
|
||||
use tokio::net::TcpListener;
|
||||
use tokio::select;
|
||||
use tokio::time::Instant;
|
||||
use tracing::{debug, error, info, info_span, trace, warn, Instrument};
|
||||
use tracing_subscriber::EnvFilter;
|
||||
|
||||
use crate::pty::WaitPid;
|
||||
|
||||
mod metrics;
|
||||
mod pty;
|
||||
|
||||
use crate::metrics::reported::*;
|
||||
use crate::metrics::{decrement_gauge, histogram, increment_counter, increment_gauge};
|
||||
|
||||
/// SSH-compatible server for playing Xanthous
|
||||
#[derive(Parser, Debug)]
|
||||
struct Opts {
|
||||
|
|
@ -33,6 +38,10 @@ struct Opts {
|
|||
#[clap(long, short = 'a', default_value = "0.0.0.0:22")]
|
||||
address: String,
|
||||
|
||||
/// Address to listen to for metrics
|
||||
#[clap(long, default_value = "0.0.0.0:9000")]
|
||||
metrics_address: SocketAddr,
|
||||
|
||||
/// Format to use when emitting log events
|
||||
#[clap(
|
||||
long,
|
||||
|
|
@ -132,6 +141,7 @@ impl Handler {
|
|||
|
||||
let child = pty::spawn(cmd, Some(winsize), None).await?;
|
||||
info!(pid = %child.pid, "Spawned child");
|
||||
increment_gauge!(RUNNING_PROCESSES, 1.0);
|
||||
self.child = Some(child.handle().await?);
|
||||
tokio::spawn(
|
||||
async move {
|
||||
|
|
@ -143,6 +153,7 @@ impl Handler {
|
|||
span.in_scope(|| error!(%error, "Error running child"));
|
||||
let _ = handle.close(channel_id).await;
|
||||
}
|
||||
decrement_gauge!(RUNNING_PROCESSES, 1.0);
|
||||
}
|
||||
.in_current_span(),
|
||||
);
|
||||
|
|
@ -285,12 +296,19 @@ async fn main() -> Result<()> {
|
|||
color_eyre::install()?;
|
||||
let opts = Box::leak::<'static>(Box::new(Opts::parse()));
|
||||
opts.init_logging()?;
|
||||
PrometheusBuilder::new()
|
||||
.listen_address(opts.metrics_address)
|
||||
.install()?;
|
||||
metrics::register();
|
||||
|
||||
let config = Arc::new(opts.ssh_server_config()?);
|
||||
info!(address = %opts.address, "Listening for new SSH connections");
|
||||
let listener = TcpListener::bind(&opts.address).await?;
|
||||
|
||||
loop {
|
||||
let (stream, address) = listener.accept().await?;
|
||||
increment_counter!(CONNECTIONS_ACCEPTED);
|
||||
increment_gauge!(ACTIVE_CONNECTIONS, 1.0);
|
||||
let config = config.clone();
|
||||
let handler = Handler {
|
||||
xanthous_binary_path: &opts.xanthous_binary_path,
|
||||
|
|
@ -300,12 +318,17 @@ async fn main() -> Result<()> {
|
|||
};
|
||||
tokio::spawn(async move {
|
||||
let span = info_span!("client", address = %handler.address);
|
||||
let start = Instant::now();
|
||||
if let Err(error) = server::run_stream(config, stream, handler)
|
||||
.instrument(span.clone())
|
||||
.await
|
||||
{
|
||||
span.in_scope(|| error!(%error));
|
||||
}
|
||||
let duration = start.elapsed();
|
||||
span.in_scope(|| info!(duration_ms = %duration.as_millis(), "Client disconnected"));
|
||||
histogram!(CONNECTION_DURATION, duration);
|
||||
decrement_gauge!(ACTIVE_CONNECTIONS, 1.0);
|
||||
});
|
||||
}
|
||||
}
|
||||
|
|
|
|||
24
users/grfn/xanthous/server/src/metrics.rs
Normal file
24
users/grfn/xanthous/server/src/metrics.rs
Normal file
|
|
@ -0,0 +1,24 @@
|
|||
pub use ::metrics::*;
|
||||
|
||||
pub mod reported {
|
||||
/// Counter: Connections accepted on the TCP listener
|
||||
pub const CONNECTIONS_ACCEPTED: &str = "ssh.connections.accepted";
|
||||
|
||||
/// Histogram: Connection duration
|
||||
pub const CONNECTION_DURATION: &str = "ssh.connections.duration";
|
||||
|
||||
/// Gauge: Currently active connections
|
||||
pub const ACTIVE_CONNECTIONS: &str = "ssh.connections.active";
|
||||
|
||||
/// Gauge: Currently running xanthous processes
|
||||
pub const RUNNING_PROCESSES: &str = "ssh.child.processes";
|
||||
}
|
||||
|
||||
pub fn register() {
|
||||
use reported::*;
|
||||
|
||||
register_counter!(CONNECTIONS_ACCEPTED);
|
||||
register_histogram!(CONNECTION_DURATION);
|
||||
register_gauge!(ACTIVE_CONNECTIONS);
|
||||
register_gauge!(RUNNING_PROCESSES);
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue