chore(ops): move archivist machine to ops and contrib
contrib/ gets the clickhouse patching, the bucket log parsing code and the awscli setup and shell. ops/ gets the machine config itself. Change-Id: If8b8f8cce5ca9c2b4d19e17be9a8b895ac35e84a Reviewed-on: https://cl.snix.dev/c/snix/+/30163 Autosubmit: Florian Klink <flokli@flokli.de> Tested-by: besadii Reviewed-by: Ryan Lahfa <masterancpp@gmail.com>
This commit is contained in:
parent
c3de9e21eb
commit
ae4d967288
14 changed files with 21 additions and 40 deletions
1
contrib/archivist/OWNERS
Normal file
1
contrib/archivist/OWNERS
Normal file
|
|
@ -0,0 +1 @@
|
|||
edef
|
||||
23
contrib/archivist/README.md
Normal file
23
contrib/archivist/README.md
Normal file
|
|
@ -0,0 +1,23 @@
|
|||
# archivist
|
||||
|
||||
This directory contains various scripts and helpers used for nix-archivist tasks.
|
||||
|
||||
It's used from the archivist EC2 instance, as well as standalone.
|
||||
|
||||
## AWS Profile setup
|
||||
There's 2 AWS Accounts, reachable via the nixos.awsapps.com SSO portal.
|
||||
|
||||
### archeologist
|
||||
This is assuming the `archeologist` AWS role in the main NixOS account.
|
||||
|
||||
### archivist
|
||||
This is a separate AWS Account, only for the archivist project. We can assume
|
||||
`AWSAdministratorAccess` in there.
|
||||
|
||||
## archivist-ec2 Machine
|
||||
The `archivist-ec2` machine currently is deployed in the main NixOS account.
|
||||
|
||||
It regularly processes S3 bucket logs and dumps them in parquet format into
|
||||
another bucket.
|
||||
In the future, we want to move this machine to the dedicated `archivist` AWS
|
||||
account.
|
||||
61
contrib/archivist/default.nix
Normal file
61
contrib/archivist/default.nix
Normal file
|
|
@ -0,0 +1,61 @@
|
|||
{ depot
|
||||
, pkgs
|
||||
, ...
|
||||
}:
|
||||
|
||||
let
|
||||
clickhouseConfigAWS = builtins.toFile "clickhouse-local.xml" ''
|
||||
<clickhouse>
|
||||
<s3>
|
||||
<use_environment_credentials>true</use_environment_credentials>
|
||||
</s3>
|
||||
</clickhouse>
|
||||
'';
|
||||
# clickhouse has a very odd AWS config concept.
|
||||
# Configure it to be a bit more sane.
|
||||
clickhouseLocalFixedAWS = pkgs.runCommand "clickhouse-local-fixed"
|
||||
{
|
||||
nativeBuildInputs = [ pkgs.makeWrapper ];
|
||||
} ''
|
||||
mkdir -p $out/bin
|
||||
makeWrapper ${pkgs.clickhouse}/bin/clickhouse-local $out/bin/clickhouse-local \
|
||||
--append-flags "-C ${clickhouseConfigAWS}"
|
||||
'';
|
||||
|
||||
in
|
||||
depot.nix.readTree.drvTargets {
|
||||
inherit clickhouseLocalFixedAWS;
|
||||
|
||||
parse-bucket-logs = pkgs.runCommand "archivist-parse-bucket-logs"
|
||||
{
|
||||
nativeBuildInputs = [ pkgs.makeWrapper ];
|
||||
} ''
|
||||
mkdir -p $out/bin
|
||||
makeWrapper ${(pkgs.writers.writeRust "parse-bucket-logs-unwrapped" {} ./parse_bucket_logs.rs)} $out/bin/archivist-parse-bucket-logs \
|
||||
--prefix PATH : ${pkgs.lib.makeBinPath [ clickhouseLocalFixedAWS ]}
|
||||
'';
|
||||
|
||||
# A shell, by default pointing us to the archivist SSO profile / account by default.
|
||||
shell = pkgs.mkShell {
|
||||
name = "archivist-shell";
|
||||
packages = with pkgs; [ awscli2 ];
|
||||
|
||||
AWS_PROFILE = "archivist";
|
||||
AWS_CONFIG_FILE = pkgs.writeText "aws-config" ''
|
||||
[sso-session nixos]
|
||||
sso_region = eu-north-1
|
||||
sso_start_url = https://nixos.awsapps.com/start
|
||||
sso_registration_scopes = sso:account:access
|
||||
|
||||
[profile "archeologist"]
|
||||
sso_session = nixos
|
||||
sso_account_id = 080433136561 # nixos root
|
||||
sso_role_name = archeologist
|
||||
|
||||
[profile "archivist"]
|
||||
sso_session = nixos
|
||||
sso_account_id = 286553126452 # archivist
|
||||
sso_role_name = AWSAdministratorAccess
|
||||
'';
|
||||
};
|
||||
}
|
||||
42
contrib/archivist/parse_bucket_logs.rs
Normal file
42
contrib/archivist/parse_bucket_logs.rs
Normal file
|
|
@ -0,0 +1,42 @@
|
|||
use std::env;
|
||||
use std::process::Command;
|
||||
use std::process::ExitCode;
|
||||
|
||||
fn main() -> ExitCode {
|
||||
let args: Vec<String> = env::args().collect();
|
||||
if args.len() != 3 {
|
||||
eprintln!("needs two args, input s3 url (glob) and output pq file");
|
||||
return ExitCode::FAILURE;
|
||||
}
|
||||
|
||||
let input_files = &args[1];
|
||||
let output_file = &args[2];
|
||||
|
||||
let mut cmd = Command::new("clickhouse-local");
|
||||
cmd.arg("--progress")
|
||||
.arg("-q")
|
||||
.arg(format!(r#"SELECT
|
||||
key,
|
||||
toInt64(nullif(http_status, '-')) AS http_status,
|
||||
toInt64(nullif(object_size_str, '-')) AS object_size,
|
||||
toInt64(nullif(bytes_sent_str, '-')) AS bytes_sent,
|
||||
nullif(user_agent, '-') AS user_agent,
|
||||
operation,
|
||||
nullif(requester, '-') AS requester,
|
||||
parseDateTime(timestamp_str, '%d/%b/%Y:%k:%i:%s %z') AS timestamp
|
||||
FROM s3(
|
||||
'{}',
|
||||
'Regexp',
|
||||
'owner String , bucket String, timestamp_str String, remote_ip String, requester LowCardinality(String), request_id String, operation LowCardinality(String), key String, request_uri String, http_status String, error_code String, bytes_sent_str String, object_size_str String, total_time String, turn_around_time String, referer String, user_agent String, version_id String, host_id String, signature_version String, cipher_suite String, authentication_type String, host_header String, tls_version String, access_point_arn String, acl_required String'
|
||||
)
|
||||
ORDER BY timestamp ASC
|
||||
SETTINGS
|
||||
format_regexp_skip_unmatched = 1,
|
||||
format_regexp = '(\\S+) (\\S+) \\[(.*)\\] (\\S+) (\\S+) (\\S+) (\\S+) (\\S+) ((?:\\S+ \\S+ \\S+)|\\S+) (\\S+) (\\S+) (\\S+) (\\S+) (\\S+) (\\S+) (\\S+) ("\\S+") (\\S+) (\\S+) (\\S+) (\\S+) (\\S+) (\\S+) (\\S+) (\\S+) (\\S+).*',
|
||||
output_format_parquet_compression_method = 'zstd'
|
||||
INTO OUTFILE '{}' FORMAT Parquet"#, input_files, output_file));
|
||||
|
||||
cmd.status().expect("clickhouse-local failed");
|
||||
|
||||
ExitCode::SUCCESS
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue