feat(ops/modules/www/git.snix.dev): block AI scrapers

This blocks a bunch of AI scrapers from Forgejo, which seems to be
particularly attractive.

Especially meta-externalagent has been scraping very excessively.

The list comes from https://github.com/ai-robots-txt/ai.robots.txt,
let's see how often this needs updating.

Change-Id: I55ae7c42c6a3eeff6f0457411a8b05d55cb24f65
Reviewed-on: https://cl.snix.dev/c/snix/+/30370
Autosubmit: Florian Klink <flokli@flokli.de>
Tested-by: besadii
Reviewed-by: edef <edef@edef.eu>
This commit is contained in:
Florian Klink 2025-05-01 16:47:17 +03:00 committed by clbot
parent c501361412
commit 853754d25f
2 changed files with 16 additions and 1 deletions

View file

@ -1,4 +1,4 @@
{ ... }:
{ depot, ... }:
{
imports = [
@ -10,9 +10,12 @@
serverName = "git.snix.dev";
enableACME = true;
forceSSL = true;
locations."=/robots.txt".alias = "${depot.third_party.sources.ai-robots-txt}/robots.txt";
locations."/" = {
proxyPass = "http://127.0.0.1:3000";
extraConfig = ''
include ${depot.third_party.sources.ai-robots-txt + "/nginx-block-ai-bots.conf"};
proxy_ssl_server_name on;
proxy_pass_header Authorization;

View file

@ -11,6 +11,18 @@
"url": "https://github.com/ryantm/agenix/archive/e600439ec4c273cf11e06fe4d9d906fb98fa097c.tar.gz",
"url_template": "https://github.com/<owner>/<repo>/archive/<rev>.tar.gz"
},
"ai-robots-txt": {
"branch": "main",
"description": "A list of AI agents and robots to block.",
"homepage": "https://github.com/ai-robots-txt/ai.robots.txt/releases.atom",
"owner": "ai-robots-txt",
"repo": "ai.robots.txt",
"rev": "678380727e8685af8c5311bcfa1f55c7aa866d3b",
"sha256": "1wm84mbjypgzm0nfsl4c8pfkx93hr1iaxgq4s4lsxhzqk6x4ns5k",
"type": "tarball",
"url": "https://github.com/ai-robots-txt/ai.robots.txt/archive/678380727e8685af8c5311bcfa1f55c7aa866d3b.tar.gz",
"url_template": "https://github.com/<owner>/<repo>/archive/<rev>.tar.gz"
},
"bqn-libs": {
"branch": "master",
"description": "Informal collection of BQN utilities",