feat(users/Profpatsch/lyric): read lyric sqlite from squashfs

The unpacked lrclib dump is 40GB at this point, but compresses very
well (to about 10GB give or take). So I thought about how to store it
in a compressed on disk, but still allow sqlite to query
efficiently (i.e. use a compression format that has arbitrary seek
indices).

Squashfs! with zstd compression!

```
mksquashfs ~/.cache/lyric-bak/ \
  ~/tmp/squashtest \
  -comp zstd \
  -Xcompression-level 5
```

Still need to write a helper that does the download & compression for
me to update the database once in a blue moon.

Unfortunately, sqlite-utils does not allow URL connection strings yet,
so I had to do a horrible SQL interpolation bit.
Issue tracker: https://mastodon.xyz/@Profpatsch/114083481108799334

This also changes the query to filter out every track that has neither
plain nor synched lyrics (yeah those exist for some reason?).

Change-Id: Ief0519ae8d9e5f14b66f6df6e3bcecae911ea142
Reviewed-on: https://cl.tvl.fyi/c/depot/+/13208
Tested-by: BuildkiteCI
Reviewed-by: Profpatsch <mail@profpatsch.de>
This commit is contained in:
Profpatsch 2025-03-06 13:25:33 +01:00
parent 0002a5163f
commit b865618291

View file

@ -1,7 +1,11 @@
{ pkgs, depot, lib, ... }:
let
bins = depot.nix.getBins pkgs.sqlite-utils [ "sqlite-utils" ]
bins = depot.nix.getBins pkgs.sqlite [ "sqlite3" ]
// depot.nix.getBins pkgs.util-linux [ "unshare" ]
// depot.nix.getBins pkgs.coreutils [ "echo" ]
// depot.nix.getBins pkgs.gnused [ "sed" ]
// depot.nix.getBins pkgs.squashfuse [ "squashfuse" ]
// depot.nix.getBins pkgs.jq [ "jq" ];
mpv-script = pkgs.writeTextFile {
@ -34,39 +38,89 @@ let
"\${cache}/lyric/as-files/\${1}.lrc"
];
# looool
escapeSqliteString = depot.nix.writeExecline "escape-sqlite-string" { readNArgs = 1; } [
"pipeline"
[
"printf"
"%s"
"$1"
]
bins.sed
"s/''/''''/g"
];
# Display lyrics for the given search string;
# search string can contain a substring of band name, album name, song title
#
# Use the database dump from https://lrclib.net/db-dumps and place it in ~/.cache/lyric/lrclib-db-dump.sqlite3
#
# TODO: put in the nodejs argh
lyric =
(depot.nix.writeExecline "lyric" { readNArgs = 1; } [
"backtick"
"-E"
"cache"
[ depot.users.Profpatsch.xdg-cache-home ]
# make sure the squashfuse is only mounted while the command is running
bins.unshare
"--user"
"--mount"
"--pid"
"--map-root-user"
"--kill-child"
"if"
[ "mkdir" "-p" "\${cache}/lyric/dump" ]
# TODO: provide a command that takes an url of a lyric.gz and converts it to this here squash image
"if"
[ bins.squashfuse "-ononempty" "\${cache}/lyric/lyric-db.squash" "\${cache}/lyric/dump" ]
# please help me god
"backtick"
"-E"
"searchstring"
[ escapeSqliteString "$1" ]
"pipeline"
[
bins.sqlite-utils
"query"
"\${cache}/lyric/lrclib-db-dump.sqlite3"
''
select
synced_lyrics,
has_synced_lyrics,
plain_lyrics
from
tracks_fts(:searchstring) tf
join tracks t on t.rowid = tf.rowid
join lyrics l on t.rowid = l.track_id
order by
has_synced_lyrics desc, t.id
"pipeline"
[
"echo"
(''
.mode json
select * from (
-- first we try to find if we can find the track verbatim
select * from (select
synced_lyrics,
has_synced_lyrics,
plain_lyrics
from
tracks_fts('' + "'\${searchstring}'" + '') tf
join tracks t on t.rowid = tf.rowid
join lyrics l on t.rowid = l.track_id
order by
has_synced_lyrics desc, t.id
)
UNION
select * from (select
synced_lyrics,
has_synced_lyrics,
plain_lyrics
from
tracks_fts('' + "'\${searchstring}'" + '') tf
join tracks t on t.rowid = tf.rowid
join lyrics l on t.rowid = l.track_id
order by
has_synced_lyrics desc, t.id
)
)
where synced_lyrics is not null and synced_lyrics != ''''
and plain_lyrics is not null and plain_lyrics != ''''
limit
1
1;
''
"--param"
"searchstring"
"$1"
)
]
bins.sqlite3
"file:\${cache}/lyric/dump/lrclib-db-dump.sqlite3?immutable=1"
]
bins.jq
"-r"
@ -84,9 +138,12 @@ let
]);
js = depot.users.Profpatsch.napalm.buildPackage ./. { };
in
{
inherit
lyric
js
mpv-script;
}