snix/users/Profpatsch/lyric/default.nix
Profpatsch b865618291 feat(users/Profpatsch/lyric): read lyric sqlite from squashfs
The unpacked lrclib dump is 40GB at this point, but compresses very
well (to about 10GB give or take). So I thought about how to store it
in a compressed on disk, but still allow sqlite to query
efficiently (i.e. use a compression format that has arbitrary seek
indices).

Squashfs! with zstd compression!

```
mksquashfs ~/.cache/lyric-bak/ \
  ~/tmp/squashtest \
  -comp zstd \
  -Xcompression-level 5
```

Still need to write a helper that does the download & compression for
me to update the database once in a blue moon.

Unfortunately, sqlite-utils does not allow URL connection strings yet,
so I had to do a horrible SQL interpolation bit.
Issue tracker: https://mastodon.xyz/@Profpatsch/114083481108799334

This also changes the query to filter out every track that has neither
plain nor synched lyrics (yeah those exist for some reason?).

Change-Id: Ief0519ae8d9e5f14b66f6df6e3bcecae911ea142
Reviewed-on: https://cl.tvl.fyi/c/depot/+/13208
Tested-by: BuildkiteCI
Reviewed-by: Profpatsch <mail@profpatsch.de>
2025-03-06 13:16:46 +00:00

149 lines
3.9 KiB
Nix

{ pkgs, depot, lib, ... }:
let
bins = depot.nix.getBins pkgs.sqlite [ "sqlite3" ]
// depot.nix.getBins pkgs.util-linux [ "unshare" ]
// depot.nix.getBins pkgs.coreutils [ "echo" ]
// depot.nix.getBins pkgs.gnused [ "sed" ]
// depot.nix.getBins pkgs.squashfuse [ "squashfuse" ]
// depot.nix.getBins pkgs.jq [ "jq" ];
mpv-script = pkgs.writeTextFile {
name = "lyric.lua";
text =
lib.replaceStrings
[ "@get_subtitles_command@" ]
[ (toString lyric-to-temp-file) ]
(builtins.readFile ./lyric-mpv-script.lua);
derivationArgs.passthru.scriptName = "lyric.lua";
};
lyric-to-temp-file = depot.nix.writeExecline "lyric-to-temp-file" { readNArgs = 1; } [
"backtick"
"-E"
"cache"
[ depot.users.Profpatsch.xdg-cache-home ]
"if"
[ "mkdir" "-p" "\${cache}/lyric/as-files" ]
"if"
[
"redirfd"
"-w"
"1"
"\${cache}/lyric/as-files/\${1}.lrc"
lyric
"$1"
]
"printf"
"\${cache}/lyric/as-files/\${1}.lrc"
];
# looool
escapeSqliteString = depot.nix.writeExecline "escape-sqlite-string" { readNArgs = 1; } [
"pipeline"
[
"printf"
"%s"
"$1"
]
bins.sed
"s/''/''''/g"
];
# Display lyrics for the given search string;
# search string can contain a substring of band name, album name, song title
#
# Use the database dump from https://lrclib.net/db-dumps and place it in ~/.cache/lyric/lrclib-db-dump.sqlite3
#
# TODO: put in the nodejs argh
lyric =
(depot.nix.writeExecline "lyric" { readNArgs = 1; } [
"backtick"
"-E"
"cache"
[ depot.users.Profpatsch.xdg-cache-home ]
# make sure the squashfuse is only mounted while the command is running
bins.unshare
"--user"
"--mount"
"--pid"
"--map-root-user"
"--kill-child"
"if"
[ "mkdir" "-p" "\${cache}/lyric/dump" ]
# TODO: provide a command that takes an url of a lyric.gz and converts it to this here squash image
"if"
[ bins.squashfuse "-ononempty" "\${cache}/lyric/lyric-db.squash" "\${cache}/lyric/dump" ]
# please help me god
"backtick"
"-E"
"searchstring"
[ escapeSqliteString "$1" ]
"pipeline"
[
"pipeline"
[
"echo"
(''
.mode json
select * from (
-- first we try to find if we can find the track verbatim
select * from (select
synced_lyrics,
has_synced_lyrics,
plain_lyrics
from
tracks_fts('' + "'\${searchstring}'" + '') tf
join tracks t on t.rowid = tf.rowid
join lyrics l on t.rowid = l.track_id
order by
has_synced_lyrics desc, t.id
)
UNION
select * from (select
synced_lyrics,
has_synced_lyrics,
plain_lyrics
from
tracks_fts('' + "'\${searchstring}'" + '') tf
join tracks t on t.rowid = tf.rowid
join lyrics l on t.rowid = l.track_id
order by
has_synced_lyrics desc, t.id
)
)
where synced_lyrics is not null and synced_lyrics != ''''
and plain_lyrics is not null and plain_lyrics != ''''
limit
1;
''
)
]
bins.sqlite3
"file:\${cache}/lyric/dump/lrclib-db-dump.sqlite3?immutable=1"
]
bins.jq
"-r"
''
if .[0] == null
then ""
else
.[0]
| if .has_synced_lyrics == 1
then .synced_lyrics
else .plain_lyrics
end
end
''
]);
js = depot.users.Profpatsch.napalm.buildPackage ./. { };
in
{
inherit
lyric
js
mpv-script;
}