fix(users/Profpatsch/whatcd-resolver): speed up artist_ids lookups

We need `artist_ids` in most of the queries, so let’s make them a
generated column and put an index on them.

This reduces times for getArtistNameById from ~300ms to ~20ms and for
the main “latest” table from ~250ms to ~60ms.

The `artist_has_been_snatched` logic moves into the torrent filter, so
we don’t have to work on such large records further down.

Change-Id: I5a1929bd9c81ea0031e512d01897c0e460ded077
Reviewed-on: https://cl.tvl.fyi/c/depot/+/13240
Tested-by: BuildkiteCI
Reviewed-by: Profpatsch <mail@profpatsch.de>
This commit is contained in:
Profpatsch 2025-03-10 17:47:36 +01:00
parent 5efe6cefe8
commit cb5a745233
2 changed files with 60 additions and 37 deletions

View file

@ -638,11 +638,28 @@ getBestTorrents ::
getBestTorrents opts = do
queryWith
( [sql|
WITH filtered_torrents AS (
WITH
artist_has_been_snatched AS (
SELECT DISTINCT artist_id
FROM (
SELECT
UNNEST(artist_ids) as artist_id,
t.torrent_file IS NOT NULL as has_torrent_file
FROM redacted.torrents t) as _
WHERE has_torrent_file
),
filtered_torrents AS (
SELECT DISTINCT ON (torrent_group)
id
FROM
redacted.torrents
JOIN LATERAL
-- filter everything thats not a favourite if requested
(SELECT (
artist_ids && ARRAY(SELECT artist_id FROM redacted.artist_favourites)
OR artist_ids && ARRAY(SELECT artist_id FROM artist_has_been_snatched)
) as is_favourite) as _
ON (NOT ?::bool OR is_favourite)
WHERE
-- filter by artist id
(?::bool OR (to_jsonb(?::int) <@ (jsonb_path_query_array(full_json_result, '$.artists[*].id'))))
@ -667,6 +684,7 @@ getBestTorrents opts = do
tg.full_json_result->'artists',
'[]'::jsonb
) as artists,
t.artist_ids || tg.artist_ids as artist_ids,
tg.full_json_result->>'groupName' AS group_name,
tg.full_json_result->>'groupYear' AS group_year,
t.torrent_file IS NOT NULL AS has_torrent_file,
@ -677,16 +695,6 @@ getBestTorrents opts = do
JOIN redacted.torrent_groups tg ON tg.id = t.torrent_group
WHERE
tg.full_json_result->>'releaseType' <> ALL (?::text[])
),
prepare2 AS MATERIALIZED (
-- extract the json artist ids field into an array of ints
SELECT *, array(select id from jsonb_to_recordset(artists) as (id int)) as artist_ids
FROM prepare1
),
artist_has_been_snatched AS MATERIALIZED (
SELECT DISTINCT artist_id
FROM (SELECT UNNEST(artist_ids) as artist_id, has_torrent_file from prepare2) as _
WHERE has_torrent_file
)
SELECT
group_id,
@ -699,14 +707,7 @@ getBestTorrents opts = do
has_torrent_file,
transmission_torrent_hash,
torrent_format
FROM prepare2
JOIN LATERAL
(SELECT (
artist_ids && ARRAY(SELECT artist_id FROM artist_has_been_snatched)
OR artist_ids && ARRAY(SELECT artist_id FROM redacted.artist_favourites)
) as is_favourite) as _
-- filter everything thats not a favourite if requested
ON (NOT ?::bool OR is_favourite)
FROM prepare1
|]
<> case opts.ordering of
BySeedingWeight -> [fmt|ORDER BY seeding_weight DESC|] <> "\n"
@ -722,12 +723,12 @@ getBestTorrents opts = do
let (onlyTheseTorrentsB, onlyTheseTorrents) = case opts.onlyTheseTorrents of
Nothing -> (True, PGArray [])
Just a -> (False, a <&> (.torrentId) & PGArray)
( onlyArtistB :: Bool,
( opts.onlyFavourites :: Bool,
onlyArtistB :: Bool,
onlyArtistId :: Int,
onlyTheseTorrentsB :: Bool,
onlyTheseTorrents,
(opts.disallowedReleaseTypes & concatMap (\rt -> [rt.stringKey, rt.intKey & buildText intDecimalT]) & PGArray :: PGArray Text),
opts.onlyFavourites :: Bool,
opts.limitResults <&> naturalToInteger :: Maybe Integer
)
)
@ -772,19 +773,8 @@ getArtistNameById :: (MonadPostgres m, HasField "artistId" r Int) => r -> Transa
getArtistNameById dat = do
queryFirstRowWithMaybe
[sql|
explain analyze WITH mapping as (
SELECT x.id, x.name FROM
redacted.torrents t
join LATERAL
jsonb_to_recordset(full_json_result->'artists') as x(id int, name text) on true
UNION
SELECT x.id, x.name FROM
redacted.torrent_groups tg
join LATERAL
jsonb_to_recordset(full_json_result->'artists') as x(id int, name text) on true
)
SELECT name FROM mapping
WHERE id = ?::int
SELECT artist_name FROM redacted.artist_names
WHERE artist_id = ?::int
LIMIT 1
|]
(getLabel @"artistId" dat)

View file

@ -1107,7 +1107,6 @@ migrate = inSpan "Database Migration" $ do
ALTER TABLE redacted.torrents_json
ADD COLUMN IF NOT EXISTS transmission_torrent_hash text NULL;
-- the seeding weight is used to find the best torrent in a group.
CREATE OR REPLACE FUNCTION calc_seeding_weight(full_json_result jsonb) RETURNS int AS $$
BEGIN
@ -1147,6 +1146,19 @@ migrate = inSpan "Database Migration" $ do
ALTER TABLE redacted.torrents_json
ADD COLUMN IF NOT EXISTS seeding_weight int NOT NULL GENERATED ALWAYS AS (calc_seeding_weight(full_json_result)) STORED;
CREATE OR REPLACE FUNCTION artist_record_to_id(artists jsonb) RETURNS int[]
as $$
SELECT array_agg(x::int) from jsonb_path_query(artists, '$[*].id') j(x);
$$ LANGUAGE sql IMMUTABLE;
ALTER TABLE redacted.torrents_json
ADD COLUMN IF NOT EXISTS artist_ids int[] NOT NULL GENERATED ALWAYS AS (COALESCE(artist_record_to_id(full_json_result->'artists'), ARRAY[]::int[])) STORED;
ALTER TABLE redacted.torrent_groups
ADD COLUMN IF NOT EXISTS artist_ids int[] NOT NULL GENERATED ALWAYS AS (COALESCE(artist_record_to_id(full_json_result->'artists'), ARRAY[]::int[])) STORED;
CREATE INDEX IF NOT EXISTS torrents_json_artist_ids ON redacted.torrents_json USING GIN (artist_ids);
CREATE INDEX IF NOT EXISTS torrent_groups_artist_ids ON redacted.torrent_groups USING GIN (artist_ids);
-- inflect out values of the full json
CREATE OR REPLACE VIEW redacted.torrents AS
SELECT
@ -1157,10 +1169,10 @@ migrate = inSpan "Database Migration" $ do
t.seeding_weight,
t.full_json_result,
t.torrent_file,
t.transmission_torrent_hash
t.transmission_torrent_hash,
t.artist_ids
FROM redacted.torrents_json t;
CREATE INDEX IF NOT EXISTS torrents_json_seeding ON redacted.torrents_json(((full_json_result->'seeding')::integer));
CREATE INDEX IF NOT EXISTS torrents_json_snatches ON redacted.torrents_json(((full_json_result->'snatches')::integer));
@ -1169,6 +1181,27 @@ migrate = inSpan "Database Migration" $ do
artist_id INTEGER NOT NULL,
UNIQUE(artist_id)
);
-- for easier query lookup, a mapping from artist ids to names
CREATE OR REPLACE VIEW redacted.artist_names AS
SELECT
t.artist_id, x.name as artist_name
FROM
(SELECT unnest(artist_ids) as artist_id, * FROM redacted.torrents t) as t
join LATERAL
jsonb_to_recordset(full_json_result->'artists') as x(id int, name text)
ON x.id = t.artist_id
WHERE x.id = t.artist_id
UNION ALL
SELECT
t.artist_id, x.name as artist_name
FROM
(SELECT unnest(artist_ids) as artist_id, * FROM redacted.torrent_groups t) as t
join LATERAL
jsonb_to_recordset(full_json_result->'artists') as x(id int, name text)
ON x.id = t.artist_id
WHERE x.id = t.artist_id;
|]
()