fix(users/Profpatsch/whatcd-resolver): speed up artist_ids lookups
We need `artist_ids` in most of the queries, so let’s make them a generated column and put an index on them. This reduces times for getArtistNameById from ~300ms to ~20ms and for the main “latest” table from ~250ms to ~60ms. The `artist_has_been_snatched` logic moves into the torrent filter, so we don’t have to work on such large records further down. Change-Id: I5a1929bd9c81ea0031e512d01897c0e460ded077 Reviewed-on: https://cl.tvl.fyi/c/depot/+/13240 Tested-by: BuildkiteCI Reviewed-by: Profpatsch <mail@profpatsch.de>
This commit is contained in:
parent
5efe6cefe8
commit
cb5a745233
2 changed files with 60 additions and 37 deletions
|
|
@ -638,11 +638,28 @@ getBestTorrents ::
|
|||
getBestTorrents opts = do
|
||||
queryWith
|
||||
( [sql|
|
||||
WITH filtered_torrents AS (
|
||||
WITH
|
||||
artist_has_been_snatched AS (
|
||||
SELECT DISTINCT artist_id
|
||||
FROM (
|
||||
SELECT
|
||||
UNNEST(artist_ids) as artist_id,
|
||||
t.torrent_file IS NOT NULL as has_torrent_file
|
||||
FROM redacted.torrents t) as _
|
||||
WHERE has_torrent_file
|
||||
),
|
||||
filtered_torrents AS (
|
||||
SELECT DISTINCT ON (torrent_group)
|
||||
id
|
||||
FROM
|
||||
redacted.torrents
|
||||
JOIN LATERAL
|
||||
-- filter everything that’s not a favourite if requested
|
||||
(SELECT (
|
||||
artist_ids && ARRAY(SELECT artist_id FROM redacted.artist_favourites)
|
||||
OR artist_ids && ARRAY(SELECT artist_id FROM artist_has_been_snatched)
|
||||
) as is_favourite) as _
|
||||
ON (NOT ?::bool OR is_favourite)
|
||||
WHERE
|
||||
-- filter by artist id
|
||||
(?::bool OR (to_jsonb(?::int) <@ (jsonb_path_query_array(full_json_result, '$.artists[*].id'))))
|
||||
|
|
@ -667,6 +684,7 @@ getBestTorrents opts = do
|
|||
tg.full_json_result->'artists',
|
||||
'[]'::jsonb
|
||||
) as artists,
|
||||
t.artist_ids || tg.artist_ids as artist_ids,
|
||||
tg.full_json_result->>'groupName' AS group_name,
|
||||
tg.full_json_result->>'groupYear' AS group_year,
|
||||
t.torrent_file IS NOT NULL AS has_torrent_file,
|
||||
|
|
@ -677,16 +695,6 @@ getBestTorrents opts = do
|
|||
JOIN redacted.torrent_groups tg ON tg.id = t.torrent_group
|
||||
WHERE
|
||||
tg.full_json_result->>'releaseType' <> ALL (?::text[])
|
||||
),
|
||||
prepare2 AS MATERIALIZED (
|
||||
-- extract the json artist ids field into an array of ints
|
||||
SELECT *, array(select id from jsonb_to_recordset(artists) as (id int)) as artist_ids
|
||||
FROM prepare1
|
||||
),
|
||||
artist_has_been_snatched AS MATERIALIZED (
|
||||
SELECT DISTINCT artist_id
|
||||
FROM (SELECT UNNEST(artist_ids) as artist_id, has_torrent_file from prepare2) as _
|
||||
WHERE has_torrent_file
|
||||
)
|
||||
SELECT
|
||||
group_id,
|
||||
|
|
@ -699,14 +707,7 @@ getBestTorrents opts = do
|
|||
has_torrent_file,
|
||||
transmission_torrent_hash,
|
||||
torrent_format
|
||||
FROM prepare2
|
||||
JOIN LATERAL
|
||||
(SELECT (
|
||||
artist_ids && ARRAY(SELECT artist_id FROM artist_has_been_snatched)
|
||||
OR artist_ids && ARRAY(SELECT artist_id FROM redacted.artist_favourites)
|
||||
) as is_favourite) as _
|
||||
-- filter everything that’s not a favourite if requested
|
||||
ON (NOT ?::bool OR is_favourite)
|
||||
FROM prepare1
|
||||
|]
|
||||
<> case opts.ordering of
|
||||
BySeedingWeight -> [fmt|ORDER BY seeding_weight DESC|] <> "\n"
|
||||
|
|
@ -722,12 +723,12 @@ getBestTorrents opts = do
|
|||
let (onlyTheseTorrentsB, onlyTheseTorrents) = case opts.onlyTheseTorrents of
|
||||
Nothing -> (True, PGArray [])
|
||||
Just a -> (False, a <&> (.torrentId) & PGArray)
|
||||
( onlyArtistB :: Bool,
|
||||
( opts.onlyFavourites :: Bool,
|
||||
onlyArtistB :: Bool,
|
||||
onlyArtistId :: Int,
|
||||
onlyTheseTorrentsB :: Bool,
|
||||
onlyTheseTorrents,
|
||||
(opts.disallowedReleaseTypes & concatMap (\rt -> [rt.stringKey, rt.intKey & buildText intDecimalT]) & PGArray :: PGArray Text),
|
||||
opts.onlyFavourites :: Bool,
|
||||
opts.limitResults <&> naturalToInteger :: Maybe Integer
|
||||
)
|
||||
)
|
||||
|
|
@ -772,19 +773,8 @@ getArtistNameById :: (MonadPostgres m, HasField "artistId" r Int) => r -> Transa
|
|||
getArtistNameById dat = do
|
||||
queryFirstRowWithMaybe
|
||||
[sql|
|
||||
explain analyze WITH mapping as (
|
||||
SELECT x.id, x.name FROM
|
||||
redacted.torrents t
|
||||
join LATERAL
|
||||
jsonb_to_recordset(full_json_result->'artists') as x(id int, name text) on true
|
||||
UNION
|
||||
SELECT x.id, x.name FROM
|
||||
redacted.torrent_groups tg
|
||||
join LATERAL
|
||||
jsonb_to_recordset(full_json_result->'artists') as x(id int, name text) on true
|
||||
)
|
||||
SELECT name FROM mapping
|
||||
WHERE id = ?::int
|
||||
SELECT artist_name FROM redacted.artist_names
|
||||
WHERE artist_id = ?::int
|
||||
LIMIT 1
|
||||
|]
|
||||
(getLabel @"artistId" dat)
|
||||
|
|
|
|||
|
|
@ -1107,7 +1107,6 @@ migrate = inSpan "Database Migration" $ do
|
|||
ALTER TABLE redacted.torrents_json
|
||||
ADD COLUMN IF NOT EXISTS transmission_torrent_hash text NULL;
|
||||
|
||||
|
||||
-- the seeding weight is used to find the best torrent in a group.
|
||||
CREATE OR REPLACE FUNCTION calc_seeding_weight(full_json_result jsonb) RETURNS int AS $$
|
||||
BEGIN
|
||||
|
|
@ -1147,6 +1146,19 @@ migrate = inSpan "Database Migration" $ do
|
|||
ALTER TABLE redacted.torrents_json
|
||||
ADD COLUMN IF NOT EXISTS seeding_weight int NOT NULL GENERATED ALWAYS AS (calc_seeding_weight(full_json_result)) STORED;
|
||||
|
||||
CREATE OR REPLACE FUNCTION artist_record_to_id(artists jsonb) RETURNS int[]
|
||||
as $$
|
||||
SELECT array_agg(x::int) from jsonb_path_query(artists, '$[*].id') j(x);
|
||||
$$ LANGUAGE sql IMMUTABLE;
|
||||
|
||||
ALTER TABLE redacted.torrents_json
|
||||
ADD COLUMN IF NOT EXISTS artist_ids int[] NOT NULL GENERATED ALWAYS AS (COALESCE(artist_record_to_id(full_json_result->'artists'), ARRAY[]::int[])) STORED;
|
||||
ALTER TABLE redacted.torrent_groups
|
||||
ADD COLUMN IF NOT EXISTS artist_ids int[] NOT NULL GENERATED ALWAYS AS (COALESCE(artist_record_to_id(full_json_result->'artists'), ARRAY[]::int[])) STORED;
|
||||
|
||||
CREATE INDEX IF NOT EXISTS torrents_json_artist_ids ON redacted.torrents_json USING GIN (artist_ids);
|
||||
CREATE INDEX IF NOT EXISTS torrent_groups_artist_ids ON redacted.torrent_groups USING GIN (artist_ids);
|
||||
|
||||
-- inflect out values of the full json
|
||||
CREATE OR REPLACE VIEW redacted.torrents AS
|
||||
SELECT
|
||||
|
|
@ -1157,10 +1169,10 @@ migrate = inSpan "Database Migration" $ do
|
|||
t.seeding_weight,
|
||||
t.full_json_result,
|
||||
t.torrent_file,
|
||||
t.transmission_torrent_hash
|
||||
t.transmission_torrent_hash,
|
||||
t.artist_ids
|
||||
FROM redacted.torrents_json t;
|
||||
|
||||
|
||||
CREATE INDEX IF NOT EXISTS torrents_json_seeding ON redacted.torrents_json(((full_json_result->'seeding')::integer));
|
||||
CREATE INDEX IF NOT EXISTS torrents_json_snatches ON redacted.torrents_json(((full_json_result->'snatches')::integer));
|
||||
|
||||
|
|
@ -1169,6 +1181,27 @@ migrate = inSpan "Database Migration" $ do
|
|||
artist_id INTEGER NOT NULL,
|
||||
UNIQUE(artist_id)
|
||||
);
|
||||
|
||||
-- for easier query lookup, a mapping from artist ids to names
|
||||
CREATE OR REPLACE VIEW redacted.artist_names AS
|
||||
SELECT
|
||||
t.artist_id, x.name as artist_name
|
||||
FROM
|
||||
(SELECT unnest(artist_ids) as artist_id, * FROM redacted.torrents t) as t
|
||||
join LATERAL
|
||||
jsonb_to_recordset(full_json_result->'artists') as x(id int, name text)
|
||||
ON x.id = t.artist_id
|
||||
WHERE x.id = t.artist_id
|
||||
UNION ALL
|
||||
SELECT
|
||||
t.artist_id, x.name as artist_name
|
||||
FROM
|
||||
(SELECT unnest(artist_ids) as artist_id, * FROM redacted.torrent_groups t) as t
|
||||
join LATERAL
|
||||
jsonb_to_recordset(full_json_result->'artists') as x(id int, name text)
|
||||
ON x.id = t.artist_id
|
||||
WHERE x.id = t.artist_id;
|
||||
|
||||
|]
|
||||
()
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue