fix(users/Profpatsch/whatcd-resolver): speed up artist_ids lookups
We need `artist_ids` in most of the queries, so let’s make them a generated column and put an index on them. This reduces times for getArtistNameById from ~300ms to ~20ms and for the main “latest” table from ~250ms to ~60ms. The `artist_has_been_snatched` logic moves into the torrent filter, so we don’t have to work on such large records further down. Change-Id: I5a1929bd9c81ea0031e512d01897c0e460ded077 Reviewed-on: https://cl.tvl.fyi/c/depot/+/13240 Tested-by: BuildkiteCI Reviewed-by: Profpatsch <mail@profpatsch.de>
This commit is contained in:
parent
5efe6cefe8
commit
cb5a745233
2 changed files with 60 additions and 37 deletions
|
|
@ -638,11 +638,28 @@ getBestTorrents ::
|
||||||
getBestTorrents opts = do
|
getBestTorrents opts = do
|
||||||
queryWith
|
queryWith
|
||||||
( [sql|
|
( [sql|
|
||||||
WITH filtered_torrents AS (
|
WITH
|
||||||
|
artist_has_been_snatched AS (
|
||||||
|
SELECT DISTINCT artist_id
|
||||||
|
FROM (
|
||||||
|
SELECT
|
||||||
|
UNNEST(artist_ids) as artist_id,
|
||||||
|
t.torrent_file IS NOT NULL as has_torrent_file
|
||||||
|
FROM redacted.torrents t) as _
|
||||||
|
WHERE has_torrent_file
|
||||||
|
),
|
||||||
|
filtered_torrents AS (
|
||||||
SELECT DISTINCT ON (torrent_group)
|
SELECT DISTINCT ON (torrent_group)
|
||||||
id
|
id
|
||||||
FROM
|
FROM
|
||||||
redacted.torrents
|
redacted.torrents
|
||||||
|
JOIN LATERAL
|
||||||
|
-- filter everything that’s not a favourite if requested
|
||||||
|
(SELECT (
|
||||||
|
artist_ids && ARRAY(SELECT artist_id FROM redacted.artist_favourites)
|
||||||
|
OR artist_ids && ARRAY(SELECT artist_id FROM artist_has_been_snatched)
|
||||||
|
) as is_favourite) as _
|
||||||
|
ON (NOT ?::bool OR is_favourite)
|
||||||
WHERE
|
WHERE
|
||||||
-- filter by artist id
|
-- filter by artist id
|
||||||
(?::bool OR (to_jsonb(?::int) <@ (jsonb_path_query_array(full_json_result, '$.artists[*].id'))))
|
(?::bool OR (to_jsonb(?::int) <@ (jsonb_path_query_array(full_json_result, '$.artists[*].id'))))
|
||||||
|
|
@ -667,6 +684,7 @@ getBestTorrents opts = do
|
||||||
tg.full_json_result->'artists',
|
tg.full_json_result->'artists',
|
||||||
'[]'::jsonb
|
'[]'::jsonb
|
||||||
) as artists,
|
) as artists,
|
||||||
|
t.artist_ids || tg.artist_ids as artist_ids,
|
||||||
tg.full_json_result->>'groupName' AS group_name,
|
tg.full_json_result->>'groupName' AS group_name,
|
||||||
tg.full_json_result->>'groupYear' AS group_year,
|
tg.full_json_result->>'groupYear' AS group_year,
|
||||||
t.torrent_file IS NOT NULL AS has_torrent_file,
|
t.torrent_file IS NOT NULL AS has_torrent_file,
|
||||||
|
|
@ -677,16 +695,6 @@ getBestTorrents opts = do
|
||||||
JOIN redacted.torrent_groups tg ON tg.id = t.torrent_group
|
JOIN redacted.torrent_groups tg ON tg.id = t.torrent_group
|
||||||
WHERE
|
WHERE
|
||||||
tg.full_json_result->>'releaseType' <> ALL (?::text[])
|
tg.full_json_result->>'releaseType' <> ALL (?::text[])
|
||||||
),
|
|
||||||
prepare2 AS MATERIALIZED (
|
|
||||||
-- extract the json artist ids field into an array of ints
|
|
||||||
SELECT *, array(select id from jsonb_to_recordset(artists) as (id int)) as artist_ids
|
|
||||||
FROM prepare1
|
|
||||||
),
|
|
||||||
artist_has_been_snatched AS MATERIALIZED (
|
|
||||||
SELECT DISTINCT artist_id
|
|
||||||
FROM (SELECT UNNEST(artist_ids) as artist_id, has_torrent_file from prepare2) as _
|
|
||||||
WHERE has_torrent_file
|
|
||||||
)
|
)
|
||||||
SELECT
|
SELECT
|
||||||
group_id,
|
group_id,
|
||||||
|
|
@ -699,14 +707,7 @@ getBestTorrents opts = do
|
||||||
has_torrent_file,
|
has_torrent_file,
|
||||||
transmission_torrent_hash,
|
transmission_torrent_hash,
|
||||||
torrent_format
|
torrent_format
|
||||||
FROM prepare2
|
FROM prepare1
|
||||||
JOIN LATERAL
|
|
||||||
(SELECT (
|
|
||||||
artist_ids && ARRAY(SELECT artist_id FROM artist_has_been_snatched)
|
|
||||||
OR artist_ids && ARRAY(SELECT artist_id FROM redacted.artist_favourites)
|
|
||||||
) as is_favourite) as _
|
|
||||||
-- filter everything that’s not a favourite if requested
|
|
||||||
ON (NOT ?::bool OR is_favourite)
|
|
||||||
|]
|
|]
|
||||||
<> case opts.ordering of
|
<> case opts.ordering of
|
||||||
BySeedingWeight -> [fmt|ORDER BY seeding_weight DESC|] <> "\n"
|
BySeedingWeight -> [fmt|ORDER BY seeding_weight DESC|] <> "\n"
|
||||||
|
|
@ -722,12 +723,12 @@ getBestTorrents opts = do
|
||||||
let (onlyTheseTorrentsB, onlyTheseTorrents) = case opts.onlyTheseTorrents of
|
let (onlyTheseTorrentsB, onlyTheseTorrents) = case opts.onlyTheseTorrents of
|
||||||
Nothing -> (True, PGArray [])
|
Nothing -> (True, PGArray [])
|
||||||
Just a -> (False, a <&> (.torrentId) & PGArray)
|
Just a -> (False, a <&> (.torrentId) & PGArray)
|
||||||
( onlyArtistB :: Bool,
|
( opts.onlyFavourites :: Bool,
|
||||||
|
onlyArtistB :: Bool,
|
||||||
onlyArtistId :: Int,
|
onlyArtistId :: Int,
|
||||||
onlyTheseTorrentsB :: Bool,
|
onlyTheseTorrentsB :: Bool,
|
||||||
onlyTheseTorrents,
|
onlyTheseTorrents,
|
||||||
(opts.disallowedReleaseTypes & concatMap (\rt -> [rt.stringKey, rt.intKey & buildText intDecimalT]) & PGArray :: PGArray Text),
|
(opts.disallowedReleaseTypes & concatMap (\rt -> [rt.stringKey, rt.intKey & buildText intDecimalT]) & PGArray :: PGArray Text),
|
||||||
opts.onlyFavourites :: Bool,
|
|
||||||
opts.limitResults <&> naturalToInteger :: Maybe Integer
|
opts.limitResults <&> naturalToInteger :: Maybe Integer
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
@ -772,19 +773,8 @@ getArtistNameById :: (MonadPostgres m, HasField "artistId" r Int) => r -> Transa
|
||||||
getArtistNameById dat = do
|
getArtistNameById dat = do
|
||||||
queryFirstRowWithMaybe
|
queryFirstRowWithMaybe
|
||||||
[sql|
|
[sql|
|
||||||
explain analyze WITH mapping as (
|
SELECT artist_name FROM redacted.artist_names
|
||||||
SELECT x.id, x.name FROM
|
WHERE artist_id = ?::int
|
||||||
redacted.torrents t
|
|
||||||
join LATERAL
|
|
||||||
jsonb_to_recordset(full_json_result->'artists') as x(id int, name text) on true
|
|
||||||
UNION
|
|
||||||
SELECT x.id, x.name FROM
|
|
||||||
redacted.torrent_groups tg
|
|
||||||
join LATERAL
|
|
||||||
jsonb_to_recordset(full_json_result->'artists') as x(id int, name text) on true
|
|
||||||
)
|
|
||||||
SELECT name FROM mapping
|
|
||||||
WHERE id = ?::int
|
|
||||||
LIMIT 1
|
LIMIT 1
|
||||||
|]
|
|]
|
||||||
(getLabel @"artistId" dat)
|
(getLabel @"artistId" dat)
|
||||||
|
|
|
||||||
|
|
@ -1107,7 +1107,6 @@ migrate = inSpan "Database Migration" $ do
|
||||||
ALTER TABLE redacted.torrents_json
|
ALTER TABLE redacted.torrents_json
|
||||||
ADD COLUMN IF NOT EXISTS transmission_torrent_hash text NULL;
|
ADD COLUMN IF NOT EXISTS transmission_torrent_hash text NULL;
|
||||||
|
|
||||||
|
|
||||||
-- the seeding weight is used to find the best torrent in a group.
|
-- the seeding weight is used to find the best torrent in a group.
|
||||||
CREATE OR REPLACE FUNCTION calc_seeding_weight(full_json_result jsonb) RETURNS int AS $$
|
CREATE OR REPLACE FUNCTION calc_seeding_weight(full_json_result jsonb) RETURNS int AS $$
|
||||||
BEGIN
|
BEGIN
|
||||||
|
|
@ -1147,6 +1146,19 @@ migrate = inSpan "Database Migration" $ do
|
||||||
ALTER TABLE redacted.torrents_json
|
ALTER TABLE redacted.torrents_json
|
||||||
ADD COLUMN IF NOT EXISTS seeding_weight int NOT NULL GENERATED ALWAYS AS (calc_seeding_weight(full_json_result)) STORED;
|
ADD COLUMN IF NOT EXISTS seeding_weight int NOT NULL GENERATED ALWAYS AS (calc_seeding_weight(full_json_result)) STORED;
|
||||||
|
|
||||||
|
CREATE OR REPLACE FUNCTION artist_record_to_id(artists jsonb) RETURNS int[]
|
||||||
|
as $$
|
||||||
|
SELECT array_agg(x::int) from jsonb_path_query(artists, '$[*].id') j(x);
|
||||||
|
$$ LANGUAGE sql IMMUTABLE;
|
||||||
|
|
||||||
|
ALTER TABLE redacted.torrents_json
|
||||||
|
ADD COLUMN IF NOT EXISTS artist_ids int[] NOT NULL GENERATED ALWAYS AS (COALESCE(artist_record_to_id(full_json_result->'artists'), ARRAY[]::int[])) STORED;
|
||||||
|
ALTER TABLE redacted.torrent_groups
|
||||||
|
ADD COLUMN IF NOT EXISTS artist_ids int[] NOT NULL GENERATED ALWAYS AS (COALESCE(artist_record_to_id(full_json_result->'artists'), ARRAY[]::int[])) STORED;
|
||||||
|
|
||||||
|
CREATE INDEX IF NOT EXISTS torrents_json_artist_ids ON redacted.torrents_json USING GIN (artist_ids);
|
||||||
|
CREATE INDEX IF NOT EXISTS torrent_groups_artist_ids ON redacted.torrent_groups USING GIN (artist_ids);
|
||||||
|
|
||||||
-- inflect out values of the full json
|
-- inflect out values of the full json
|
||||||
CREATE OR REPLACE VIEW redacted.torrents AS
|
CREATE OR REPLACE VIEW redacted.torrents AS
|
||||||
SELECT
|
SELECT
|
||||||
|
|
@ -1157,10 +1169,10 @@ migrate = inSpan "Database Migration" $ do
|
||||||
t.seeding_weight,
|
t.seeding_weight,
|
||||||
t.full_json_result,
|
t.full_json_result,
|
||||||
t.torrent_file,
|
t.torrent_file,
|
||||||
t.transmission_torrent_hash
|
t.transmission_torrent_hash,
|
||||||
|
t.artist_ids
|
||||||
FROM redacted.torrents_json t;
|
FROM redacted.torrents_json t;
|
||||||
|
|
||||||
|
|
||||||
CREATE INDEX IF NOT EXISTS torrents_json_seeding ON redacted.torrents_json(((full_json_result->'seeding')::integer));
|
CREATE INDEX IF NOT EXISTS torrents_json_seeding ON redacted.torrents_json(((full_json_result->'seeding')::integer));
|
||||||
CREATE INDEX IF NOT EXISTS torrents_json_snatches ON redacted.torrents_json(((full_json_result->'snatches')::integer));
|
CREATE INDEX IF NOT EXISTS torrents_json_snatches ON redacted.torrents_json(((full_json_result->'snatches')::integer));
|
||||||
|
|
||||||
|
|
@ -1169,6 +1181,27 @@ migrate = inSpan "Database Migration" $ do
|
||||||
artist_id INTEGER NOT NULL,
|
artist_id INTEGER NOT NULL,
|
||||||
UNIQUE(artist_id)
|
UNIQUE(artist_id)
|
||||||
);
|
);
|
||||||
|
|
||||||
|
-- for easier query lookup, a mapping from artist ids to names
|
||||||
|
CREATE OR REPLACE VIEW redacted.artist_names AS
|
||||||
|
SELECT
|
||||||
|
t.artist_id, x.name as artist_name
|
||||||
|
FROM
|
||||||
|
(SELECT unnest(artist_ids) as artist_id, * FROM redacted.torrents t) as t
|
||||||
|
join LATERAL
|
||||||
|
jsonb_to_recordset(full_json_result->'artists') as x(id int, name text)
|
||||||
|
ON x.id = t.artist_id
|
||||||
|
WHERE x.id = t.artist_id
|
||||||
|
UNION ALL
|
||||||
|
SELECT
|
||||||
|
t.artist_id, x.name as artist_name
|
||||||
|
FROM
|
||||||
|
(SELECT unnest(artist_ids) as artist_id, * FROM redacted.torrent_groups t) as t
|
||||||
|
join LATERAL
|
||||||
|
jsonb_to_recordset(full_json_result->'artists') as x(id int, name text)
|
||||||
|
ON x.id = t.artist_id
|
||||||
|
WHERE x.id = t.artist_id;
|
||||||
|
|
||||||
|]
|
|]
|
||||||
()
|
()
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue