fix(users/Profpatsch/whatcd-resolver): speed up artist_ids lookups
We need `artist_ids` in most of the queries, so let’s make them a generated column and put an index on them. This reduces times for getArtistNameById from ~300ms to ~20ms and for the main “latest” table from ~250ms to ~60ms. The `artist_has_been_snatched` logic moves into the torrent filter, so we don’t have to work on such large records further down. Change-Id: I5a1929bd9c81ea0031e512d01897c0e460ded077 Reviewed-on: https://cl.tvl.fyi/c/depot/+/13240 Tested-by: BuildkiteCI Reviewed-by: Profpatsch <mail@profpatsch.de>
This commit is contained in:
		
							parent
							
								
									5efe6cefe8
								
							
						
					
					
						commit
						cb5a745233
					
				
					 2 changed files with 60 additions and 37 deletions
				
			
		|  | @ -638,11 +638,28 @@ getBestTorrents :: | |||
| getBestTorrents opts = do | ||||
|   queryWith | ||||
|     ( [sql| | ||||
|       WITH filtered_torrents AS ( | ||||
|       WITH | ||||
|       artist_has_been_snatched AS ( | ||||
|         SELECT DISTINCT artist_id | ||||
|         FROM ( | ||||
|           SELECT | ||||
|             UNNEST(artist_ids) as artist_id, | ||||
|             t.torrent_file IS NOT NULL as has_torrent_file | ||||
|           FROM redacted.torrents t) as _ | ||||
|         WHERE has_torrent_file | ||||
|       ), | ||||
|       filtered_torrents AS ( | ||||
|         SELECT DISTINCT ON (torrent_group) | ||||
|           id | ||||
|         FROM | ||||
|           redacted.torrents | ||||
|         JOIN LATERAL | ||||
|           -- filter everything that’s not a favourite if requested | ||||
|           (SELECT ( | ||||
|             artist_ids && ARRAY(SELECT artist_id FROM redacted.artist_favourites) | ||||
|             OR artist_ids && ARRAY(SELECT artist_id FROM artist_has_been_snatched) | ||||
|           ) as is_favourite) as _ | ||||
|           ON (NOT ?::bool OR is_favourite) | ||||
|         WHERE | ||||
|           -- filter by artist id | ||||
|           (?::bool OR (to_jsonb(?::int) <@ (jsonb_path_query_array(full_json_result, '$.artists[*].id')))) | ||||
|  | @ -667,6 +684,7 @@ getBestTorrents opts = do | |||
|             tg.full_json_result->'artists', | ||||
|             '[]'::jsonb | ||||
|           ) as artists, | ||||
|           t.artist_ids || tg.artist_ids as artist_ids, | ||||
|           tg.full_json_result->>'groupName' AS group_name, | ||||
|           tg.full_json_result->>'groupYear' AS group_year, | ||||
|           t.torrent_file IS NOT NULL AS has_torrent_file, | ||||
|  | @ -677,16 +695,6 @@ getBestTorrents opts = do | |||
|         JOIN redacted.torrent_groups tg ON tg.id = t.torrent_group | ||||
|         WHERE | ||||
|           tg.full_json_result->>'releaseType' <> ALL (?::text[]) | ||||
|       ), | ||||
|       prepare2 AS MATERIALIZED ( | ||||
|         -- extract the json artist ids field into an array of ints | ||||
|         SELECT *, array(select id from jsonb_to_recordset(artists) as (id int)) as artist_ids | ||||
|         FROM prepare1 | ||||
|       ), | ||||
|       artist_has_been_snatched AS MATERIALIZED ( | ||||
|         SELECT DISTINCT artist_id | ||||
|         FROM (SELECT UNNEST(artist_ids) as artist_id, has_torrent_file from prepare2) as _ | ||||
|         WHERE has_torrent_file | ||||
|       ) | ||||
|       SELECT | ||||
|         group_id, | ||||
|  | @ -699,14 +707,7 @@ getBestTorrents opts = do | |||
|         has_torrent_file, | ||||
|         transmission_torrent_hash, | ||||
|         torrent_format | ||||
|       FROM prepare2 | ||||
|       JOIN LATERAL | ||||
|         (SELECT ( | ||||
|           artist_ids && ARRAY(SELECT artist_id FROM artist_has_been_snatched) | ||||
|           OR artist_ids && ARRAY(SELECT artist_id FROM redacted.artist_favourites) | ||||
|         ) as is_favourite) as _ | ||||
|         -- filter everything that’s not a favourite if requested | ||||
|         ON (NOT ?::bool OR is_favourite) | ||||
|       FROM prepare1 | ||||
|     |] | ||||
|         <> case opts.ordering of | ||||
|           BySeedingWeight -> [fmt|ORDER BY seeding_weight DESC|] <> "\n" | ||||
|  | @ -722,12 +723,12 @@ getBestTorrents opts = do | |||
|         let (onlyTheseTorrentsB, onlyTheseTorrents) = case opts.onlyTheseTorrents of | ||||
|               Nothing -> (True, PGArray []) | ||||
|               Just a -> (False, a <&> (.torrentId) & PGArray) | ||||
|         ( onlyArtistB :: Bool, | ||||
|         ( opts.onlyFavourites :: Bool, | ||||
|           onlyArtistB :: Bool, | ||||
|           onlyArtistId :: Int, | ||||
|           onlyTheseTorrentsB :: Bool, | ||||
|           onlyTheseTorrents, | ||||
|           (opts.disallowedReleaseTypes & concatMap (\rt -> [rt.stringKey, rt.intKey & buildText intDecimalT]) & PGArray :: PGArray Text), | ||||
|           opts.onlyFavourites :: Bool, | ||||
|           opts.limitResults <&> naturalToInteger :: Maybe Integer | ||||
|           ) | ||||
|     ) | ||||
|  | @ -772,19 +773,8 @@ getArtistNameById :: (MonadPostgres m, HasField "artistId" r Int) => r -> Transa | |||
| getArtistNameById dat = do | ||||
|   queryFirstRowWithMaybe | ||||
|     [sql| | ||||
|     explain analyze WITH mapping as ( | ||||
|         SELECT x.id, x.name FROM | ||||
|           redacted.torrents t | ||||
|           join LATERAL | ||||
|           jsonb_to_recordset(full_json_result->'artists') as x(id int, name text) on true | ||||
|         UNION | ||||
|         SELECT x.id, x.name FROM | ||||
|           redacted.torrent_groups tg | ||||
|           join LATERAL | ||||
|           jsonb_to_recordset(full_json_result->'artists') as x(id int, name text) on true | ||||
|         ) | ||||
|         SELECT name FROM mapping | ||||
|         WHERE id = ?::int | ||||
|         SELECT artist_name FROM redacted.artist_names | ||||
|         WHERE artist_id = ?::int | ||||
|         LIMIT 1 | ||||
|   |] | ||||
|     (getLabel @"artistId" dat) | ||||
|  |  | |||
|  | @ -1107,7 +1107,6 @@ migrate = inSpan "Database Migration" $ do | |||
|     ALTER TABLE redacted.torrents_json | ||||
|     ADD COLUMN IF NOT EXISTS transmission_torrent_hash text NULL; | ||||
| 
 | ||||
| 
 | ||||
|     -- the seeding weight is used to find the best torrent in a group. | ||||
|     CREATE OR REPLACE FUNCTION calc_seeding_weight(full_json_result jsonb) RETURNS int AS $$ | ||||
|     BEGIN | ||||
|  | @ -1147,6 +1146,19 @@ migrate = inSpan "Database Migration" $ do | |||
|     ALTER TABLE redacted.torrents_json | ||||
|     ADD COLUMN IF NOT EXISTS seeding_weight int NOT NULL GENERATED ALWAYS AS (calc_seeding_weight(full_json_result)) STORED; | ||||
| 
 | ||||
|     CREATE OR REPLACE FUNCTION artist_record_to_id(artists jsonb) RETURNS int[] | ||||
|     as $$ | ||||
|       SELECT array_agg(x::int) from jsonb_path_query(artists, '$[*].id') j(x); | ||||
|     $$ LANGUAGE sql IMMUTABLE; | ||||
| 
 | ||||
|     ALTER TABLE redacted.torrents_json | ||||
|     ADD COLUMN IF NOT EXISTS artist_ids int[] NOT NULL GENERATED ALWAYS AS (COALESCE(artist_record_to_id(full_json_result->'artists'), ARRAY[]::int[])) STORED; | ||||
|     ALTER TABLE redacted.torrent_groups | ||||
|     ADD COLUMN IF NOT EXISTS artist_ids int[] NOT NULL GENERATED ALWAYS AS (COALESCE(artist_record_to_id(full_json_result->'artists'), ARRAY[]::int[])) STORED; | ||||
| 
 | ||||
|     CREATE INDEX IF NOT EXISTS torrents_json_artist_ids ON redacted.torrents_json USING GIN (artist_ids); | ||||
|     CREATE INDEX IF NOT EXISTS torrent_groups_artist_ids ON redacted.torrent_groups USING GIN (artist_ids); | ||||
| 
 | ||||
|     -- inflect out values of the full json | ||||
|     CREATE OR REPLACE VIEW redacted.torrents AS | ||||
|     SELECT | ||||
|  | @ -1157,10 +1169,10 @@ migrate = inSpan "Database Migration" $ do | |||
|       t.seeding_weight, | ||||
|       t.full_json_result, | ||||
|       t.torrent_file, | ||||
|       t.transmission_torrent_hash | ||||
|       t.transmission_torrent_hash, | ||||
|       t.artist_ids | ||||
|     FROM redacted.torrents_json t; | ||||
| 
 | ||||
| 
 | ||||
|     CREATE INDEX IF NOT EXISTS torrents_json_seeding ON redacted.torrents_json(((full_json_result->'seeding')::integer)); | ||||
|     CREATE INDEX IF NOT EXISTS torrents_json_snatches ON redacted.torrents_json(((full_json_result->'snatches')::integer)); | ||||
| 
 | ||||
|  | @ -1169,6 +1181,27 @@ migrate = inSpan "Database Migration" $ do | |||
|       artist_id INTEGER NOT NULL, | ||||
|       UNIQUE(artist_id) | ||||
|     ); | ||||
| 
 | ||||
|     -- for easier query lookup, a mapping from artist ids to names | ||||
|     CREATE OR REPLACE VIEW redacted.artist_names AS | ||||
|     SELECT | ||||
|       t.artist_id, x.name as artist_name | ||||
|     FROM | ||||
|       (SELECT unnest(artist_ids) as artist_id, * FROM redacted.torrents t) as t | ||||
|       join LATERAL | ||||
|       jsonb_to_recordset(full_json_result->'artists') as x(id int, name text) | ||||
|       ON x.id = t.artist_id | ||||
|       WHERE x.id = t.artist_id | ||||
|     UNION ALL | ||||
|     SELECT | ||||
|       t.artist_id, x.name as artist_name | ||||
|     FROM | ||||
|       (SELECT unnest(artist_ids) as artist_id, * FROM redacted.torrent_groups t) as t | ||||
|       join LATERAL | ||||
|       jsonb_to_recordset(full_json_result->'artists') as x(id int, name text) | ||||
|       ON x.id = t.artist_id | ||||
|       WHERE x.id = t.artist_id; | ||||
| 
 | ||||
|   |] | ||||
|     () | ||||
| 
 | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue