snix/users/Profpatsch/whatcd-resolver/src/Redacted.hs
Profpatsch 42da189180 feat(users/Profpatsch/whatcd-resolver): conduit shenanigans
Some experiments in speeding up search efficiency.

This is more of a “is this possible” thing, and it looks like it’s
possible, but does not really improve anything regarding the rate
limiting.

The idea is that we can start everything at the same time as async,
but use a semaphore to have only 5 things run at once. That also means
that as soon as something is done, we immediately start the next task.
The asyncs are guaranteed to be cleaned up by the `ResourceT`
wrapper (eventually).

It’s pretty cool how Conduit makes writing these side-effecting things
pretty possible.

Change-Id: Ibadead7db293373b415840960602fa71920fc653
Reviewed-on: https://cl.tvl.fyi/c/depot/+/13246
Tested-by: BuildkiteCI
Reviewed-by: Profpatsch <mail@profpatsch.de>
2025-03-11 19:45:40 +00:00

1143 lines
38 KiB
Haskell
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

{-# LANGUAGE QuasiQuotes #-}
module Redacted where
import AppT
import Arg
import Bencode
import Builder
import Comparison
import Conduit (ConduitT)
import Conduit qualified as Cond
import Control.Monad.Logger.CallStack
import Control.Monad.Reader
import Control.Monad.Trans.Resource (resourceForkWith)
import Data.Aeson qualified as Json
import Data.Aeson.BetterErrors qualified as Json
import Data.Aeson.Key qualified as Key
import Data.Aeson.KeyMap qualified as KeyMap
import Data.BEncode (BEncode)
import Data.Conduit ((.|))
import Data.Error.Tree
import Data.List qualified as List
import Data.List.NonEmpty qualified as NonEmpty
import Data.Maybe (catMaybes)
import Data.Text.IO qualified as Text.IO
import Data.Time (NominalDiffTime, UTCTime)
import Data.Time.Clock.POSIX (posixSecondsToUTCTime)
import Database.PostgreSQL.Simple (Binary (Binary), Only (..))
import Database.PostgreSQL.Simple.Types (PGArray (PGArray))
import FieldParser (FieldParser)
import FieldParser qualified as Field
import Http qualified
import Json qualified
import Label
import MyLabel
import MyPrelude
import Network.HTTP.Types
import Network.Wai.Parse qualified as Wai
import OpenTelemetry.Context.ThreadLocal qualified as Otel
import OpenTelemetry.Trace qualified as Otel hiding (getTracer, inSpan, inSpan')
import Optional
import Parse (Parse, mapLookup, mapLookupMay, runParse)
import Parse qualified
import Postgres.Decoder qualified as Dec
import Postgres.MonadPostgres
import Pretty
import RevList (RevList)
import RevList qualified
import UnliftIO (MonadUnliftIO, askRunInIO, async, newQSem, withQSem)
import UnliftIO.Async (Async)
import UnliftIO.Async qualified as Async
import UnliftIO.Concurrent (threadDelay)
import Prelude hiding (length, span)
class MonadRedacted m where
getRedactedApiKey :: m ByteString
instance (MonadIO m) => MonadRedacted (AppT m) where
getRedactedApiKey = AppT (asks (.redactedApiKey))
redactedSearch ::
( MonadThrow m,
MonadOtel m,
MonadRedacted m,
HasField "actionArgs" extraArguments [(ByteString, ByteString)],
HasField "page" dat (Maybe Natural)
) =>
extraArguments ->
dat ->
Json.Parse ErrorTree a ->
m a
redactedSearch extraArguments dat parser =
inSpan' "Redacted API Search" $ \span ->
redactedPagedRequest
span
( t3
#action
"browse"
#actionArgs
extraArguments.actionArgs
#page
dat.page
)
parser
redactedGetArtist ::
( MonadOtel m,
MonadThrow m,
MonadRedacted m,
HasField "artistId" r Int,
HasField "page" r (Maybe Natural)
) =>
r ->
Json.Parse ErrorTree a ->
m a
redactedGetArtist dat parser =
inSpan' "Redacted Get Artist" $ \span -> do
redactedPagedRequest
span
( t3
#action
"artist"
#actionArgs
[("id", buildBytes intDecimalB dat.artistId)]
#page
(dat.page)
)
parser
redactedPagedRequest ::
( MonadThrow m,
MonadOtel m,
MonadRedacted m,
HasField "action" dat ByteString,
HasField "actionArgs" dat [(ByteString, ByteString)],
HasField "page" dat (Maybe Natural)
) =>
Otel.Span ->
dat ->
Json.Parse ErrorTree a ->
m a
redactedPagedRequest span dat parser =
redactedApiRequestJson
span
( t2
#action
dat.action
#actionArgs
( (dat.actionArgs <&> second Just)
<> ( dat.page
& ifExists
(\page -> ("page", Just $ buildBytes naturalDecimalB page))
)
)
)
parser
redactedGetTorrentFile ::
( MonadLogger m,
MonadThrow m,
HasField "torrentId" dat Int,
HasField "useFreeleechTokens" dat Bool,
MonadOtel m,
MonadRedacted m
) =>
dat ->
m ByteString
redactedGetTorrentFile dat = inSpan' "Redacted Get Torrent File" $ \span -> do
let actionArgs =
[ ("id", Just (buildBytes intDecimalB dat.torrentId))
]
-- try using tokens as long as we have them (TODO: what if theres no tokens left?
-- ANSWER: it breaks:
-- responseBody = "{\"status\":\"failure\",\"error\":\"You do not have any freeleech tokens left. Please use the regular DL link.\"}",
<> (if dat.useFreeleechTokens then [("usetoken", Just "1")] else [])
let reqDat =
( T2
(label @"action" "download")
( label @"actionArgs" $ actionArgs
)
)
addAttribute span "redacted.request" (toOtelJsonAttr reqDat)
req <- mkRedactedApiRequest reqDat
httpTorrent span req
mkRedactedTorrentLink :: Arg "torrentGroupId" Int -> Text
mkRedactedTorrentLink torrentId = [fmt|https://redacted.sh/torrents.php?id={torrentId.unArg}|]
exampleSearch :: (MonadThrow m, MonadLogger m, MonadPostgres m, MonadOtel m, MonadRedacted m) => Transaction m ()
exampleSearch = do
_x1 <-
redactedSearchAndInsert
[ ("searchstr", "cherish"),
("artistname", "kirinji"),
-- ("year", "1982"),
-- ("format", "MP3"),
-- ("releasetype", "album"),
("order_by", "year")
]
_x3 <-
redactedSearchAndInsert
[ ("searchstr", "mouss et hakim"),
("artistname", "mouss et hakim"),
-- ("year", "1982"),
-- ("format", "MP3"),
-- ("releasetype", "album"),
("order_by", "year")
]
_x2 <-
redactedSearchAndInsert
[ ("searchstr", "thriller"),
("artistname", "michael jackson"),
-- ("year", "1982"),
-- ("format", "MP3"),
-- ("releasetype", "album"),
("order_by", "year")
]
pure ()
redactedRefreshArtist ::
( MonadLogger m,
MonadPostgres m,
MonadThrow m,
MonadOtel m,
MonadRedacted m,
HasField "artistId" dat Int
) =>
dat ->
Transaction m (Label "newTorrents" [Label "torrentId" Int])
redactedRefreshArtist dat = do
redactedPagedSearchAndInsert
( Json.key "torrentgroup" $
parseTourGroups
( t2
#torrentFieldName
"torrent"
#torrentIdName
"id"
)
)
( \page ->
redactedGetArtist
( T2
(getLabel @"artistId" dat)
page
)
)
-- | Do the search, return a transaction that inserts all results from all pages of the search.
redactedSearchAndInsert ::
(MonadLogger m, MonadPostgres m, MonadThrow m, MonadOtel m, MonadRedacted m) =>
[(ByteString, ByteString)] ->
Transaction m (Label "newTorrents" [Label "torrentId" Int])
redactedSearchAndInsert extraArguments =
redactedPagedSearchAndInsert
(Json.key "results" $ parseTourGroups (T2 (label @"torrentFieldName" "torrents") (label @"torrentIdName" "torrentId")))
( redactedSearch
(label @"actionArgs" extraArguments)
)
-- | Parse the standard Redacted reply object, @{ status: "success", response: ... }@ or throw an error.
--
-- The response might contain a `pages` field, if not well return 1.
parseRedactedReplyStatus ::
(Monad m) =>
Json.ParseT ErrorTree m b ->
Json.ParseT ErrorTree m (T2 "pages" Natural "response" b)
parseRedactedReplyStatus inner = do
status <- Json.key "status" Json.asText
when (status /= "success") $ do
Json.throwCustomError ([fmt|Status was not "success", but {status}|] :: ErrorTree)
Json.key "response" $ do
pages <-
Json.keyMay
"pages"
( Field.toJsonParser
( Field.mapError singleError $
Field.jsonNumber >>> Field.boundedScientificIntegral @Int "not an Integer" >>> Field.integralToNatural
)
)
-- in case the field is missing, lets assume there is only one page
<&> fromMaybe 1
res <- inner
pure $
T2
(label @"pages" pages)
(label @"response" res)
type TourGroups =
( Label
"tourGroups"
[ T2
"tourGroup"
TourGroup
"torrents"
[T2 "torrentId" Int "fullJsonResult" Json.Value]
]
)
data TourGroup = TourGroup
{ groupId :: Int,
groupName :: Text,
fullJsonResult :: Json.Value,
-- | Needed for sm0rt request recursion
groupArtists :: [Label "artistId" Int]
}
parseTourGroups ::
( Monad m,
HasField "torrentFieldName" opts Text,
HasField "torrentIdName" opts Text
) =>
opts ->
Json.ParseT err m TourGroups
parseTourGroups opts =
do
label @"tourGroups"
<$> ( catMaybes
<$> ( Json.eachInArray $ do
Json.keyMay opts.torrentFieldName (pure ()) >>= \case
-- not a torrent group, maybe some files or something (e.g. guitar tabs see Dream Theater Systematic Chaos)
Nothing -> pure Nothing
Just () -> do
groupId <- Json.key "groupId" (Json.asIntegral @_ @Int)
groupName <- Json.key "groupName" Json.asText
groupArtists <-
Json.keyMayMempty "artists" $
Json.eachInArray $
lbl #artistId <$> Json.key "id" (Json.asIntegral @_ @Int)
fullJsonResult <-
Json.asObject
-- remove torrents cause they are inserted separately below
<&> KeyMap.filterWithKey (\k _ -> k /= (opts.torrentFieldName & Key.fromText))
<&> Json.Object
let tourGroup = TourGroup {..}
torrents <- Json.keyLabel @"torrents" opts.torrentFieldName $
Json.eachInArray $ do
torrentId <- Json.keyLabel @"torrentId" opts.torrentIdName (Json.asIntegral @_ @Int)
fullJsonResultT <-
label @"fullJsonResult"
<$> ( Json.asObject
<&> KeyMap.mapKeyVal
( \k ->
if
-- some torrent objects use “snatched” instead of “snatches”
| k == "snatched" -> "snatches"
-- normalize the torrent id field
| k == (opts.torrentIdName & Key.fromText) -> "torrentId"
| otherwise -> k
)
id
<&> Json.Object
)
pure $ T2 torrentId fullJsonResultT
pure $ Just (T2 (label @"tourGroup" tourGroup) torrents)
)
)
testChunkBetween :: IO [NonEmpty Integer]
testChunkBetween = do
Cond.runConduit $
( do
Cond.yield [0]
Cond.yield [1, 2]
Cond.yield [3, 4]
Cond.yield []
Cond.yield [5, 6]
Cond.yield [7]
)
.| filterEmpty
.| chunkBetween (t2 #low 2 #high 4)
.| Cond.sinkList
filterEmpty :: (Monad m) => ConduitT [a] (NonEmpty a) m ()
filterEmpty = Cond.awaitForever yieldIfNonEmpty
-- | Chunk the given stream of lists into chunks that are between @low@ and @high@ (both incl).
-- The last chunk might be shorter than low.
chunkBetween ::
( HasField "low" p Natural,
HasField "high" p Natural,
Monad m
) =>
p ->
ConduitT (NonEmpty a) (NonEmpty a) m ()
chunkBetween opts = do
go mempty
where
low = min opts.low opts.high
high = max opts.low opts.high
high' = assertField (boundedNatural @Int) high
l = lengthNatural
go !acc = do
if l acc >= low
then do
let (c, rest) = List.splitAt high' acc
yieldIfNonEmpty c
go rest
else do
xs <- Cond.await
case xs of
Nothing -> yieldIfNonEmpty acc
Just xs' -> go (acc <> NonEmpty.toList xs')
yieldIfNonEmpty :: (Monad m) => [a] -> ConduitT i (NonEmpty a) m ()
yieldIfNonEmpty = \case
IsNonEmpty xs -> Cond.yield xs
IsEmpty -> pure ()
redactedPagedSearchAndInsert ::
forall m.
( MonadLogger m,
MonadPostgres m,
MonadOtel m
) =>
Json.Parse ErrorTree TourGroups ->
-- | A redacted request that returns a paged result
( forall a.
Label "page" (Maybe Natural) ->
Json.Parse ErrorTree a ->
m a
) ->
Transaction m (Label "newTorrents" [Label "torrentId" Int])
redactedPagedSearchAndInsert innerParser pagedRequest = do
-- The first search returns the amount of pages, so we use that to query all results piece by piece.
firstPage <- go Nothing
let remainingPages = firstPage.pages - 1
logInfo [fmt|Got the first page, found {remainingPages} more pages|]
let otherPagesNum = [(2 :: Natural) .. remainingPages]
Cond.runConduit @(Transaction m) $
( do
Cond.yield (singleton firstPage)
case otherPagesNum of
IsNonEmpty o -> runConcurrentlyBunched' (lbl #batchSize 5) (go . Just <$> o)
IsEmpty -> pure ()
)
.| chunkBetween (t2 #low 5 #high 10)
.| Cond.mapMC
( \block ->
block
& concatMap (.response.tourGroups)
& \case
IsNonEmpty tgs -> do
tgs & insertTourGroupsAndTorrents
pure $ tgs & concatMap (\tg -> tg.torrents <&> getLabel @"torrentId")
IsEmpty -> pure []
)
.| Cond.concatC
.| Cond.sinkList
<&> label @"newTorrents"
where
go mpage =
lift @Transaction $
pagedRequest
(label @"page" mpage)
( parseRedactedReplyStatus $ innerParser
)
insertTourGroupsAndTorrents ::
NonEmpty
( T2
"tourGroup"
TourGroup
"torrents"
[T2 "torrentId" Int "fullJsonResult" Json.Value]
) ->
Transaction m ()
insertTourGroupsAndTorrents dat = inSpan' "Insert Tour Groups & Torrents" $ \span -> do
addAttribute span "tour_group.length" (dat & lengthNatural, naturalDecimalT)
let tourGroups = dat <&> (.tourGroup)
let torrents = dat <&> (.torrents)
insertTourGroups tourGroups
>>= ( \res ->
insertTorrents $
zipT2 $
T2
(label @"torrentGroupIdPg" $ res <&> (.tourGroupIdPg))
(label @"torrents" (torrents & toList))
)
insertTourGroups ::
NonEmpty TourGroup ->
Transaction m [Label "tourGroupIdPg" Int]
insertTourGroups dats = do
let groupNames =
dats <&> \dat -> [fmt|{dat.groupId}: {dat.groupName}|]
logInfo [fmt|Inserting tour groups for {showPretty groupNames}|]
_ <-
execute
[fmt|
DELETE FROM redacted.torrent_groups
WHERE group_id = ANY (?::integer[])
|]
(Only $ (dats <&> (.groupId) & toList & PGArray :: PGArray Int))
executeManyReturningWith
[fmt|
INSERT INTO redacted.torrent_groups (
group_id, group_name, full_json_result
) VALUES
( ?, ? , ? )
ON CONFLICT (group_id) DO UPDATE SET
group_id = excluded.group_id,
group_name = excluded.group_name,
full_json_result = excluded.full_json_result
RETURNING (id)
|]
( dats
-- make sure we dont have the same conflict target twice
& NonEmpty.nubBy (\a b -> a.groupId == b.groupId)
<&> ( \dat ->
( dat.groupId,
dat.groupName,
dat.fullJsonResult
)
)
)
(label @"tourGroupIdPg" <$> Dec.fromField @Int)
insertTorrents ::
[ T2
"torrentGroupIdPg"
Int
"torrents"
[T2 "torrentId" Int "fullJsonResult" Json.Value]
] ->
Transaction m ()
insertTorrents dats = do
_ <-
execute
[sql|
DELETE FROM redacted.torrents_json
WHERE torrent_id = ANY (?::integer[])
|]
( Only $
PGArray
[ torrent.torrentId
| dat <- dats,
torrent <- dat.torrents
]
)
execute
[sql|
INSERT INTO redacted.torrents_json
( torrent_group
, torrent_id
, full_json_result)
SELECT *
FROM UNNEST(
?::integer[]
, ?::integer[]
, ?::jsonb[]
) AS inputs(
torrent_group
, torrent_id
, full_json_result)
|]
( [ T3
(getLabel @"torrentGroupIdPg" dat)
(getLabel @"torrentId" group)
(getLabel @"fullJsonResult" group)
| dat <- dats,
group <- dat.torrents
]
& List.nubBy (\a b -> a.torrentId == b.torrentId)
& unzip3PGArray
@"torrentGroupIdPg"
@Int
@"torrentId"
@Int
@"fullJsonResult"
@Json.Value
)
pure ()
-- | Traverse over the given function in parallel, but only allow a certain amount of concurrent requests.
-- Will start new threads as soon as a resource becomes available, but always return results in input ordering.
runConcurrentlyBunched' ::
forall m opts a.
( MonadUnliftIO m,
HasField "batchSize" opts Natural
) =>
opts ->
-- | list of actions to run
NonEmpty (m a) ->
ConduitT () (NonEmpty a) m ()
runConcurrentlyBunched' opts acts = do
let batchSize = assertField (boundedNatural @Int) opts.batchSize
runInIO <- lift askRunInIO
-- NB: make sure none of the asyncs escape from here
Cond.transPipe (Cond.runResourceT @m) $ do
-- This use of resourceForkWith looks a little off, but its the only way to return an `Async a`, I hope it brackets correctly lol
ctx <- Otel.getContext
let spawn :: m a -> Cond.ResourceT m (Async a)
spawn f = resourceForkWith (\io -> async (io >> Otel.attachContext ctx >> runInIO f)) (pure ())
qsem <- newQSem batchSize
-- spawn all asyncs here, but limit how many get run consecutively by threading through a semaphore
spawned <- for acts $ \act ->
lift $ spawn $ withQSem qsem $ act
Cond.yieldMany (spawned & NonEmpty.toList)
.| awaitAllReadyAsyncs
-- | Consume as many asyncs as are ready and return their results.
--
-- Make sure they are already running (if you use 'Cond.yieldM' they are only started when awaited by the conduit).
--
-- If any async throws an exception, the exception will be thrown in the conduit.
-- Already running asyncs will not be cancelled. (TODO: can we somehow make that a thing?)
awaitAllReadyAsyncs :: forall m a. (MonadIO m) => ConduitT (Async a) (NonEmpty a) m ()
awaitAllReadyAsyncs = go
where
-- wait for the next async and then consume as many as are already done
go :: ConduitT (Async a) (NonEmpty a) m ()
go = do
Cond.await >>= \case
Nothing -> pure ()
Just nextAsync -> do
res <- Async.wait nextAsync
-- consume as many asyncs as are already done
goAllReady (RevList.singleton res)
goAllReady :: RevList a -> ConduitT (Async a) (NonEmpty a) m ()
goAllReady !acc = do
next <- Cond.await
case next of
Nothing -> yieldIfNonEmptyRev acc
Just a -> do
thereAlready <- Async.poll a
case thereAlready of
Nothing -> do
-- consumed everything that was available, yield and wait for the next block
yieldIfNonEmptyRev acc
Cond.leftover a
go
Just _ -> do
-- will not block
res <- Async.wait a
goAllReady (acc <> RevList.singleton res)
yieldIfNonEmptyRev :: RevList a -> ConduitT (Async a) (NonEmpty a) m ()
yieldIfNonEmptyRev r = do
case r & RevList.revListToList of
IsNonEmpty e -> Cond.yield e
IsEmpty -> pure ()
testAwaitAllReadyAsyncs :: IO [[Char]]
testAwaitAllReadyAsyncs =
Cond.runConduit $
( do
running <-
lift $
sequence
[ async (print "foo" >> pure 'a'),
async (pure 'b'),
async (threadDelay 5000 >> pure 'c'),
async (print "bar" >> pure '5'),
async (threadDelay 1_000_000 >> print "lol" >> pure 'd'),
async (error "no"),
async (print "baz" >> pure 'f')
]
Cond.yieldMany running
)
.| awaitAllReadyAsyncs
.| Cond.mapC toList
.| Cond.sinkList
-- | Run the field parser and throw an uncatchable assertion error if it fails.
assertField :: (HasCallStack) => FieldParser from to -> from -> to
assertField parser from = Field.runFieldParser parser from & unwrapError
boundedNatural :: forall i. (Integral i, Bounded i) => FieldParser Natural i
boundedNatural = lmap naturalToInteger (Field.bounded @i "boundedNatural")
redactedGetTorrentFileAndInsert ::
( HasField "torrentId" r Int,
HasField "useFreeleechTokens" r Bool,
MonadPostgres m,
MonadThrow m,
MonadLogger m,
MonadOtel m,
MonadRedacted m
) =>
r ->
Transaction m (Label "torrentFile" ByteString)
redactedGetTorrentFileAndInsert dat = inSpan' "Redacted Get Torrent File and Insert" $ \span -> do
bytes <- lift $ redactedGetTorrentFile dat
execute
[sql|
UPDATE redacted.torrents_json
SET torrent_file = ?::bytea
WHERE torrent_id = ?::integer
|]
( (Binary bytes :: Binary ByteString),
dat.torrentId
)
>>= assertOneUpdated span "redactedGetTorrentFileAndInsert"
>>= \() -> pure (label @"torrentFile" bytes)
getTorrentFileById ::
( MonadPostgres m,
HasField "torrentId" r Int,
MonadThrow m
) =>
r ->
Transaction m (Maybe (Label "torrentFile" ByteString))
getTorrentFileById dat = do
queryWith
[sql|
SELECT torrent_file
FROM redacted.torrents
WHERE torrent_id = ?::integer
|]
(Only $ (dat.torrentId :: Int))
(fmap @Maybe (label @"torrentFile") <$> Dec.byteaMay)
>>= ensureSingleRow
updateTransmissionTorrentHashById ::
( MonadPostgres m,
HasField "torrentId" r Int,
HasField "torrentHash" r Text
) =>
r ->
Transaction m (Label "numberOfRowsAffected" Natural)
updateTransmissionTorrentHashById dat = do
execute
[sql|
UPDATE redacted.torrents_json
SET transmission_torrent_hash = ?::text
WHERE torrent_id = ?::integer
|]
( dat.torrentHash :: Text,
dat.torrentId :: Int
)
assertOneUpdated ::
(HasField "numberOfRowsAffected" r Natural, MonadThrow m, MonadIO m) =>
Otel.Span ->
Text ->
r ->
m ()
assertOneUpdated span name x = case x.numberOfRowsAffected of
1 -> pure ()
n -> appThrow span ([fmt|{name :: Text}: Expected to update exactly one row, but updated {n :: Natural} row(s)|])
data TorrentData transmissionInfo = TorrentData
{ groupId :: Int,
torrentId :: Int,
releaseType :: ReleaseType,
seedingWeight :: Int,
artists :: [T2 "artistId" Int "artistName" Text],
torrentGroupJson :: TorrentGroupJson,
torrentStatus :: TorrentStatus transmissionInfo,
torrentFormat :: Text
}
-- | https://redacted.sh/wiki.php?action=article&id=455#_1804298149
data ReleaseType = ReleaseType {intKey :: Int, stringKey :: Text}
deriving stock (Eq, Show)
releaseTypeFromTextOrIntKey :: Text -> ReleaseType
releaseTypeFromTextOrIntKey t =
allReleaseTypesSorted
& List.find
( \rt -> do
rt.stringKey == t || buildText intDecimalT rt.intKey == t
)
& fromMaybe (ReleaseType {intKey = (-1), stringKey = t})
releaseTypeComparison :: Comparison ReleaseType
releaseTypeComparison = listIndexComparison allReleaseTypesSorted
allReleaseTypesSorted :: [ReleaseType]
allReleaseTypesSorted =
[ releaseTypeAlbum,
releaseTypeLiveAlbum,
releaseTypeAnthology,
releaseTypeSoundtrack,
releaseTypeEP,
releaseTypeCompilation,
releaseTypeSingle,
releaseTypeRemix,
releaseTypeBootleg,
releaseTypeInterview,
releaseTypeMixtape,
releaseTypeDemo,
releaseTypeConcertRecording,
releaseTypeDJMix,
releaseTypeUnknown,
releaseTypeProducedBy,
releaseTypeComposition,
releaseTypeRemixedBy,
releaseTypeGuestAppearance
]
releaseTypeAlbum, releaseTypeSoundtrack, releaseTypeEP, releaseTypeAnthology, releaseTypeCompilation, releaseTypeSingle, releaseTypeLiveAlbum, releaseTypeRemix, releaseTypeBootleg, releaseTypeInterview, releaseTypeMixtape, releaseTypeDemo, releaseTypeConcertRecording, releaseTypeDJMix, releaseTypeUnknown, releaseTypeProducedBy, releaseTypeComposition, releaseTypeRemixedBy, releaseTypeGuestAppearance :: ReleaseType
releaseTypeAlbum = ReleaseType 1 "Album"
releaseTypeSoundtrack = ReleaseType 3 "Soundtrack"
releaseTypeEP = ReleaseType 5 "EP"
releaseTypeAnthology = ReleaseType 6 "Anthology"
releaseTypeCompilation = ReleaseType 7 "Compilation"
releaseTypeSingle = ReleaseType 9 "Single"
releaseTypeLiveAlbum = ReleaseType 11 "Live album"
releaseTypeRemix = ReleaseType 13 "Remix"
releaseTypeBootleg = ReleaseType 14 "Bootleg"
releaseTypeInterview = ReleaseType 15 "Interview"
releaseTypeMixtape = ReleaseType 16 "Mixtape"
releaseTypeDemo = ReleaseType 17 "Demo"
releaseTypeConcertRecording = ReleaseType 18 "Concert Recording"
releaseTypeDJMix = ReleaseType 19 "DJ Mix"
releaseTypeUnknown = ReleaseType 21 "Unknown"
releaseTypeProducedBy = ReleaseType 1021 "Produced By"
releaseTypeComposition = ReleaseType 1022 "Composition"
releaseTypeRemixedBy = ReleaseType 1023 "Remixed By"
releaseTypeGuestAppearance = ReleaseType 1024 "Guest Appearance"
data TorrentGroupJson = TorrentGroupJson
{ groupName :: Text,
groupYear :: Natural
}
data TorrentStatus transmissionInfo
= NoTorrentFileYet
| NotInTransmissionYet
| InTransmission (T2 "torrentHash" Text "transmissionInfo" transmissionInfo)
getTorrentById :: (MonadPostgres m, HasField "torrentId" r Int, MonadThrow m) => r -> Transaction m Json.Value
getTorrentById dat = do
queryWith
[sql|
SELECT full_json_result FROM redacted.torrents
WHERE torrent_id = ?::integer
|]
(getLabel @"torrentId" dat)
(Dec.json Json.asValue)
>>= ensureSingleRow
data GetBestTorrentsFilter = GetBestTorrentsFilter
{ onlyArtist :: Maybe (Label "artistRedactedId" Int),
onlyTheseTorrents :: Maybe ([Label "torrentId" Int]),
disallowedReleaseTypes :: [ReleaseType],
limitResults :: Maybe Natural,
ordering :: BestTorrentsOrdering,
onlyFavourites :: Bool
}
data BestTorrentsOrdering = BySeedingWeight | ByLastReleases
-- | Find the best torrent for each torrent group (based on the seeding_weight)
getBestTorrents ::
(MonadPostgres m) =>
GetBestTorrentsFilter ->
Transaction m [TorrentData ()]
getBestTorrents opts = do
queryWith
( [sql|
WITH
artist_has_been_snatched AS (
SELECT DISTINCT artist_id
FROM (
SELECT
UNNEST(artist_ids) as artist_id,
t.torrent_file IS NOT NULL as has_torrent_file
FROM redacted.torrents t) as _
WHERE has_torrent_file
),
filtered_torrents AS (
SELECT DISTINCT ON (torrent_group)
id
FROM
redacted.torrents
JOIN LATERAL
-- filter everything thats not a favourite if requested
(SELECT (
artist_ids && ARRAY(SELECT artist_id FROM redacted.artist_favourites)
OR artist_ids && ARRAY(SELECT artist_id FROM artist_has_been_snatched)
) as is_favourite) as _
ON (NOT ?::bool OR is_favourite)
WHERE
-- filter by artist id
(?::bool OR (?::int = any (artist_ids)))
-- filter by torrent ids
AND
(?::bool OR torrent_id = ANY (?::int[]))
ORDER BY
torrent_group,
-- prefer torrents which we already downloaded
torrent_file,
seeding_weight DESC
),
prepare1 AS (
SELECT
tg.group_id,
t.torrent_id,
t.seeding_weight,
tg.full_json_result->>'releaseType' AS release_type,
-- TODO: different endpoints handle this differently (e.g. action=search and action=artist), we should unify this while parsing
COALESCE(
t.full_json_result->'artists',
tg.full_json_result->'artists',
'[]'::jsonb
) as artists,
t.artist_ids || tg.artist_ids as artist_ids,
tg.full_json_result->>'groupName' AS group_name,
tg.full_json_result->>'groupYear' AS group_year,
t.torrent_file IS NOT NULL AS has_torrent_file,
t.transmission_torrent_hash,
t.full_json_result->>'encoding' AS torrent_format
FROM filtered_torrents f
JOIN redacted.torrents t ON t.id = f.id
JOIN redacted.torrent_groups tg ON tg.id = t.torrent_group
WHERE
tg.full_json_result->>'releaseType' <> ALL (?::text[])
)
SELECT
group_id,
torrent_id,
seeding_weight,
release_type,
artists,
group_name,
group_year,
has_torrent_file,
transmission_torrent_hash,
torrent_format
FROM prepare1
|]
<> case opts.ordering of
BySeedingWeight -> [fmt|ORDER BY seeding_weight DESC|] <> "\n"
ByLastReleases -> [fmt|ORDER BY group_id DESC|] <> "\n"
<> [sql|
LIMIT ?::int
|]
)
( do
let (onlyArtistB, onlyArtistId) = case opts.onlyArtist of
Nothing -> (True, 0)
Just a -> (False, a.artistRedactedId)
let (onlyTheseTorrentsB, onlyTheseTorrents) = case opts.onlyTheseTorrents of
Nothing -> (True, PGArray [])
Just a -> (False, a <&> (.torrentId) & PGArray)
( opts.onlyFavourites :: Bool,
onlyArtistB :: Bool,
onlyArtistId :: Int,
onlyTheseTorrentsB :: Bool,
onlyTheseTorrents,
(opts.disallowedReleaseTypes & concatMap (\rt -> [rt.stringKey, rt.intKey & buildText intDecimalT]) & PGArray :: PGArray Text),
opts.limitResults <&> naturalToInteger :: Maybe Integer
)
)
( do
groupId <- Dec.fromField @Int
torrentId <- Dec.fromField @Int
seedingWeight <- Dec.fromField @Int
releaseType <- releaseTypeFromTextOrIntKey <$> Dec.text
artists <- Dec.json $
Json.eachInArray $ do
id_ <- Json.keyLabel @"artistId" "id" (Json.asIntegral @_ @Int)
name <- Json.keyLabel @"artistName" "name" Json.asText
pure $ T2 id_ name
torrentGroupJson <- do
groupName <- Dec.text
groupYear <- Dec.textParse Field.decimalNatural
pure $ TorrentGroupJson {..}
hasTorrentFile <- Dec.fromField @Bool
transmissionTorrentHash <- Dec.fromField @(Maybe Text)
torrentFormat <- Dec.text
pure $
TorrentData
{ torrentStatus =
if
| not hasTorrentFile -> NoTorrentFileYet
| Nothing <- transmissionTorrentHash -> NotInTransmissionYet
| Just hash <- transmissionTorrentHash ->
InTransmission $
T2 (label @"torrentHash" hash) (label @"transmissionInfo" ()),
torrentFormat = case torrentFormat of
"Lossless" -> "flac"
"V0 (VBR)" -> "V0"
"V2 (VBR)" -> "V2"
"320" -> "320"
"256" -> "256"
o -> o,
..
}
)
getArtistNameById :: (MonadPostgres m, HasField "artistId" r Int) => r -> Transaction m (Maybe Text)
getArtistNameById dat = do
queryFirstRowWithMaybe
[sql|
SELECT artist_name FROM redacted.artist_names
WHERE artist_id = ?::int
LIMIT 1
|]
(getLabel @"artistId" dat)
(Dec.fromField @Text)
-- | Do a request to the redacted API. If you know what that is, you know how to find the API docs.
mkRedactedApiRequest ::
( MonadThrow m,
HasField "action" p ByteString,
HasField "actionArgs" p [(ByteString, Maybe ByteString)],
MonadRedacted m
) =>
p ->
m Http.Request
mkRedactedApiRequest dat = do
authKey <- getRedactedApiKey
pure $
[fmt|https://redacted.sh/ajax.php|]
& Http.setRequestMethod "GET"
& Http.setQueryString (("action", Just dat.action) : dat.actionArgs)
& Http.setRequestHeader "Authorization" [authKey]
httpTorrent ::
( MonadIO m,
MonadThrow m
) =>
Otel.Span ->
Http.Request ->
m ByteString
httpTorrent span req =
Http.httpBS req
>>= assertM
span
( \resp -> do
let statusCode = resp & Http.getResponseStatus & (.statusCode)
contentType =
resp
& Http.getResponseHeaders
& List.lookup "content-type"
<&> Wai.parseContentType
<&> (\(ct, _mimeAttributes) -> ct)
if
| statusCode == 200,
Just "application/x-bittorrent" <- contentType ->
Right $ (resp & Http.getResponseBody)
| statusCode == 200,
Just otherType <- contentType ->
Left [fmt|Redacted returned a non-torrent body, with content-type "{otherType}"|]
| statusCode == 200,
Nothing <- contentType ->
Left [fmt|Redacted returned a body with unspecified content type|]
| code <- statusCode -> Left $ AppExceptionPretty [[fmt|Redacted returned an non-200 error code, code {code}|], pretty resp]
)
redactedApiRequestJson ::
( MonadThrow m,
HasField "action" p ByteString,
HasField "actionArgs" p [(ByteString, Maybe ByteString)],
MonadOtel m,
MonadRedacted m
) =>
Otel.Span ->
p ->
Json.Parse ErrorTree a ->
m a
redactedApiRequestJson span dat parser = do
addAttribute span "redacted.request" (toOtelJsonAttr (T2 (getLabel @"action" dat) (getLabel @"actionArgs" dat)))
mkRedactedApiRequest dat
>>= Http.httpJson defaults parser
test :: (MonadThrow m, MonadRedacted m, MonadOtel m) => m ()
test =
inSpan' "test" $ \span -> do
redactedApiRequestJson
span
(T2 (label @"action" "artist") (label @"actionArgs" [("id", Just "2785")]))
(Json.asValue)
<&> Pretty.showPrettyJsonColored
>>= liftIO . putStderrLn
readTorrentFile :: (MonadIO m, MonadPostgres m) => m ()
readTorrentFile = runTransaction $ do
torrentBytes <-
queryWith
[sql|
SELECT torrent_file from redacted.torrents where torrent_file is not null limit 10 |]
()
Dec.bytea
liftIO $ for_ torrentBytes $ \b -> case testBencode b of
Left e -> do
Text.IO.putStrLn $ prettyErrorTree e
Right a -> printPretty a
liftIO $ print $ lengthNatural torrentBytes
testBencode :: ByteString -> (Either ErrorTree TorrentFile)
testBencode bs = Parse.runParse "cannot parse bencode" (parseBencode >>> bencodeTorrentParser) bs
-- | A torrent file
--
-- from wikipedia:
--
-- * announce—the URL of the high tracker
-- * info—this maps to a dictionary whose keys are very dependent on whether one or more files are being shared:
-- - files—a list of dictionaries each corresponding to a file (only when multiple files are being shared). Each dictionary has the following keys:
-- * length—size of the file in bytes.
-- * path—a list of strings corresponding to subdirectory names, the last of which is the actual file name
-- - length—size of the file in bytes (only when one file is being shared though)
-- - name—suggested filename where the file is to be saved (if one file)/suggested directory name where the files are to be saved (if multiple files)
-- - piece length—number of bytes per piece. This is commonly 28 KiB = 256 KiB = 262,144 B.
-- - pieces—a hash list, i.e., a concatenation of each piece's SHA-1 hash. As SHA-1 returns a 160-bit hash, pieces will be a string whose length is a multiple of 20 bytes. If the torrent contains multiple files, the pieces are formed by concatenating the files in the order they appear in the files dictionary (i.e., all pieces in the torrent are the full piece length except for the last piece, which may be shorter).
data TorrentFile = TorrentFile
{ announce :: Text,
comment :: Maybe Text,
createdBy :: Maybe Text,
creationDate :: Maybe UTCTime,
encoding :: Maybe Text,
info :: Info
}
deriving stock (Eq, Show)
data Info = Info
{ name :: Text,
files :: [File],
pieceLength :: Natural,
pieces :: ByteString,
private :: Maybe Bool,
source :: Maybe Text
}
deriving stock (Eq, Show)
data File = File
{ length_ :: Natural,
path :: [Text]
}
deriving stock (Eq, Show)
bencodeTorrentParser :: Parse BEncode TorrentFile
bencodeTorrentParser =
bencodeDict >>> do
announce <- mapLookup "announce" bencodeTextLenient
comment <- mapLookupMay "comment" bencodeTextLenient
createdBy <- mapLookupMay "created by" bencodeTextLenient
creationDate <- mapLookupMay "creation date" (bencodeInteger <&> posixSecondsToUTCTime . fromInteger @NominalDiffTime)
encoding <- mapLookupMay "encoding" bencodeTextLenient
info <-
mapLookup "info" $
bencodeDict >>> do
name <- mapLookup "name" bencodeTextLenient
files <-
mapLookup "files" $
bencodeList
>>> ( Parse.multiple $
bencodeDict >>> do
length_ <- mapLookup "length" bencodeNatural
path <- mapLookup "path" $ bencodeList >>> Parse.multiple bencodeTextLenient
pure $ File {..}
)
pieceLength <- mapLookup "piece length" bencodeNatural
pieces <- mapLookup "pieces" bencodeBytes
private <-
mapLookupMay "private" bencodeInteger
<&> fmap
( \case
0 -> False
_ -> True
)
source <- mapLookupMay "source" bencodeTextLenient
pure Info {..}
pure TorrentFile {..}