From 17f5b550667b52948a8f3a3385a4404b9380347d Mon Sep 17 00:00:00 2001 From: Profpatsch Date: Fri, 7 Mar 2025 15:06:55 +0100 Subject: [PATCH] feat(users/Profpatsch/whatcd-resolver): locally serve resources Proxy the resources through our server instead of always requesting them from the CDN, leaking lots of usage data. This prefetches the resources at server startup (instead of e.g. adding them to the build environment statically), which is a nice compromise I think. It will also keep the integrity verification in place, but that is still done in the client. Adds `rel=preload` as well, so we start fetching asap. No caching (yet). We could easily add a hash over the content though so the client never has to re-request the resources. Change-Id: I9aac80cfb1ded09e578ba2a70dcf982bf5322ff6 Reviewed-on: https://cl.tvl.fyi/c/depot/+/13215 Reviewed-by: Profpatsch Tested-by: BuildkiteCI --- users/Profpatsch/my-prelude/src/MyPrelude.hs | 6 + users/Profpatsch/my-prelude/src/Parse.hs | 17 + users/Profpatsch/whatcd-resolver/src/Http.hs | 47 +- .../whatcd-resolver/src/WhatcdResolver.hs | 570 ++++++++++-------- 4 files changed, 400 insertions(+), 240 deletions(-) diff --git a/users/Profpatsch/my-prelude/src/MyPrelude.hs b/users/Profpatsch/my-prelude/src/MyPrelude.hs index d535902f2..231650b5e 100644 --- a/users/Profpatsch/my-prelude/src/MyPrelude.hs +++ b/users/Profpatsch/my-prelude/src/MyPrelude.hs @@ -173,6 +173,7 @@ module MyPrelude HasCallStack, module Data.Error, symbolText, + unwrapErrorTree, ) where @@ -199,6 +200,7 @@ import Data.Char qualified import Data.Coerce (Coercible, coerce) import Data.Data (Proxy (Proxy)) import Data.Error +import Data.Error.Tree (ErrorTree, prettyErrorTree) import Data.Foldable (Foldable (foldMap', toList), fold, foldl', for_, sequenceA_, traverse_) import Data.Foldable qualified as Foldable import Data.Function ((&)) @@ -833,3 +835,7 @@ symbolText :: forall sym. (KnownSymbol sym) => Text symbolText = do symbolVal (Proxy :: Proxy sym) & stringToText + +-- | Like 'unwrapError', but for 'ErrorTree'. Will crash with an uncatchable exception if 'Left'. +unwrapErrorTree :: (HasCallStack) => Either ErrorTree a -> a +unwrapErrorTree e = e & first (newError . prettyErrorTree) & unwrapError diff --git a/users/Profpatsch/my-prelude/src/Parse.hs b/users/Profpatsch/my-prelude/src/Parse.hs index 2c14e6998..758507d45 100644 --- a/users/Profpatsch/my-prelude/src/Parse.hs +++ b/users/Profpatsch/my-prelude/src/Parse.hs @@ -1,4 +1,5 @@ {-# LANGUAGE QuasiQuotes #-} +{-# LANGUAGE TemplateHaskellQuotes #-} module Parse where @@ -14,6 +15,7 @@ import Data.Semigroupoid qualified as Semigroupoid import Data.Text qualified as Text import FieldParser (FieldParser) import FieldParser qualified as Field +import Language.Haskell.TH.Syntax qualified as TH import PossehlAnalyticsPrelude import Validation (partitionValidations) import Prelude hiding (init, maybe) @@ -191,3 +193,18 @@ zipNonEmpty (x :| xs) (y :| ys) = (x, y) :| zip xs ys zipIndex :: NonEmpty b -> NonEmpty (Natural, b) zipIndex = zipNonEmpty (1 :| [2 :: Natural ..]) + +-- | Parse a literal value at compile time. This is used with Template Haskell, like so: +-- +-- > $$("2023-07-27" & literal hyphenatedDay) :: Time.Day +-- +-- You need the double @$$@! +-- +-- ATTN: This needs an instance of the 'TH.Lift' class for the output type. +-- Many library types don’t yet implement this class, so we have to provide the instances ourselves. +-- See NOTE: Lift for library types +literal :: forall from to. (TH.Lift to) => Parse from to -> from -> TH.Code TH.Q to +literal parser s = do + case runParse "Literal parse failed" parser s of + Right a -> [||a||] + Left err -> TH.liftCode (err & prettyErrorTree & textToString & fail) diff --git a/users/Profpatsch/whatcd-resolver/src/Http.hs b/users/Profpatsch/whatcd-resolver/src/Http.hs index f710af92e..e13da3e2c 100644 --- a/users/Profpatsch/whatcd-resolver/src/Http.hs +++ b/users/Profpatsch/whatcd-resolver/src/Http.hs @@ -1,7 +1,10 @@ {-# LANGUAGE QuasiQuotes #-} +{-# OPTIONS_GHC -Wno-orphans #-} module Http - ( doRequestJson, + ( textToURI, + uriToHttpClientRequest, + doRequestJson, RequestOptions (..), mkRequestOptions, httpJson, @@ -19,6 +22,8 @@ module Http where import AppT +import Control.Exception (Exception (..), SomeException) +import Control.Monad.Catch.Pure (runCatch) import Data.Aeson qualified as Json import Data.Aeson.BetterErrors qualified as Json import Data.CaseInsensitive (CI (original)) @@ -29,6 +34,7 @@ import Data.List.NonEmpty qualified as NonEmpty import Data.Ord (clamp) import Data.Text qualified as Text import Data.Text.Punycode qualified as Punycode +import FieldParser (FieldParser' (..)) import FieldParser qualified as Field import Json qualified import Json.Enc qualified as Enc @@ -38,12 +44,51 @@ import Network.HTTP.Client import Network.HTTP.Client qualified as Http import Network.HTTP.Simple qualified as Http import Network.HTTP.Types.Status (Status (..)) +import Network.URI (URI, parseURI) import Network.Wai.Parse qualified as Wai import Optional +import Parse (Parse) +import Parse qualified import Pretty import UnliftIO.Concurrent (threadDelay) import Prelude hiding (span) +-- | Make sure we can parse the given Text into an URI. +textToURI :: Parse Text URI +textToURI = + Parse.fieldParser + ( FieldParser $ \text -> + text + & textToString + & Network.URI.parseURI + & annotate [fmt|Cannot parse this as a URL: "{text}"|] + ) + +-- | Make sure we can parse the given URI into a Request. +-- +-- This tries to work around the horrible, horrible interface in Http.Client. +uriToHttpClientRequest :: Parse URI Http.Request +uriToHttpClientRequest = + Parse.mkParseNoContext + ( \(ctx, url) -> + (url & Http.requestFromURI) + & runCatch + & first (checkException @Http.HttpException) + & \case + Left (Right (Http.InvalidUrlException urlText reason)) -> + Left [fmt|Unable to set the url "{urlText}" as request URL, reason: {reason}, at {Parse.showContext ctx}|] + Left (Right exc@(Http.HttpExceptionRequest _ _)) -> + Left [fmt|Weird! Should not get a HttpExceptionRequest when parsing an URL (bad library design), was {exc & displayException}, at {Parse.showContext ctx}|] + Left (Left someExc) -> + Left [fmt|Weird! Should not get anyhting but a HttpException when parsing an URL (bad library design), was {someExc & displayException}, at {Parse.showContext ctx}|] + Right req -> pure req + ) + where + checkException :: (Exception b) => SomeException -> Either SomeException b + checkException some = case fromException some of + Nothing -> Left some + Just e -> Right e + data RequestOptions = RequestOptions { method :: ByteString, host :: Text, diff --git a/users/Profpatsch/whatcd-resolver/src/WhatcdResolver.hs b/users/Profpatsch/whatcd-resolver/src/WhatcdResolver.hs index dfa3af641..fc51b7fab 100644 --- a/users/Profpatsch/whatcd-resolver/src/WhatcdResolver.hs +++ b/users/Profpatsch/whatcd-resolver/src/WhatcdResolver.hs @@ -10,12 +10,12 @@ import Comparison import Conduit (ConduitT) import Conduit qualified import Control.Category qualified as Cat -import Control.Monad.Catch.Pure (runCatch) import Control.Monad.Logger.CallStack import Control.Monad.Reader import Data.Aeson qualified as Json import Data.Aeson.BetterErrors qualified as Json import Data.Aeson.KeyMap qualified as KeyMap +import Data.ByteString qualified as ByteString import Data.CaseInsensitive (CI) import Data.Conduit ((.|)) import Data.Error.Tree @@ -28,9 +28,10 @@ import Data.Text qualified as Text import Database.PostgreSQL.Simple qualified as Postgres import Database.PostgreSQL.Simple.Types (PGArray (PGArray)) import Database.Postgres.Temp qualified as TmpPg -import FieldParser (FieldParser, FieldParser' (..)) +import FieldParser (FieldParser) import FieldParser qualified as Field import Html qualified +import Http import IHP.HSX.QQ (hsx) import IHP.HSX.ToHtml (ToHtml) import Json qualified @@ -45,11 +46,10 @@ import Network.HTTP.Client.Conduit qualified as Http import Network.HTTP.Simple qualified as Http import Network.HTTP.Types import Network.HTTP.Types qualified as Http -import Network.URI (URI) -import Network.URI qualified import Network.Wai (ResponseReceived) import Network.Wai qualified as Wai import Network.Wai.Handler.Warp qualified as Warp +import Network.Wai.Parse (parseContentType) import OpenTelemetry.Attributes qualified as Otel import OpenTelemetry.Context.ThreadLocal qualified as Otel import OpenTelemetry.Trace qualified as Otel hiding (getTracer, inSpan, inSpan') @@ -67,7 +67,9 @@ import System.Environment qualified as Env import System.FilePath (()) import Text.Blaze.Html (Html) import Text.Blaze.Html.Renderer.Utf8 qualified as Html +import Text.Blaze.Html5 ((!)) import Text.Blaze.Html5 qualified as Html +import Text.Blaze.Html5.Attributes qualified as HtmlA import Tool (readTool, readTools) import Transmission import UnliftIO hiding (Handler) @@ -98,6 +100,8 @@ htmlUi = do () (Dec.fromField @Text) + ourHtmlIntegrities <- prefetchHtmlIntegrities + (counterHtmlM, counterHandler, _counterAsync) <- testCounter (label @"endpoint" "counter") withRunInIO $ \runInIO -> Warp.run 9093 $ \req respondOrig -> do @@ -129,199 +133,201 @@ htmlUi = do let handlers :: Handlers (AppT IO) handlers = - Map.fromList - [ ( "", - Html $ \span -> do - counterHtml <- counterHtmlM - mainHtml counterHtml uniqueRunId span - ), - ( "redacted-search", - HtmlWithQueryArgs (label @"searchstr" <$> singleQueryArgument "searchstr" Cat.id) $ - \dat _span -> do - t <- redactedSearchAndInsert [("searchstr", dat.queryArgs.searchstr)] - runTransaction $ do - res <- t - (table, settings) <- - concurrentlyTraced - ( getBestTorrentsTable - (label @"groupByReleaseType" True) - ( Just - ( E21 - (label @"onlyTheseTorrents" res.newTorrents) - ) :: - Maybe - ( E2 - "onlyTheseTorrents" - [Label "torrentId" Int] - "artistRedactedId" - Natural - ) - ) - ) - (getSettings) - pure $ - mainHtml' - ( MainHtml - { pageTitle = [fmt|whatcd-resolver – Search – {dat.queryArgs.searchstr & bytesToTextUtf8Lenient}|], - returnUrl = dat.returnUrl, - counterHtml = "", - mainContent = - [hsx|

Search results for
{dat.queryArgs.searchstr}

{table}|], - uniqueRunId, - searchFieldContent = dat.queryArgs.searchstr & bytesToTextUtf8Lenient, - settings - } - ) - ), - ( "snips/redacted/torrentDataJson", - Html $ \span -> do - dat <- torrentIdMp span - Html.mkVal <$> (runTransaction $ getTorrentById dat) - ), - ( "snips/redacted/getTorrentFile", - HtmlOrReferer $ \span -> do - dat <- torrentIdMp span - runTransaction $ do - settings <- getSettings - inserted <- redactedGetTorrentFileAndInsert (T2 dat (getLabel @"useFreeleechTokens" settings)) - running <- - lift @Transaction $ - doTransmissionRequest' (transmissionRequestAddTorrent inserted) - updateTransmissionTorrentHashById - ( T2 - (getLabel @"torrentHash" running) - (getLabel @"torrentId" dat) - ) - pure $ - everySecond - "snips/transmission/getTorrentState" - (Enc.object [("torrent-hash", Enc.text running.torrentHash)]) - "Starting" - ), - -- TODO: this is bad duplication?? - ( "snips/redacted/startTorrentFile", - Html $ \span -> do - dat <- torrentIdMp span - runTransaction $ do - file <- - getTorrentFileById dat - <&> annotate [fmt|No torrent file for torrentId "{dat.torrentId}"|] - >>= orAppThrow span + Map.fromList $ + ourHtmlIntegrities.handlers + <> [ ( "", + Html $ \span -> do + counterHtml <- counterHtmlM + mainHtml ourHtmlIntegrities counterHtml uniqueRunId span + ), + ( "redacted-search", + HtmlWithQueryArgs (label @"searchstr" <$> singleQueryArgument "searchstr" Cat.id) $ + \dat _span -> do + t <- redactedSearchAndInsert [("searchstr", dat.queryArgs.searchstr)] + runTransaction $ do + res <- t + (table, settings) <- + concurrentlyTraced + ( getBestTorrentsTable + (label @"groupByReleaseType" True) + ( Just + ( E21 + (label @"onlyTheseTorrents" res.newTorrents) + ) :: + Maybe + ( E2 + "onlyTheseTorrents" + [Label "torrentId" Int] + "artistRedactedId" + Natural + ) + ) + ) + (getSettings) + pure $ + mainHtml' + ourHtmlIntegrities + ( MainHtml + { pageTitle = [fmt|whatcd-resolver – Search – {dat.queryArgs.searchstr & bytesToTextUtf8Lenient}|], + returnUrl = dat.returnUrl, + counterHtml = "", + mainContent = + [hsx|

Search results for
{dat.queryArgs.searchstr}

{table}|], + uniqueRunId, + searchFieldContent = dat.queryArgs.searchstr & bytesToTextUtf8Lenient, + settings + } + ) + ), + ( "snips/redacted/torrentDataJson", + Html $ \span -> do + dat <- torrentIdMp span + Html.mkVal <$> (runTransaction $ getTorrentById dat) + ), + ( "snips/redacted/getTorrentFile", + HtmlOrReferer $ \span -> do + dat <- torrentIdMp span + runTransaction $ do + settings <- getSettings + inserted <- redactedGetTorrentFileAndInsert (T2 dat (getLabel @"useFreeleechTokens" settings)) + running <- + lift @Transaction $ + doTransmissionRequest' (transmissionRequestAddTorrent inserted) + updateTransmissionTorrentHashById + ( T2 + (getLabel @"torrentHash" running) + (getLabel @"torrentId" dat) + ) + pure $ + everySecond + "snips/transmission/getTorrentState" + (Enc.object [("torrent-hash", Enc.text running.torrentHash)]) + "Starting" + ), + -- TODO: this is bad duplication?? + ( "snips/redacted/startTorrentFile", + Html $ \span -> do + dat <- torrentIdMp span + runTransaction $ do + file <- + getTorrentFileById dat + <&> annotate [fmt|No torrent file for torrentId "{dat.torrentId}"|] + >>= orAppThrow span - running <- - lift @Transaction $ - doTransmissionRequest' (transmissionRequestAddTorrent file) - updateTransmissionTorrentHashById - ( T2 - (getLabel @"torrentHash" running) - (getLabel @"torrentId" dat) - ) - pure $ - everySecond - "snips/transmission/getTorrentState" - (Enc.object [("torrent-hash", Enc.text running.torrentHash)]) - "Starting" - ), - ( "snips/transmission/getTorrentState", - Html $ \span -> do - dat <- parseMultipartOrThrow span req $ label @"torrentHash" <$> Multipart.field "torrent-hash" Field.utf8 - status <- - doTransmissionRequest' - ( transmissionRequestListOnlyTorrents - ( T2 - (label @"ids" [label @"torrentHash" dat.torrentHash]) - (label @"fields" ["hashString"]) - ) - (Json.keyLabel @"torrentHash" "hashString" Json.asText) - ) - <&> List.find (\torrent -> torrent.torrentHash == dat.torrentHash) + running <- + lift @Transaction $ + doTransmissionRequest' (transmissionRequestAddTorrent file) + updateTransmissionTorrentHashById + ( T2 + (getLabel @"torrentHash" running) + (getLabel @"torrentId" dat) + ) + pure $ + everySecond + "snips/transmission/getTorrentState" + (Enc.object [("torrent-hash", Enc.text running.torrentHash)]) + "Starting" + ), + ( "snips/transmission/getTorrentState", + Html $ \span -> do + dat <- parseMultipartOrThrow span req $ label @"torrentHash" <$> Multipart.field "torrent-hash" Field.utf8 + status <- + doTransmissionRequest' + ( transmissionRequestListOnlyTorrents + ( T2 + (label @"ids" [label @"torrentHash" dat.torrentHash]) + (label @"fields" ["hashString"]) + ) + (Json.keyLabel @"torrentHash" "hashString" Json.asText) + ) + <&> List.find (\torrent -> torrent.torrentHash == dat.torrentHash) - pure $ - case status of - Nothing -> [hsx|ERROR unknown|] - Just _torrent -> [hsx|Running|] - ), - ( "snips/jsonld/render", - do - HtmlWithQueryArgs - ( label @"target" - <$> ( (singleQueryArgument "target" Field.utf8 >>> textToURI) - & Parse.andParse uriToHttpClientRequest - ) - ) - ( \dat _span -> do - jsonld <- httpGetJsonLd (dat.queryArgs.target) - pure $ renderJsonld jsonld - ) - ), - ("counter", counterHandler), - ( "settings", - PostAndRedirect - ( do - settings <- runTransaction getSettings - pure $ do - returnTo <- Multipart.fieldLabel @"returnTo" "returnTo" Field.utf8 - parsed <- label @"settings" <$> settingsMultipartParser settings - pure $ T2 returnTo parsed - ) - $ \_span (s :: T2 "returnTo" Text "settings" Settings) -> do - let Settings {useFreeleechTokens} = s.settings - runTransaction $ do - _ <- - writeSettings - [ T2 - (label @"key" "useFreeleechTokens") - (label @"val" $ Json.Bool useFreeleechTokens) - ] - pure $ label @"redirectTo" (s.returnTo & textToBytesUtf8) - ), - ( "artist", - do - HtmlWithQueryArgs - ( label @"artistRedactedId" - <$> (singleQueryArgument "redacted_id" (Field.utf8 >>> Field.decimalNatural)) - ) - $ \dat _span -> do - artistPage (T2 dat.queryArgs (label @"uniqueRunId" uniqueRunId)) - ), - ( "artist/refresh", - HtmlOrRedirect $ - \span -> do - dat <- - parseMultipartOrThrow - span - req - (label @"artistId" <$> Multipart.field "artist-id" Field.utf8) - t <- redactedRefreshArtist dat - runTransaction $ do - t - pure $ E22 (label @"redirectTo" [fmt|/artist?redacted_id={dat.artistId}|]) - ), - ( "autorefresh", - Plain $ do - qry <- - parseQueryArgsNewSpan - "Autorefresh Query Parse" - ( label @"hasItBeenRestarted" - <$> singleQueryArgument "hasItBeenRestarted" Field.utf8 - ) - pure $ - Wai.responseLBS - Http.ok200 - ( [("Content-Type", "text/html")] - <> if uniqueRunId /= qry.hasItBeenRestarted - then -- cause the client side to refresh - [("HX-Refresh", "true")] - else [] - ) - "" - ) - ] + pure $ + case status of + Nothing -> [hsx|ERROR unknown|] + Just _torrent -> [hsx|Running|] + ), + ( "snips/jsonld/render", + do + HtmlWithQueryArgs + ( label @"target" + <$> ( (singleQueryArgument "target" Field.utf8 >>> textToURI) + & Parse.andParse uriToHttpClientRequest + ) + ) + ( \dat _span -> do + jsonld <- httpGetJsonLd (dat.queryArgs.target) + pure $ renderJsonld jsonld + ) + ), + ("counter", counterHandler), + ( "settings", + PostAndRedirect + ( do + settings <- runTransaction getSettings + pure $ do + returnTo <- Multipart.fieldLabel @"returnTo" "returnTo" Field.utf8 + parsed <- label @"settings" <$> settingsMultipartParser settings + pure $ T2 returnTo parsed + ) + $ \_span (s :: T2 "returnTo" Text "settings" Settings) -> do + let Settings {useFreeleechTokens} = s.settings + runTransaction $ do + _ <- + writeSettings + [ T2 + (label @"key" "useFreeleechTokens") + (label @"val" $ Json.Bool useFreeleechTokens) + ] + pure $ label @"redirectTo" (s.returnTo & textToBytesUtf8) + ), + ( "artist", + do + HtmlWithQueryArgs + ( label @"artistRedactedId" + <$> (singleQueryArgument "redacted_id" (Field.utf8 >>> Field.decimalNatural)) + ) + $ \dat _span -> do + artistPage ourHtmlIntegrities (T2 dat.queryArgs (label @"uniqueRunId" uniqueRunId)) + ), + ( "artist/refresh", + HtmlOrRedirect $ + \span -> do + dat <- + parseMultipartOrThrow + span + req + (label @"artistId" <$> Multipart.field "artist-id" Field.utf8) + t <- redactedRefreshArtist dat + runTransaction $ do + t + pure $ E22 (label @"redirectTo" [fmt|/artist?redacted_id={dat.artistId}|]) + ), + ( "autorefresh", + Plain $ do + qry <- + parseQueryArgsNewSpan + "Autorefresh Query Parse" + ( label @"hasItBeenRestarted" + <$> singleQueryArgument "hasItBeenRestarted" Field.utf8 + ) + pure $ + Wai.responseLBS + Http.ok200 + ( [("Content-Type", "text/html")] + <> if uniqueRunId /= qry.hasItBeenRestarted + then -- cause the client side to refresh + [("HX-Refresh", "true")] + else [] + ) + "" + ) + ] runInIO $ runHandlers ( Html $ \span -> do counterHtml <- counterHtmlM - mainHtml counterHtml uniqueRunId span + mainHtml ourHtmlIntegrities counterHtml uniqueRunId span ) handlers req @@ -330,8 +336,8 @@ htmlUi = do everySecond :: Text -> Enc -> Html -> Html everySecond call extraData innerHtml = [hsx|
{innerHtml}
|] - mainHtml :: Html -> Text -> Otel.Span -> AppT IO Html - mainHtml counterHtml uniqueRunId _span = runTransaction $ do + mainHtml :: OurHtmlIntegrities (AppT IO) -> Html -> Text -> Otel.Span -> AppT IO Html + mainHtml ourHtmlIntegrities counterHtml uniqueRunId _span = runTransaction $ do -- jsonld <- -- httpGetJsonLd -- ( URI.parseURI "https://musicbrainz.org/work/92000fd4-d304-406d-aeb4-6bdbeed318ec" & annotate "not an URI" & unwrapError, @@ -345,6 +351,7 @@ htmlUi = do -- transmissionTorrentsTable <- lift @Transaction getTransmissionTorrentsTable pure $ mainHtml' + ourHtmlIntegrities ( MainHtml { pageTitle = "whatcd-resolver", returnUrl = "/", @@ -366,10 +373,11 @@ data MainHtml = MainHtml settings :: Settings } -mainHtml' :: MainHtml -> Html -mainHtml' dat = do +mainHtml' :: OurHtmlIntegrities m -> MainHtml -> Html +mainHtml' integrities dat = do -- transmissionTorrentsTable <- lift @Transaction getTransmissionTorrentsTable htmlPageChrome + integrities dat.pageTitle ( [hsx| {dat.counterHtml} @@ -443,8 +451,8 @@ redirectOrFallback target responseFn req = do Just _ -> responseFn Http.ok200 ("Hx-Redirect", target) Nothing -> responseFn Http.seeOther303 ("Location", target) -htmlPageChrome :: (ToHtml a) => Text -> a -> Html -htmlPageChrome title body = +htmlPageChrome :: (ToHtml a) => OurHtmlIntegrities m -> Text -> a -> Html +htmlPageChrome integrities title body = Html.docTypeHtml $ [hsx| @@ -458,9 +466,7 @@ htmlPageChrome title body = TODO: create favicon --> - - - + {integrities.html}