feat(users/Profpatsch/whatcd-resolver): add a simple torrent parser
The `bencode` library is anything but production-grade, but it’s enough to parse the torrent files generated by Gazelle lol. This should help with … I haven’t really figured out yet what it helps with I guess. But it was fun. I like the `Parse` abstraction very much. It can parse XML and it can parse Bencode. Good. Change-Id: If1331de423eab3e91ce9db6e2a7eb84da51b18a2 Reviewed-on: https://cl.tvl.fyi/c/depot/+/13211 Tested-by: BuildkiteCI Reviewed-by: Profpatsch <mail@profpatsch.de>
This commit is contained in:
parent
d64c53d051
commit
dde78515f6
8 changed files with 245 additions and 8 deletions
|
|
@ -11,6 +11,7 @@ import Data.Aeson.BetterErrors qualified as Json
|
|||
import Data.Aeson.Key qualified as Key
|
||||
import Data.Aeson.KeyMap qualified as KeyMap
|
||||
import Data.Aeson.Types qualified
|
||||
import Data.ByteString.Base64 qualified as Base64
|
||||
import Data.Error.Tree
|
||||
import Data.Map.Strict qualified as Map
|
||||
import Data.Maybe (catMaybes)
|
||||
|
|
@ -333,6 +334,10 @@ instance ToJSON EmptyObject where
|
|||
mkJsonArray :: [Value] -> Value
|
||||
mkJsonArray xs = xs & Vector.fromList & Array
|
||||
|
||||
-- | Encode a 'ByteString' as a base64-encoded json string
|
||||
mkBase64Bytes :: ByteString -> Value
|
||||
mkBase64Bytes = String . bytesToTextUtf8Unsafe . Base64.encode
|
||||
|
||||
data RestrictJsonOpts = RestrictJsonOpts
|
||||
{ maxDepth :: Natural,
|
||||
maxSizeObject :: Natural,
|
||||
|
|
|
|||
|
|
@ -7,6 +7,7 @@ import Control.Selective (Selective)
|
|||
import Data.Error.Tree
|
||||
import Data.Functor.Compose
|
||||
import Data.List qualified as List
|
||||
import Data.Map.Strict qualified as Map
|
||||
import Data.Monoid (First (..))
|
||||
import Data.Semigroup.Traversable
|
||||
import Data.Semigroupoid qualified as Semigroupoid
|
||||
|
|
@ -72,8 +73,8 @@ mkParsePushContext toPush f = Parse $ \(ctx, from) -> case f (ctx, from) of
|
|||
Right to -> Success (addContext toPush ctx, to)
|
||||
Left err -> Failure $ singleton err
|
||||
|
||||
mkParseNoContext :: (from -> Either ErrorTree to) -> Parse from to
|
||||
mkParseNoContext f = Parse $ \(ctx, from) -> case f from of
|
||||
mkParseNoContext :: ((Context, from) -> Either ErrorTree to) -> Parse from to
|
||||
mkParseNoContext f = Parse $ \(ctx, from) -> case f (ctx, from) of
|
||||
Right to -> Success (ctx, to)
|
||||
Left err -> Failure $ singleton err
|
||||
|
||||
|
|
@ -161,6 +162,24 @@ findAll inner = Parse $ \(ctx, xs) ->
|
|||
Success (ctx, [])
|
||||
(_miss, (hitCtx, hit) : hits) -> Success (hitCtx, hit : (hits <&> snd))
|
||||
|
||||
-- | Find the given element in the map, and parse it with the given parser.
|
||||
mapLookup :: (Coercible Text key, Ord key) => key -> Parse from to -> Parse (Map key from) to
|
||||
mapLookup key inner = do
|
||||
let keyT :: Text = coerce key
|
||||
Parse $ \(ctx, m) -> case Map.lookup (coerce key) m of
|
||||
Nothing -> Failure $ singleton [fmt|Key "{keyT}" not found in map at {showContext ctx}|]
|
||||
Just a -> runParse' inner (addContext keyT ctx, a)
|
||||
|
||||
-- | Find the given element in the map, and parse it with the given parser.
|
||||
--
|
||||
-- Use this instead of `rmap` to add the map key to the error context.
|
||||
mapLookupMay :: (Coercible Text key, Ord key) => key -> Parse from to -> Parse (Map key from) (Maybe to)
|
||||
mapLookupMay key inner = do
|
||||
let keyT :: Text = coerce key
|
||||
Parse $ \(ctx, m) -> case Map.lookup (coerce key) m of
|
||||
Nothing -> Success (addContext keyT ctx, Nothing)
|
||||
Just a -> runParse' (Just <$> inner) (addContext keyT ctx, a)
|
||||
|
||||
-- | convert a 'FieldParser' into a 'Parse'.
|
||||
fieldParser :: FieldParser from to -> Parse from to
|
||||
fieldParser fp = Parse $ \(ctx, from) -> case Field.runFieldParser fp from of
|
||||
|
|
|
|||
|
|
@ -14,6 +14,7 @@ pkgs.mkShell {
|
|||
h.async
|
||||
h.aeson-better-errors
|
||||
h.blaze-html
|
||||
h.bencode
|
||||
h.conduit-extra
|
||||
h.error
|
||||
h.monad-logger
|
||||
|
|
|
|||
|
|
@ -12,6 +12,7 @@ let
|
|||
./Main.hs
|
||||
./src/WhatcdResolver.hs
|
||||
./src/AppT.hs
|
||||
./src/Bencode.hs
|
||||
./src/JsonLd.hs
|
||||
./src/Optional.hs
|
||||
./src/Html.hs
|
||||
|
|
@ -29,6 +30,7 @@ let
|
|||
pkgs.haskellPackages.pa-field-parser
|
||||
pkgs.haskellPackages.pa-run-command
|
||||
pkgs.haskellPackages.aeson-better-errors
|
||||
pkgs.haskellPackages.bencode
|
||||
pkgs.haskellPackages.blaze-html
|
||||
pkgs.haskellPackages.hs-opentelemetry-sdk
|
||||
pkgs.haskellPackages.http-conduit
|
||||
|
|
|
|||
109
users/Profpatsch/whatcd-resolver/src/Bencode.hs
Normal file
109
users/Profpatsch/whatcd-resolver/src/Bencode.hs
Normal file
|
|
@ -0,0 +1,109 @@
|
|||
{-# LANGUAGE QuasiQuotes #-}
|
||||
|
||||
module Bencode where
|
||||
|
||||
import Aeson (jsonArray)
|
||||
import Data.Aeson qualified as Json
|
||||
import Data.Aeson.Key qualified as Key
|
||||
import Data.Aeson.KeyMap qualified as KeyMap
|
||||
import Data.BEncode (BEncode)
|
||||
import Data.BEncode qualified as Bencode
|
||||
import Data.ByteString.Lazy (LazyByteString)
|
||||
import Data.ByteString.Lazy.Char8 qualified as Char8.Lazy
|
||||
import Data.Char qualified as Char
|
||||
import Data.List qualified as List
|
||||
import Data.Map.Strict qualified as Map
|
||||
import FieldParser qualified as Field
|
||||
import Json (RestrictJsonOpts (RestrictJsonOpts), mkJsonArray)
|
||||
import Json qualified
|
||||
import MyPrelude
|
||||
import Parse (Parse, fieldParser, mkParseNoContext, showContext)
|
||||
import Pretty
|
||||
import Text.Printf (printf)
|
||||
import Prelude hiding (span)
|
||||
|
||||
bencodeBytes :: Parse BEncode ByteString
|
||||
bencodeBytes = Parse.mkParseNoContext $ \(ctx, bencode) -> case bencode of
|
||||
Bencode.BString bs -> Right (toStrictBytes bs)
|
||||
_ -> Left $ [fmt|Expected a bencode byte-string, but got {bencode & prettyBencodeRestricted}, at {showContext ctx}|]
|
||||
|
||||
bencodeInteger :: Parse BEncode Integer
|
||||
bencodeInteger = Parse.mkParseNoContext $ \(ctx, bencode) -> case bencode of
|
||||
Bencode.BInt i -> Right i
|
||||
_ -> Left $ [fmt|Expected a bencode integer, but got {bencode & prettyBencodeRestricted}, at {showContext ctx}|]
|
||||
|
||||
bencodeNatural :: Parse BEncode Natural
|
||||
bencodeNatural = bencodeInteger >>> Parse.fieldParser Field.integralToNatural
|
||||
|
||||
bencodeDict :: Parse BEncode (Map Text BEncode)
|
||||
bencodeDict = Parse.mkParseNoContext $ \(ctx, bencode) -> case bencode of
|
||||
Bencode.BDict d -> Right $ Map.mapKeys stringToText d
|
||||
_ -> Left $ [fmt|Expected a bencode dict, but got {bencode & prettyBencodeRestricted}, at {showContext ctx}|]
|
||||
|
||||
bencodeList :: Parse BEncode [BEncode]
|
||||
bencodeList = Parse.mkParseNoContext $ \(ctx, bencode) -> case bencode of
|
||||
Bencode.BList l -> Right $ l
|
||||
_ -> Left $ [fmt|Expected a bencode list, but got {bencode & prettyBencodeRestricted}, at {showContext ctx}|]
|
||||
|
||||
parseBencode :: Parse ByteString BEncode
|
||||
parseBencode = Parse.mkParseNoContext $ \(ctx, bs) -> do
|
||||
let lazy = toLazyBytes bs
|
||||
case Bencode.bRead lazy of
|
||||
Nothing -> Left $ [fmt|Failed to parse bencode: {Bencode.BString lazy & prettyBencodeRestricted}, at {showContext ctx}|]
|
||||
Just a -> Right a
|
||||
|
||||
bencodeTextLenient :: Parse BEncode Text
|
||||
bencodeTextLenient = Parse.mkParseNoContext $ \(ctx, bencode) -> do
|
||||
case bencode of
|
||||
Bencode.BString bs -> Right (bs & toStrictBytes & bytesToTextUtf8Lenient)
|
||||
_ -> Left $ [fmt|Expected a bencode string, but got {bencode & prettyBencodeRestricted}, at {showContext ctx}|]
|
||||
|
||||
prettyBencodeRestricted :: BEncode -> Text
|
||||
prettyBencodeRestricted =
|
||||
showPrettyJson . Json.restrictJson restriction . bencodeToJsonValue
|
||||
where
|
||||
restriction =
|
||||
RestrictJsonOpts
|
||||
{ maxDepth = 3,
|
||||
maxSizeObject = 10,
|
||||
maxSizeArray = 10,
|
||||
maxStringLength = 100
|
||||
}
|
||||
|
||||
bencodeToJsonValue :: BEncode -> Json.Value
|
||||
bencodeToJsonValue = \case
|
||||
Bencode.BString bs -> case bs & bytesToTextUtf8Lazy of
|
||||
-- If it’s not valid utf-8, let’s at least display a hexdump
|
||||
Left _ -> mkJsonArray $ "hexdump of bytes:" : (hexdump 0 bs <&> Json.String)
|
||||
Right a -> Json.String $ a & toStrict
|
||||
Bencode.BInt i -> Json.Number (fromIntegral @Integer @Scientific i)
|
||||
Bencode.BDict m -> Json.Object $ m & Map.toList <&> bimap Key.fromString bencodeToJsonValue & KeyMap.fromList
|
||||
Bencode.BList l -> jsonArray $ l <&> bencodeToJsonValue
|
||||
|
||||
-- | Unfold using f until the predicate becomes true.
|
||||
unfoldUntil :: (b -> Bool) -> (b -> (a, b)) -> b -> [a]
|
||||
unfoldUntil p f = List.unfoldr (\x -> guard (not (p x)) >> pure (f x))
|
||||
|
||||
-- | Return hex characters for the byte value.
|
||||
bytehex :: Int -> String
|
||||
bytehex n = printf "%02x" n
|
||||
|
||||
-- | Return a printable character, or a dot.
|
||||
prChar :: Char -> Char
|
||||
prChar ch
|
||||
| Char.ord ch >= 32 && Char.ord ch < 128 = ch
|
||||
| otherwise = '.'
|
||||
|
||||
-- | Return a string containing a pretty hexdump of xs using addresses
|
||||
-- starting at n.
|
||||
hexdump :: Int -> LazyByteString -> [Text]
|
||||
hexdump n xs = zipWith hexLine addrs dlines
|
||||
where
|
||||
addrs = [n, n + 16 ..]
|
||||
dlines = unfoldUntil null (splitAt 16) (Char8.Lazy.unpack xs)
|
||||
hexLine :: Int -> String -> Text
|
||||
hexLine addr xs' = stringToText $ printf "%08x |%-23s %-23s| %s" addr h1 h2 s
|
||||
where
|
||||
h1 = unwords $ map (bytehex . Char.ord) $ take 8 xs'
|
||||
h2 = unwords $ map (bytehex . Char.ord) $ drop 8 xs'
|
||||
s = map prChar xs'
|
||||
|
|
@ -4,6 +4,7 @@ module Redacted where
|
|||
|
||||
import AppT
|
||||
import Arg
|
||||
import Bencode
|
||||
import Builder
|
||||
import Comparison
|
||||
import Control.Monad.Logger.CallStack
|
||||
|
|
@ -12,10 +13,14 @@ import Data.Aeson qualified as Json
|
|||
import Data.Aeson.BetterErrors qualified as Json
|
||||
import Data.Aeson.Key qualified as Key
|
||||
import Data.Aeson.KeyMap qualified as KeyMap
|
||||
import Data.BEncode (BEncode)
|
||||
import Data.Error.Tree
|
||||
import Data.List qualified as List
|
||||
import Data.List.NonEmpty qualified as NonEmpty
|
||||
import Data.Maybe (catMaybes)
|
||||
import Data.Text.IO qualified as Text.IO
|
||||
import Data.Time (NominalDiffTime, UTCTime)
|
||||
import Data.Time.Clock.POSIX (posixSecondsToUTCTime)
|
||||
import Database.PostgreSQL.Simple (Binary (Binary), Only (..))
|
||||
import Database.PostgreSQL.Simple.Types (PGArray (PGArray))
|
||||
import FieldParser qualified as Field
|
||||
|
|
@ -27,10 +32,12 @@ import Network.HTTP.Types
|
|||
import Network.Wai.Parse qualified as Wai
|
||||
import OpenTelemetry.Trace qualified as Otel hiding (getTracer, inSpan, inSpan')
|
||||
import Optional
|
||||
import Parse (Parse, mapLookup, mapLookupMay, runParse)
|
||||
import Parse qualified
|
||||
import Postgres.Decoder qualified as Dec
|
||||
import Postgres.MonadPostgres
|
||||
import Pretty
|
||||
import Prelude hiding (span)
|
||||
import Prelude hiding (length, span)
|
||||
|
||||
class MonadRedacted m where
|
||||
getRedactedApiKey :: m ByteString
|
||||
|
|
@ -781,3 +788,93 @@ test =
|
|||
(Json.asValue)
|
||||
<&> Pretty.showPrettyJsonColored
|
||||
>>= liftIO . putStderrLn
|
||||
|
||||
readTorrentFile :: (MonadIO m, MonadPostgres m) => m ()
|
||||
readTorrentFile = runTransaction $ do
|
||||
torrentBytes <-
|
||||
queryWith
|
||||
[sql|
|
||||
SELECT torrent_file from redacted.torrents where torrent_file is not null limit 10 |]
|
||||
()
|
||||
Dec.bytea
|
||||
liftIO $ for_ torrentBytes $ \b -> case testBencode b of
|
||||
Left e -> do
|
||||
Text.IO.putStrLn $ prettyErrorTree e
|
||||
Right a -> printPretty a
|
||||
liftIO $ print $ lengthNatural torrentBytes
|
||||
|
||||
testBencode :: ByteString -> (Either ErrorTree TorrentFile)
|
||||
testBencode bs = Parse.runParse "cannot parse bencode" (parseBencode >>> bencodeTorrentParser) bs
|
||||
|
||||
-- | A torrent file
|
||||
--
|
||||
-- from wikipedia:
|
||||
--
|
||||
-- * announce—the URL of the high tracker
|
||||
-- * info—this maps to a dictionary whose keys are very dependent on whether one or more files are being shared:
|
||||
-- - files—a list of dictionaries each corresponding to a file (only when multiple files are being shared). Each dictionary has the following keys:
|
||||
-- * length—size of the file in bytes.
|
||||
-- * path—a list of strings corresponding to subdirectory names, the last of which is the actual file name
|
||||
-- - length—size of the file in bytes (only when one file is being shared though)
|
||||
-- - name—suggested filename where the file is to be saved (if one file)/suggested directory name where the files are to be saved (if multiple files)
|
||||
-- - piece length—number of bytes per piece. This is commonly 28 KiB = 256 KiB = 262,144 B.
|
||||
-- - pieces—a hash list, i.e., a concatenation of each piece's SHA-1 hash. As SHA-1 returns a 160-bit hash, pieces will be a string whose length is a multiple of 20 bytes. If the torrent contains multiple files, the pieces are formed by concatenating the files in the order they appear in the files dictionary (i.e., all pieces in the torrent are the full piece length except for the last piece, which may be shorter).
|
||||
data TorrentFile = TorrentFile
|
||||
{ announce :: Text,
|
||||
comment :: Maybe Text,
|
||||
createdBy :: Maybe Text,
|
||||
creationDate :: Maybe UTCTime,
|
||||
encoding :: Maybe Text,
|
||||
info :: Info
|
||||
}
|
||||
deriving stock (Eq, Show)
|
||||
|
||||
data Info = Info
|
||||
{ name :: Text,
|
||||
files :: [File],
|
||||
pieceLength :: Natural,
|
||||
pieces :: ByteString,
|
||||
private :: Maybe Bool,
|
||||
source :: Maybe Text
|
||||
}
|
||||
deriving stock (Eq, Show)
|
||||
|
||||
data File = File
|
||||
{ length_ :: Natural,
|
||||
path :: [Text]
|
||||
}
|
||||
deriving stock (Eq, Show)
|
||||
|
||||
bencodeTorrentParser :: Parse BEncode TorrentFile
|
||||
bencodeTorrentParser =
|
||||
bencodeDict >>> do
|
||||
announce <- mapLookup "announce" bencodeTextLenient
|
||||
comment <- mapLookupMay "comment" bencodeTextLenient
|
||||
createdBy <- mapLookupMay "created by" bencodeTextLenient
|
||||
creationDate <- mapLookupMay "creation date" (bencodeInteger <&> posixSecondsToUTCTime . fromInteger @NominalDiffTime)
|
||||
encoding <- mapLookupMay "encoding" bencodeTextLenient
|
||||
info <-
|
||||
mapLookup "info" $
|
||||
bencodeDict >>> do
|
||||
name <- mapLookup "name" bencodeTextLenient
|
||||
files <-
|
||||
mapLookup "files" $
|
||||
bencodeList
|
||||
>>> ( Parse.multiple $
|
||||
bencodeDict >>> do
|
||||
length_ <- mapLookup "length" bencodeNatural
|
||||
path <- mapLookup "path" $ bencodeList >>> Parse.multiple bencodeTextLenient
|
||||
pure $ File {..}
|
||||
)
|
||||
pieceLength <- mapLookup "piece length" bencodeNatural
|
||||
pieces <- mapLookup "pieces" bencodeBytes
|
||||
private <-
|
||||
mapLookupMay "private" bencodeInteger
|
||||
<&> fmap
|
||||
( \case
|
||||
0 -> False
|
||||
_ -> True
|
||||
)
|
||||
source <- mapLookupMay "source" bencodeTextLenient
|
||||
pure Info {..}
|
||||
pure TorrentFile {..}
|
||||
|
|
|
|||
|
|
@ -54,7 +54,7 @@ import OpenTelemetry.Attributes qualified as Otel
|
|||
import OpenTelemetry.Context.ThreadLocal qualified as Otel
|
||||
import OpenTelemetry.Trace qualified as Otel hiding (getTracer, inSpan, inSpan')
|
||||
import OpenTelemetry.Trace.Monad qualified as Otel
|
||||
import Parse (Parse)
|
||||
import Parse (Parse, showContext)
|
||||
import Parse qualified
|
||||
import Postgres.Decoder qualified as Dec
|
||||
import Postgres.MonadPostgres
|
||||
|
|
@ -650,17 +650,17 @@ textToURI =
|
|||
uriToHttpClientRequest :: Parse URI Http.Request
|
||||
uriToHttpClientRequest =
|
||||
Parse.mkParseNoContext
|
||||
( \url ->
|
||||
( \(ctx, url) ->
|
||||
(url & Http.requestFromURI)
|
||||
& runCatch
|
||||
& first (checkException @Http.HttpException)
|
||||
& \case
|
||||
Left (Right (Http.InvalidUrlException urlText reason)) ->
|
||||
Left [fmt|Unable to set the url "{urlText}" as request URL, reason: {reason}|]
|
||||
Left [fmt|Unable to set the url "{urlText}" as request URL, reason: {reason}, at {Parse.showContext ctx}|]
|
||||
Left (Right exc@(Http.HttpExceptionRequest _ _)) ->
|
||||
Left [fmt|Weird! Should not get a HttpExceptionRequest when parsing an URL (bad library design), was {exc & displayException}|]
|
||||
Left [fmt|Weird! Should not get a HttpExceptionRequest when parsing an URL (bad library design), was {exc & displayException}, at {Parse.showContext ctx}|]
|
||||
Left (Left someExc) ->
|
||||
Left [fmt|Weird! Should not get anyhting but a HttpException when parsing an URL (bad library design), was {someExc & displayException}|]
|
||||
Left [fmt|Weird! Should not get anyhting but a HttpException when parsing an URL (bad library design), was {someExc & displayException}, at {Parse.showContext ctx}|]
|
||||
Right req -> pure req
|
||||
)
|
||||
|
||||
|
|
|
|||
|
|
@ -65,6 +65,7 @@ library
|
|||
exposed-modules:
|
||||
WhatcdResolver
|
||||
AppT
|
||||
Bencode
|
||||
JsonLd
|
||||
Optional
|
||||
Http
|
||||
|
|
@ -84,6 +85,7 @@ library
|
|||
pa-run-command,
|
||||
aeson-better-errors,
|
||||
aeson,
|
||||
bencode,
|
||||
blaze-html,
|
||||
bytestring,
|
||||
case-insensitive,
|
||||
|
|
@ -108,7 +110,9 @@ library
|
|||
postgresql-simple,
|
||||
punycode,
|
||||
tmp-postgres,
|
||||
time,
|
||||
unliftio,
|
||||
selective,
|
||||
wai-extra,
|
||||
wai,
|
||||
warp,
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue