feat(users/Profpatsch): init jbovlaste sqlite
This is intended to convert the XML dump from https://jbovlaste.lojban.org/ to an sqlite database at one point. So far only XML parsing and some pretty printing Change-Id: I48c989a3109c8d513c812703fa7a8f2689a157ee Reviewed-on: https://cl.tvl.fyi/c/depot/+/8687 Tested-by: BuildkiteCI Reviewed-by: Profpatsch <mail@profpatsch.de> Autosubmit: Profpatsch <mail@profpatsch.de>
This commit is contained in:
		
							parent
							
								
									8c4730c433
								
							
						
					
					
						commit
						c2baefbecc
					
				
					 3 changed files with 213 additions and 0 deletions
				
			
		
							
								
								
									
										110
									
								
								users/Profpatsch/jbovlaste-sqlite/JbovlasteSqlite.hs
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										110
									
								
								users/Profpatsch/jbovlaste-sqlite/JbovlasteSqlite.hs
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
					@ -0,0 +1,110 @@
 | 
				
			||||||
 | 
					{-# LANGUAGE QuasiQuotes #-}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					module Main where
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					import Data.ByteString.Internal qualified as Bytes
 | 
				
			||||||
 | 
					import Data.Error.Tree
 | 
				
			||||||
 | 
					import Data.List qualified as List
 | 
				
			||||||
 | 
					import Data.Map.Strict qualified as Map
 | 
				
			||||||
 | 
					import Data.Text qualified as Text
 | 
				
			||||||
 | 
					import Data.Text.IO qualified as Text
 | 
				
			||||||
 | 
					import Label
 | 
				
			||||||
 | 
					import PossehlAnalyticsPrelude
 | 
				
			||||||
 | 
					import Text.XML (def)
 | 
				
			||||||
 | 
					import Text.XML qualified as Xml
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					main :: IO ()
 | 
				
			||||||
 | 
					main = do
 | 
				
			||||||
 | 
					  f <- file
 | 
				
			||||||
 | 
					  f.documentRoot
 | 
				
			||||||
 | 
					    & filterElementsRec noUsers
 | 
				
			||||||
 | 
					    & downTo (T2 (label @"maxdepth" 5) (label @"maxlistitems" 20))
 | 
				
			||||||
 | 
					    & toTree
 | 
				
			||||||
 | 
					    & prettyErrorTree
 | 
				
			||||||
 | 
					    & Text.putStrLn
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					file :: IO Xml.Document
 | 
				
			||||||
 | 
					file = Xml.readFile def "./jbovlaste-en.xml"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					-- | Filter XML elements recursively based on the given predicate
 | 
				
			||||||
 | 
					filterElementsRec :: (Xml.Element -> Bool) -> Xml.Element -> Xml.Element
 | 
				
			||||||
 | 
					filterElementsRec f el =
 | 
				
			||||||
 | 
					  el
 | 
				
			||||||
 | 
					    { Xml.elementNodes =
 | 
				
			||||||
 | 
					        mapMaybe
 | 
				
			||||||
 | 
					          ( \case
 | 
				
			||||||
 | 
					              Xml.NodeElement el' ->
 | 
				
			||||||
 | 
					                if f el'
 | 
				
			||||||
 | 
					                  then Just $ Xml.NodeElement $ filterElementsRec f el'
 | 
				
			||||||
 | 
					                  else Nothing
 | 
				
			||||||
 | 
					              other -> Just other
 | 
				
			||||||
 | 
					          )
 | 
				
			||||||
 | 
					          el.elementNodes
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					-- | no <user> allowed
 | 
				
			||||||
 | 
					noUsers :: Xml.Element -> Bool
 | 
				
			||||||
 | 
					noUsers el = el.elementName.nameLocalName /= "user"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					downTo :: (T2 "maxdepth" Int "maxlistitems" Int) -> Xml.Element -> Xml.Element
 | 
				
			||||||
 | 
					downTo n el =
 | 
				
			||||||
 | 
					  if n.maxdepth > 0
 | 
				
			||||||
 | 
					    then
 | 
				
			||||||
 | 
					      el
 | 
				
			||||||
 | 
					        { Xml.elementNodes =
 | 
				
			||||||
 | 
					            ( do
 | 
				
			||||||
 | 
					                let eleven = take (n.maxlistitems + 1) $ map down el.elementNodes
 | 
				
			||||||
 | 
					                if List.length eleven == (n.maxlistitems + 1)
 | 
				
			||||||
 | 
					                  then eleven <> [Xml.NodeComment "snip!"]
 | 
				
			||||||
 | 
					                  else eleven
 | 
				
			||||||
 | 
					            )
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					    else el {Xml.elementNodes = [Xml.NodeComment "snip!"]}
 | 
				
			||||||
 | 
					  where
 | 
				
			||||||
 | 
					    down =
 | 
				
			||||||
 | 
					      \case
 | 
				
			||||||
 | 
					        Xml.NodeElement el' ->
 | 
				
			||||||
 | 
					          Xml.NodeElement $
 | 
				
			||||||
 | 
					            downTo
 | 
				
			||||||
 | 
					              ( T2
 | 
				
			||||||
 | 
					                  (label @"maxdepth" $ n.maxdepth - 1)
 | 
				
			||||||
 | 
					                  (label @"maxlistitems" n.maxlistitems)
 | 
				
			||||||
 | 
					              )
 | 
				
			||||||
 | 
					              el'
 | 
				
			||||||
 | 
					        more -> more
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					toTree :: Xml.Element -> ErrorTree
 | 
				
			||||||
 | 
					toTree el = do
 | 
				
			||||||
 | 
					  let outer =
 | 
				
			||||||
 | 
					        if not $ null el.elementAttributes
 | 
				
			||||||
 | 
					          then [fmt|<{name el.elementName}: {attrs el.elementAttributes}>|]
 | 
				
			||||||
 | 
					          else [fmt|<{name el.elementName}>|]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  case el.elementNodes & filter (not . isEmptyContent) & nonEmpty of
 | 
				
			||||||
 | 
					    Nothing -> singleError (newError outer)
 | 
				
			||||||
 | 
					    Just (n :| []) | not $ isElementNode n -> singleError $ errorContext outer (nodeErrorNoElement n)
 | 
				
			||||||
 | 
					    Just nodes -> nestedMultiError (newError outer) (nodes <&> node)
 | 
				
			||||||
 | 
					  where
 | 
				
			||||||
 | 
					    isEmptyContent = \case
 | 
				
			||||||
 | 
					      Xml.NodeContent c -> c & Text.all Bytes.isSpaceChar8
 | 
				
			||||||
 | 
					      _ -> False
 | 
				
			||||||
 | 
					    isElementNode = \case
 | 
				
			||||||
 | 
					      Xml.NodeElement _ -> True
 | 
				
			||||||
 | 
					      _ -> False
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    node :: Xml.Node -> ErrorTree
 | 
				
			||||||
 | 
					    node = \case
 | 
				
			||||||
 | 
					      Xml.NodeElement el' -> toTree el'
 | 
				
			||||||
 | 
					      other -> singleError $ nodeErrorNoElement other
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    nodeErrorNoElement :: Xml.Node -> Error
 | 
				
			||||||
 | 
					    nodeErrorNoElement = \case
 | 
				
			||||||
 | 
					      Xml.NodeInstruction i -> [fmt|Instruction: {i & show}|]
 | 
				
			||||||
 | 
					      Xml.NodeContent c -> [fmt|"{c & Text.replace "\"" "\\\""}"|]
 | 
				
			||||||
 | 
					      Xml.NodeComment c -> [fmt|<!-- {c} -->|]
 | 
				
			||||||
 | 
					      Xml.NodeElement _ -> error "NodeElement not allowed here"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    name :: Xml.Name -> Text
 | 
				
			||||||
 | 
					    name n = [fmt|{n.namePrefix & fromMaybe ""}{n.nameLocalName}|]
 | 
				
			||||||
 | 
					    attrs :: Map Xml.Name Text -> Text
 | 
				
			||||||
 | 
					    attrs a = a & Map.toList <&> (\(k, v) -> [fmt|{name k}={v}|]) & Text.intercalate ", " & \s -> [fmt|({s})|]
 | 
				
			||||||
							
								
								
									
										32
									
								
								users/Profpatsch/jbovlaste-sqlite/default.nix
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										32
									
								
								users/Profpatsch/jbovlaste-sqlite/default.nix
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
					@ -0,0 +1,32 @@
 | 
				
			||||||
 | 
					{ depot, pkgs, lib, ... }:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					let
 | 
				
			||||||
 | 
					  #   bins = depot.nix.getBins pkgs.sqlite ["sqlite3"];
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  jbovlaste-sqlite = pkgs.haskellPackages.mkDerivation {
 | 
				
			||||||
 | 
					    pname = "jbovlaste-sqlite";
 | 
				
			||||||
 | 
					    version = "0.1.0";
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    src = depot.users.Profpatsch.exactSource ./. [
 | 
				
			||||||
 | 
					      ./jbovlaste-sqlite.cabal
 | 
				
			||||||
 | 
					      ./JbovlasteSqlite.hs
 | 
				
			||||||
 | 
					    ];
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    libraryHaskellDepends = [
 | 
				
			||||||
 | 
					      pkgs.haskellPackages.pa-prelude
 | 
				
			||||||
 | 
					      pkgs.haskellPackages.pa-label
 | 
				
			||||||
 | 
					      pkgs.haskellPackages.pa-error-tree
 | 
				
			||||||
 | 
					      pkgs.haskellPackages.sqlite-simple
 | 
				
			||||||
 | 
					      pkgs.haskellPackages.xml-conduit
 | 
				
			||||||
 | 
					      depot.users.Profpatsch.arglib.netencode.haskell
 | 
				
			||||||
 | 
					      depot.users.Profpatsch.netencode.netencode-hs
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    ];
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    isExecutable = true;
 | 
				
			||||||
 | 
					    isLibrary = false;
 | 
				
			||||||
 | 
					    license = lib.licenses.mit;
 | 
				
			||||||
 | 
					  };
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					in
 | 
				
			||||||
 | 
					jbovlaste-sqlite
 | 
				
			||||||
							
								
								
									
										71
									
								
								users/Profpatsch/jbovlaste-sqlite/jbovlaste-sqlite.cabal
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										71
									
								
								users/Profpatsch/jbovlaste-sqlite/jbovlaste-sqlite.cabal
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
					@ -0,0 +1,71 @@
 | 
				
			||||||
 | 
					cabal-version:      3.0
 | 
				
			||||||
 | 
					name:               jbovlaste-sqlite
 | 
				
			||||||
 | 
					version:            0.1.0.0
 | 
				
			||||||
 | 
					author:             Profpatsch
 | 
				
			||||||
 | 
					maintainer:         mail@profpatsch.de
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					common common-options
 | 
				
			||||||
 | 
					  ghc-options:
 | 
				
			||||||
 | 
					      -Wall
 | 
				
			||||||
 | 
					      -Wno-type-defaults
 | 
				
			||||||
 | 
					      -Wunused-packages
 | 
				
			||||||
 | 
					      -Wredundant-constraints
 | 
				
			||||||
 | 
					      -fwarn-missing-deriving-strategies
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  -- See https://downloads.haskell.org/ghc/latest/docs/users_guide/exts.html
 | 
				
			||||||
 | 
					  -- for a description of all these extensions
 | 
				
			||||||
 | 
					  default-extensions:
 | 
				
			||||||
 | 
					      -- Infer Applicative instead of Monad where possible
 | 
				
			||||||
 | 
					    ApplicativeDo
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    -- Allow literal strings to be Text
 | 
				
			||||||
 | 
					    OverloadedStrings
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    -- Syntactic sugar improvements
 | 
				
			||||||
 | 
					    LambdaCase
 | 
				
			||||||
 | 
					    MultiWayIf
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    -- Makes the (deprecated) usage of * instead of Data.Kind.Type an error
 | 
				
			||||||
 | 
					    NoStarIsType
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    -- Convenient and crucial to deal with ambiguous field names, commonly
 | 
				
			||||||
 | 
					    -- known as RecordDotSyntax
 | 
				
			||||||
 | 
					    OverloadedRecordDot
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    -- does not export record fields as functions, use OverloadedRecordDot to access instead
 | 
				
			||||||
 | 
					    NoFieldSelectors
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    -- Record punning
 | 
				
			||||||
 | 
					    RecordWildCards
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    -- Improved Deriving
 | 
				
			||||||
 | 
					    DerivingStrategies
 | 
				
			||||||
 | 
					    DerivingVia
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    -- Type-level strings
 | 
				
			||||||
 | 
					    DataKinds
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    -- to enable the `type` keyword in import lists (ormolu uses this automatically)
 | 
				
			||||||
 | 
					    ExplicitNamespaces
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  default-language: GHC2021
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					executable jbovlaste-sqlite
 | 
				
			||||||
 | 
					    import: common-options
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    main-is:          JbovlasteSqlite.hs
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    build-depends:
 | 
				
			||||||
 | 
					        base >=4.15 && <5,
 | 
				
			||||||
 | 
					        pa-prelude,
 | 
				
			||||||
 | 
					        pa-label,
 | 
				
			||||||
 | 
					        pa-error-tree,
 | 
				
			||||||
 | 
					        my-prelude,
 | 
				
			||||||
 | 
					        containers,
 | 
				
			||||||
 | 
					        bytestring,
 | 
				
			||||||
 | 
					        arglib-netencode,
 | 
				
			||||||
 | 
					        netencode,
 | 
				
			||||||
 | 
					        text,
 | 
				
			||||||
 | 
					        sqlite-simple,
 | 
				
			||||||
 | 
					        xml-conduit,
 | 
				
			||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue