diff --git a/default.nix b/default.nix index d851cef..92c9481 100644 --- a/default.nix +++ b/default.nix @@ -1,5 +1,6 @@ -{ mkDerivation, aeson, amqp, base, bytestring, dhall, lens -, lens-aeson, lib, mtl, pipes, text, wreq +{ mkDerivation, acid-state, aeson, amqp, base, bytestring, dhall +, hedgehog, hspec, hspec-hedgehog, lens, lens-aeson, lib, mtl +, pipes, safecopy, text, wreq }: mkDerivation { pname = "reddit-pub"; @@ -8,10 +9,11 @@ mkDerivation { isLibrary = true; isExecutable = true; libraryHaskellDepends = [ - aeson amqp base bytestring dhall lens lens-aeson mtl pipes text - wreq + acid-state aeson amqp base bytestring dhall lens lens-aeson mtl + pipes safecopy text wreq ]; executableHaskellDepends = [ base ]; + testHaskellDepends = [ base hedgehog hspec hspec-hedgehog ]; license = "unknown"; hydraPlatforms = lib.platforms.none; } diff --git a/reddit-pub.cabal b/reddit-pub.cabal index 5036f21..8f46921 100644 --- a/reddit-pub.cabal +++ b/reddit-pub.cabal @@ -29,6 +29,7 @@ library Network.Reddit Data.SubReddit Publish + Data.Trie -- Modules included in this library but not exported. -- other-modules: @@ -46,6 +47,9 @@ library , dhall , wreq , pipes + , safecopy + , acid-state + , containers hs-source-dirs: src default-language: Haskell2010 @@ -63,3 +67,26 @@ executable reddit-pub hs-source-dirs: app default-language: Haskell2010 + +test-suite reddit-tests + main-is: Spec.hs + type: exitcode-stdio-1.0 + other-modules: Test.Data.Trie + + -- Modules included in this executable, other than Main. + -- other-modules: + + -- LANGUAGE extensions used by modules in this package. + -- other-extensions: + build-depends: + base ^>=4.14.1.0, + mtl, + containers, + bytestring, + reddit-pub, + hedgehog, + hspec, + hspec-hedgehog + + hs-source-dirs: test + default-language: Haskell2010 diff --git a/src/Data/Trie.hs b/src/Data/Trie.hs new file mode 100644 index 0000000..a2b4d14 --- /dev/null +++ b/src/Data/Trie.hs @@ -0,0 +1,70 @@ +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE DeriveFunctor #-} +{-# LANGUAGE LambdaCase #-} +{-# LANGUAGE ScopedTypeVariables #-} +-- I know theres the bytestring-trie package, but it's been kind of +-- unmaintained lately, I don't have the time to start pinning and stuff. +module Data.Trie where + +import Data.ByteString (ByteString) +import Data.Map.Strict (Map) +import qualified Data.Foldable as F +import qualified Data.Map.Strict as M +import qualified Data.ByteString as B +import Data.Word (Word8) +import Data.Maybe (isJust) + +-- XXX: This is as lazy as it gets, strictify where it's needed +data TrieF a f + = Empty + | ElementNode (Maybe (ByteString, a)) (Map Word8 f) + deriving Functor + +newtype Fix f = Fix { getFix :: f (Fix f) } + +type Trie a = Fix (TrieF a) + +type TrieSet = Trie () + +cata :: Functor f => (f a -> a) -> Fix f -> a +cata f = c where c = f . fmap c . getFix + +ana :: Functor f => (a -> f a) -> a -> Fix f +ana f = c where c = Fix . fmap c . f + +fromList :: Semigroup a => [(ByteString, a)] -> Trie a +fromList = F.foldl' (\acc (k,v) -> insert k v acc) empty + +toList :: Trie a -> [(ByteString, a)] +toList = cata $ \case + Empty -> [] + ElementNode (Just a) children -> a : concat (M.elems children) + ElementNode Nothing children -> concat (M.elems children) + +insert :: Semigroup a => ByteString -> a -> Trie a -> Trie a +insert k v = union (singleton k v) + +singleton :: ByteString -> a -> Trie a +singleton bs a = ana go (B.uncons bs) + where + go Nothing = ElementNode (Just (bs, a)) M.empty + go (Just (w8,children)) = ElementNode Nothing (M.singleton w8 (B.uncons children)) + +union :: Semigroup a => Trie a -> Trie a -> Trie a +union a (Fix Empty) = a +union (Fix Empty) b = b +union (Fix (ElementNode ma' x1)) (Fix (ElementNode ma x0)) = Fix (ElementNode (ma' <> ma) (M.unionWith union x1 x0)) + +empty :: Trie a +empty = Fix Empty + +lookup :: forall a. ByteString -> Trie a -> Maybe a +lookup bs t = cata go t $ B.uncons bs + where + go :: TrieF a (Maybe (Word8, ByteString) -> Maybe a) -> Maybe (Word8, ByteString) -> Maybe a + go Empty _ = Nothing + go (ElementNode ma' _) Nothing = fmap snd ma' + go (ElementNode _ m) (Just (w8,xs)) = M.lookup w8 m >>= \next -> next (B.uncons xs) + +member :: ByteString -> Trie a -> Bool +member bs = isJust . Data.Trie.lookup bs diff --git a/test/Spec.hs b/test/Spec.hs new file mode 100644 index 0000000..a34ec2d --- /dev/null +++ b/test/Spec.hs @@ -0,0 +1,9 @@ +module Main where + +import Test.Hspec (hspec) + +import qualified Test.Data.Trie as Data.Trie + +main :: IO () +main = hspec $ + Data.Trie.spec diff --git a/test/Test/Data/Trie.hs b/test/Test/Data/Trie.hs new file mode 100644 index 0000000..c0589fd --- /dev/null +++ b/test/Test/Data/Trie.hs @@ -0,0 +1,32 @@ +{-# LANGUAGE NamedFieldPuns #-} +module Test.Data.Trie where + +import Test.Hspec +import Test.Hspec.Hedgehog +import Data.ByteString (ByteString) +import qualified Hedgehog.Gen as Gen +import qualified Hedgehog.Range as Range + +import qualified Data.Map.Strict as M +import Control.Monad.State (evalStateT, foldM, execStateT, StateT (runStateT)) +import qualified Data.Foldable as F +import qualified Data.Trie as Trie +import Data.Monoid + +genByteString :: Gen ByteString +genByteString = Gen.bytes (Range.linear 0 50) + +genModel :: Gen (M.Map ByteString (Sum Int)) +genModel = Gen.map (Range.linear 0 30) genT + where + genT = (,) <$> genByteString <*> (Sum <$> Gen.integral (Range.linear 0 1000)) + +propInsert :: PropertyT IO () +propInsert = do + model <- forAll genModel + let got = F.foldl' (\acc (k,v) -> Trie.insert k v acc) Trie.empty $ M.toList model + model === M.fromList (Trie.toList got) + +spec :: Spec +spec = describe "Data.Trie" $ + it "inserts as with Map" $ hedgehog propInsert