Simple trie implementation

This commit is contained in:
Mats Rauhala 2021-10-27 22:43:20 +03:00
parent af538b9aa2
commit d646fc9095
5 changed files with 144 additions and 4 deletions

View File

@ -1,5 +1,6 @@
{ mkDerivation, aeson, amqp, base, bytestring, dhall, lens { mkDerivation, acid-state, aeson, amqp, base, bytestring, dhall
, lens-aeson, lib, mtl, pipes, text, wreq , hedgehog, hspec, hspec-hedgehog, lens, lens-aeson, lib, mtl
, pipes, safecopy, text, wreq
}: }:
mkDerivation { mkDerivation {
pname = "reddit-pub"; pname = "reddit-pub";
@ -8,10 +9,11 @@ mkDerivation {
isLibrary = true; isLibrary = true;
isExecutable = true; isExecutable = true;
libraryHaskellDepends = [ libraryHaskellDepends = [
aeson amqp base bytestring dhall lens lens-aeson mtl pipes text acid-state aeson amqp base bytestring dhall lens lens-aeson mtl
wreq pipes safecopy text wreq
]; ];
executableHaskellDepends = [ base ]; executableHaskellDepends = [ base ];
testHaskellDepends = [ base hedgehog hspec hspec-hedgehog ];
license = "unknown"; license = "unknown";
hydraPlatforms = lib.platforms.none; hydraPlatforms = lib.platforms.none;
} }

View File

@ -29,6 +29,7 @@ library
Network.Reddit Network.Reddit
Data.SubReddit Data.SubReddit
Publish Publish
Data.Trie
-- Modules included in this library but not exported. -- Modules included in this library but not exported.
-- other-modules: -- other-modules:
@ -46,6 +47,9 @@ library
, dhall , dhall
, wreq , wreq
, pipes , pipes
, safecopy
, acid-state
, containers
hs-source-dirs: src hs-source-dirs: src
default-language: Haskell2010 default-language: Haskell2010
@ -63,3 +67,26 @@ executable reddit-pub
hs-source-dirs: app hs-source-dirs: app
default-language: Haskell2010 default-language: Haskell2010
test-suite reddit-tests
main-is: Spec.hs
type: exitcode-stdio-1.0
other-modules: Test.Data.Trie
-- Modules included in this executable, other than Main.
-- other-modules:
-- LANGUAGE extensions used by modules in this package.
-- other-extensions:
build-depends:
base ^>=4.14.1.0,
mtl,
containers,
bytestring,
reddit-pub,
hedgehog,
hspec,
hspec-hedgehog
hs-source-dirs: test
default-language: Haskell2010

70
src/Data/Trie.hs Normal file
View File

@ -0,0 +1,70 @@
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE DeriveFunctor #-}
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE ScopedTypeVariables #-}
-- I know theres the bytestring-trie package, but it's been kind of
-- unmaintained lately, I don't have the time to start pinning and stuff.
module Data.Trie where
import Data.ByteString (ByteString)
import Data.Map.Strict (Map)
import qualified Data.Foldable as F
import qualified Data.Map.Strict as M
import qualified Data.ByteString as B
import Data.Word (Word8)
import Data.Maybe (isJust)
-- XXX: This is as lazy as it gets, strictify where it's needed
data TrieF a f
= Empty
| ElementNode (Maybe (ByteString, a)) (Map Word8 f)
deriving Functor
newtype Fix f = Fix { getFix :: f (Fix f) }
type Trie a = Fix (TrieF a)
type TrieSet = Trie ()
cata :: Functor f => (f a -> a) -> Fix f -> a
cata f = c where c = f . fmap c . getFix
ana :: Functor f => (a -> f a) -> a -> Fix f
ana f = c where c = Fix . fmap c . f
fromList :: Semigroup a => [(ByteString, a)] -> Trie a
fromList = F.foldl' (\acc (k,v) -> insert k v acc) empty
toList :: Trie a -> [(ByteString, a)]
toList = cata $ \case
Empty -> []
ElementNode (Just a) children -> a : concat (M.elems children)
ElementNode Nothing children -> concat (M.elems children)
insert :: Semigroup a => ByteString -> a -> Trie a -> Trie a
insert k v = union (singleton k v)
singleton :: ByteString -> a -> Trie a
singleton bs a = ana go (B.uncons bs)
where
go Nothing = ElementNode (Just (bs, a)) M.empty
go (Just (w8,children)) = ElementNode Nothing (M.singleton w8 (B.uncons children))
union :: Semigroup a => Trie a -> Trie a -> Trie a
union a (Fix Empty) = a
union (Fix Empty) b = b
union (Fix (ElementNode ma' x1)) (Fix (ElementNode ma x0)) = Fix (ElementNode (ma' <> ma) (M.unionWith union x1 x0))
empty :: Trie a
empty = Fix Empty
lookup :: forall a. ByteString -> Trie a -> Maybe a
lookup bs t = cata go t $ B.uncons bs
where
go :: TrieF a (Maybe (Word8, ByteString) -> Maybe a) -> Maybe (Word8, ByteString) -> Maybe a
go Empty _ = Nothing
go (ElementNode ma' _) Nothing = fmap snd ma'
go (ElementNode _ m) (Just (w8,xs)) = M.lookup w8 m >>= \next -> next (B.uncons xs)
member :: ByteString -> Trie a -> Bool
member bs = isJust . Data.Trie.lookup bs

9
test/Spec.hs Normal file
View File

@ -0,0 +1,9 @@
module Main where
import Test.Hspec (hspec)
import qualified Test.Data.Trie as Data.Trie
main :: IO ()
main = hspec $
Data.Trie.spec

32
test/Test/Data/Trie.hs Normal file
View File

@ -0,0 +1,32 @@
{-# LANGUAGE NamedFieldPuns #-}
module Test.Data.Trie where
import Test.Hspec
import Test.Hspec.Hedgehog
import Data.ByteString (ByteString)
import qualified Hedgehog.Gen as Gen
import qualified Hedgehog.Range as Range
import qualified Data.Map.Strict as M
import Control.Monad.State (evalStateT, foldM, execStateT, StateT (runStateT))
import qualified Data.Foldable as F
import qualified Data.Trie as Trie
import Data.Monoid
genByteString :: Gen ByteString
genByteString = Gen.bytes (Range.linear 0 50)
genModel :: Gen (M.Map ByteString (Sum Int))
genModel = Gen.map (Range.linear 0 30) genT
where
genT = (,) <$> genByteString <*> (Sum <$> Gen.integral (Range.linear 0 1000))
propInsert :: PropertyT IO ()
propInsert = do
model <- forAll genModel
let got = F.foldl' (\acc (k,v) -> Trie.insert k v acc) Trie.empty $ M.toList model
model === M.fromList (Trie.toList got)
spec :: Spec
spec = describe "Data.Trie" $
it "inserts as with Map" $ hedgehog propInsert