Bring the addressbook back up to date
- bytestring-trie is not available - hls - modern shell.nix - base - Re-implement trie, with benchmarks - Realize my implementation of trie is slower than Data.Map, use that instead
This commit is contained in:
parent
a672fecbc9
commit
87f6eb00f6
@ -16,8 +16,13 @@ maintainer: mats.rauhala@iki.fi
|
||||
-- category:
|
||||
extra-source-files: CHANGELOG.md
|
||||
|
||||
common deps
|
||||
build-depends: base >=4.13.0.0 && <4.15
|
||||
|
||||
library
|
||||
import: deps
|
||||
exposed-modules: MyLib
|
||||
, Data.Trie
|
||||
, Data.Email.Header
|
||||
, Data.Email
|
||||
, Control.Addressbook.Streaming
|
||||
@ -26,15 +31,13 @@ library
|
||||
-- other-extensions:
|
||||
default-extensions: OverloadedStrings
|
||||
LambdaCase
|
||||
build-depends: base ^>=4.13.0.0
|
||||
, attoparsec
|
||||
build-depends: attoparsec
|
||||
, mtl
|
||||
, conduit
|
||||
, conduit-extra
|
||||
, bytestring
|
||||
, lens
|
||||
, text
|
||||
, bytestring-trie
|
||||
, vector
|
||||
, containers
|
||||
, filepath
|
||||
@ -43,27 +46,29 @@ library
|
||||
ghc-options: -Wall
|
||||
|
||||
executable addressbook
|
||||
import: deps
|
||||
main-is: Main.hs
|
||||
-- other-modules:
|
||||
-- other-extensions:
|
||||
build-depends: base ^>=4.13.0.0, addressbook
|
||||
build-depends: addressbook
|
||||
, optparse-applicative
|
||||
, text
|
||||
hs-source-dirs: app
|
||||
default-language: Haskell2010
|
||||
ghc-options: -Wall
|
||||
ghc-options: -Wall -threaded
|
||||
|
||||
test-suite addressbook-test
|
||||
import: deps
|
||||
default-language: Haskell2010
|
||||
default-extensions: OverloadedStrings
|
||||
LambdaCase
|
||||
other-modules: Test.Data.Email.Header
|
||||
, Test.Data.Email
|
||||
, Test.Data.Trie
|
||||
type: exitcode-stdio-1.0
|
||||
hs-source-dirs: test
|
||||
main-is: MyLibTest.hs
|
||||
build-depends: base ^>=4.13.0.0
|
||||
, addressbook
|
||||
build-depends: addressbook
|
||||
, tasty
|
||||
, tasty-hedgehog
|
||||
, tasty-hunit
|
||||
@ -75,4 +80,27 @@ test-suite addressbook-test
|
||||
, vector
|
||||
, conduit
|
||||
, conduit-extra
|
||||
, containers
|
||||
ghc-options: -Wall
|
||||
|
||||
|
||||
-- I know there's a benchmark type, but haskellPackages.shellFor doesn't seem
|
||||
-- to support it and I don't have the time to figure it out
|
||||
executable addressbook-bench
|
||||
import: deps
|
||||
default-language: Haskell2010
|
||||
default-extensions: OverloadedStrings
|
||||
LambdaCase
|
||||
-- other-modules: Test.Data.Email.Header
|
||||
-- , Test.Data.Email
|
||||
-- , Test.Data.Trie
|
||||
hs-source-dirs: bench
|
||||
main-is: Bench.hs
|
||||
build-depends: addressbook
|
||||
, criterion
|
||||
, hedgehog-corpus
|
||||
, bytestring
|
||||
, containers
|
||||
ghc-options: -Wall
|
||||
|
||||
|
||||
|
54
bench/Bench.hs
Normal file
54
bench/Bench.hs
Normal file
@ -0,0 +1,54 @@
|
||||
module Main where
|
||||
|
||||
import Criterion.Main (defaultMain)
|
||||
import Data.ByteString (ByteString)
|
||||
import Hedgehog.Corpus (muppets, simpsons, vegetables, viruses)
|
||||
import Criterion
|
||||
import qualified Data.Map.Strict as M
|
||||
import qualified Data.ByteString.Char8 as B
|
||||
import qualified Data.Trie as Trie
|
||||
|
||||
setupEnv :: IO ([(ByteString, ())], [(ByteString, ())])
|
||||
setupEnv = pure (small, big)
|
||||
where
|
||||
small =
|
||||
[ (firstname <> "." <> surname <> "@" <> domain <> "." <> tld, ())
|
||||
| firstname <- simpsons
|
||||
, surname <- muppets
|
||||
, domain <- vegetables
|
||||
, tld <- ["com", "fi", "co.uk", "info", "org"]
|
||||
]
|
||||
big =
|
||||
[ (firstname <> "." <> surname <> "@" <> domain1 <> "." <> domain2 <> "." <> tld, ())
|
||||
| firstname <- simpsons
|
||||
, surname <- muppets
|
||||
, domain1 <- vegetables
|
||||
, domain2 <- viruses
|
||||
, tld <- ["com", "fi", "co.uk", "info", "org"]
|
||||
]
|
||||
|
||||
main :: IO ()
|
||||
main = defaultMain
|
||||
[ env setupEnv $ \ ~(small,big) -> bgroup "main"
|
||||
[ bgroup "small"
|
||||
[ bgroup "Data.Map"
|
||||
[ bench "length . toList . fromList" $ whnf (length . M.toList . M.fromList) small
|
||||
, bench "length . filter (\"homer\" `isPrefixOf`) . keys . fromList" $ whnf (length . M.keys . M.filterWithKey (\k _ -> "homer" `B.isPrefixOf` k) . M.fromList) small
|
||||
]
|
||||
, bgroup "Data.Trie"
|
||||
[ bench "length . toList . fromList" $ whnf (length . Trie.toList . Trie.fromList) small
|
||||
, bench "length . keys . submap \"homer\" . fromList" $ whnf (length . Trie.keys . Trie.submap "homer" . Trie.fromList) small
|
||||
]
|
||||
]
|
||||
, bgroup "big"
|
||||
[ bgroup "Data.Map"
|
||||
[ bench "length . toList . fromList" $ whnf (length . M.toList . M.fromList) big
|
||||
, bench "length . filter (\"homer\" `isPrefixOf`) . keys . fromList" $ whnf (length . M.keys . M.filterWithKey (\k _ -> "homer" `B.isPrefixOf` k) . M.fromList) big
|
||||
]
|
||||
, bgroup "Data.Trie"
|
||||
[ bench "length . toList . fromList" $ whnf (length . Trie.toList . Trie.fromList) big
|
||||
, bench "length . keys . submap \"homer\" . fromList" $ whnf (length . Trie.keys . Trie.submap "homer" . Trie.fromList) big
|
||||
]
|
||||
]
|
||||
]
|
||||
]
|
20
default.nix
20
default.nix
@ -1,6 +1,6 @@
|
||||
{ mkDerivation, attoparsec, base, bytestring, bytestring-trie
|
||||
, conduit, conduit-extra, containers, filepath, hedgehog
|
||||
, hedgehog-corpus, HUnit, lens, mtl, optparse-applicative, stdenv
|
||||
{ mkDerivation, attoparsec, base, bytestring, conduit
|
||||
, conduit-extra, containers, criterion, filepath, hedgehog
|
||||
, hedgehog-corpus, HUnit, lens, lib, mtl, optparse-applicative
|
||||
, tasty, tasty-hedgehog, tasty-hunit, text, vector
|
||||
}:
|
||||
mkDerivation {
|
||||
@ -10,13 +10,15 @@ mkDerivation {
|
||||
isLibrary = true;
|
||||
isExecutable = true;
|
||||
libraryHaskellDepends = [
|
||||
attoparsec base bytestring bytestring-trie conduit conduit-extra
|
||||
containers filepath lens mtl text vector
|
||||
attoparsec base bytestring conduit conduit-extra containers
|
||||
filepath lens mtl text vector
|
||||
];
|
||||
executableHaskellDepends = [
|
||||
base criterion hedgehog-corpus optparse-applicative text
|
||||
];
|
||||
executableHaskellDepends = [ base optparse-applicative text ];
|
||||
testHaskellDepends = [
|
||||
base bytestring conduit conduit-extra hedgehog hedgehog-corpus
|
||||
HUnit tasty tasty-hedgehog tasty-hunit text vector
|
||||
base bytestring conduit conduit-extra containers hedgehog
|
||||
hedgehog-corpus HUnit tasty tasty-hedgehog tasty-hunit text vector
|
||||
];
|
||||
license = stdenv.lib.licenses.bsd3;
|
||||
license = lib.licenses.bsd3;
|
||||
}
|
||||
|
11
easy-hls-nix.json
Normal file
11
easy-hls-nix.json
Normal file
@ -0,0 +1,11 @@
|
||||
{
|
||||
"url": "https://github.com/ssbothwell/easy-hls-nix",
|
||||
"rev": "393ccab35104d5d49e0ff9eadf7b8654e87abffd",
|
||||
"date": "2021-09-16T11:13:40-07:00",
|
||||
"path": "/nix/store/dsfqcsiahsp9rkip4fsqzz32x0swa3d4-easy-hls-nix",
|
||||
"sha256": "0q1qxlkzjqx2hvf9k2cp5a98vlvsj13lap6hm7gl1kkqp88ai1dw",
|
||||
"fetchLFS": false,
|
||||
"fetchSubmodules": false,
|
||||
"deepClone": false,
|
||||
"leaveDotGit": false
|
||||
}
|
10
easy-hsl-nix.json
Normal file
10
easy-hsl-nix.json
Normal file
@ -0,0 +1,10 @@
|
||||
{
|
||||
"url": "https://github.com/jkachmar/easy-hls-nix",
|
||||
"rev": "291cf77f512a7121bb6801cde35ee1e8b7287f91",
|
||||
"date": "2021-04-13T16:12:58-04:00",
|
||||
"path": "/nix/store/wlmckg348q32ylfpg5dv3d5x88nkbi59-easy-hls-nix",
|
||||
"sha256": "1bvbcp9zwmh53sm16ycp8phhc6vzc72a71sf0bvyjgfbn6zp68bc",
|
||||
"fetchSubmodules": false,
|
||||
"deepClone": false,
|
||||
"leaveDotGit": false
|
||||
}
|
17
shell.nix
17
shell.nix
@ -1,15 +1,24 @@
|
||||
with (import <nixpkgs> {});
|
||||
|
||||
let addressbook = haskellPackages.callPackage ./. {};
|
||||
let
|
||||
hp = haskellPackages.extend (self: super: {
|
||||
addressbook = self.callPackage ./. {};
|
||||
});
|
||||
easy-hls = callPackage easy-hls-src { ghcVersions = [ hp.ghc.version ]; };
|
||||
easy-hls-src = fetchFromGitHub {
|
||||
owner = "ssbothwell";
|
||||
repo = "easy-hls-nix";
|
||||
inherit (builtins.fromJSON (builtins.readFile ./easy-hls-nix.json)) rev sha256;
|
||||
};
|
||||
|
||||
in
|
||||
|
||||
mkShell {
|
||||
name = "addressbook-shell";
|
||||
hp.shellFor {
|
||||
packages = h: [h.addressbook];
|
||||
buildInputs = [
|
||||
ghcid
|
||||
easy-hls
|
||||
stylish-haskell
|
||||
haskellPackages.cabal-install
|
||||
(haskellPackages.ghcWithHoogle (_: addressbook.buildInputs ++ addressbook.propagatedBuildInputs))
|
||||
];
|
||||
}
|
||||
|
@ -12,7 +12,7 @@ import qualified Data.Conduit.Combinators as C
|
||||
import qualified Data.Conduit.List as CL
|
||||
import qualified Data.Conduit.Text as CT
|
||||
|
||||
import qualified Data.Trie as Trie
|
||||
import qualified Data.Map.Strict as Map
|
||||
|
||||
import System.IO
|
||||
(stdout)
|
||||
@ -26,22 +26,26 @@ import System.FilePath
|
||||
|
||||
import Control.Exception
|
||||
(catch)
|
||||
import Data.ByteString (ByteString)
|
||||
import qualified Data.ByteString.Char8 as B
|
||||
|
||||
-- XXX: The current Data.Trie implementation is much slower than Data.Set
|
||||
query :: Text -> IO ()
|
||||
query prefix = do
|
||||
datDir <- fromMaybe "./" <$> lookupEnv "HOME"
|
||||
state <- catch @IOError (runResourceT $ runConduit $ readState datDir) (\_ -> pure Trie.empty)
|
||||
state <- catch @IOError (runResourceT $ runConduit $ readState datDir) (\_ -> pure Map.empty)
|
||||
runConduit $ outputResults state
|
||||
where
|
||||
readState :: FilePath -> ConduitM () Void (ResourceT IO) (Trie.Trie [Text])
|
||||
readState :: FilePath -> ConduitM () Void (ResourceT IO) (Map.Map ByteString [Text])
|
||||
readState dir =
|
||||
CB.sourceFile (dir </> ".addressbook.dat")
|
||||
.| CT.decode CT.utf8
|
||||
.| CT.lines
|
||||
.| C.foldMap (\s -> Trie.singleton (TE.encodeUtf8 $ T.toLower s) [s])
|
||||
outputResults :: Trie.Trie [Text] -> ConduitM () Void IO ()
|
||||
.| C.foldMap (\s -> Map.singleton (TE.encodeUtf8 $ T.toLower s) [s])
|
||||
prefixB = TE.encodeUtf8 prefix
|
||||
outputResults :: Map.Map ByteString [Text] -> ConduitM () Void IO ()
|
||||
outputResults state =
|
||||
CL.sourceList (Trie.elems $ Trie.submap (TE.encodeUtf8 prefix) state)
|
||||
CL.sourceList (Map.elems $ Map.filterWithKey (\k _ -> prefixB `B.isPrefixOf` k) state)
|
||||
.| C.concat
|
||||
.| C.map (<> "\n")
|
||||
.| CT.encode CT.utf8
|
||||
|
87
src/Data/Trie.hs
Normal file
87
src/Data/Trie.hs
Normal file
@ -0,0 +1,87 @@
|
||||
-- The bytestring-trie is marked as broken. Trie is a simple(ish) datastructure, implement one here
|
||||
{-# LANGUAGE ScopedTypeVariables #-}
|
||||
{-# LANGUAGE DeriveFunctor #-}
|
||||
{-# LANGUAGE TypeApplications #-}
|
||||
{-# LANGUAGE DeriveFoldable #-}
|
||||
module Data.Trie where
|
||||
import Data.ByteString (ByteString)
|
||||
import Data.Word (Word8)
|
||||
import Data.Foldable (foldl')
|
||||
import Data.Semigroup (Last(..))
|
||||
import qualified Data.ByteString as B
|
||||
import Data.Coerce (coerce)
|
||||
import Data.IntMap (IntMap)
|
||||
import qualified Data.IntMap.Strict as M
|
||||
|
||||
data Pair a b = Pair !a !b
|
||||
deriving (Functor, Foldable)
|
||||
|
||||
data Trie a
|
||||
= Empty
|
||||
| Branch !(Maybe (Pair ByteString a)) !(IntMap (Trie a))
|
||||
deriving (Functor, Foldable)
|
||||
|
||||
instance Semigroup a => Semigroup (Trie a) where
|
||||
(<>) = union
|
||||
|
||||
instance Monoid a => Monoid (Trie a) where
|
||||
mempty = Empty
|
||||
|
||||
instance Show a => Show (Trie a) where
|
||||
show = show . toList
|
||||
|
||||
keys :: Trie a -> [ByteString]
|
||||
keys Empty = []
|
||||
keys (Branch Nothing m) = concatMap keys (M.elems m)
|
||||
keys (Branch (Just (Pair b _)) m) = b : concatMap keys (M.elems m)
|
||||
|
||||
union :: Semigroup a => Trie a -> Trie a -> Trie a
|
||||
union Empty r = r
|
||||
union l Empty = l
|
||||
union (Branch m_pa cl) (Branch ma cr) = Branch (merge m_pa ma) (M.unionWith union cl cr)
|
||||
where
|
||||
merge (Just (Pair x a)) (Just (Pair _ b)) = Just $ Pair x (a <> b)
|
||||
merge Nothing r = r
|
||||
merge l Nothing = l
|
||||
|
||||
unionR :: forall a. Trie a -> Trie a -> Trie a
|
||||
unionR a b = coerce @(Trie (Last a)) $ union (coerce a) (coerce b)
|
||||
|
||||
singleton :: forall a. ByteString -> a -> Trie a
|
||||
singleton bs a = go (B.uncons bs)
|
||||
where
|
||||
go :: Maybe (Word8, ByteString) -> Trie a
|
||||
go Nothing =
|
||||
let x = Pair bs a
|
||||
in x `seq` Branch (Just (Pair bs a)) M.empty
|
||||
go (Just (w,c)) =
|
||||
let y = M.singleton (fromIntegral w) (go (B.uncons c))
|
||||
in y `seq` Branch Nothing y
|
||||
|
||||
empty :: Trie a
|
||||
empty = Empty
|
||||
|
||||
elems :: Trie a -> [a]
|
||||
elems = foldr (:) []
|
||||
|
||||
submap :: forall a. ByteString -> Trie a -> Trie a
|
||||
submap bs = go (B.uncons bs)
|
||||
where
|
||||
go :: Maybe (Word8, ByteString) -> Trie a -> Trie a
|
||||
go _ Empty = Empty
|
||||
go Nothing t = t
|
||||
go (Just (w,cs)) (Branch _ c) = maybe empty (go (B.uncons cs)) $ M.lookup (fromIntegral w) c
|
||||
|
||||
insert :: forall a. ByteString -> a -> Trie a -> Trie a
|
||||
insert bs a = (`unionR` singleton bs a)
|
||||
|
||||
fromList :: [(ByteString, a)] -> Trie a
|
||||
fromList = foldl' (\acc (k,v) -> insert k v acc) empty
|
||||
|
||||
toList :: forall a. Trie a -> [(ByteString, a)]
|
||||
toList = go
|
||||
where
|
||||
go :: Trie a -> [(ByteString, a)]
|
||||
go Empty = []
|
||||
go (Branch Nothing m) = concatMap (\(_, child) -> go child) (M.toList m)
|
||||
go (Branch (Just (Pair a b)) m) = (a, b) : concatMap (\(_, child) -> go child) (M.toList m)
|
@ -4,11 +4,13 @@ import Test.Tasty
|
||||
|
||||
import qualified Test.Data.Email as Data.Email
|
||||
import qualified Test.Data.Email.Header as Data.Email.Header
|
||||
import qualified Test.Data.Trie as Data.Trie
|
||||
|
||||
tests :: TestTree
|
||||
tests = testGroup "tests"
|
||||
[ Data.Email.Header.tests
|
||||
, Data.Email.tests
|
||||
, Data.Trie.tests
|
||||
]
|
||||
|
||||
main :: IO ()
|
||||
|
45
test/Test/Data/Trie.hs
Normal file
45
test/Test/Data/Trie.hs
Normal file
@ -0,0 +1,45 @@
|
||||
module Test.Data.Trie where
|
||||
|
||||
import Test.Tasty
|
||||
import Test.Tasty.Hedgehog (testProperty)
|
||||
import Hedgehog
|
||||
import Data.Monoid (Sum(..))
|
||||
import Data.ByteString (ByteString)
|
||||
import Data.Map (Map)
|
||||
import qualified Hedgehog.Gen as Gen
|
||||
import qualified Hedgehog.Range as Range
|
||||
import qualified Data.Trie as T
|
||||
import qualified Data.Map.Strict as M
|
||||
|
||||
genMap :: Gen (Map ByteString (Sum Int))
|
||||
genMap = Gen.map (Range.linear 0 100) genT
|
||||
where
|
||||
genT = (,) <$> Gen.bytes (Range.linear 0 20) <*> fmap Sum (Gen.integral (Range.linear 0 10))
|
||||
|
||||
propIsomorphic :: Property
|
||||
propIsomorphic = property $ do
|
||||
m <- forAll genMap
|
||||
let wanted = m
|
||||
got = M.fromList . T.toList . T.fromList . M.toList $ wanted
|
||||
wanted === got
|
||||
|
||||
propElems :: Property
|
||||
propElems = property $ do
|
||||
m <- forAll genMap
|
||||
let wanted = M.elems m
|
||||
got = T.elems . T.fromList . M.toList $ m
|
||||
wanted === got
|
||||
|
||||
propKeys :: Property
|
||||
propKeys = property $ do
|
||||
m <- forAll genMap
|
||||
let wanted = M.keys m
|
||||
got = T.keys . T.fromList . M.toList $ m
|
||||
wanted === got
|
||||
|
||||
tests :: TestTree
|
||||
tests = testGroup "Data.Trie"
|
||||
[ testProperty "isomorphic to Map" $ propIsomorphic
|
||||
, testProperty "isomorphic elems" $ propElems
|
||||
, testProperty "isomorphic keys" $ propKeys
|
||||
]
|
Loading…
Reference in New Issue
Block a user