Compare commits

...

14 Commits

Author SHA1 Message Date
6f0d8d0372 Add flakes 2022-02-07 15:03:00 +02:00
5182c47a19 Fix bug in parser 2021-11-04 10:24:22 +02:00
a47d4903cb Increase chunk size back to 200 2021-11-04 10:11:37 +02:00
00e585281c Avoid unneeded writes 2021-11-04 10:10:35 +02:00
03860c370a Share old info 2021-10-29 22:59:04 +03:00
41c666fe93 Make the analysis parallel (#2)
Co-authored-by: Mats Rauhala <mats.rauhala@iki.fi>
Reviewed-on: #2
Co-authored-by: Mats Rauhala <masse@rauhala.info>
Co-committed-by: Mats Rauhala <masse@rauhala.info>
2021-10-29 22:41:46 +03:00
1c4e766f92 Redundant bracket 2021-10-29 17:43:35 +03:00
0c75d66122 Simplify querying 2021-10-29 17:34:41 +03:00
e189f87dd7 Use Data.Map on streaming side as well 2021-10-29 17:26:29 +03:00
87f6eb00f6 Bring the addressbook back up to date
- bytestring-trie is not available
- hls
- modern shell.nix
- base
- Re-implement trie, with benchmarks
- Realize my implementation of trie is slower than Data.Map, use that
  instead
2021-10-29 17:22:29 +03:00
a672fecbc9 Querying 2020-12-11 17:55:52 +02:00
91578bfb03 Save as trie 2020-12-11 17:55:35 +02:00
22b143aac7 Wall everything 2020-12-11 17:53:34 +02:00
7f6b318fcb More strict emails 2020-12-11 17:53:08 +02:00
18 changed files with 549 additions and 82 deletions

View File

@ -16,49 +16,61 @@ maintainer: mats.rauhala@iki.fi
-- category:
extra-source-files: CHANGELOG.md
common deps
build-depends: base >=4.13.0.0 && <4.15
library
import: deps
exposed-modules: MyLib
, Data.Trie
, Data.Email.Header
, Data.Email
, Control.Addressbook.Streaming
, Control.Addressbook.Query
-- other-modules:
-- other-extensions:
default-extensions: OverloadedStrings
LambdaCase
build-depends: base ^>=4.13.0.0
, attoparsec
build-depends: attoparsec
, mtl
, conduit
, conduit-extra
, bytestring
, lens
, text
, bytestring-trie
, vector
, containers
, filepath
, parallel
, unix
hs-source-dirs: src
default-language: Haskell2010
ghc-options: -Wall
executable addressbook
import: deps
main-is: Main.hs
-- other-modules:
-- other-extensions:
build-depends: base ^>=4.13.0.0, addressbook
build-depends: addressbook
, optparse-applicative
, text
hs-source-dirs: app
default-language: Haskell2010
ghc-options: -Wall -threaded -eventlog
test-suite addressbook-test
import: deps
default-language: Haskell2010
default-extensions: OverloadedStrings
LambdaCase
other-modules: Test.Data.Email.Header
, Test.Data.Email
, Test.Data.Trie
type: exitcode-stdio-1.0
hs-source-dirs: test
main-is: MyLibTest.hs
build-depends: base ^>=4.13.0.0
, addressbook
build-depends: addressbook
, tasty
, tasty-hedgehog
, tasty-hunit
@ -70,3 +82,27 @@ test-suite addressbook-test
, vector
, conduit
, conduit-extra
, containers
ghc-options: -Wall
-- I know there's a benchmark type, but haskellPackages.shellFor doesn't seem
-- to support it and I don't have the time to figure it out
executable addressbook-bench
import: deps
default-language: Haskell2010
default-extensions: OverloadedStrings
LambdaCase
-- other-modules: Test.Data.Email.Header
-- , Test.Data.Email
-- , Test.Data.Trie
hs-source-dirs: bench
main-is: Bench.hs
build-depends: addressbook
, criterion
, hedgehog-corpus
, bytestring
, containers
ghc-options: -Wall

View File

@ -3,18 +3,28 @@ module Main where
import Options.Applicative
import Data.Text
(Text)
import qualified Data.Text as T
import qualified Control.Addressbook.Query as Query
import qualified Control.Addressbook.Streaming as Streaming
data CmdLine
= Stream
| Query Text
deriving Show
cmdline :: Parser CmdLine
cmdline = subparser (command "stream" (info (pure Stream) (progDesc "Record a stream of filenames")))
cmdline = subparser
( command "stream" (info (pure Stream) (progDesc "Record a stream of filenames"))
<> command "query" (info (Query . T.pack <$> argument str (metavar "QUERY")) (progDesc "Query email addresses"))
)
handler :: CmdLine -> IO ()
handler = \case
Stream -> Streaming.run
Query q -> Query.query q
main :: IO ()
main = execParser opts >>= handler

54
bench/Bench.hs Normal file
View File

@ -0,0 +1,54 @@
module Main where
import Criterion.Main (defaultMain)
import Data.ByteString (ByteString)
import Hedgehog.Corpus (muppets, simpsons, vegetables, viruses)
import Criterion
import qualified Data.Map.Strict as M
import qualified Data.ByteString.Char8 as B
import qualified Data.Trie as Trie
setupEnv :: IO ([(ByteString, ())], [(ByteString, ())])
setupEnv = pure (small, big)
where
small =
[ (firstname <> "." <> surname <> "@" <> domain <> "." <> tld, ())
| firstname <- simpsons
, surname <- muppets
, domain <- vegetables
, tld <- ["com", "fi", "co.uk", "info", "org"]
]
big =
[ (firstname <> "." <> surname <> "@" <> domain1 <> "." <> domain2 <> "." <> tld, ())
| firstname <- simpsons
, surname <- muppets
, domain1 <- vegetables
, domain2 <- viruses
, tld <- ["com", "fi", "co.uk", "info", "org"]
]
main :: IO ()
main = defaultMain
[ env setupEnv $ \ ~(small,big) -> bgroup "main"
[ bgroup "small"
[ bgroup "Data.Map"
[ bench "length . toList . fromList" $ whnf (length . M.toList . M.fromList) small
, bench "length . filter (\"homer\" `isPrefixOf`) . keys . fromList" $ whnf (length . M.keys . M.filterWithKey (\k _ -> "homer" `B.isPrefixOf` k) . M.fromList) small
]
, bgroup "Data.Trie"
[ bench "length . toList . fromList" $ whnf (length . Trie.toList . Trie.fromList) small
, bench "length . keys . submap \"homer\" . fromList" $ whnf (length . Trie.keys . Trie.submap "homer" . Trie.fromList) small
]
]
, bgroup "big"
[ bgroup "Data.Map"
[ bench "length . toList . fromList" $ whnf (length . M.toList . M.fromList) big
, bench "length . filter (\"homer\" `isPrefixOf`) . keys . fromList" $ whnf (length . M.keys . M.filterWithKey (\k _ -> "homer" `B.isPrefixOf` k) . M.fromList) big
]
, bgroup "Data.Trie"
[ bench "length . toList . fromList" $ whnf (length . Trie.toList . Trie.fromList) big
, bench "length . keys . submap \"homer\" . fromList" $ whnf (length . Trie.keys . Trie.submap "homer" . Trie.fromList) big
]
]
]
]

View File

@ -1,7 +1,7 @@
{ mkDerivation, attoparsec, base, bytestring, bytestring-trie
, conduit, conduit-extra, containers, hedgehog, hedgehog-corpus
, HUnit, lens, mtl, optparse-applicative, stdenv, tasty
, tasty-hedgehog, tasty-hunit, text, vector
{ mkDerivation, attoparsec, base, bytestring, conduit
, conduit-extra, containers, criterion, filepath, hedgehog
, hedgehog-corpus, HUnit, lens, lib, mtl, optparse-applicative
, parallel, tasty, tasty-hedgehog, tasty-hunit, text, unix, vector
}:
mkDerivation {
pname = "addressbook";
@ -10,13 +10,16 @@ mkDerivation {
isLibrary = true;
isExecutable = true;
libraryHaskellDepends = [
attoparsec base bytestring bytestring-trie conduit conduit-extra
containers lens mtl text vector
attoparsec base bytestring conduit conduit-extra containers
filepath lens mtl parallel text unix vector
];
executableHaskellDepends = [
base bytestring containers criterion hedgehog-corpus
optparse-applicative text
];
executableHaskellDepends = [ base optparse-applicative ];
testHaskellDepends = [
base bytestring conduit conduit-extra hedgehog hedgehog-corpus
HUnit tasty tasty-hedgehog tasty-hunit text vector
base bytestring conduit conduit-extra containers hedgehog
hedgehog-corpus HUnit tasty tasty-hedgehog tasty-hunit text vector
];
license = stdenv.lib.licenses.bsd3;
license = lib.licenses.bsd3;
}

11
easy-hls-nix.json Normal file
View File

@ -0,0 +1,11 @@
{
"url": "https://github.com/ssbothwell/easy-hls-nix",
"rev": "393ccab35104d5d49e0ff9eadf7b8654e87abffd",
"date": "2021-09-16T11:13:40-07:00",
"path": "/nix/store/dsfqcsiahsp9rkip4fsqzz32x0swa3d4-easy-hls-nix",
"sha256": "0q1qxlkzjqx2hvf9k2cp5a98vlvsj13lap6hm7gl1kkqp88ai1dw",
"fetchLFS": false,
"fetchSubmodules": false,
"deepClone": false,
"leaveDotGit": false
}

10
easy-hsl-nix.json Normal file
View File

@ -0,0 +1,10 @@
{
"url": "https://github.com/jkachmar/easy-hls-nix",
"rev": "291cf77f512a7121bb6801cde35ee1e8b7287f91",
"date": "2021-04-13T16:12:58-04:00",
"path": "/nix/store/wlmckg348q32ylfpg5dv3d5x88nkbi59-easy-hls-nix",
"sha256": "1bvbcp9zwmh53sm16ycp8phhc6vzc72a71sf0bvyjgfbn6zp68bc",
"fetchSubmodules": false,
"deepClone": false,
"leaveDotGit": false
}

62
flake.lock generated Normal file
View File

@ -0,0 +1,62 @@
{
"nodes": {
"easy-hls": {
"inputs": {
"nixpkgs": [
"nixpkgs"
]
},
"locked": {
"lastModified": 1637250802,
"narHash": "sha256-/crlHEVB148PGQLZCsHOR9L5qgvCAfRSocIoKgmMAhA=",
"owner": "jkachmar",
"repo": "easy-hls-nix",
"rev": "7c123399ef8a67dc0e505d9cf7f2c7f64f1cd847",
"type": "github"
},
"original": {
"owner": "jkachmar",
"repo": "easy-hls-nix",
"type": "github"
}
},
"flake-utils": {
"locked": {
"lastModified": 1637014545,
"narHash": "sha256-26IZAc5yzlD9FlDT54io1oqG/bBoyka+FJk5guaX4x4=",
"owner": "numtide",
"repo": "flake-utils",
"rev": "bba5dcc8e0b20ab664967ad83d24d64cb64ec4f4",
"type": "github"
},
"original": {
"owner": "numtide",
"repo": "flake-utils",
"type": "github"
}
},
"nixpkgs": {
"locked": {
"lastModified": 1637312849,
"narHash": "sha256-OhVZopkyryEfLyPwcXk2IQsdi80lj6TY1YFoMNZ4hCQ=",
"owner": "NixOS",
"repo": "nixpkgs",
"rev": "e4806bb4416f88c20f8be0b8ef9b5b09ff9022a6",
"type": "github"
},
"original": {
"id": "nixpkgs",
"type": "indirect"
}
},
"root": {
"inputs": {
"easy-hls": "easy-hls",
"flake-utils": "flake-utils",
"nixpkgs": "nixpkgs"
}
}
},
"root": "root",
"version": 7
}

53
flake.nix Normal file
View File

@ -0,0 +1,53 @@
{
description = "addressbook";
inputs = {
easy-hls = {
url = "github:jkachmar/easy-hls-nix";
inputs.nixpkgs.follows = "nixpkgs";
};
flake-utils = {
url = "github:numtide/flake-utils";
};
};
outputs = { self, nixpkgs, flake-utils, easy-hls }:
{ overlay = final: prev: {
haskellPackages = prev.haskellPackages.override ( old: {
overrides = final.lib.composeExtensions (old.overrides or (_: _: {})) (f: p: {
addressbook = f.callPackage ./. {};
});
});
};
}
//
flake-utils.lib.eachSystem ["x86_64-linux" "x86_64-darwin"] ( system:
let
pkgs = import nixpkgs { inherit system; overlays = [ self.overlay ]; };
hp = pkgs.haskellPackages;
hls = (easy-hls.withGhcs [ hp.ghc ]).${system};
in
rec {
packages = { inherit (hp) addressbook; };
defaultPackage = packages.addressbook;
apps.addressbook = {
type = "app";
program = "${hp.addressbook}/bin/addressbook";
};
devShell = hp.shellFor {
packages = h: [h.addressbook];
withHoogle = true;
buildInputs = with pkgs; [
entr
cabal-install
hp.hlint
stylish-haskell
ghcid
hls
];
};
}
);
}

View File

@ -1,15 +1,24 @@
with (import <nixpkgs> {});
let addressbook = haskellPackages.callPackage ./. {};
let
hp = haskellPackages.extend (self: super: {
addressbook = self.callPackage ./. {};
});
easy-hls = callPackage easy-hls-src { ghcVersions = [ hp.ghc.version ]; };
easy-hls-src = fetchFromGitHub {
owner = "ssbothwell";
repo = "easy-hls-nix";
inherit (builtins.fromJSON (builtins.readFile ./easy-hls-nix.json)) rev sha256;
};
in
mkShell {
name = "addressbook-shell";
hp.shellFor {
packages = h: [h.addressbook];
buildInputs = [
ghcid
easy-hls
stylish-haskell
haskellPackages.cabal-install
(haskellPackages.ghcWithHoogle (_: addressbook.buildInputs ++ addressbook.propagatedBuildInputs))
];
}

View File

@ -0,0 +1,42 @@
{-# LANGUAGE TypeApplications #-}
module Control.Addressbook.Query where
import Data.Text
(Text)
import qualified Data.Text as T
import Conduit
import qualified Data.Conduit.Binary as CB
import qualified Data.Conduit.Combinators as C
import qualified Data.Conduit.Text as CT
import System.IO
(stdout)
import Data.Maybe
(fromMaybe)
import System.Environment
(lookupEnv)
import System.FilePath
((</>))
import Control.Exception
(catch)
-- XXX: The current Data.Trie implementation is much slower than Data.Set
query :: Text -> IO ()
query prefix = do
datDir <- fromMaybe "./" <$> lookupEnv "HOME"
catch @IOError (runResourceT $ runConduit $ filterAddresses datDir) $ \e -> print e
where
prefixLC = T.toLower prefix
filterAddresses :: FilePath -> ConduitT () Void (ResourceT IO) ()
filterAddresses dir =
CB.sourceFile (dir </> ".addressbook.dat")
.| CT.decode CT.utf8
.| CT.lines
.| C.filter (\v -> prefixLC `T.isPrefixOf` T.toLower v)
.| CT.encode CT.utf8
.| C.map (<> "\n")
.| CB.sinkHandle stdout

View File

@ -1,41 +1,73 @@
module Control.Addressbook.Streaming where
import qualified Data.Text as T
import Conduit
import qualified Data.Conduit.Binary as CB
import qualified Data.Conduit.Combinators as C
import qualified Data.Conduit.Text as CT
import qualified Data.Conduit.List as CL
import Data.Email
import Data.Email.Header
(Header(..))
import System.IO
(stdin)
import qualified Data.Foldable as F
import qualified Data.Set as S
import Data.Maybe
(fromMaybe)
import System.Environment
(lookupEnv)
import System.FilePath
((</>))
import Data.Set (Set)
import Data.ByteString (ByteString)
import qualified Data.Set as Set
import qualified Data.ByteString.Lazy as LBS
import Data.Char (ord)
import qualified Data.ByteString.Lazy.Char8 as LBC
import System.IO.Unsafe (unsafeInterleaveIO)
import Control.Parallel.Strategies (rseq, parMap)
import qualified Data.List as L
import Control.Monad (unless)
import System.Posix (touchFile)
combine :: (MonadUnliftIO m, MonadResource m, MonadThrow m, MonadIO m) => ConduitM FilePath Header m ()
combine = await >>= \case
Nothing -> pure ()
Just path -> (CB.sourceFile path .| parseEmail) >> combine
chunks :: Int -> [a] -> [[a]]
chunks n = L.unfoldr $ \case
[] -> Nothing
xs -> Just (splitAt n xs)
run :: IO ()
run = do
x <- runResourceT $ runConduit stream
F.for_ x print
datDir <- fromMaybe "./" <$> lookupEnv "HOME"
let datFile = datDir </> ".addressbook.dat"
touchFile datFile
original <- Set.fromList . map LBS.toStrict . lbsLines <$> LBS.readFile datFile
xs <- LBS.getContents >>= stream
let set = original `Set.union` F.fold (parMap rseq F.fold (chunks 200 xs))
unless (original == set) $
runResourceT $
runConduit $
CL.sourceList (Set.elems set)
.| C.map (<> "\n")
.| CB.sinkFileCautious datFile
where
separate = \case
From x -> [x]
To xs -> F.toList xs
-- A set of (locally) unique addresses. Composes with parMap
lbsLines = LBS.split (fromIntegral $ ord '\n')
stream :: LBS.ByteString -> IO [Set ByteString]
stream =
CB.sourceHandle stdin
.| CT.decode CT.utf8
.| CT.lines
.| C.map T.unpack
.| combine
.| C.concatMap separate
.| C.foldMap (S.singleton)
traverse (unsafeInterleaveIO . parse . LBC.unpack)
. filter (not . LBS.null)
. lbsLines
parse path =
runResourceT $
runConduit $
CB.sourceFile path .| parseEmail .| C.concatMap separate .| C.foldMap Set.singleton

View File

@ -5,12 +5,9 @@ import Data.Email.Header
import Conduit
import qualified Data.Conduit.Combinators as C
import qualified Data.Conduit.Text as CT
import Data.ByteString
(ByteString)
parseEmail :: (MonadUnliftIO m, MonadThrow m, Monad m) => ConduitM ByteString Header m ()
parseEmail = catchC (CT.decode CT.utf8) err .| CT.lines .| C.concatMap decode
where
err e = liftIO (print @CT.TextException e) >> yield ""
parseEmail = C.linesUnboundedAscii .| C.concatMap decode

View File

@ -1,55 +1,63 @@
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE ApplicativeDo #-}
module Data.Email.Header where
import Data.Text
(Text)
import qualified Data.Text as T
import qualified Data.Foldable as F
import Data.Attoparsec.Text
import Data.Attoparsec.ByteString.Char8
import Data.Vector
(Vector)
import qualified Data.Vector as V
import Data.Char
(isSpace)
import Control.Applicative
((<|>))
import Data.ByteString (ByteString)
import qualified Data.ByteString.Char8 as BC
data Header
= From !Text
| To !(Vector Text)
= From !ByteString
| To !(Vector ByteString)
deriving (Show, Eq)
decode :: Text -> Either String Header
decode :: ByteString -> Either String Header
decode = parseOnly parseHeader
where
parseHeader :: Parser Header
parseHeader = parseFrom <|> parseTo
parseFrom :: Parser Header
parseFrom = From <$> (string "From:" *> emptySpace *> (bracketEmail <|> email))
parseTo :: Parser Header
parseTo = To <$> (string "To:" *> emptySpace *> emails)
emptySpace = many' space
emails :: Parser (Vector Text)
emails = V.fromList <$> (bracketEmail <|> email) `sepBy` char ','
bracketEmail :: Parser Text
bracketEmail = do
_ <- manyTill anyChar (char '<')
T.pack <$> manyTill anyChar (char '>')
email :: Parser Text
email = do
_ <- many' space
name <- T.pack <$> many' (notChar '@')
_ <- char '@'
rest <- T.pack <$> many' (satisfy (\c -> not (isSpace c) && c /= ','))
_ <- many' (notChar ',')
pure (name <> "@" <> rest)
{-# INLINE decode #-}
parseHeader :: Parser Header
parseHeader = parseFrom <|> parseTo
{-# INLINE parseHeader #-}
parseFrom :: Parser Header
parseFrom = From <$> (string "From:" *> emptySpace *> (bracketEmail <|> email))
parseTo :: Parser Header
parseTo = To <$> (string "To:" *> emptySpace *> emails)
emptySpace :: Parser ()
emptySpace = () <$ many' space
emails :: Parser (Vector ByteString)
emails = V.fromList <$> (bracketEmail <|> email) `sepBy` char ','
bracketEmail :: Parser ByteString
bracketEmail = do
_ <- manyTill anyChar (char '<')
email
{-# INLINE bracketEmail #-}
email :: Parser ByteString
email = do
_ <- many' space
name <- BC.pack <$> many' (satisfy (\c -> not (isSpace c) && c /= '@'))
_ <- char '@'
rest <- BC.pack <$> many' (satisfy (\c -> not (isSpace c) && c /= ',' && c /= '>' && c /= '<'))
_ <- many' (notChar ',')
pure (name <> "@" <> rest)
{-# INLINE email #-}
encode :: Header -> Text
encode :: Header -> ByteString
encode = \case
From addr -> "From: " <> addr
To addrs -> "To: " <> T.intercalate ", " (F.toList addrs)
To addrs -> "To: " <> BC.intercalate ", " (F.toList addrs)

87
src/Data/Trie.hs Normal file
View File

@ -0,0 +1,87 @@
-- The bytestring-trie is marked as broken. Trie is a simple(ish) datastructure, implement one here
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE DeriveFunctor #-}
{-# LANGUAGE TypeApplications #-}
{-# LANGUAGE DeriveFoldable #-}
module Data.Trie where
import Data.ByteString (ByteString)
import Data.Word (Word8)
import Data.Foldable (foldl')
import Data.Semigroup (Last(..))
import qualified Data.ByteString as B
import Data.Coerce (coerce)
import Data.IntMap (IntMap)
import qualified Data.IntMap.Strict as M
data Pair a b = Pair !a !b
deriving (Functor, Foldable)
data Trie a
= Empty
| Branch !(Maybe (Pair ByteString a)) !(IntMap (Trie a))
deriving (Functor, Foldable)
instance Semigroup a => Semigroup (Trie a) where
(<>) = union
instance Monoid a => Monoid (Trie a) where
mempty = Empty
instance Show a => Show (Trie a) where
show = show . toList
keys :: Trie a -> [ByteString]
keys Empty = []
keys (Branch Nothing m) = concatMap keys (M.elems m)
keys (Branch (Just (Pair b _)) m) = b : concatMap keys (M.elems m)
union :: Semigroup a => Trie a -> Trie a -> Trie a
union Empty r = r
union l Empty = l
union (Branch m_pa cl) (Branch ma cr) = Branch (merge m_pa ma) (M.unionWith union cl cr)
where
merge (Just (Pair x a)) (Just (Pair _ b)) = Just $ Pair x (a <> b)
merge Nothing r = r
merge l Nothing = l
unionR :: forall a. Trie a -> Trie a -> Trie a
unionR a b = coerce @(Trie (Last a)) $ union (coerce a) (coerce b)
singleton :: forall a. ByteString -> a -> Trie a
singleton bs a = go (B.uncons bs)
where
go :: Maybe (Word8, ByteString) -> Trie a
go Nothing =
let x = Pair bs a
in x `seq` Branch (Just (Pair bs a)) M.empty
go (Just (w,c)) =
let y = M.singleton (fromIntegral w) (go (B.uncons c))
in y `seq` Branch Nothing y
empty :: Trie a
empty = Empty
elems :: Trie a -> [a]
elems = foldr (:) []
submap :: forall a. ByteString -> Trie a -> Trie a
submap bs = go (B.uncons bs)
where
go :: Maybe (Word8, ByteString) -> Trie a -> Trie a
go _ Empty = Empty
go Nothing t = t
go (Just (w,cs)) (Branch _ c) = maybe empty (go (B.uncons cs)) $ M.lookup (fromIntegral w) c
insert :: forall a. ByteString -> a -> Trie a -> Trie a
insert bs a = (`unionR` singleton bs a)
fromList :: [(ByteString, a)] -> Trie a
fromList = foldl' (\acc (k,v) -> insert k v acc) empty
toList :: forall a. Trie a -> [(ByteString, a)]
toList = go
where
go :: Trie a -> [(ByteString, a)]
go Empty = []
go (Branch Nothing m) = concatMap (\(_, child) -> go child) (M.toList m)
go (Branch (Just (Pair a b)) m) = (a, b) : concatMap (\(_, child) -> go child) (M.toList m)

View File

@ -4,11 +4,13 @@ import Test.Tasty
import qualified Test.Data.Email as Data.Email
import qualified Test.Data.Email.Header as Data.Email.Header
import qualified Test.Data.Trie as Data.Trie
tests :: TestTree
tests = testGroup "tests"
[ Data.Email.Header.tests
, Data.Email.tests
, Data.Trie.tests
]
main :: IO ()

View File

@ -17,9 +17,11 @@ sample :: ByteString
sample =
"Subject: Hello worldddd\n\
\From: me@example.com\n\
\Dkim: asd\n\
\To: you <you@example.com>\n\
\ \n\n \
\foo"
\\n\n\
\From: foo bar <a mailto=\"me2@example.com\" />\n\
\asd\n"
parseToList :: ByteString -> IO [Header]
parseToList _ = runConduit (CB.sourceLbs sample .| parseEmail .| CL.consume)
@ -29,4 +31,7 @@ tests = testGroup "Data.Email"
[ testCase "Can parse a sample email" $ do
got <- parseToList sample
got @?= [ From "me@example.com", To ["you@example.com"]]
, testCase "Combined mailto" $ do
let got = decode "To: James Doe<james.doe@example.com<mailto:james.doe@example.com>>"
got @?= Right (To ["james.doe@example.com"])
]

View File

@ -10,11 +10,12 @@ import qualified Hedgehog.Corpus as Corpus
import qualified Hedgehog.Gen as Gen
import qualified Hedgehog.Range as Range
import Data.Text
import qualified Data.Text as T
import Data.ByteString
import qualified Data.ByteString as T
import qualified Data.Vector as V
import Data.Email.Header
import qualified Data.ByteString.Char8 as BC
genHeader :: Gen Header
genHeader = Gen.choice
@ -22,17 +23,17 @@ genHeader = Gen.choice
, To . V.fromList <$> Gen.list (Range.linear 0 10) genEmail
]
genEmail :: Gen Text
genEmail :: Gen ByteString
genEmail = do
name <- Gen.element Corpus.simpsons
domain <- Gen.element Corpus.cooking
tld <- Gen.element ["com","fi","org"]
pure $ name <> "@" <> domain <> "." <> tld
wrapped :: Char -> Text -> Char -> Text
wrapped l x r = T.singleton l <> x <> T.singleton r
wrapped :: Char -> ByteString -> Char -> ByteString
wrapped l x r = BC.singleton l <> x <> BC.singleton r
genComment :: Gen Text
genComment :: Gen ByteString
genComment = do
x <- Gen.element Corpus.simpsons
Gen.element [x, wrapped '"' x '"']

45
test/Test/Data/Trie.hs Normal file
View File

@ -0,0 +1,45 @@
module Test.Data.Trie where
import Test.Tasty
import Test.Tasty.Hedgehog (testProperty)
import Hedgehog
import Data.Monoid (Sum(..))
import Data.ByteString (ByteString)
import Data.Map (Map)
import qualified Hedgehog.Gen as Gen
import qualified Hedgehog.Range as Range
import qualified Data.Trie as T
import qualified Data.Map.Strict as M
genMap :: Gen (Map ByteString (Sum Int))
genMap = Gen.map (Range.linear 0 100) genT
where
genT = (,) <$> Gen.bytes (Range.linear 0 20) <*> fmap Sum (Gen.integral (Range.linear 0 10))
propIsomorphic :: Property
propIsomorphic = property $ do
m <- forAll genMap
let wanted = m
got = M.fromList . T.toList . T.fromList . M.toList $ wanted
wanted === got
propElems :: Property
propElems = property $ do
m <- forAll genMap
let wanted = M.elems m
got = T.elems . T.fromList . M.toList $ m
wanted === got
propKeys :: Property
propKeys = property $ do
m <- forAll genMap
let wanted = M.keys m
got = T.keys . T.fromList . M.toList $ m
wanted === got
tests :: TestTree
tests = testGroup "Data.Trie"
[ testProperty "isomorphic to Map" $ propIsomorphic
, testProperty "isomorphic elems" $ propElems
, testProperty "isomorphic keys" $ propKeys
]