Compare commits
14 Commits
a4da4f73da
...
master
Author | SHA1 | Date | |
---|---|---|---|
6f0d8d0372 | |||
5182c47a19 | |||
a47d4903cb | |||
00e585281c | |||
03860c370a | |||
41c666fe93 | |||
1c4e766f92 | |||
0c75d66122 | |||
e189f87dd7 | |||
87f6eb00f6 | |||
a672fecbc9 | |||
91578bfb03 | |||
22b143aac7 | |||
7f6b318fcb |
@ -16,49 +16,61 @@ maintainer: mats.rauhala@iki.fi
|
|||||||
-- category:
|
-- category:
|
||||||
extra-source-files: CHANGELOG.md
|
extra-source-files: CHANGELOG.md
|
||||||
|
|
||||||
|
common deps
|
||||||
|
build-depends: base >=4.13.0.0 && <4.15
|
||||||
|
|
||||||
library
|
library
|
||||||
|
import: deps
|
||||||
exposed-modules: MyLib
|
exposed-modules: MyLib
|
||||||
|
, Data.Trie
|
||||||
, Data.Email.Header
|
, Data.Email.Header
|
||||||
, Data.Email
|
, Data.Email
|
||||||
, Control.Addressbook.Streaming
|
, Control.Addressbook.Streaming
|
||||||
|
, Control.Addressbook.Query
|
||||||
-- other-modules:
|
-- other-modules:
|
||||||
-- other-extensions:
|
-- other-extensions:
|
||||||
default-extensions: OverloadedStrings
|
default-extensions: OverloadedStrings
|
||||||
LambdaCase
|
LambdaCase
|
||||||
build-depends: base ^>=4.13.0.0
|
build-depends: attoparsec
|
||||||
, attoparsec
|
|
||||||
, mtl
|
, mtl
|
||||||
, conduit
|
, conduit
|
||||||
, conduit-extra
|
, conduit-extra
|
||||||
, bytestring
|
, bytestring
|
||||||
, lens
|
, lens
|
||||||
, text
|
, text
|
||||||
, bytestring-trie
|
|
||||||
, vector
|
, vector
|
||||||
, containers
|
, containers
|
||||||
|
, filepath
|
||||||
|
, parallel
|
||||||
|
, unix
|
||||||
hs-source-dirs: src
|
hs-source-dirs: src
|
||||||
default-language: Haskell2010
|
default-language: Haskell2010
|
||||||
|
ghc-options: -Wall
|
||||||
|
|
||||||
executable addressbook
|
executable addressbook
|
||||||
|
import: deps
|
||||||
main-is: Main.hs
|
main-is: Main.hs
|
||||||
-- other-modules:
|
-- other-modules:
|
||||||
-- other-extensions:
|
-- other-extensions:
|
||||||
build-depends: base ^>=4.13.0.0, addressbook
|
build-depends: addressbook
|
||||||
, optparse-applicative
|
, optparse-applicative
|
||||||
|
, text
|
||||||
hs-source-dirs: app
|
hs-source-dirs: app
|
||||||
default-language: Haskell2010
|
default-language: Haskell2010
|
||||||
|
ghc-options: -Wall -threaded -eventlog
|
||||||
|
|
||||||
test-suite addressbook-test
|
test-suite addressbook-test
|
||||||
|
import: deps
|
||||||
default-language: Haskell2010
|
default-language: Haskell2010
|
||||||
default-extensions: OverloadedStrings
|
default-extensions: OverloadedStrings
|
||||||
LambdaCase
|
LambdaCase
|
||||||
other-modules: Test.Data.Email.Header
|
other-modules: Test.Data.Email.Header
|
||||||
, Test.Data.Email
|
, Test.Data.Email
|
||||||
|
, Test.Data.Trie
|
||||||
type: exitcode-stdio-1.0
|
type: exitcode-stdio-1.0
|
||||||
hs-source-dirs: test
|
hs-source-dirs: test
|
||||||
main-is: MyLibTest.hs
|
main-is: MyLibTest.hs
|
||||||
build-depends: base ^>=4.13.0.0
|
build-depends: addressbook
|
||||||
, addressbook
|
|
||||||
, tasty
|
, tasty
|
||||||
, tasty-hedgehog
|
, tasty-hedgehog
|
||||||
, tasty-hunit
|
, tasty-hunit
|
||||||
@ -70,3 +82,27 @@ test-suite addressbook-test
|
|||||||
, vector
|
, vector
|
||||||
, conduit
|
, conduit
|
||||||
, conduit-extra
|
, conduit-extra
|
||||||
|
, containers
|
||||||
|
ghc-options: -Wall
|
||||||
|
|
||||||
|
|
||||||
|
-- I know there's a benchmark type, but haskellPackages.shellFor doesn't seem
|
||||||
|
-- to support it and I don't have the time to figure it out
|
||||||
|
executable addressbook-bench
|
||||||
|
import: deps
|
||||||
|
default-language: Haskell2010
|
||||||
|
default-extensions: OverloadedStrings
|
||||||
|
LambdaCase
|
||||||
|
-- other-modules: Test.Data.Email.Header
|
||||||
|
-- , Test.Data.Email
|
||||||
|
-- , Test.Data.Trie
|
||||||
|
hs-source-dirs: bench
|
||||||
|
main-is: Bench.hs
|
||||||
|
build-depends: addressbook
|
||||||
|
, criterion
|
||||||
|
, hedgehog-corpus
|
||||||
|
, bytestring
|
||||||
|
, containers
|
||||||
|
ghc-options: -Wall
|
||||||
|
|
||||||
|
|
||||||
|
12
app/Main.hs
12
app/Main.hs
@ -3,18 +3,28 @@ module Main where
|
|||||||
|
|
||||||
import Options.Applicative
|
import Options.Applicative
|
||||||
|
|
||||||
|
import Data.Text
|
||||||
|
(Text)
|
||||||
|
import qualified Data.Text as T
|
||||||
|
|
||||||
|
import qualified Control.Addressbook.Query as Query
|
||||||
import qualified Control.Addressbook.Streaming as Streaming
|
import qualified Control.Addressbook.Streaming as Streaming
|
||||||
|
|
||||||
data CmdLine
|
data CmdLine
|
||||||
= Stream
|
= Stream
|
||||||
|
| Query Text
|
||||||
deriving Show
|
deriving Show
|
||||||
|
|
||||||
cmdline :: Parser CmdLine
|
cmdline :: Parser CmdLine
|
||||||
cmdline = subparser (command "stream" (info (pure Stream) (progDesc "Record a stream of filenames")))
|
cmdline = subparser
|
||||||
|
( command "stream" (info (pure Stream) (progDesc "Record a stream of filenames"))
|
||||||
|
<> command "query" (info (Query . T.pack <$> argument str (metavar "QUERY")) (progDesc "Query email addresses"))
|
||||||
|
)
|
||||||
|
|
||||||
handler :: CmdLine -> IO ()
|
handler :: CmdLine -> IO ()
|
||||||
handler = \case
|
handler = \case
|
||||||
Stream -> Streaming.run
|
Stream -> Streaming.run
|
||||||
|
Query q -> Query.query q
|
||||||
|
|
||||||
main :: IO ()
|
main :: IO ()
|
||||||
main = execParser opts >>= handler
|
main = execParser opts >>= handler
|
||||||
|
54
bench/Bench.hs
Normal file
54
bench/Bench.hs
Normal file
@ -0,0 +1,54 @@
|
|||||||
|
module Main where
|
||||||
|
|
||||||
|
import Criterion.Main (defaultMain)
|
||||||
|
import Data.ByteString (ByteString)
|
||||||
|
import Hedgehog.Corpus (muppets, simpsons, vegetables, viruses)
|
||||||
|
import Criterion
|
||||||
|
import qualified Data.Map.Strict as M
|
||||||
|
import qualified Data.ByteString.Char8 as B
|
||||||
|
import qualified Data.Trie as Trie
|
||||||
|
|
||||||
|
setupEnv :: IO ([(ByteString, ())], [(ByteString, ())])
|
||||||
|
setupEnv = pure (small, big)
|
||||||
|
where
|
||||||
|
small =
|
||||||
|
[ (firstname <> "." <> surname <> "@" <> domain <> "." <> tld, ())
|
||||||
|
| firstname <- simpsons
|
||||||
|
, surname <- muppets
|
||||||
|
, domain <- vegetables
|
||||||
|
, tld <- ["com", "fi", "co.uk", "info", "org"]
|
||||||
|
]
|
||||||
|
big =
|
||||||
|
[ (firstname <> "." <> surname <> "@" <> domain1 <> "." <> domain2 <> "." <> tld, ())
|
||||||
|
| firstname <- simpsons
|
||||||
|
, surname <- muppets
|
||||||
|
, domain1 <- vegetables
|
||||||
|
, domain2 <- viruses
|
||||||
|
, tld <- ["com", "fi", "co.uk", "info", "org"]
|
||||||
|
]
|
||||||
|
|
||||||
|
main :: IO ()
|
||||||
|
main = defaultMain
|
||||||
|
[ env setupEnv $ \ ~(small,big) -> bgroup "main"
|
||||||
|
[ bgroup "small"
|
||||||
|
[ bgroup "Data.Map"
|
||||||
|
[ bench "length . toList . fromList" $ whnf (length . M.toList . M.fromList) small
|
||||||
|
, bench "length . filter (\"homer\" `isPrefixOf`) . keys . fromList" $ whnf (length . M.keys . M.filterWithKey (\k _ -> "homer" `B.isPrefixOf` k) . M.fromList) small
|
||||||
|
]
|
||||||
|
, bgroup "Data.Trie"
|
||||||
|
[ bench "length . toList . fromList" $ whnf (length . Trie.toList . Trie.fromList) small
|
||||||
|
, bench "length . keys . submap \"homer\" . fromList" $ whnf (length . Trie.keys . Trie.submap "homer" . Trie.fromList) small
|
||||||
|
]
|
||||||
|
]
|
||||||
|
, bgroup "big"
|
||||||
|
[ bgroup "Data.Map"
|
||||||
|
[ bench "length . toList . fromList" $ whnf (length . M.toList . M.fromList) big
|
||||||
|
, bench "length . filter (\"homer\" `isPrefixOf`) . keys . fromList" $ whnf (length . M.keys . M.filterWithKey (\k _ -> "homer" `B.isPrefixOf` k) . M.fromList) big
|
||||||
|
]
|
||||||
|
, bgroup "Data.Trie"
|
||||||
|
[ bench "length . toList . fromList" $ whnf (length . Trie.toList . Trie.fromList) big
|
||||||
|
, bench "length . keys . submap \"homer\" . fromList" $ whnf (length . Trie.keys . Trie.submap "homer" . Trie.fromList) big
|
||||||
|
]
|
||||||
|
]
|
||||||
|
]
|
||||||
|
]
|
23
default.nix
23
default.nix
@ -1,7 +1,7 @@
|
|||||||
{ mkDerivation, attoparsec, base, bytestring, bytestring-trie
|
{ mkDerivation, attoparsec, base, bytestring, conduit
|
||||||
, conduit, conduit-extra, containers, hedgehog, hedgehog-corpus
|
, conduit-extra, containers, criterion, filepath, hedgehog
|
||||||
, HUnit, lens, mtl, optparse-applicative, stdenv, tasty
|
, hedgehog-corpus, HUnit, lens, lib, mtl, optparse-applicative
|
||||||
, tasty-hedgehog, tasty-hunit, text, vector
|
, parallel, tasty, tasty-hedgehog, tasty-hunit, text, unix, vector
|
||||||
}:
|
}:
|
||||||
mkDerivation {
|
mkDerivation {
|
||||||
pname = "addressbook";
|
pname = "addressbook";
|
||||||
@ -10,13 +10,16 @@ mkDerivation {
|
|||||||
isLibrary = true;
|
isLibrary = true;
|
||||||
isExecutable = true;
|
isExecutable = true;
|
||||||
libraryHaskellDepends = [
|
libraryHaskellDepends = [
|
||||||
attoparsec base bytestring bytestring-trie conduit conduit-extra
|
attoparsec base bytestring conduit conduit-extra containers
|
||||||
containers lens mtl text vector
|
filepath lens mtl parallel text unix vector
|
||||||
|
];
|
||||||
|
executableHaskellDepends = [
|
||||||
|
base bytestring containers criterion hedgehog-corpus
|
||||||
|
optparse-applicative text
|
||||||
];
|
];
|
||||||
executableHaskellDepends = [ base optparse-applicative ];
|
|
||||||
testHaskellDepends = [
|
testHaskellDepends = [
|
||||||
base bytestring conduit conduit-extra hedgehog hedgehog-corpus
|
base bytestring conduit conduit-extra containers hedgehog
|
||||||
HUnit tasty tasty-hedgehog tasty-hunit text vector
|
hedgehog-corpus HUnit tasty tasty-hedgehog tasty-hunit text vector
|
||||||
];
|
];
|
||||||
license = stdenv.lib.licenses.bsd3;
|
license = lib.licenses.bsd3;
|
||||||
}
|
}
|
||||||
|
11
easy-hls-nix.json
Normal file
11
easy-hls-nix.json
Normal file
@ -0,0 +1,11 @@
|
|||||||
|
{
|
||||||
|
"url": "https://github.com/ssbothwell/easy-hls-nix",
|
||||||
|
"rev": "393ccab35104d5d49e0ff9eadf7b8654e87abffd",
|
||||||
|
"date": "2021-09-16T11:13:40-07:00",
|
||||||
|
"path": "/nix/store/dsfqcsiahsp9rkip4fsqzz32x0swa3d4-easy-hls-nix",
|
||||||
|
"sha256": "0q1qxlkzjqx2hvf9k2cp5a98vlvsj13lap6hm7gl1kkqp88ai1dw",
|
||||||
|
"fetchLFS": false,
|
||||||
|
"fetchSubmodules": false,
|
||||||
|
"deepClone": false,
|
||||||
|
"leaveDotGit": false
|
||||||
|
}
|
10
easy-hsl-nix.json
Normal file
10
easy-hsl-nix.json
Normal file
@ -0,0 +1,10 @@
|
|||||||
|
{
|
||||||
|
"url": "https://github.com/jkachmar/easy-hls-nix",
|
||||||
|
"rev": "291cf77f512a7121bb6801cde35ee1e8b7287f91",
|
||||||
|
"date": "2021-04-13T16:12:58-04:00",
|
||||||
|
"path": "/nix/store/wlmckg348q32ylfpg5dv3d5x88nkbi59-easy-hls-nix",
|
||||||
|
"sha256": "1bvbcp9zwmh53sm16ycp8phhc6vzc72a71sf0bvyjgfbn6zp68bc",
|
||||||
|
"fetchSubmodules": false,
|
||||||
|
"deepClone": false,
|
||||||
|
"leaveDotGit": false
|
||||||
|
}
|
62
flake.lock
generated
Normal file
62
flake.lock
generated
Normal file
@ -0,0 +1,62 @@
|
|||||||
|
{
|
||||||
|
"nodes": {
|
||||||
|
"easy-hls": {
|
||||||
|
"inputs": {
|
||||||
|
"nixpkgs": [
|
||||||
|
"nixpkgs"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"locked": {
|
||||||
|
"lastModified": 1637250802,
|
||||||
|
"narHash": "sha256-/crlHEVB148PGQLZCsHOR9L5qgvCAfRSocIoKgmMAhA=",
|
||||||
|
"owner": "jkachmar",
|
||||||
|
"repo": "easy-hls-nix",
|
||||||
|
"rev": "7c123399ef8a67dc0e505d9cf7f2c7f64f1cd847",
|
||||||
|
"type": "github"
|
||||||
|
},
|
||||||
|
"original": {
|
||||||
|
"owner": "jkachmar",
|
||||||
|
"repo": "easy-hls-nix",
|
||||||
|
"type": "github"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"flake-utils": {
|
||||||
|
"locked": {
|
||||||
|
"lastModified": 1637014545,
|
||||||
|
"narHash": "sha256-26IZAc5yzlD9FlDT54io1oqG/bBoyka+FJk5guaX4x4=",
|
||||||
|
"owner": "numtide",
|
||||||
|
"repo": "flake-utils",
|
||||||
|
"rev": "bba5dcc8e0b20ab664967ad83d24d64cb64ec4f4",
|
||||||
|
"type": "github"
|
||||||
|
},
|
||||||
|
"original": {
|
||||||
|
"owner": "numtide",
|
||||||
|
"repo": "flake-utils",
|
||||||
|
"type": "github"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nixpkgs": {
|
||||||
|
"locked": {
|
||||||
|
"lastModified": 1637312849,
|
||||||
|
"narHash": "sha256-OhVZopkyryEfLyPwcXk2IQsdi80lj6TY1YFoMNZ4hCQ=",
|
||||||
|
"owner": "NixOS",
|
||||||
|
"repo": "nixpkgs",
|
||||||
|
"rev": "e4806bb4416f88c20f8be0b8ef9b5b09ff9022a6",
|
||||||
|
"type": "github"
|
||||||
|
},
|
||||||
|
"original": {
|
||||||
|
"id": "nixpkgs",
|
||||||
|
"type": "indirect"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"root": {
|
||||||
|
"inputs": {
|
||||||
|
"easy-hls": "easy-hls",
|
||||||
|
"flake-utils": "flake-utils",
|
||||||
|
"nixpkgs": "nixpkgs"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"root": "root",
|
||||||
|
"version": 7
|
||||||
|
}
|
53
flake.nix
Normal file
53
flake.nix
Normal file
@ -0,0 +1,53 @@
|
|||||||
|
{
|
||||||
|
description = "addressbook";
|
||||||
|
|
||||||
|
inputs = {
|
||||||
|
easy-hls = {
|
||||||
|
url = "github:jkachmar/easy-hls-nix";
|
||||||
|
inputs.nixpkgs.follows = "nixpkgs";
|
||||||
|
};
|
||||||
|
flake-utils = {
|
||||||
|
url = "github:numtide/flake-utils";
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
outputs = { self, nixpkgs, flake-utils, easy-hls }:
|
||||||
|
{ overlay = final: prev: {
|
||||||
|
haskellPackages = prev.haskellPackages.override ( old: {
|
||||||
|
overrides = final.lib.composeExtensions (old.overrides or (_: _: {})) (f: p: {
|
||||||
|
addressbook = f.callPackage ./. {};
|
||||||
|
});
|
||||||
|
});
|
||||||
|
};
|
||||||
|
}
|
||||||
|
//
|
||||||
|
flake-utils.lib.eachSystem ["x86_64-linux" "x86_64-darwin"] ( system:
|
||||||
|
let
|
||||||
|
pkgs = import nixpkgs { inherit system; overlays = [ self.overlay ]; };
|
||||||
|
hp = pkgs.haskellPackages;
|
||||||
|
hls = (easy-hls.withGhcs [ hp.ghc ]).${system};
|
||||||
|
in
|
||||||
|
rec {
|
||||||
|
|
||||||
|
packages = { inherit (hp) addressbook; };
|
||||||
|
|
||||||
|
defaultPackage = packages.addressbook;
|
||||||
|
apps.addressbook = {
|
||||||
|
type = "app";
|
||||||
|
program = "${hp.addressbook}/bin/addressbook";
|
||||||
|
};
|
||||||
|
devShell = hp.shellFor {
|
||||||
|
packages = h: [h.addressbook];
|
||||||
|
withHoogle = true;
|
||||||
|
buildInputs = with pkgs; [
|
||||||
|
entr
|
||||||
|
cabal-install
|
||||||
|
hp.hlint
|
||||||
|
stylish-haskell
|
||||||
|
ghcid
|
||||||
|
hls
|
||||||
|
];
|
||||||
|
};
|
||||||
|
}
|
||||||
|
);
|
||||||
|
}
|
17
shell.nix
17
shell.nix
@ -1,15 +1,24 @@
|
|||||||
with (import <nixpkgs> {});
|
with (import <nixpkgs> {});
|
||||||
|
|
||||||
let addressbook = haskellPackages.callPackage ./. {};
|
let
|
||||||
|
hp = haskellPackages.extend (self: super: {
|
||||||
|
addressbook = self.callPackage ./. {};
|
||||||
|
});
|
||||||
|
easy-hls = callPackage easy-hls-src { ghcVersions = [ hp.ghc.version ]; };
|
||||||
|
easy-hls-src = fetchFromGitHub {
|
||||||
|
owner = "ssbothwell";
|
||||||
|
repo = "easy-hls-nix";
|
||||||
|
inherit (builtins.fromJSON (builtins.readFile ./easy-hls-nix.json)) rev sha256;
|
||||||
|
};
|
||||||
|
|
||||||
in
|
in
|
||||||
|
|
||||||
mkShell {
|
hp.shellFor {
|
||||||
name = "addressbook-shell";
|
packages = h: [h.addressbook];
|
||||||
buildInputs = [
|
buildInputs = [
|
||||||
ghcid
|
ghcid
|
||||||
|
easy-hls
|
||||||
stylish-haskell
|
stylish-haskell
|
||||||
haskellPackages.cabal-install
|
haskellPackages.cabal-install
|
||||||
(haskellPackages.ghcWithHoogle (_: addressbook.buildInputs ++ addressbook.propagatedBuildInputs))
|
|
||||||
];
|
];
|
||||||
}
|
}
|
||||||
|
42
src/Control/Addressbook/Query.hs
Normal file
42
src/Control/Addressbook/Query.hs
Normal file
@ -0,0 +1,42 @@
|
|||||||
|
{-# LANGUAGE TypeApplications #-}
|
||||||
|
module Control.Addressbook.Query where
|
||||||
|
|
||||||
|
import Data.Text
|
||||||
|
(Text)
|
||||||
|
import qualified Data.Text as T
|
||||||
|
|
||||||
|
import Conduit
|
||||||
|
import qualified Data.Conduit.Binary as CB
|
||||||
|
import qualified Data.Conduit.Combinators as C
|
||||||
|
import qualified Data.Conduit.Text as CT
|
||||||
|
|
||||||
|
|
||||||
|
import System.IO
|
||||||
|
(stdout)
|
||||||
|
|
||||||
|
import Data.Maybe
|
||||||
|
(fromMaybe)
|
||||||
|
import System.Environment
|
||||||
|
(lookupEnv)
|
||||||
|
import System.FilePath
|
||||||
|
((</>))
|
||||||
|
|
||||||
|
import Control.Exception
|
||||||
|
(catch)
|
||||||
|
|
||||||
|
-- XXX: The current Data.Trie implementation is much slower than Data.Set
|
||||||
|
query :: Text -> IO ()
|
||||||
|
query prefix = do
|
||||||
|
datDir <- fromMaybe "./" <$> lookupEnv "HOME"
|
||||||
|
catch @IOError (runResourceT $ runConduit $ filterAddresses datDir) $ \e -> print e
|
||||||
|
where
|
||||||
|
prefixLC = T.toLower prefix
|
||||||
|
filterAddresses :: FilePath -> ConduitT () Void (ResourceT IO) ()
|
||||||
|
filterAddresses dir =
|
||||||
|
CB.sourceFile (dir </> ".addressbook.dat")
|
||||||
|
.| CT.decode CT.utf8
|
||||||
|
.| CT.lines
|
||||||
|
.| C.filter (\v -> prefixLC `T.isPrefixOf` T.toLower v)
|
||||||
|
.| CT.encode CT.utf8
|
||||||
|
.| C.map (<> "\n")
|
||||||
|
.| CB.sinkHandle stdout
|
@ -1,41 +1,73 @@
|
|||||||
module Control.Addressbook.Streaming where
|
module Control.Addressbook.Streaming where
|
||||||
|
|
||||||
import qualified Data.Text as T
|
|
||||||
|
|
||||||
import Conduit
|
import Conduit
|
||||||
import qualified Data.Conduit.Binary as CB
|
import qualified Data.Conduit.Binary as CB
|
||||||
import qualified Data.Conduit.Combinators as C
|
import qualified Data.Conduit.Combinators as C
|
||||||
import qualified Data.Conduit.Text as CT
|
import qualified Data.Conduit.List as CL
|
||||||
|
|
||||||
import Data.Email
|
import Data.Email
|
||||||
import Data.Email.Header
|
import Data.Email.Header
|
||||||
(Header(..))
|
(Header(..))
|
||||||
|
|
||||||
import System.IO
|
|
||||||
(stdin)
|
|
||||||
|
|
||||||
import qualified Data.Foldable as F
|
import qualified Data.Foldable as F
|
||||||
|
|
||||||
import qualified Data.Set as S
|
|
||||||
|
import Data.Maybe
|
||||||
|
(fromMaybe)
|
||||||
|
import System.Environment
|
||||||
|
(lookupEnv)
|
||||||
|
import System.FilePath
|
||||||
|
((</>))
|
||||||
|
import Data.Set (Set)
|
||||||
|
import Data.ByteString (ByteString)
|
||||||
|
import qualified Data.Set as Set
|
||||||
|
import qualified Data.ByteString.Lazy as LBS
|
||||||
|
import Data.Char (ord)
|
||||||
|
import qualified Data.ByteString.Lazy.Char8 as LBC
|
||||||
|
import System.IO.Unsafe (unsafeInterleaveIO)
|
||||||
|
import Control.Parallel.Strategies (rseq, parMap)
|
||||||
|
import qualified Data.List as L
|
||||||
|
import Control.Monad (unless)
|
||||||
|
import System.Posix (touchFile)
|
||||||
|
|
||||||
combine :: (MonadUnliftIO m, MonadResource m, MonadThrow m, MonadIO m) => ConduitM FilePath Header m ()
|
combine :: (MonadUnliftIO m, MonadResource m, MonadThrow m, MonadIO m) => ConduitM FilePath Header m ()
|
||||||
combine = await >>= \case
|
combine = await >>= \case
|
||||||
Nothing -> pure ()
|
Nothing -> pure ()
|
||||||
Just path -> (CB.sourceFile path .| parseEmail) >> combine
|
Just path -> (CB.sourceFile path .| parseEmail) >> combine
|
||||||
|
|
||||||
|
chunks :: Int -> [a] -> [[a]]
|
||||||
|
chunks n = L.unfoldr $ \case
|
||||||
|
[] -> Nothing
|
||||||
|
xs -> Just (splitAt n xs)
|
||||||
|
|
||||||
run :: IO ()
|
run :: IO ()
|
||||||
run = do
|
run = do
|
||||||
x <- runResourceT $ runConduit stream
|
datDir <- fromMaybe "./" <$> lookupEnv "HOME"
|
||||||
F.for_ x print
|
let datFile = datDir </> ".addressbook.dat"
|
||||||
|
touchFile datFile
|
||||||
|
original <- Set.fromList . map LBS.toStrict . lbsLines <$> LBS.readFile datFile
|
||||||
|
xs <- LBS.getContents >>= stream
|
||||||
|
let set = original `Set.union` F.fold (parMap rseq F.fold (chunks 200 xs))
|
||||||
|
unless (original == set) $
|
||||||
|
runResourceT $
|
||||||
|
runConduit $
|
||||||
|
CL.sourceList (Set.elems set)
|
||||||
|
.| C.map (<> "\n")
|
||||||
|
.| CB.sinkFileCautious datFile
|
||||||
where
|
where
|
||||||
separate = \case
|
separate = \case
|
||||||
From x -> [x]
|
From x -> [x]
|
||||||
To xs -> F.toList xs
|
To xs -> F.toList xs
|
||||||
|
-- A set of (locally) unique addresses. Composes with parMap
|
||||||
|
lbsLines = LBS.split (fromIntegral $ ord '\n')
|
||||||
|
stream :: LBS.ByteString -> IO [Set ByteString]
|
||||||
stream =
|
stream =
|
||||||
CB.sourceHandle stdin
|
traverse (unsafeInterleaveIO . parse . LBC.unpack)
|
||||||
.| CT.decode CT.utf8
|
. filter (not . LBS.null)
|
||||||
.| CT.lines
|
. lbsLines
|
||||||
.| C.map T.unpack
|
parse path =
|
||||||
.| combine
|
runResourceT $
|
||||||
.| C.concatMap separate
|
runConduit $
|
||||||
.| C.foldMap (S.singleton)
|
CB.sourceFile path .| parseEmail .| C.concatMap separate .| C.foldMap Set.singleton
|
||||||
|
@ -5,12 +5,9 @@ import Data.Email.Header
|
|||||||
|
|
||||||
import Conduit
|
import Conduit
|
||||||
import qualified Data.Conduit.Combinators as C
|
import qualified Data.Conduit.Combinators as C
|
||||||
import qualified Data.Conduit.Text as CT
|
|
||||||
|
|
||||||
import Data.ByteString
|
import Data.ByteString
|
||||||
(ByteString)
|
(ByteString)
|
||||||
|
|
||||||
parseEmail :: (MonadUnliftIO m, MonadThrow m, Monad m) => ConduitM ByteString Header m ()
|
parseEmail :: (MonadUnliftIO m, MonadThrow m, Monad m) => ConduitM ByteString Header m ()
|
||||||
parseEmail = catchC (CT.decode CT.utf8) err .| CT.lines .| C.concatMap decode
|
parseEmail = C.linesUnboundedAscii .| C.concatMap decode
|
||||||
where
|
|
||||||
err e = liftIO (print @CT.TextException e) >> yield ""
|
|
||||||
|
@ -1,55 +1,63 @@
|
|||||||
|
{-# LANGUAGE OverloadedStrings #-}
|
||||||
|
{-# LANGUAGE ApplicativeDo #-}
|
||||||
module Data.Email.Header where
|
module Data.Email.Header where
|
||||||
|
|
||||||
import Data.Text
|
|
||||||
(Text)
|
|
||||||
import qualified Data.Text as T
|
|
||||||
|
|
||||||
import qualified Data.Foldable as F
|
import qualified Data.Foldable as F
|
||||||
|
|
||||||
import Data.Attoparsec.Text
|
import Data.Attoparsec.ByteString.Char8
|
||||||
|
|
||||||
import Data.Vector
|
import Data.Vector
|
||||||
(Vector)
|
(Vector)
|
||||||
import qualified Data.Vector as V
|
import qualified Data.Vector as V
|
||||||
|
|
||||||
import Data.Char
|
|
||||||
(isSpace)
|
|
||||||
|
|
||||||
import Control.Applicative
|
import Control.Applicative
|
||||||
((<|>))
|
((<|>))
|
||||||
|
import Data.ByteString (ByteString)
|
||||||
|
import qualified Data.ByteString.Char8 as BC
|
||||||
|
|
||||||
data Header
|
data Header
|
||||||
= From !Text
|
= From !ByteString
|
||||||
| To !(Vector Text)
|
| To !(Vector ByteString)
|
||||||
deriving (Show, Eq)
|
deriving (Show, Eq)
|
||||||
|
|
||||||
decode :: Text -> Either String Header
|
decode :: ByteString -> Either String Header
|
||||||
decode = parseOnly parseHeader
|
decode = parseOnly parseHeader
|
||||||
where
|
{-# INLINE decode #-}
|
||||||
parseHeader :: Parser Header
|
|
||||||
parseHeader = parseFrom <|> parseTo
|
parseHeader :: Parser Header
|
||||||
parseFrom :: Parser Header
|
parseHeader = parseFrom <|> parseTo
|
||||||
parseFrom = From <$> (string "From:" *> emptySpace *> (bracketEmail <|> email))
|
{-# INLINE parseHeader #-}
|
||||||
parseTo :: Parser Header
|
|
||||||
parseTo = To <$> (string "To:" *> emptySpace *> emails)
|
parseFrom :: Parser Header
|
||||||
emptySpace = many' space
|
parseFrom = From <$> (string "From:" *> emptySpace *> (bracketEmail <|> email))
|
||||||
emails :: Parser (Vector Text)
|
|
||||||
emails = V.fromList <$> (bracketEmail <|> email) `sepBy` char ','
|
parseTo :: Parser Header
|
||||||
bracketEmail :: Parser Text
|
parseTo = To <$> (string "To:" *> emptySpace *> emails)
|
||||||
bracketEmail = do
|
|
||||||
|
emptySpace :: Parser ()
|
||||||
|
emptySpace = () <$ many' space
|
||||||
|
|
||||||
|
emails :: Parser (Vector ByteString)
|
||||||
|
emails = V.fromList <$> (bracketEmail <|> email) `sepBy` char ','
|
||||||
|
|
||||||
|
bracketEmail :: Parser ByteString
|
||||||
|
bracketEmail = do
|
||||||
_ <- manyTill anyChar (char '<')
|
_ <- manyTill anyChar (char '<')
|
||||||
T.pack <$> manyTill anyChar (char '>')
|
email
|
||||||
email :: Parser Text
|
{-# INLINE bracketEmail #-}
|
||||||
email = do
|
|
||||||
|
email :: Parser ByteString
|
||||||
|
email = do
|
||||||
_ <- many' space
|
_ <- many' space
|
||||||
name <- T.pack <$> many' (notChar '@')
|
name <- BC.pack <$> many' (satisfy (\c -> not (isSpace c) && c /= '@'))
|
||||||
_ <- char '@'
|
_ <- char '@'
|
||||||
rest <- T.pack <$> many' (satisfy (\c -> not (isSpace c) && c /= ','))
|
rest <- BC.pack <$> many' (satisfy (\c -> not (isSpace c) && c /= ',' && c /= '>' && c /= '<'))
|
||||||
_ <- many' (notChar ',')
|
_ <- many' (notChar ',')
|
||||||
pure (name <> "@" <> rest)
|
pure (name <> "@" <> rest)
|
||||||
|
{-# INLINE email #-}
|
||||||
|
|
||||||
|
|
||||||
encode :: Header -> Text
|
encode :: Header -> ByteString
|
||||||
encode = \case
|
encode = \case
|
||||||
From addr -> "From: " <> addr
|
From addr -> "From: " <> addr
|
||||||
To addrs -> "To: " <> T.intercalate ", " (F.toList addrs)
|
To addrs -> "To: " <> BC.intercalate ", " (F.toList addrs)
|
||||||
|
87
src/Data/Trie.hs
Normal file
87
src/Data/Trie.hs
Normal file
@ -0,0 +1,87 @@
|
|||||||
|
-- The bytestring-trie is marked as broken. Trie is a simple(ish) datastructure, implement one here
|
||||||
|
{-# LANGUAGE ScopedTypeVariables #-}
|
||||||
|
{-# LANGUAGE DeriveFunctor #-}
|
||||||
|
{-# LANGUAGE TypeApplications #-}
|
||||||
|
{-# LANGUAGE DeriveFoldable #-}
|
||||||
|
module Data.Trie where
|
||||||
|
import Data.ByteString (ByteString)
|
||||||
|
import Data.Word (Word8)
|
||||||
|
import Data.Foldable (foldl')
|
||||||
|
import Data.Semigroup (Last(..))
|
||||||
|
import qualified Data.ByteString as B
|
||||||
|
import Data.Coerce (coerce)
|
||||||
|
import Data.IntMap (IntMap)
|
||||||
|
import qualified Data.IntMap.Strict as M
|
||||||
|
|
||||||
|
data Pair a b = Pair !a !b
|
||||||
|
deriving (Functor, Foldable)
|
||||||
|
|
||||||
|
data Trie a
|
||||||
|
= Empty
|
||||||
|
| Branch !(Maybe (Pair ByteString a)) !(IntMap (Trie a))
|
||||||
|
deriving (Functor, Foldable)
|
||||||
|
|
||||||
|
instance Semigroup a => Semigroup (Trie a) where
|
||||||
|
(<>) = union
|
||||||
|
|
||||||
|
instance Monoid a => Monoid (Trie a) where
|
||||||
|
mempty = Empty
|
||||||
|
|
||||||
|
instance Show a => Show (Trie a) where
|
||||||
|
show = show . toList
|
||||||
|
|
||||||
|
keys :: Trie a -> [ByteString]
|
||||||
|
keys Empty = []
|
||||||
|
keys (Branch Nothing m) = concatMap keys (M.elems m)
|
||||||
|
keys (Branch (Just (Pair b _)) m) = b : concatMap keys (M.elems m)
|
||||||
|
|
||||||
|
union :: Semigroup a => Trie a -> Trie a -> Trie a
|
||||||
|
union Empty r = r
|
||||||
|
union l Empty = l
|
||||||
|
union (Branch m_pa cl) (Branch ma cr) = Branch (merge m_pa ma) (M.unionWith union cl cr)
|
||||||
|
where
|
||||||
|
merge (Just (Pair x a)) (Just (Pair _ b)) = Just $ Pair x (a <> b)
|
||||||
|
merge Nothing r = r
|
||||||
|
merge l Nothing = l
|
||||||
|
|
||||||
|
unionR :: forall a. Trie a -> Trie a -> Trie a
|
||||||
|
unionR a b = coerce @(Trie (Last a)) $ union (coerce a) (coerce b)
|
||||||
|
|
||||||
|
singleton :: forall a. ByteString -> a -> Trie a
|
||||||
|
singleton bs a = go (B.uncons bs)
|
||||||
|
where
|
||||||
|
go :: Maybe (Word8, ByteString) -> Trie a
|
||||||
|
go Nothing =
|
||||||
|
let x = Pair bs a
|
||||||
|
in x `seq` Branch (Just (Pair bs a)) M.empty
|
||||||
|
go (Just (w,c)) =
|
||||||
|
let y = M.singleton (fromIntegral w) (go (B.uncons c))
|
||||||
|
in y `seq` Branch Nothing y
|
||||||
|
|
||||||
|
empty :: Trie a
|
||||||
|
empty = Empty
|
||||||
|
|
||||||
|
elems :: Trie a -> [a]
|
||||||
|
elems = foldr (:) []
|
||||||
|
|
||||||
|
submap :: forall a. ByteString -> Trie a -> Trie a
|
||||||
|
submap bs = go (B.uncons bs)
|
||||||
|
where
|
||||||
|
go :: Maybe (Word8, ByteString) -> Trie a -> Trie a
|
||||||
|
go _ Empty = Empty
|
||||||
|
go Nothing t = t
|
||||||
|
go (Just (w,cs)) (Branch _ c) = maybe empty (go (B.uncons cs)) $ M.lookup (fromIntegral w) c
|
||||||
|
|
||||||
|
insert :: forall a. ByteString -> a -> Trie a -> Trie a
|
||||||
|
insert bs a = (`unionR` singleton bs a)
|
||||||
|
|
||||||
|
fromList :: [(ByteString, a)] -> Trie a
|
||||||
|
fromList = foldl' (\acc (k,v) -> insert k v acc) empty
|
||||||
|
|
||||||
|
toList :: forall a. Trie a -> [(ByteString, a)]
|
||||||
|
toList = go
|
||||||
|
where
|
||||||
|
go :: Trie a -> [(ByteString, a)]
|
||||||
|
go Empty = []
|
||||||
|
go (Branch Nothing m) = concatMap (\(_, child) -> go child) (M.toList m)
|
||||||
|
go (Branch (Just (Pair a b)) m) = (a, b) : concatMap (\(_, child) -> go child) (M.toList m)
|
@ -4,11 +4,13 @@ import Test.Tasty
|
|||||||
|
|
||||||
import qualified Test.Data.Email as Data.Email
|
import qualified Test.Data.Email as Data.Email
|
||||||
import qualified Test.Data.Email.Header as Data.Email.Header
|
import qualified Test.Data.Email.Header as Data.Email.Header
|
||||||
|
import qualified Test.Data.Trie as Data.Trie
|
||||||
|
|
||||||
tests :: TestTree
|
tests :: TestTree
|
||||||
tests = testGroup "tests"
|
tests = testGroup "tests"
|
||||||
[ Data.Email.Header.tests
|
[ Data.Email.Header.tests
|
||||||
, Data.Email.tests
|
, Data.Email.tests
|
||||||
|
, Data.Trie.tests
|
||||||
]
|
]
|
||||||
|
|
||||||
main :: IO ()
|
main :: IO ()
|
||||||
|
@ -17,9 +17,11 @@ sample :: ByteString
|
|||||||
sample =
|
sample =
|
||||||
"Subject: Hello worldddd\n\
|
"Subject: Hello worldddd\n\
|
||||||
\From: me@example.com\n\
|
\From: me@example.com\n\
|
||||||
|
\Dkim: asd\n\
|
||||||
\To: you <you@example.com>\n\
|
\To: you <you@example.com>\n\
|
||||||
\ \n\n \
|
\\n\n\
|
||||||
\foo"
|
\From: foo bar <a mailto=\"me2@example.com\" />\n\
|
||||||
|
\asd\n"
|
||||||
|
|
||||||
parseToList :: ByteString -> IO [Header]
|
parseToList :: ByteString -> IO [Header]
|
||||||
parseToList _ = runConduit (CB.sourceLbs sample .| parseEmail .| CL.consume)
|
parseToList _ = runConduit (CB.sourceLbs sample .| parseEmail .| CL.consume)
|
||||||
@ -29,4 +31,7 @@ tests = testGroup "Data.Email"
|
|||||||
[ testCase "Can parse a sample email" $ do
|
[ testCase "Can parse a sample email" $ do
|
||||||
got <- parseToList sample
|
got <- parseToList sample
|
||||||
got @?= [ From "me@example.com", To ["you@example.com"]]
|
got @?= [ From "me@example.com", To ["you@example.com"]]
|
||||||
|
, testCase "Combined mailto" $ do
|
||||||
|
let got = decode "To: James Doe<james.doe@example.com<mailto:james.doe@example.com>>"
|
||||||
|
got @?= Right (To ["james.doe@example.com"])
|
||||||
]
|
]
|
||||||
|
@ -10,11 +10,12 @@ import qualified Hedgehog.Corpus as Corpus
|
|||||||
import qualified Hedgehog.Gen as Gen
|
import qualified Hedgehog.Gen as Gen
|
||||||
import qualified Hedgehog.Range as Range
|
import qualified Hedgehog.Range as Range
|
||||||
|
|
||||||
import Data.Text
|
import Data.ByteString
|
||||||
import qualified Data.Text as T
|
import qualified Data.ByteString as T
|
||||||
import qualified Data.Vector as V
|
import qualified Data.Vector as V
|
||||||
|
|
||||||
import Data.Email.Header
|
import Data.Email.Header
|
||||||
|
import qualified Data.ByteString.Char8 as BC
|
||||||
|
|
||||||
genHeader :: Gen Header
|
genHeader :: Gen Header
|
||||||
genHeader = Gen.choice
|
genHeader = Gen.choice
|
||||||
@ -22,17 +23,17 @@ genHeader = Gen.choice
|
|||||||
, To . V.fromList <$> Gen.list (Range.linear 0 10) genEmail
|
, To . V.fromList <$> Gen.list (Range.linear 0 10) genEmail
|
||||||
]
|
]
|
||||||
|
|
||||||
genEmail :: Gen Text
|
genEmail :: Gen ByteString
|
||||||
genEmail = do
|
genEmail = do
|
||||||
name <- Gen.element Corpus.simpsons
|
name <- Gen.element Corpus.simpsons
|
||||||
domain <- Gen.element Corpus.cooking
|
domain <- Gen.element Corpus.cooking
|
||||||
tld <- Gen.element ["com","fi","org"]
|
tld <- Gen.element ["com","fi","org"]
|
||||||
pure $ name <> "@" <> domain <> "." <> tld
|
pure $ name <> "@" <> domain <> "." <> tld
|
||||||
|
|
||||||
wrapped :: Char -> Text -> Char -> Text
|
wrapped :: Char -> ByteString -> Char -> ByteString
|
||||||
wrapped l x r = T.singleton l <> x <> T.singleton r
|
wrapped l x r = BC.singleton l <> x <> BC.singleton r
|
||||||
|
|
||||||
genComment :: Gen Text
|
genComment :: Gen ByteString
|
||||||
genComment = do
|
genComment = do
|
||||||
x <- Gen.element Corpus.simpsons
|
x <- Gen.element Corpus.simpsons
|
||||||
Gen.element [x, wrapped '"' x '"']
|
Gen.element [x, wrapped '"' x '"']
|
||||||
|
45
test/Test/Data/Trie.hs
Normal file
45
test/Test/Data/Trie.hs
Normal file
@ -0,0 +1,45 @@
|
|||||||
|
module Test.Data.Trie where
|
||||||
|
|
||||||
|
import Test.Tasty
|
||||||
|
import Test.Tasty.Hedgehog (testProperty)
|
||||||
|
import Hedgehog
|
||||||
|
import Data.Monoid (Sum(..))
|
||||||
|
import Data.ByteString (ByteString)
|
||||||
|
import Data.Map (Map)
|
||||||
|
import qualified Hedgehog.Gen as Gen
|
||||||
|
import qualified Hedgehog.Range as Range
|
||||||
|
import qualified Data.Trie as T
|
||||||
|
import qualified Data.Map.Strict as M
|
||||||
|
|
||||||
|
genMap :: Gen (Map ByteString (Sum Int))
|
||||||
|
genMap = Gen.map (Range.linear 0 100) genT
|
||||||
|
where
|
||||||
|
genT = (,) <$> Gen.bytes (Range.linear 0 20) <*> fmap Sum (Gen.integral (Range.linear 0 10))
|
||||||
|
|
||||||
|
propIsomorphic :: Property
|
||||||
|
propIsomorphic = property $ do
|
||||||
|
m <- forAll genMap
|
||||||
|
let wanted = m
|
||||||
|
got = M.fromList . T.toList . T.fromList . M.toList $ wanted
|
||||||
|
wanted === got
|
||||||
|
|
||||||
|
propElems :: Property
|
||||||
|
propElems = property $ do
|
||||||
|
m <- forAll genMap
|
||||||
|
let wanted = M.elems m
|
||||||
|
got = T.elems . T.fromList . M.toList $ m
|
||||||
|
wanted === got
|
||||||
|
|
||||||
|
propKeys :: Property
|
||||||
|
propKeys = property $ do
|
||||||
|
m <- forAll genMap
|
||||||
|
let wanted = M.keys m
|
||||||
|
got = T.keys . T.fromList . M.toList $ m
|
||||||
|
wanted === got
|
||||||
|
|
||||||
|
tests :: TestTree
|
||||||
|
tests = testGroup "Data.Trie"
|
||||||
|
[ testProperty "isomorphic to Map" $ propIsomorphic
|
||||||
|
, testProperty "isomorphic elems" $ propElems
|
||||||
|
, testProperty "isomorphic keys" $ propKeys
|
||||||
|
]
|
Reference in New Issue
Block a user