Compare commits
6 Commits
Author | SHA1 | Date | |
---|---|---|---|
6f0d8d0372 | |||
5182c47a19 | |||
a47d4903cb | |||
00e585281c | |||
03860c370a | |||
41c666fe93 |
@ -41,6 +41,8 @@ library
|
||||
, vector
|
||||
, containers
|
||||
, filepath
|
||||
, parallel
|
||||
, unix
|
||||
hs-source-dirs: src
|
||||
default-language: Haskell2010
|
||||
ghc-options: -Wall
|
||||
@ -55,7 +57,7 @@ executable addressbook
|
||||
, text
|
||||
hs-source-dirs: app
|
||||
default-language: Haskell2010
|
||||
ghc-options: -Wall -threaded
|
||||
ghc-options: -Wall -threaded -eventlog
|
||||
|
||||
test-suite addressbook-test
|
||||
import: deps
|
||||
|
@ -1,7 +1,7 @@
|
||||
{ mkDerivation, attoparsec, base, bytestring, conduit
|
||||
, conduit-extra, containers, criterion, filepath, hedgehog
|
||||
, hedgehog-corpus, HUnit, lens, lib, mtl, optparse-applicative
|
||||
, tasty, tasty-hedgehog, tasty-hunit, text, vector
|
||||
, parallel, tasty, tasty-hedgehog, tasty-hunit, text, unix, vector
|
||||
}:
|
||||
mkDerivation {
|
||||
pname = "addressbook";
|
||||
@ -11,10 +11,11 @@ mkDerivation {
|
||||
isExecutable = true;
|
||||
libraryHaskellDepends = [
|
||||
attoparsec base bytestring conduit conduit-extra containers
|
||||
filepath lens mtl text vector
|
||||
filepath lens mtl parallel text unix vector
|
||||
];
|
||||
executableHaskellDepends = [
|
||||
base criterion hedgehog-corpus optparse-applicative text
|
||||
base bytestring containers criterion hedgehog-corpus
|
||||
optparse-applicative text
|
||||
];
|
||||
testHaskellDepends = [
|
||||
base bytestring conduit conduit-extra containers hedgehog
|
||||
|
62
flake.lock
Normal file
62
flake.lock
Normal file
@ -0,0 +1,62 @@
|
||||
{
|
||||
"nodes": {
|
||||
"easy-hls": {
|
||||
"inputs": {
|
||||
"nixpkgs": [
|
||||
"nixpkgs"
|
||||
]
|
||||
},
|
||||
"locked": {
|
||||
"lastModified": 1637250802,
|
||||
"narHash": "sha256-/crlHEVB148PGQLZCsHOR9L5qgvCAfRSocIoKgmMAhA=",
|
||||
"owner": "jkachmar",
|
||||
"repo": "easy-hls-nix",
|
||||
"rev": "7c123399ef8a67dc0e505d9cf7f2c7f64f1cd847",
|
||||
"type": "github"
|
||||
},
|
||||
"original": {
|
||||
"owner": "jkachmar",
|
||||
"repo": "easy-hls-nix",
|
||||
"type": "github"
|
||||
}
|
||||
},
|
||||
"flake-utils": {
|
||||
"locked": {
|
||||
"lastModified": 1637014545,
|
||||
"narHash": "sha256-26IZAc5yzlD9FlDT54io1oqG/bBoyka+FJk5guaX4x4=",
|
||||
"owner": "numtide",
|
||||
"repo": "flake-utils",
|
||||
"rev": "bba5dcc8e0b20ab664967ad83d24d64cb64ec4f4",
|
||||
"type": "github"
|
||||
},
|
||||
"original": {
|
||||
"owner": "numtide",
|
||||
"repo": "flake-utils",
|
||||
"type": "github"
|
||||
}
|
||||
},
|
||||
"nixpkgs": {
|
||||
"locked": {
|
||||
"lastModified": 1637312849,
|
||||
"narHash": "sha256-OhVZopkyryEfLyPwcXk2IQsdi80lj6TY1YFoMNZ4hCQ=",
|
||||
"owner": "NixOS",
|
||||
"repo": "nixpkgs",
|
||||
"rev": "e4806bb4416f88c20f8be0b8ef9b5b09ff9022a6",
|
||||
"type": "github"
|
||||
},
|
||||
"original": {
|
||||
"id": "nixpkgs",
|
||||
"type": "indirect"
|
||||
}
|
||||
},
|
||||
"root": {
|
||||
"inputs": {
|
||||
"easy-hls": "easy-hls",
|
||||
"flake-utils": "flake-utils",
|
||||
"nixpkgs": "nixpkgs"
|
||||
}
|
||||
}
|
||||
},
|
||||
"root": "root",
|
||||
"version": 7
|
||||
}
|
53
flake.nix
Normal file
53
flake.nix
Normal file
@ -0,0 +1,53 @@
|
||||
{
|
||||
description = "addressbook";
|
||||
|
||||
inputs = {
|
||||
easy-hls = {
|
||||
url = "github:jkachmar/easy-hls-nix";
|
||||
inputs.nixpkgs.follows = "nixpkgs";
|
||||
};
|
||||
flake-utils = {
|
||||
url = "github:numtide/flake-utils";
|
||||
};
|
||||
};
|
||||
|
||||
outputs = { self, nixpkgs, flake-utils, easy-hls }:
|
||||
{ overlay = final: prev: {
|
||||
haskellPackages = prev.haskellPackages.override ( old: {
|
||||
overrides = final.lib.composeExtensions (old.overrides or (_: _: {})) (f: p: {
|
||||
addressbook = f.callPackage ./. {};
|
||||
});
|
||||
});
|
||||
};
|
||||
}
|
||||
//
|
||||
flake-utils.lib.eachSystem ["x86_64-linux" "x86_64-darwin"] ( system:
|
||||
let
|
||||
pkgs = import nixpkgs { inherit system; overlays = [ self.overlay ]; };
|
||||
hp = pkgs.haskellPackages;
|
||||
hls = (easy-hls.withGhcs [ hp.ghc ]).${system};
|
||||
in
|
||||
rec {
|
||||
|
||||
packages = { inherit (hp) addressbook; };
|
||||
|
||||
defaultPackage = packages.addressbook;
|
||||
apps.addressbook = {
|
||||
type = "app";
|
||||
program = "${hp.addressbook}/bin/addressbook";
|
||||
};
|
||||
devShell = hp.shellFor {
|
||||
packages = h: [h.addressbook];
|
||||
withHoogle = true;
|
||||
buildInputs = with pkgs; [
|
||||
entr
|
||||
cabal-install
|
||||
hp.hlint
|
||||
stylish-haskell
|
||||
ghcid
|
||||
hls
|
||||
];
|
||||
};
|
||||
}
|
||||
);
|
||||
}
|
@ -1,23 +1,18 @@
|
||||
module Control.Addressbook.Streaming where
|
||||
|
||||
import qualified Data.Text as T
|
||||
|
||||
import Conduit
|
||||
import qualified Data.Conduit.Binary as CB
|
||||
import qualified Data.Conduit.Combinators as C
|
||||
import qualified Data.Conduit.List as CL
|
||||
import qualified Data.Conduit.Text as CT
|
||||
|
||||
import Data.Email
|
||||
import Data.Email.Header
|
||||
(Header(..))
|
||||
|
||||
import System.IO
|
||||
(stdin)
|
||||
|
||||
import qualified Data.Foldable as F
|
||||
|
||||
import qualified Data.Map.Strict as Map
|
||||
|
||||
import Data.Maybe
|
||||
(fromMaybe)
|
||||
@ -25,28 +20,54 @@ import System.Environment
|
||||
(lookupEnv)
|
||||
import System.FilePath
|
||||
((</>))
|
||||
import Data.Set (Set)
|
||||
import Data.ByteString (ByteString)
|
||||
import qualified Data.Set as Set
|
||||
import qualified Data.ByteString.Lazy as LBS
|
||||
import Data.Char (ord)
|
||||
import qualified Data.ByteString.Lazy.Char8 as LBC
|
||||
import System.IO.Unsafe (unsafeInterleaveIO)
|
||||
import Control.Parallel.Strategies (rseq, parMap)
|
||||
import qualified Data.List as L
|
||||
import Control.Monad (unless)
|
||||
import System.Posix (touchFile)
|
||||
|
||||
combine :: (MonadUnliftIO m, MonadResource m, MonadThrow m, MonadIO m) => ConduitM FilePath Header m ()
|
||||
combine = await >>= \case
|
||||
Nothing -> pure ()
|
||||
Just path -> (CB.sourceFile path .| parseEmail) >> combine
|
||||
|
||||
chunks :: Int -> [a] -> [[a]]
|
||||
chunks n = L.unfoldr $ \case
|
||||
[] -> Nothing
|
||||
xs -> Just (splitAt n xs)
|
||||
|
||||
run :: IO ()
|
||||
run = do
|
||||
datDir <- fromMaybe "./" <$> lookupEnv "HOME"
|
||||
runResourceT $ do
|
||||
x <- runConduit stream
|
||||
runConduit (CL.sourceList (Map.keys x) .| C.map (<> "\n") .| CB.sinkFileCautious (datDir </> ".addressbook.dat"))
|
||||
let datFile = datDir </> ".addressbook.dat"
|
||||
touchFile datFile
|
||||
original <- Set.fromList . map LBS.toStrict . lbsLines <$> LBS.readFile datFile
|
||||
xs <- LBS.getContents >>= stream
|
||||
let set = original `Set.union` F.fold (parMap rseq F.fold (chunks 200 xs))
|
||||
unless (original == set) $
|
||||
runResourceT $
|
||||
runConduit $
|
||||
CL.sourceList (Set.elems set)
|
||||
.| C.map (<> "\n")
|
||||
.| CB.sinkFileCautious datFile
|
||||
where
|
||||
separate = \case
|
||||
From x -> [x]
|
||||
To xs -> F.toList xs
|
||||
-- A set of (locally) unique addresses. Composes with parMap
|
||||
lbsLines = LBS.split (fromIntegral $ ord '\n')
|
||||
stream :: LBS.ByteString -> IO [Set ByteString]
|
||||
stream =
|
||||
CB.sourceHandle stdin
|
||||
.| CT.decode CT.utf8
|
||||
.| CT.lines
|
||||
.| C.map T.unpack
|
||||
.| combine
|
||||
.| C.concatMap separate
|
||||
.| CT.encode CT.utf8
|
||||
.| C.foldMap (`Map.singleton` ())
|
||||
traverse (unsafeInterleaveIO . parse . LBC.unpack)
|
||||
. filter (not . LBS.null)
|
||||
. lbsLines
|
||||
parse path =
|
||||
runResourceT $
|
||||
runConduit $
|
||||
CB.sourceFile path .| parseEmail .| C.concatMap separate .| C.foldMap Set.singleton
|
||||
|
@ -5,12 +5,9 @@ import Data.Email.Header
|
||||
|
||||
import Conduit
|
||||
import qualified Data.Conduit.Combinators as C
|
||||
import qualified Data.Conduit.Text as CT
|
||||
|
||||
import Data.ByteString
|
||||
(ByteString)
|
||||
|
||||
parseEmail :: (MonadUnliftIO m, MonadThrow m, Monad m) => ConduitM ByteString Header m ()
|
||||
parseEmail = catchC (CT.decode CT.utf8) err .| CT.lines .| C.concatMap decode
|
||||
where
|
||||
err e = liftIO (print @CT.TextException e) >> yield ""
|
||||
parseEmail = C.linesUnboundedAscii .| C.concatMap decode
|
||||
|
@ -1,55 +1,63 @@
|
||||
{-# LANGUAGE OverloadedStrings #-}
|
||||
{-# LANGUAGE ApplicativeDo #-}
|
||||
module Data.Email.Header where
|
||||
|
||||
import Data.Text
|
||||
(Text)
|
||||
import qualified Data.Text as T
|
||||
|
||||
import qualified Data.Foldable as F
|
||||
|
||||
import Data.Attoparsec.Text
|
||||
import Data.Attoparsec.ByteString.Char8
|
||||
|
||||
import Data.Vector
|
||||
(Vector)
|
||||
import qualified Data.Vector as V
|
||||
|
||||
import Data.Char
|
||||
(isSpace)
|
||||
|
||||
import Control.Applicative
|
||||
((<|>))
|
||||
import Data.ByteString (ByteString)
|
||||
import qualified Data.ByteString.Char8 as BC
|
||||
|
||||
data Header
|
||||
= From !Text
|
||||
| To !(Vector Text)
|
||||
= From !ByteString
|
||||
| To !(Vector ByteString)
|
||||
deriving (Show, Eq)
|
||||
|
||||
decode :: Text -> Either String Header
|
||||
decode :: ByteString -> Either String Header
|
||||
decode = parseOnly parseHeader
|
||||
where
|
||||
parseHeader :: Parser Header
|
||||
parseHeader = parseFrom <|> parseTo
|
||||
parseFrom :: Parser Header
|
||||
parseFrom = From <$> (string "From:" *> emptySpace *> (bracketEmail <|> email))
|
||||
parseTo :: Parser Header
|
||||
parseTo = To <$> (string "To:" *> emptySpace *> emails)
|
||||
emptySpace = many' space
|
||||
emails :: Parser (Vector Text)
|
||||
emails = V.fromList <$> (bracketEmail <|> email) `sepBy` char ','
|
||||
bracketEmail :: Parser Text
|
||||
bracketEmail = do
|
||||
_ <- manyTill anyChar (char '<')
|
||||
email
|
||||
email :: Parser Text
|
||||
email = do
|
||||
_ <- many' space
|
||||
name <- T.pack <$> many' (satisfy (\c -> not (isSpace c) && c /= '@'))
|
||||
_ <- char '@'
|
||||
rest <- T.pack <$> many' (satisfy (\c -> not (isSpace c) && c /= ',' && c /= '>'))
|
||||
_ <- many' (notChar ',')
|
||||
pure (name <> "@" <> rest)
|
||||
{-# INLINE decode #-}
|
||||
|
||||
parseHeader :: Parser Header
|
||||
parseHeader = parseFrom <|> parseTo
|
||||
{-# INLINE parseHeader #-}
|
||||
|
||||
parseFrom :: Parser Header
|
||||
parseFrom = From <$> (string "From:" *> emptySpace *> (bracketEmail <|> email))
|
||||
|
||||
parseTo :: Parser Header
|
||||
parseTo = To <$> (string "To:" *> emptySpace *> emails)
|
||||
|
||||
emptySpace :: Parser ()
|
||||
emptySpace = () <$ many' space
|
||||
|
||||
emails :: Parser (Vector ByteString)
|
||||
emails = V.fromList <$> (bracketEmail <|> email) `sepBy` char ','
|
||||
|
||||
bracketEmail :: Parser ByteString
|
||||
bracketEmail = do
|
||||
_ <- manyTill anyChar (char '<')
|
||||
email
|
||||
{-# INLINE bracketEmail #-}
|
||||
|
||||
email :: Parser ByteString
|
||||
email = do
|
||||
_ <- many' space
|
||||
name <- BC.pack <$> many' (satisfy (\c -> not (isSpace c) && c /= '@'))
|
||||
_ <- char '@'
|
||||
rest <- BC.pack <$> many' (satisfy (\c -> not (isSpace c) && c /= ',' && c /= '>' && c /= '<'))
|
||||
_ <- many' (notChar ',')
|
||||
pure (name <> "@" <> rest)
|
||||
{-# INLINE email #-}
|
||||
|
||||
|
||||
encode :: Header -> Text
|
||||
encode :: Header -> ByteString
|
||||
encode = \case
|
||||
From addr -> "From: " <> addr
|
||||
To addrs -> "To: " <> T.intercalate ", " (F.toList addrs)
|
||||
To addrs -> "To: " <> BC.intercalate ", " (F.toList addrs)
|
||||
|
@ -31,4 +31,7 @@ tests = testGroup "Data.Email"
|
||||
[ testCase "Can parse a sample email" $ do
|
||||
got <- parseToList sample
|
||||
got @?= [ From "me@example.com", To ["you@example.com"]]
|
||||
, testCase "Combined mailto" $ do
|
||||
let got = decode "To: James Doe<james.doe@example.com<mailto:james.doe@example.com>>"
|
||||
got @?= Right (To ["james.doe@example.com"])
|
||||
]
|
||||
|
@ -10,11 +10,12 @@ import qualified Hedgehog.Corpus as Corpus
|
||||
import qualified Hedgehog.Gen as Gen
|
||||
import qualified Hedgehog.Range as Range
|
||||
|
||||
import Data.Text
|
||||
import qualified Data.Text as T
|
||||
import Data.ByteString
|
||||
import qualified Data.ByteString as T
|
||||
import qualified Data.Vector as V
|
||||
|
||||
import Data.Email.Header
|
||||
import qualified Data.ByteString.Char8 as BC
|
||||
|
||||
genHeader :: Gen Header
|
||||
genHeader = Gen.choice
|
||||
@ -22,17 +23,17 @@ genHeader = Gen.choice
|
||||
, To . V.fromList <$> Gen.list (Range.linear 0 10) genEmail
|
||||
]
|
||||
|
||||
genEmail :: Gen Text
|
||||
genEmail :: Gen ByteString
|
||||
genEmail = do
|
||||
name <- Gen.element Corpus.simpsons
|
||||
domain <- Gen.element Corpus.cooking
|
||||
tld <- Gen.element ["com","fi","org"]
|
||||
pure $ name <> "@" <> domain <> "." <> tld
|
||||
|
||||
wrapped :: Char -> Text -> Char -> Text
|
||||
wrapped l x r = T.singleton l <> x <> T.singleton r
|
||||
wrapped :: Char -> ByteString -> Char -> ByteString
|
||||
wrapped l x r = BC.singleton l <> x <> BC.singleton r
|
||||
|
||||
genComment :: Gen Text
|
||||
genComment :: Gen ByteString
|
||||
genComment = do
|
||||
x <- Gen.element Corpus.simpsons
|
||||
Gen.element [x, wrapped '"' x '"']
|
||||
|
Loading…
Reference in New Issue
Block a user