Compare commits

..

2 Commits

Author SHA1 Message Date
a300c88cfb Try parallel 2021-10-29 22:09:43 +03:00
560ea23861 parallel 2021-10-29 20:47:04 +03:00
8 changed files with 62 additions and 182 deletions

View File

@ -42,7 +42,6 @@ library
, containers
, filepath
, parallel
, unix
hs-source-dirs: src
default-language: Haskell2010
ghc-options: -Wall

View File

@ -1,7 +1,7 @@
{ mkDerivation, attoparsec, base, bytestring, conduit
, conduit-extra, containers, criterion, filepath, hedgehog
, hedgehog-corpus, HUnit, lens, lib, mtl, optparse-applicative
, parallel, tasty, tasty-hedgehog, tasty-hunit, text, unix, vector
, parallel, tasty, tasty-hedgehog, tasty-hunit, text, vector
}:
mkDerivation {
pname = "addressbook";
@ -11,7 +11,7 @@ mkDerivation {
isExecutable = true;
libraryHaskellDepends = [
attoparsec base bytestring conduit conduit-extra containers
filepath lens mtl parallel text unix vector
filepath lens mtl parallel text vector
];
executableHaskellDepends = [
base bytestring containers criterion hedgehog-corpus

62
flake.lock generated
View File

@ -1,62 +0,0 @@
{
"nodes": {
"easy-hls": {
"inputs": {
"nixpkgs": [
"nixpkgs"
]
},
"locked": {
"lastModified": 1637250802,
"narHash": "sha256-/crlHEVB148PGQLZCsHOR9L5qgvCAfRSocIoKgmMAhA=",
"owner": "jkachmar",
"repo": "easy-hls-nix",
"rev": "7c123399ef8a67dc0e505d9cf7f2c7f64f1cd847",
"type": "github"
},
"original": {
"owner": "jkachmar",
"repo": "easy-hls-nix",
"type": "github"
}
},
"flake-utils": {
"locked": {
"lastModified": 1637014545,
"narHash": "sha256-26IZAc5yzlD9FlDT54io1oqG/bBoyka+FJk5guaX4x4=",
"owner": "numtide",
"repo": "flake-utils",
"rev": "bba5dcc8e0b20ab664967ad83d24d64cb64ec4f4",
"type": "github"
},
"original": {
"owner": "numtide",
"repo": "flake-utils",
"type": "github"
}
},
"nixpkgs": {
"locked": {
"lastModified": 1637312849,
"narHash": "sha256-OhVZopkyryEfLyPwcXk2IQsdi80lj6TY1YFoMNZ4hCQ=",
"owner": "NixOS",
"repo": "nixpkgs",
"rev": "e4806bb4416f88c20f8be0b8ef9b5b09ff9022a6",
"type": "github"
},
"original": {
"id": "nixpkgs",
"type": "indirect"
}
},
"root": {
"inputs": {
"easy-hls": "easy-hls",
"flake-utils": "flake-utils",
"nixpkgs": "nixpkgs"
}
}
},
"root": "root",
"version": 7
}

View File

@ -1,53 +0,0 @@
{
description = "addressbook";
inputs = {
easy-hls = {
url = "github:jkachmar/easy-hls-nix";
inputs.nixpkgs.follows = "nixpkgs";
};
flake-utils = {
url = "github:numtide/flake-utils";
};
};
outputs = { self, nixpkgs, flake-utils, easy-hls }:
{ overlay = final: prev: {
haskellPackages = prev.haskellPackages.override ( old: {
overrides = final.lib.composeExtensions (old.overrides or (_: _: {})) (f: p: {
addressbook = f.callPackage ./. {};
});
});
};
}
//
flake-utils.lib.eachSystem ["x86_64-linux" "x86_64-darwin"] ( system:
let
pkgs = import nixpkgs { inherit system; overlays = [ self.overlay ]; };
hp = pkgs.haskellPackages;
hls = (easy-hls.withGhcs [ hp.ghc ]).${system};
in
rec {
packages = { inherit (hp) addressbook; };
defaultPackage = packages.addressbook;
apps.addressbook = {
type = "app";
program = "${hp.addressbook}/bin/addressbook";
};
devShell = hp.shellFor {
packages = h: [h.addressbook];
withHoogle = true;
buildInputs = with pkgs; [
entr
cabal-install
hp.hlint
stylish-haskell
ghcid
hls
];
};
}
);
}

View File

@ -1,18 +1,23 @@
module Control.Addressbook.Streaming where
import qualified Data.Text as T
import Conduit
import qualified Data.Conduit.Binary as CB
import qualified Data.Conduit.Combinators as C
import qualified Data.Conduit.List as CL
import qualified Data.Conduit.Text as CT
import Data.Email
import Data.Email.Header
(Header(..))
import System.IO
(stdin)
import qualified Data.Foldable as F
import qualified Data.Map.Strict as Map
import Data.Maybe
(fromMaybe)
@ -27,10 +32,10 @@ import qualified Data.ByteString.Lazy as LBS
import Data.Char (ord)
import qualified Data.ByteString.Lazy.Char8 as LBC
import System.IO.Unsafe (unsafeInterleaveIO)
import Control.Parallel.Strategies (rseq, parMap)
import Control.Parallel.Strategies (using, parList, rseq, parBuffer, rdeepseq, parMap)
import qualified Data.List as L
import Control.Monad (unless)
import System.Posix (touchFile)
import Control.Concurrent (getNumCapabilities)
import Debug.Trace (traceShow)
combine :: (MonadUnliftIO m, MonadResource m, MonadThrow m, MonadIO m) => ConduitM FilePath Header m ()
combine = await >>= \case
@ -45,29 +50,36 @@ chunks n = L.unfoldr $ \case
run :: IO ()
run = do
datDir <- fromMaybe "./" <$> lookupEnv "HOME"
let datFile = datDir </> ".addressbook.dat"
touchFile datFile
original <- Set.fromList . map LBS.toStrict . lbsLines <$> LBS.readFile datFile
xs <- LBS.getContents >>= stream
let set = original `Set.union` F.fold (parMap rseq F.fold (chunks 200 xs))
unless (original == set) $
let x = F.fold (parMap rseq F.fold (chunks 200 xs))
runResourceT $
runConduit $
CL.sourceList (Set.elems set)
.| C.map (<> "\n")
.| CB.sinkFileCautious datFile
runConduit (CL.sourceList (Set.elems x) .| C.map (<> "\n") .| CB.sinkFileCautious (datDir </> ".addressbook.dat"))
where
separate = \case
From x -> [x]
To xs -> F.toList xs
-- A set of (locally) unique addresses. Composes with parMap
lbsLines = LBS.split (fromIntegral $ ord '\n')
stream :: LBS.ByteString -> IO [Set ByteString]
stream =
traverse (unsafeInterleaveIO . parse . LBC.unpack)
. filter (not . LBS.null)
. lbsLines
stream :: LBS.ByteString -> IO ([Set ByteString])
stream = traverse (unsafeInterleaveIO . parse . LBC.unpack) . filter (not . LBS.null) . LBS.split (fromIntegral $ ord '\n')
parse path =
runResourceT $
runConduit $
CB.sourceFile path .| parseEmail .| C.concatMap separate .| C.foldMap Set.singleton
run_ :: IO ()
run_ = do
datDir <- fromMaybe "./" <$> lookupEnv "HOME"
runResourceT $ do
x <- runConduit stream
runConduit (CL.sourceList (Set.elems x) .| C.map (<> "\n") .| CB.sinkFileCautious (datDir </> ".addressbook.dat"))
where
separate = \case
From x -> [x]
To xs -> F.toList xs
stream =
CB.sourceHandle stdin
.| CT.decode CT.utf8
.| CT.lines
.| C.map T.unpack
.| combine
.| C.concatMap separate
.| C.foldMap (Set.singleton)

View File

@ -1,5 +1,4 @@
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE ApplicativeDo #-}
module Data.Email.Header where
import qualified Data.Foldable as F
@ -22,39 +21,28 @@ data Header
decode :: ByteString -> Either String Header
decode = parseOnly parseHeader
{-# INLINE decode #-}
parseHeader :: Parser Header
parseHeader = parseFrom <|> parseTo
{-# INLINE parseHeader #-}
parseFrom :: Parser Header
parseFrom = From <$> (string "From:" *> emptySpace *> (bracketEmail <|> email))
parseTo :: Parser Header
parseTo = To <$> (string "To:" *> emptySpace *> emails)
emptySpace :: Parser ()
emptySpace = () <$ many' space
emails :: Parser (Vector ByteString)
emails = V.fromList <$> (bracketEmail <|> email) `sepBy` char ','
bracketEmail :: Parser ByteString
bracketEmail = do
where
parseHeader :: Parser Header
parseHeader = parseFrom <|> parseTo
parseFrom :: Parser Header
parseFrom = From <$> (string "From:" *> emptySpace *> (bracketEmail <|> email))
parseTo :: Parser Header
parseTo = To <$> (string "To:" *> emptySpace *> emails)
emptySpace = many' space
emails :: Parser (Vector ByteString)
emails = V.fromList <$> (bracketEmail <|> email) `sepBy` char ','
bracketEmail :: Parser ByteString
bracketEmail = do
_ <- manyTill anyChar (char '<')
email
{-# INLINE bracketEmail #-}
email :: Parser ByteString
email = do
email :: Parser ByteString
email = do
_ <- many' space
name <- BC.pack <$> many' (satisfy (\c -> not (isSpace c) && c /= '@'))
_ <- char '@'
rest <- BC.pack <$> many' (satisfy (\c -> not (isSpace c) && c /= ',' && c /= '>' && c /= '<'))
rest <- BC.pack <$> many' (satisfy (\c -> not (isSpace c) && c /= ',' && c /= '>'))
_ <- many' (notChar ',')
pure (name <> "@" <> rest)
{-# INLINE email #-}
encode :: Header -> ByteString

View File

@ -31,7 +31,4 @@ tests = testGroup "Data.Email"
[ testCase "Can parse a sample email" $ do
got <- parseToList sample
got @?= [ From "me@example.com", To ["you@example.com"]]
, testCase "Combined mailto" $ do
let got = decode "To: James Doe<james.doe@example.com<mailto:james.doe@example.com>>"
got @?= Right (To ["james.doe@example.com"])
]

View File

@ -10,12 +10,11 @@ import qualified Hedgehog.Corpus as Corpus
import qualified Hedgehog.Gen as Gen
import qualified Hedgehog.Range as Range
import Data.ByteString
import qualified Data.ByteString as T
import Data.Text
import qualified Data.Text as T
import qualified Data.Vector as V
import Data.Email.Header
import qualified Data.ByteString.Char8 as BC
genHeader :: Gen Header
genHeader = Gen.choice
@ -23,17 +22,17 @@ genHeader = Gen.choice
, To . V.fromList <$> Gen.list (Range.linear 0 10) genEmail
]
genEmail :: Gen ByteString
genEmail :: Gen Text
genEmail = do
name <- Gen.element Corpus.simpsons
domain <- Gen.element Corpus.cooking
tld <- Gen.element ["com","fi","org"]
pure $ name <> "@" <> domain <> "." <> tld
wrapped :: Char -> ByteString -> Char -> ByteString
wrapped l x r = BC.singleton l <> x <> BC.singleton r
wrapped :: Char -> Text -> Char -> Text
wrapped l x r = T.singleton l <> x <> T.singleton r
genComment :: Gen ByteString
genComment :: Gen Text
genComment = do
x <- Gen.element Corpus.simpsons
Gen.element [x, wrapped '"' x '"']