Compare commits
	
		
			4 Commits
		
	
	
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
| 537c0df198 | |||
| 75aa615263 | |||
| a300c88cfb | |||
| 560ea23861 | 
@@ -41,6 +41,7 @@ library
 | 
				
			|||||||
                     , vector
 | 
					                     , vector
 | 
				
			||||||
                     , containers
 | 
					                     , containers
 | 
				
			||||||
                     , filepath
 | 
					                     , filepath
 | 
				
			||||||
 | 
					                     , parallel
 | 
				
			||||||
  hs-source-dirs:      src
 | 
					  hs-source-dirs:      src
 | 
				
			||||||
  default-language:    Haskell2010
 | 
					  default-language:    Haskell2010
 | 
				
			||||||
  ghc-options:         -Wall
 | 
					  ghc-options:         -Wall
 | 
				
			||||||
@@ -55,7 +56,7 @@ executable addressbook
 | 
				
			|||||||
                     , text
 | 
					                     , text
 | 
				
			||||||
  hs-source-dirs:      app
 | 
					  hs-source-dirs:      app
 | 
				
			||||||
  default-language:    Haskell2010
 | 
					  default-language:    Haskell2010
 | 
				
			||||||
  ghc-options:         -Wall -threaded
 | 
					  ghc-options:         -Wall -threaded -eventlog
 | 
				
			||||||
 | 
					
 | 
				
			||||||
test-suite addressbook-test
 | 
					test-suite addressbook-test
 | 
				
			||||||
  import:              deps
 | 
					  import:              deps
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -1,7 +1,7 @@
 | 
				
			|||||||
{ mkDerivation, attoparsec, base, bytestring, conduit
 | 
					{ mkDerivation, attoparsec, base, bytestring, conduit
 | 
				
			||||||
, conduit-extra, containers, criterion, filepath, hedgehog
 | 
					, conduit-extra, containers, criterion, filepath, hedgehog
 | 
				
			||||||
, hedgehog-corpus, HUnit, lens, lib, mtl, optparse-applicative
 | 
					, hedgehog-corpus, HUnit, lens, lib, mtl, optparse-applicative
 | 
				
			||||||
, tasty, tasty-hedgehog, tasty-hunit, text, vector
 | 
					, parallel, tasty, tasty-hedgehog, tasty-hunit, text, vector
 | 
				
			||||||
}:
 | 
					}:
 | 
				
			||||||
mkDerivation {
 | 
					mkDerivation {
 | 
				
			||||||
  pname = "addressbook";
 | 
					  pname = "addressbook";
 | 
				
			||||||
@@ -11,10 +11,11 @@ mkDerivation {
 | 
				
			|||||||
  isExecutable = true;
 | 
					  isExecutable = true;
 | 
				
			||||||
  libraryHaskellDepends = [
 | 
					  libraryHaskellDepends = [
 | 
				
			||||||
    attoparsec base bytestring conduit conduit-extra containers
 | 
					    attoparsec base bytestring conduit conduit-extra containers
 | 
				
			||||||
    filepath lens mtl text vector
 | 
					    filepath lens mtl parallel text vector
 | 
				
			||||||
  ];
 | 
					  ];
 | 
				
			||||||
  executableHaskellDepends = [
 | 
					  executableHaskellDepends = [
 | 
				
			||||||
    base criterion hedgehog-corpus optparse-applicative text
 | 
					    base bytestring containers criterion hedgehog-corpus
 | 
				
			||||||
 | 
					    optparse-applicative text
 | 
				
			||||||
  ];
 | 
					  ];
 | 
				
			||||||
  testHaskellDepends = [
 | 
					  testHaskellDepends = [
 | 
				
			||||||
    base bytestring conduit conduit-extra containers hedgehog
 | 
					    base bytestring conduit conduit-extra containers hedgehog
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -1,23 +1,18 @@
 | 
				
			|||||||
module Control.Addressbook.Streaming where
 | 
					module Control.Addressbook.Streaming where
 | 
				
			||||||
 | 
					
 | 
				
			||||||
import qualified Data.Text as T
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
import Conduit
 | 
					import Conduit
 | 
				
			||||||
import qualified Data.Conduit.Binary as CB
 | 
					import qualified Data.Conduit.Binary as CB
 | 
				
			||||||
import qualified Data.Conduit.Combinators as C
 | 
					import qualified Data.Conduit.Combinators as C
 | 
				
			||||||
import qualified Data.Conduit.List as CL
 | 
					import qualified Data.Conduit.List as CL
 | 
				
			||||||
import qualified Data.Conduit.Text as CT
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
import Data.Email
 | 
					import Data.Email
 | 
				
			||||||
import Data.Email.Header
 | 
					import Data.Email.Header
 | 
				
			||||||
       (Header(..))
 | 
					       (Header(..))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
import System.IO
 | 
					 | 
				
			||||||
       (stdin)
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
import qualified Data.Foldable as F
 | 
					import qualified Data.Foldable as F
 | 
				
			||||||
 | 
					
 | 
				
			||||||
import qualified Data.Map.Strict as Map
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
import Data.Maybe
 | 
					import Data.Maybe
 | 
				
			||||||
       (fromMaybe)
 | 
					       (fromMaybe)
 | 
				
			||||||
@@ -25,28 +20,47 @@ import System.Environment
 | 
				
			|||||||
       (lookupEnv)
 | 
					       (lookupEnv)
 | 
				
			||||||
import System.FilePath
 | 
					import System.FilePath
 | 
				
			||||||
       ((</>))
 | 
					       ((</>))
 | 
				
			||||||
 | 
					import Data.Set (Set)
 | 
				
			||||||
 | 
					import Data.ByteString (ByteString)
 | 
				
			||||||
 | 
					import qualified Data.Set as Set
 | 
				
			||||||
 | 
					import qualified Data.ByteString.Lazy as LBS
 | 
				
			||||||
 | 
					import Data.Char (ord)
 | 
				
			||||||
 | 
					import qualified Data.ByteString.Lazy.Char8 as LBC
 | 
				
			||||||
 | 
					import System.IO.Unsafe (unsafeInterleaveIO)
 | 
				
			||||||
 | 
					import Control.Parallel.Strategies (rseq, parMap)
 | 
				
			||||||
 | 
					import qualified Data.List as L
 | 
				
			||||||
 | 
					
 | 
				
			||||||
combine :: (MonadUnliftIO m, MonadResource m, MonadThrow m, MonadIO m) => ConduitM FilePath Header m ()
 | 
					combine :: (MonadUnliftIO m, MonadResource m, MonadThrow m, MonadIO m) => ConduitM FilePath Header m ()
 | 
				
			||||||
combine = await >>= \case
 | 
					combine = await >>= \case
 | 
				
			||||||
  Nothing -> pure ()
 | 
					  Nothing -> pure ()
 | 
				
			||||||
  Just path -> (CB.sourceFile path .| parseEmail) >> combine
 | 
					  Just path -> (CB.sourceFile path .| parseEmail) >> combine
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					chunks :: Int -> [a] -> [[a]]
 | 
				
			||||||
 | 
					chunks n = L.unfoldr $ \case
 | 
				
			||||||
 | 
					  [] -> Nothing
 | 
				
			||||||
 | 
					  xs -> Just (splitAt n xs)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
run :: IO ()
 | 
					run :: IO ()
 | 
				
			||||||
run = do
 | 
					run = do
 | 
				
			||||||
  datDir <- fromMaybe "./" <$> lookupEnv "HOME"
 | 
					  datDir <- fromMaybe "./" <$> lookupEnv "HOME"
 | 
				
			||||||
  runResourceT $ do
 | 
					  xs <- LBS.getContents >>= stream
 | 
				
			||||||
    x <- runConduit stream
 | 
					  let set = F.fold (parMap rseq F.fold (chunks 20 xs))
 | 
				
			||||||
    runConduit (CL.sourceList (Map.keys x) .| C.map (<> "\n") .| CB.sinkFileCautious (datDir </> ".addressbook.dat"))
 | 
					  runResourceT $
 | 
				
			||||||
 | 
					    runConduit $
 | 
				
			||||||
 | 
					         CL.sourceList (Set.elems set)
 | 
				
			||||||
 | 
					      .| C.map (<> "\n")
 | 
				
			||||||
 | 
					      .| CB.sinkFileCautious (datDir </> ".addressbook.dat")
 | 
				
			||||||
  where
 | 
					  where
 | 
				
			||||||
    separate = \case
 | 
					    separate = \case
 | 
				
			||||||
      From x -> [x]
 | 
					      From x -> [x]
 | 
				
			||||||
      To xs -> F.toList xs
 | 
					      To xs -> F.toList xs
 | 
				
			||||||
 | 
					    -- A set of (locally) unique addresses. Composes with parMap
 | 
				
			||||||
 | 
					    stream :: LBS.ByteString -> IO [Set ByteString]
 | 
				
			||||||
    stream =
 | 
					    stream =
 | 
				
			||||||
      CB.sourceHandle stdin
 | 
					        traverse (unsafeInterleaveIO . parse . LBC.unpack)
 | 
				
			||||||
        .| CT.decode CT.utf8
 | 
					      . filter (not . LBS.null)
 | 
				
			||||||
        .| CT.lines
 | 
					      . LBS.split (fromIntegral $ ord '\n')
 | 
				
			||||||
        .| C.map T.unpack
 | 
					    parse path =
 | 
				
			||||||
        .| combine
 | 
					      runResourceT $
 | 
				
			||||||
        .| C.concatMap separate
 | 
					        runConduit $
 | 
				
			||||||
        .| CT.encode CT.utf8
 | 
					          CB.sourceFile path .| parseEmail .| C.concatMap separate .| C.foldMap Set.singleton
 | 
				
			||||||
        .| C.foldMap (`Map.singleton` ())
 | 
					 | 
				
			||||||
 
 | 
				
			|||||||
@@ -5,12 +5,9 @@ import Data.Email.Header
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
import Conduit
 | 
					import Conduit
 | 
				
			||||||
import qualified Data.Conduit.Combinators as C
 | 
					import qualified Data.Conduit.Combinators as C
 | 
				
			||||||
import qualified Data.Conduit.Text as CT
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
import Data.ByteString
 | 
					import Data.ByteString
 | 
				
			||||||
       (ByteString)
 | 
					       (ByteString)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
parseEmail :: (MonadUnliftIO m, MonadThrow m, Monad m) => ConduitM ByteString Header m ()
 | 
					parseEmail :: (MonadUnliftIO m, MonadThrow m, Monad m) => ConduitM ByteString Header m ()
 | 
				
			||||||
parseEmail = catchC (CT.decode CT.utf8) err .| CT.lines .| C.concatMap decode
 | 
					parseEmail = C.linesUnboundedAscii .| C.concatMap decode
 | 
				
			||||||
  where
 | 
					 | 
				
			||||||
    err e = liftIO (print @CT.TextException e) >> yield ""
 | 
					 | 
				
			||||||
 
 | 
				
			|||||||
@@ -1,29 +1,25 @@
 | 
				
			|||||||
 | 
					{-# LANGUAGE OverloadedStrings #-}
 | 
				
			||||||
module Data.Email.Header where
 | 
					module Data.Email.Header where
 | 
				
			||||||
 | 
					
 | 
				
			||||||
import Data.Text
 | 
					 | 
				
			||||||
       (Text)
 | 
					 | 
				
			||||||
import qualified Data.Text as T
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
import qualified Data.Foldable as F
 | 
					import qualified Data.Foldable as F
 | 
				
			||||||
 | 
					
 | 
				
			||||||
import Data.Attoparsec.Text
 | 
					import Data.Attoparsec.ByteString.Char8
 | 
				
			||||||
 | 
					
 | 
				
			||||||
import Data.Vector
 | 
					import Data.Vector
 | 
				
			||||||
       (Vector)
 | 
					       (Vector)
 | 
				
			||||||
import qualified Data.Vector as V
 | 
					import qualified Data.Vector as V
 | 
				
			||||||
 | 
					
 | 
				
			||||||
import Data.Char
 | 
					 | 
				
			||||||
       (isSpace)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
import Control.Applicative
 | 
					import Control.Applicative
 | 
				
			||||||
       ((<|>))
 | 
					       ((<|>))
 | 
				
			||||||
 | 
					import Data.ByteString (ByteString)
 | 
				
			||||||
 | 
					import qualified Data.ByteString.Char8 as BC
 | 
				
			||||||
 | 
					
 | 
				
			||||||
data Header
 | 
					data Header
 | 
				
			||||||
  = From !Text
 | 
					  = From !ByteString
 | 
				
			||||||
  | To !(Vector Text)
 | 
					  | To !(Vector ByteString)
 | 
				
			||||||
  deriving (Show, Eq)
 | 
					  deriving (Show, Eq)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
decode :: Text -> Either String Header
 | 
					decode :: ByteString -> Either String Header
 | 
				
			||||||
decode = parseOnly parseHeader
 | 
					decode = parseOnly parseHeader
 | 
				
			||||||
  where
 | 
					  where
 | 
				
			||||||
    parseHeader :: Parser Header
 | 
					    parseHeader :: Parser Header
 | 
				
			||||||
@@ -33,23 +29,23 @@ decode = parseOnly parseHeader
 | 
				
			|||||||
    parseTo :: Parser Header
 | 
					    parseTo :: Parser Header
 | 
				
			||||||
    parseTo = To <$> (string "To:" *> emptySpace *> emails)
 | 
					    parseTo = To <$> (string "To:" *> emptySpace *> emails)
 | 
				
			||||||
    emptySpace = many' space
 | 
					    emptySpace = many' space
 | 
				
			||||||
    emails :: Parser (Vector Text)
 | 
					    emails :: Parser (Vector ByteString)
 | 
				
			||||||
    emails = V.fromList <$> (bracketEmail <|> email) `sepBy` char ','
 | 
					    emails = V.fromList <$> (bracketEmail <|> email) `sepBy` char ','
 | 
				
			||||||
    bracketEmail :: Parser Text
 | 
					    bracketEmail :: Parser ByteString
 | 
				
			||||||
    bracketEmail = do
 | 
					    bracketEmail = do
 | 
				
			||||||
      _ <- manyTill anyChar (char '<')
 | 
					      _ <- manyTill anyChar (char '<')
 | 
				
			||||||
      email
 | 
					      email
 | 
				
			||||||
    email :: Parser Text
 | 
					    email :: Parser ByteString
 | 
				
			||||||
    email = do
 | 
					    email = do
 | 
				
			||||||
      _ <- many' space
 | 
					      _ <- many' space
 | 
				
			||||||
      name <- T.pack <$> many' (satisfy (\c -> not (isSpace c) && c /= '@'))
 | 
					      name <- BC.pack <$> many' (satisfy (\c -> not (isSpace c) && c /= '@'))
 | 
				
			||||||
      _ <- char '@'
 | 
					      _ <- char '@'
 | 
				
			||||||
      rest <- T.pack <$> many' (satisfy (\c -> not (isSpace c) && c /= ',' && c /= '>'))
 | 
					      rest <- BC.pack <$> many' (satisfy (\c -> not (isSpace c) && c /= ',' && c /= '>'))
 | 
				
			||||||
      _ <- many' (notChar ',')
 | 
					      _ <- many' (notChar ',')
 | 
				
			||||||
      pure (name <> "@" <> rest)
 | 
					      pure (name <> "@" <> rest)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
encode :: Header -> Text
 | 
					encode :: Header -> ByteString
 | 
				
			||||||
encode = \case
 | 
					encode = \case
 | 
				
			||||||
  From addr -> "From: " <> addr
 | 
					  From addr -> "From: " <> addr
 | 
				
			||||||
  To addrs -> "To: " <> T.intercalate ", " (F.toList addrs)
 | 
					  To addrs -> "To: " <> BC.intercalate ", " (F.toList addrs)
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -10,11 +10,12 @@ import qualified Hedgehog.Corpus as Corpus
 | 
				
			|||||||
import qualified Hedgehog.Gen as Gen
 | 
					import qualified Hedgehog.Gen as Gen
 | 
				
			||||||
import qualified Hedgehog.Range as Range
 | 
					import qualified Hedgehog.Range as Range
 | 
				
			||||||
 | 
					
 | 
				
			||||||
import Data.Text
 | 
					import Data.ByteString
 | 
				
			||||||
import qualified Data.Text as T
 | 
					import qualified Data.ByteString as T
 | 
				
			||||||
import qualified Data.Vector as V
 | 
					import qualified Data.Vector as V
 | 
				
			||||||
 | 
					
 | 
				
			||||||
import Data.Email.Header
 | 
					import Data.Email.Header
 | 
				
			||||||
 | 
					import qualified Data.ByteString.Char8 as BC
 | 
				
			||||||
 | 
					
 | 
				
			||||||
genHeader :: Gen Header
 | 
					genHeader :: Gen Header
 | 
				
			||||||
genHeader = Gen.choice
 | 
					genHeader = Gen.choice
 | 
				
			||||||
@@ -22,17 +23,17 @@ genHeader = Gen.choice
 | 
				
			|||||||
  , To . V.fromList <$> Gen.list (Range.linear 0 10) genEmail
 | 
					  , To . V.fromList <$> Gen.list (Range.linear 0 10) genEmail
 | 
				
			||||||
  ]
 | 
					  ]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
genEmail :: Gen Text
 | 
					genEmail :: Gen ByteString
 | 
				
			||||||
genEmail = do
 | 
					genEmail = do
 | 
				
			||||||
  name <- Gen.element Corpus.simpsons
 | 
					  name <- Gen.element Corpus.simpsons
 | 
				
			||||||
  domain <- Gen.element Corpus.cooking
 | 
					  domain <- Gen.element Corpus.cooking
 | 
				
			||||||
  tld <- Gen.element ["com","fi","org"]
 | 
					  tld <- Gen.element ["com","fi","org"]
 | 
				
			||||||
  pure $ name <> "@" <> domain <> "." <> tld
 | 
					  pure $ name <> "@" <> domain <> "." <> tld
 | 
				
			||||||
 | 
					
 | 
				
			||||||
wrapped :: Char -> Text -> Char -> Text
 | 
					wrapped :: Char -> ByteString -> Char -> ByteString
 | 
				
			||||||
wrapped l x r = T.singleton l <> x <> T.singleton r
 | 
					wrapped l x r = BC.singleton l <> x <> BC.singleton r
 | 
				
			||||||
 | 
					
 | 
				
			||||||
genComment :: Gen Text
 | 
					genComment :: Gen ByteString
 | 
				
			||||||
genComment = do
 | 
					genComment = do
 | 
				
			||||||
  x <- Gen.element Corpus.simpsons
 | 
					  x <- Gen.element Corpus.simpsons
 | 
				
			||||||
  Gen.element [x, wrapped '"' x '"']
 | 
					  Gen.element [x, wrapped '"' x '"']
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user