Compare commits

...

4 Commits

Author SHA1 Message Date
Mats Rauhala a672fecbc9 Querying 2020-12-11 17:55:52 +02:00
Mats Rauhala 91578bfb03 Save as trie 2020-12-11 17:55:35 +02:00
Mats Rauhala 22b143aac7 Wall everything 2020-12-11 17:53:34 +02:00
Mats Rauhala 7f6b318fcb More strict emails 2020-12-11 17:53:08 +02:00
7 changed files with 92 additions and 15 deletions

View File

@ -21,6 +21,7 @@ library
, Data.Email.Header
, Data.Email
, Control.Addressbook.Streaming
, Control.Addressbook.Query
-- other-modules:
-- other-extensions:
default-extensions: OverloadedStrings
@ -36,8 +37,10 @@ library
, bytestring-trie
, vector
, containers
, filepath
hs-source-dirs: src
default-language: Haskell2010
ghc-options: -Wall
executable addressbook
main-is: Main.hs
@ -45,8 +48,10 @@ executable addressbook
-- other-extensions:
build-depends: base ^>=4.13.0.0, addressbook
, optparse-applicative
, text
hs-source-dirs: app
default-language: Haskell2010
ghc-options: -Wall
test-suite addressbook-test
default-language: Haskell2010
@ -70,3 +75,4 @@ test-suite addressbook-test
, vector
, conduit
, conduit-extra
ghc-options: -Wall

View File

@ -3,18 +3,28 @@ module Main where
import Options.Applicative
import Data.Text
(Text)
import qualified Data.Text as T
import qualified Control.Addressbook.Query as Query
import qualified Control.Addressbook.Streaming as Streaming
data CmdLine
= Stream
| Query Text
deriving Show
cmdline :: Parser CmdLine
cmdline = subparser (command "stream" (info (pure Stream) (progDesc "Record a stream of filenames")))
cmdline = subparser
( command "stream" (info (pure Stream) (progDesc "Record a stream of filenames"))
<> command "query" (info (Query . T.pack <$> argument str (metavar "QUERY")) (progDesc "Query email addresses"))
)
handler :: CmdLine -> IO ()
handler = \case
Stream -> Streaming.run
Query q -> Query.query q
main :: IO ()
main = execParser opts >>= handler

View File

@ -1,7 +1,7 @@
{ mkDerivation, attoparsec, base, bytestring, bytestring-trie
, conduit, conduit-extra, containers, hedgehog, hedgehog-corpus
, HUnit, lens, mtl, optparse-applicative, stdenv, tasty
, tasty-hedgehog, tasty-hunit, text, vector
, conduit, conduit-extra, containers, filepath, hedgehog
, hedgehog-corpus, HUnit, lens, mtl, optparse-applicative, stdenv
, tasty, tasty-hedgehog, tasty-hunit, text, vector
}:
mkDerivation {
pname = "addressbook";
@ -11,9 +11,9 @@ mkDerivation {
isExecutable = true;
libraryHaskellDepends = [
attoparsec base bytestring bytestring-trie conduit conduit-extra
containers lens mtl text vector
containers filepath lens mtl text vector
];
executableHaskellDepends = [ base optparse-applicative ];
executableHaskellDepends = [ base optparse-applicative text ];
testHaskellDepends = [
base bytestring conduit conduit-extra hedgehog hedgehog-corpus
HUnit tasty tasty-hedgehog tasty-hunit text vector

View File

@ -0,0 +1,48 @@
{-# LANGUAGE TypeApplications #-}
module Control.Addressbook.Query where
import Data.Text
(Text)
import qualified Data.Text as T
import qualified Data.Text.Encoding as TE
import Conduit
import qualified Data.Conduit.Binary as CB
import qualified Data.Conduit.Combinators as C
import qualified Data.Conduit.List as CL
import qualified Data.Conduit.Text as CT
import qualified Data.Trie as Trie
import System.IO
(stdout)
import Data.Maybe
(fromMaybe)
import System.Environment
(lookupEnv)
import System.FilePath
((</>))
import Control.Exception
(catch)
query :: Text -> IO ()
query prefix = do
datDir <- fromMaybe "./" <$> lookupEnv "HOME"
state <- catch @IOError (runResourceT $ runConduit $ readState datDir) (\_ -> pure Trie.empty)
runConduit $ outputResults state
where
readState :: FilePath -> ConduitM () Void (ResourceT IO) (Trie.Trie [Text])
readState dir =
CB.sourceFile (dir </> ".addressbook.dat")
.| CT.decode CT.utf8
.| CT.lines
.| C.foldMap (\s -> Trie.singleton (TE.encodeUtf8 $ T.toLower s) [s])
outputResults :: Trie.Trie [Text] -> ConduitM () Void IO ()
outputResults state =
CL.sourceList (Trie.elems $ Trie.submap (TE.encodeUtf8 prefix) state)
.| C.concat
.| C.map (<> "\n")
.| CT.encode CT.utf8
.| CB.sinkHandle stdout

View File

@ -5,6 +5,7 @@ import qualified Data.Text as T
import Conduit
import qualified Data.Conduit.Binary as CB
import qualified Data.Conduit.Combinators as C
import qualified Data.Conduit.List as CL
import qualified Data.Conduit.Text as CT
import Data.Email
@ -16,7 +17,14 @@ import System.IO
import qualified Data.Foldable as F
import qualified Data.Set as S
import qualified Data.Trie as Trie
import Data.Maybe
(fromMaybe)
import System.Environment
(lookupEnv)
import System.FilePath
((</>))
combine :: (MonadUnliftIO m, MonadResource m, MonadThrow m, MonadIO m) => ConduitM FilePath Header m ()
combine = await >>= \case
@ -25,8 +33,10 @@ combine = await >>= \case
run :: IO ()
run = do
x <- runResourceT $ runConduit stream
F.for_ x print
datDir <- fromMaybe "./" <$> lookupEnv "HOME"
runResourceT $ do
x <- runConduit stream
runConduit (CL.sourceList (Trie.keys x) .| C.map (<> "\n") .| CB.sinkFileCautious (datDir </> ".addressbook.dat"))
where
separate = \case
From x -> [x]
@ -38,4 +48,5 @@ run = do
.| C.map T.unpack
.| combine
.| C.concatMap separate
.| C.foldMap (S.singleton)
.| CT.encode CT.utf8
.| C.foldMap (`Trie.singleton` ())

View File

@ -38,13 +38,13 @@ decode = parseOnly parseHeader
bracketEmail :: Parser Text
bracketEmail = do
_ <- manyTill anyChar (char '<')
T.pack <$> manyTill anyChar (char '>')
email
email :: Parser Text
email = do
_ <- many' space
name <- T.pack <$> many' (notChar '@')
name <- T.pack <$> many' (satisfy (\c -> not (isSpace c) && c /= '@'))
_ <- char '@'
rest <- T.pack <$> many' (satisfy (\c -> not (isSpace c) && c /= ','))
rest <- T.pack <$> many' (satisfy (\c -> not (isSpace c) && c /= ',' && c /= '>'))
_ <- many' (notChar ',')
pure (name <> "@" <> rest)

View File

@ -17,9 +17,11 @@ sample :: ByteString
sample =
"Subject: Hello worldddd\n\
\From: me@example.com\n\
\Dkim: asd\n\
\To: you <you@example.com>\n\
\ \n\n \
\foo"
\\n\n\
\From: foo bar <a mailto=\"me2@example.com\" />\n\
\asd\n"
parseToList :: ByteString -> IO [Header]
parseToList _ = runConduit (CB.sourceLbs sample .| parseEmail .| CL.consume)