Compare commits

...

4 Commits

Author SHA1 Message Date
Mats Rauhala a672fecbc9 Querying 2020-12-11 17:55:52 +02:00
Mats Rauhala 91578bfb03 Save as trie 2020-12-11 17:55:35 +02:00
Mats Rauhala 22b143aac7 Wall everything 2020-12-11 17:53:34 +02:00
Mats Rauhala 7f6b318fcb More strict emails 2020-12-11 17:53:08 +02:00
7 changed files with 92 additions and 15 deletions

View File

@ -21,6 +21,7 @@ library
, Data.Email.Header , Data.Email.Header
, Data.Email , Data.Email
, Control.Addressbook.Streaming , Control.Addressbook.Streaming
, Control.Addressbook.Query
-- other-modules: -- other-modules:
-- other-extensions: -- other-extensions:
default-extensions: OverloadedStrings default-extensions: OverloadedStrings
@ -36,8 +37,10 @@ library
, bytestring-trie , bytestring-trie
, vector , vector
, containers , containers
, filepath
hs-source-dirs: src hs-source-dirs: src
default-language: Haskell2010 default-language: Haskell2010
ghc-options: -Wall
executable addressbook executable addressbook
main-is: Main.hs main-is: Main.hs
@ -45,8 +48,10 @@ executable addressbook
-- other-extensions: -- other-extensions:
build-depends: base ^>=4.13.0.0, addressbook build-depends: base ^>=4.13.0.0, addressbook
, optparse-applicative , optparse-applicative
, text
hs-source-dirs: app hs-source-dirs: app
default-language: Haskell2010 default-language: Haskell2010
ghc-options: -Wall
test-suite addressbook-test test-suite addressbook-test
default-language: Haskell2010 default-language: Haskell2010
@ -70,3 +75,4 @@ test-suite addressbook-test
, vector , vector
, conduit , conduit
, conduit-extra , conduit-extra
ghc-options: -Wall

View File

@ -3,18 +3,28 @@ module Main where
import Options.Applicative import Options.Applicative
import Data.Text
(Text)
import qualified Data.Text as T
import qualified Control.Addressbook.Query as Query
import qualified Control.Addressbook.Streaming as Streaming import qualified Control.Addressbook.Streaming as Streaming
data CmdLine data CmdLine
= Stream = Stream
| Query Text
deriving Show deriving Show
cmdline :: Parser CmdLine cmdline :: Parser CmdLine
cmdline = subparser (command "stream" (info (pure Stream) (progDesc "Record a stream of filenames"))) cmdline = subparser
( command "stream" (info (pure Stream) (progDesc "Record a stream of filenames"))
<> command "query" (info (Query . T.pack <$> argument str (metavar "QUERY")) (progDesc "Query email addresses"))
)
handler :: CmdLine -> IO () handler :: CmdLine -> IO ()
handler = \case handler = \case
Stream -> Streaming.run Stream -> Streaming.run
Query q -> Query.query q
main :: IO () main :: IO ()
main = execParser opts >>= handler main = execParser opts >>= handler

View File

@ -1,7 +1,7 @@
{ mkDerivation, attoparsec, base, bytestring, bytestring-trie { mkDerivation, attoparsec, base, bytestring, bytestring-trie
, conduit, conduit-extra, containers, hedgehog, hedgehog-corpus , conduit, conduit-extra, containers, filepath, hedgehog
, HUnit, lens, mtl, optparse-applicative, stdenv, tasty , hedgehog-corpus, HUnit, lens, mtl, optparse-applicative, stdenv
, tasty-hedgehog, tasty-hunit, text, vector , tasty, tasty-hedgehog, tasty-hunit, text, vector
}: }:
mkDerivation { mkDerivation {
pname = "addressbook"; pname = "addressbook";
@ -11,9 +11,9 @@ mkDerivation {
isExecutable = true; isExecutable = true;
libraryHaskellDepends = [ libraryHaskellDepends = [
attoparsec base bytestring bytestring-trie conduit conduit-extra attoparsec base bytestring bytestring-trie conduit conduit-extra
containers lens mtl text vector containers filepath lens mtl text vector
]; ];
executableHaskellDepends = [ base optparse-applicative ]; executableHaskellDepends = [ base optparse-applicative text ];
testHaskellDepends = [ testHaskellDepends = [
base bytestring conduit conduit-extra hedgehog hedgehog-corpus base bytestring conduit conduit-extra hedgehog hedgehog-corpus
HUnit tasty tasty-hedgehog tasty-hunit text vector HUnit tasty tasty-hedgehog tasty-hunit text vector

View File

@ -0,0 +1,48 @@
{-# LANGUAGE TypeApplications #-}
module Control.Addressbook.Query where
import Data.Text
(Text)
import qualified Data.Text as T
import qualified Data.Text.Encoding as TE
import Conduit
import qualified Data.Conduit.Binary as CB
import qualified Data.Conduit.Combinators as C
import qualified Data.Conduit.List as CL
import qualified Data.Conduit.Text as CT
import qualified Data.Trie as Trie
import System.IO
(stdout)
import Data.Maybe
(fromMaybe)
import System.Environment
(lookupEnv)
import System.FilePath
((</>))
import Control.Exception
(catch)
query :: Text -> IO ()
query prefix = do
datDir <- fromMaybe "./" <$> lookupEnv "HOME"
state <- catch @IOError (runResourceT $ runConduit $ readState datDir) (\_ -> pure Trie.empty)
runConduit $ outputResults state
where
readState :: FilePath -> ConduitM () Void (ResourceT IO) (Trie.Trie [Text])
readState dir =
CB.sourceFile (dir </> ".addressbook.dat")
.| CT.decode CT.utf8
.| CT.lines
.| C.foldMap (\s -> Trie.singleton (TE.encodeUtf8 $ T.toLower s) [s])
outputResults :: Trie.Trie [Text] -> ConduitM () Void IO ()
outputResults state =
CL.sourceList (Trie.elems $ Trie.submap (TE.encodeUtf8 prefix) state)
.| C.concat
.| C.map (<> "\n")
.| CT.encode CT.utf8
.| CB.sinkHandle stdout

View File

@ -5,6 +5,7 @@ import qualified Data.Text as T
import Conduit import Conduit
import qualified Data.Conduit.Binary as CB import qualified Data.Conduit.Binary as CB
import qualified Data.Conduit.Combinators as C import qualified Data.Conduit.Combinators as C
import qualified Data.Conduit.List as CL
import qualified Data.Conduit.Text as CT import qualified Data.Conduit.Text as CT
import Data.Email import Data.Email
@ -16,7 +17,14 @@ import System.IO
import qualified Data.Foldable as F import qualified Data.Foldable as F
import qualified Data.Set as S import qualified Data.Trie as Trie
import Data.Maybe
(fromMaybe)
import System.Environment
(lookupEnv)
import System.FilePath
((</>))
combine :: (MonadUnliftIO m, MonadResource m, MonadThrow m, MonadIO m) => ConduitM FilePath Header m () combine :: (MonadUnliftIO m, MonadResource m, MonadThrow m, MonadIO m) => ConduitM FilePath Header m ()
combine = await >>= \case combine = await >>= \case
@ -25,8 +33,10 @@ combine = await >>= \case
run :: IO () run :: IO ()
run = do run = do
x <- runResourceT $ runConduit stream datDir <- fromMaybe "./" <$> lookupEnv "HOME"
F.for_ x print runResourceT $ do
x <- runConduit stream
runConduit (CL.sourceList (Trie.keys x) .| C.map (<> "\n") .| CB.sinkFileCautious (datDir </> ".addressbook.dat"))
where where
separate = \case separate = \case
From x -> [x] From x -> [x]
@ -38,4 +48,5 @@ run = do
.| C.map T.unpack .| C.map T.unpack
.| combine .| combine
.| C.concatMap separate .| C.concatMap separate
.| C.foldMap (S.singleton) .| CT.encode CT.utf8
.| C.foldMap (`Trie.singleton` ())

View File

@ -38,13 +38,13 @@ decode = parseOnly parseHeader
bracketEmail :: Parser Text bracketEmail :: Parser Text
bracketEmail = do bracketEmail = do
_ <- manyTill anyChar (char '<') _ <- manyTill anyChar (char '<')
T.pack <$> manyTill anyChar (char '>') email
email :: Parser Text email :: Parser Text
email = do email = do
_ <- many' space _ <- many' space
name <- T.pack <$> many' (notChar '@') name <- T.pack <$> many' (satisfy (\c -> not (isSpace c) && c /= '@'))
_ <- char '@' _ <- char '@'
rest <- T.pack <$> many' (satisfy (\c -> not (isSpace c) && c /= ',')) rest <- T.pack <$> many' (satisfy (\c -> not (isSpace c) && c /= ',' && c /= '>'))
_ <- many' (notChar ',') _ <- many' (notChar ',')
pure (name <> "@" <> rest) pure (name <> "@" <> rest)

View File

@ -17,9 +17,11 @@ sample :: ByteString
sample = sample =
"Subject: Hello worldddd\n\ "Subject: Hello worldddd\n\
\From: me@example.com\n\ \From: me@example.com\n\
\Dkim: asd\n\
\To: you <you@example.com>\n\ \To: you <you@example.com>\n\
\ \n\n \ \\n\n\
\foo" \From: foo bar <a mailto=\"me2@example.com\" />\n\
\asd\n"
parseToList :: ByteString -> IO [Header] parseToList :: ByteString -> IO [Header]
parseToList _ = runConduit (CB.sourceLbs sample .| parseEmail .| CL.consume) parseToList _ = runConduit (CB.sourceLbs sample .| parseEmail .| CL.consume)