Initial email header parsing

This commit is contained in:
Mats Rauhala 2020-12-10 21:08:48 +02:00
parent ddeb89c025
commit a43b0bf471
4 changed files with 339 additions and 0 deletions

248
.stylish-haskell.yaml Normal file
View File

@ -0,0 +1,248 @@
# stylish-haskell configuration file
# ==================================
# The stylish-haskell tool is mainly configured by specifying steps. These steps
# are a list, so they have an order, and one specific step may appear more than
# once (if needed). Each file is processed by these steps in the given order.
steps:
# Convert some ASCII sequences to their Unicode equivalents. This is disabled
# by default.
# - unicode_syntax:
# # In order to make this work, we also need to insert the UnicodeSyntax
# # language pragma. If this flag is set to true, we insert it when it's
# # not already present. You may want to disable it if you configure
# # language extensions using some other method than pragmas. Default:
# # true.
# add_language_pragma: true
# Align the right hand side of some elements. This is quite conservative
# and only applies to statements where each element occupies a single
# line.
- simple_align:
cases: false
top_level_patterns: false
records: false
# Import cleanup
- imports:
# There are different ways we can align names and lists.
#
# - global: Align the import names and import list throughout the entire
# file.
#
# - file: Like global, but don't add padding when there are no qualified
# imports in the file.
#
# - group: Only align the imports per group (a group is formed by adjacent
# import lines).
#
# - none: Do not perform any alignment.
#
# Default: global.
align: none
# The following options affect only import list alignment.
#
# List align has following options:
#
# - after_alias: Import list is aligned with end of import including
# 'as' and 'hiding' keywords.
#
# > import qualified Data.List as List (concat, foldl, foldr, head,
# > init, last, length)
#
# - with_alias: Import list is aligned with start of alias or hiding.
#
# > import qualified Data.List as List (concat, foldl, foldr, head,
# > init, last, length)
#
# - new_line: Import list starts always on new line.
#
# > import qualified Data.List as List
# > (concat, foldl, foldr, head, init, last, length)
#
# Default: after_alias
list_align: new_line
# Right-pad the module names to align imports in a group:
#
# - true: a little more readable
#
# > import qualified Data.List as List (concat, foldl, foldr,
# > init, last, length)
# > import qualified Data.List.Extra as List (concat, foldl, foldr,
# > init, last, length)
#
# - false: diff-safe
#
# > import qualified Data.List as List (concat, foldl, foldr, init,
# > last, length)
# > import qualified Data.List.Extra as List (concat, foldl, foldr,
# > init, last, length)
#
# Default: true
pad_module_names: false
# Long list align style takes effect when import is too long. This is
# determined by 'columns' setting.
#
# - inline: This option will put as much specs on same line as possible.
#
# - new_line: Import list will start on new line.
#
# - new_line_multiline: Import list will start on new line when it's
# short enough to fit to single line. Otherwise it'll be multiline.
#
# - multiline: One line per import list entry.
# Type with constructor list acts like single import.
#
# > import qualified Data.Map as M
# > ( empty
# > , singleton
# > , ...
# > , delete
# > )
#
# Default: inline
long_list_align: new_line_multiline
# Align empty list (importing instances)
#
# Empty list align has following options
#
# - inherit: inherit list_align setting
#
# - right_after: () is right after the module name:
#
# > import Vector.Instances ()
#
# Default: inherit
empty_list_align: inherit
# List padding determines indentation of import list on lines after import.
# This option affects 'long_list_align'.
#
# - <integer>: constant value
#
# - module_name: align under start of module name.
# Useful for 'file' and 'group' align settings.
list_padding: 7
# Separate lists option affects formatting of import list for type
# or class. The only difference is single space between type and list
# of constructors, selectors and class functions.
#
# - true: There is single space between Foldable type and list of it's
# functions.
#
# > import Data.Foldable (Foldable (fold, foldl, foldMap))
#
# - false: There is no space between Foldable type and list of it's
# functions.
#
# > import Data.Foldable (Foldable(fold, foldl, foldMap))
#
# Default: true
separate_lists: false
# Space surround option affects formatting of import lists on a single
# line. The only difference is single space after the initial
# parenthesis and a single space before the terminal parenthesis.
#
# - true: There is single space associated with the enclosing
# parenthesis.
#
# > import Data.Foo ( foo )
#
# - false: There is no space associated with the enclosing parenthesis
#
# > import Data.Foo (foo)
#
# Default: false
space_surround: false
# Language pragmas
- language_pragmas:
# We can generate different styles of language pragma lists.
#
# - vertical: Vertical-spaced language pragmas, one per line.
#
# - compact: A more compact style.
#
# - compact_line: Similar to compact, but wrap each line with
# `{-#LANGUAGE #-}'.
#
# Default: vertical.
style: vertical
# Align affects alignment of closing pragma brackets.
#
# - true: Brackets are aligned in same column.
#
# - false: Brackets are not aligned together. There is only one space
# between actual import and closing bracket.
#
# Default: true
align: false
# stylish-haskell can detect redundancy of some language pragmas. If this
# is set to true, it will remove those redundant pragmas. Default: true.
remove_redundant: true
# Replace tabs by spaces. This is disabled by default.
# - tabs:
# # Number of spaces to use for each tab. Default: 8, as specified by the
# # Haskell report.
# spaces: 8
# Remove trailing whitespace
- trailing_whitespace: {}
# Squash multiple spaces between the left and right hand sides of some
# elements into single spaces. Basically, this undoes the effect of
# simple_align but is a bit less conservative.
# - squash: {}
# A common setting is the number of columns (parts of) code will be wrapped
# to. Different steps take this into account. Default: 80.
columns: 80
# By default, line endings are converted according to the OS. You can override
# preferred format here.
#
# - native: Native newline format. CRLF on Windows, LF on other OSes.
#
# - lf: Convert to LF ("\n").
#
# - crlf: Convert to CRLF ("\r\n").
#
# Default: native.
newline: native
# Sometimes, language extensions are specified in a cabal file or from the
# command line instead of using language pragmas in the file. stylish-haskell
# needs to be aware of these, so it can parse the file correctly.
#
# No language extensions are enabled by default.
language_extensions:
- RecordWildCards
- TemplateHaskell
- QuasiQuotes
- LambdaCase
- TupleSections
- MultiParamTypeClasses
- TypeApplications
- DataKinds
- TypeFamilies
- FlexibleContexts
- NamedFieldPuns
- MultiWayIf
- PolyKinds
- ExplicitForAll
- FunctionalDependencies
- ExplicitNamespaces
- ScopedTypeVariables
- ExistentialQuantification
- InstanceSigs
- GeneralizedNewtypeDeriving
- BangPatterns

50
src/Data/Email/Header.hs Normal file
View File

@ -0,0 +1,50 @@
module Data.Email.Header where
import Data.Text
(Text)
import qualified Data.Text as T
import qualified Data.Foldable as F
import Data.Attoparsec.Text
import Data.Vector
(Vector)
import qualified Data.Vector as V
import Data.Char
(isSpace)
import Control.Applicative
((<|>))
data Header
= From !Text
| To !(Vector Text)
deriving (Show, Eq)
decode :: Text -> Either String Header
decode = parseOnly parseHeader
where
parseHeader :: Parser Header
parseHeader = parseFrom <|> parseTo
parseFrom :: Parser Header
parseFrom = From <$> (string "From:" *> emptySpace *> email)
parseTo :: Parser Header
parseTo = To <$> (string "To:" *> emptySpace *> emails)
emptySpace = many' space
emails :: Parser (Vector Text)
emails = V.fromList <$> email `sepBy` char ','
email :: Parser Text
email = do
_ <- many' space
name <- T.pack <$> many' (notChar '@')
_ <- char '@'
rest <- T.pack <$> many' (satisfy (\c -> not (isSpace c) && c /= ','))
pure (name <> "@" <> rest)
encode :: Header -> Text
encode = \case
From addr -> "From: " <> addr
To addrs -> "To: " <> T.intercalate ", " (F.toList addrs)

5
test.sh Executable file
View File

@ -0,0 +1,5 @@
#!/usr/bin/env bash
# while :; do
find addressbook.cabal {src,test,app} -type f | entr -d -r -c -s 'cabal test --test-show-details=direct --ghc-options=-Wall'
# done

View File

@ -0,0 +1,36 @@
module Test.Data.Email.Header where
import Test.Tasty
import Test.Tasty.Hedgehog
import Hedgehog
import qualified Hedgehog.Corpus as Corpus
import qualified Hedgehog.Gen as Gen
import qualified Hedgehog.Range as Range
import Data.Text
import qualified Data.Vector as V
import Data.Email.Header
genHeader :: Gen Header
genHeader = Gen.choice
[ From <$> genEmail
, To . V.fromList <$> Gen.list (Range.linear 0 10) genEmail
]
where
genEmail :: Gen Text
genEmail = do
name <- Gen.element Corpus.simpsons
domain <- Gen.element Corpus.cooking
tld <- Gen.element ["com","fi","org"]
pure $ name <> "@" <> domain <> "." <> tld
prop_roundtrip_parse :: Property
prop_roundtrip_parse = property $ do
header <- forAll genHeader
tripping header encode decode
tests :: TestTree
tests = testGroup "Data.Email.Header"
[ testProperty "roundtrip property" $ prop_roundtrip_parse ]