From a43b0bf471994044bb17de789bf07f501daba16e Mon Sep 17 00:00:00 2001 From: Mats Rauhala Date: Thu, 10 Dec 2020 21:08:48 +0200 Subject: [PATCH] Initial email header parsing --- .stylish-haskell.yaml | 248 +++++++++++++++++++++++++++++++++ src/Data/Email/Header.hs | 50 +++++++ test.sh | 5 + test/Test/Data/Email/Header.hs | 36 +++++ 4 files changed, 339 insertions(+) create mode 100644 .stylish-haskell.yaml create mode 100644 src/Data/Email/Header.hs create mode 100755 test.sh create mode 100644 test/Test/Data/Email/Header.hs diff --git a/.stylish-haskell.yaml b/.stylish-haskell.yaml new file mode 100644 index 0000000..472fa1d --- /dev/null +++ b/.stylish-haskell.yaml @@ -0,0 +1,248 @@ +# stylish-haskell configuration file +# ================================== + +# The stylish-haskell tool is mainly configured by specifying steps. These steps +# are a list, so they have an order, and one specific step may appear more than +# once (if needed). Each file is processed by these steps in the given order. +steps: + # Convert some ASCII sequences to their Unicode equivalents. This is disabled + # by default. + # - unicode_syntax: + # # In order to make this work, we also need to insert the UnicodeSyntax + # # language pragma. If this flag is set to true, we insert it when it's + # # not already present. You may want to disable it if you configure + # # language extensions using some other method than pragmas. Default: + # # true. + # add_language_pragma: true + + # Align the right hand side of some elements. This is quite conservative + # and only applies to statements where each element occupies a single + # line. + - simple_align: + cases: false + top_level_patterns: false + records: false + + # Import cleanup + - imports: + # There are different ways we can align names and lists. + # + # - global: Align the import names and import list throughout the entire + # file. + # + # - file: Like global, but don't add padding when there are no qualified + # imports in the file. + # + # - group: Only align the imports per group (a group is formed by adjacent + # import lines). + # + # - none: Do not perform any alignment. + # + # Default: global. + align: none + + # The following options affect only import list alignment. + # + # List align has following options: + # + # - after_alias: Import list is aligned with end of import including + # 'as' and 'hiding' keywords. + # + # > import qualified Data.List as List (concat, foldl, foldr, head, + # > init, last, length) + # + # - with_alias: Import list is aligned with start of alias or hiding. + # + # > import qualified Data.List as List (concat, foldl, foldr, head, + # > init, last, length) + # + # - new_line: Import list starts always on new line. + # + # > import qualified Data.List as List + # > (concat, foldl, foldr, head, init, last, length) + # + # Default: after_alias + list_align: new_line + + # Right-pad the module names to align imports in a group: + # + # - true: a little more readable + # + # > import qualified Data.List as List (concat, foldl, foldr, + # > init, last, length) + # > import qualified Data.List.Extra as List (concat, foldl, foldr, + # > init, last, length) + # + # - false: diff-safe + # + # > import qualified Data.List as List (concat, foldl, foldr, init, + # > last, length) + # > import qualified Data.List.Extra as List (concat, foldl, foldr, + # > init, last, length) + # + # Default: true + pad_module_names: false + + # Long list align style takes effect when import is too long. This is + # determined by 'columns' setting. + # + # - inline: This option will put as much specs on same line as possible. + # + # - new_line: Import list will start on new line. + # + # - new_line_multiline: Import list will start on new line when it's + # short enough to fit to single line. Otherwise it'll be multiline. + # + # - multiline: One line per import list entry. + # Type with constructor list acts like single import. + # + # > import qualified Data.Map as M + # > ( empty + # > , singleton + # > , ... + # > , delete + # > ) + # + # Default: inline + long_list_align: new_line_multiline + + # Align empty list (importing instances) + # + # Empty list align has following options + # + # - inherit: inherit list_align setting + # + # - right_after: () is right after the module name: + # + # > import Vector.Instances () + # + # Default: inherit + empty_list_align: inherit + + # List padding determines indentation of import list on lines after import. + # This option affects 'long_list_align'. + # + # - : constant value + # + # - module_name: align under start of module name. + # Useful for 'file' and 'group' align settings. + list_padding: 7 + + # Separate lists option affects formatting of import list for type + # or class. The only difference is single space between type and list + # of constructors, selectors and class functions. + # + # - true: There is single space between Foldable type and list of it's + # functions. + # + # > import Data.Foldable (Foldable (fold, foldl, foldMap)) + # + # - false: There is no space between Foldable type and list of it's + # functions. + # + # > import Data.Foldable (Foldable(fold, foldl, foldMap)) + # + # Default: true + separate_lists: false + + # Space surround option affects formatting of import lists on a single + # line. The only difference is single space after the initial + # parenthesis and a single space before the terminal parenthesis. + # + # - true: There is single space associated with the enclosing + # parenthesis. + # + # > import Data.Foo ( foo ) + # + # - false: There is no space associated with the enclosing parenthesis + # + # > import Data.Foo (foo) + # + # Default: false + space_surround: false + + # Language pragmas + - language_pragmas: + # We can generate different styles of language pragma lists. + # + # - vertical: Vertical-spaced language pragmas, one per line. + # + # - compact: A more compact style. + # + # - compact_line: Similar to compact, but wrap each line with + # `{-#LANGUAGE #-}'. + # + # Default: vertical. + style: vertical + + # Align affects alignment of closing pragma brackets. + # + # - true: Brackets are aligned in same column. + # + # - false: Brackets are not aligned together. There is only one space + # between actual import and closing bracket. + # + # Default: true + align: false + + # stylish-haskell can detect redundancy of some language pragmas. If this + # is set to true, it will remove those redundant pragmas. Default: true. + remove_redundant: true + + # Replace tabs by spaces. This is disabled by default. + # - tabs: + # # Number of spaces to use for each tab. Default: 8, as specified by the + # # Haskell report. + # spaces: 8 + + # Remove trailing whitespace + - trailing_whitespace: {} + + # Squash multiple spaces between the left and right hand sides of some + # elements into single spaces. Basically, this undoes the effect of + # simple_align but is a bit less conservative. + # - squash: {} + +# A common setting is the number of columns (parts of) code will be wrapped +# to. Different steps take this into account. Default: 80. +columns: 80 + +# By default, line endings are converted according to the OS. You can override +# preferred format here. +# +# - native: Native newline format. CRLF on Windows, LF on other OSes. +# +# - lf: Convert to LF ("\n"). +# +# - crlf: Convert to CRLF ("\r\n"). +# +# Default: native. +newline: native + +# Sometimes, language extensions are specified in a cabal file or from the +# command line instead of using language pragmas in the file. stylish-haskell +# needs to be aware of these, so it can parse the file correctly. +# +# No language extensions are enabled by default. +language_extensions: + - RecordWildCards + - TemplateHaskell + - QuasiQuotes + - LambdaCase + - TupleSections + - MultiParamTypeClasses + - TypeApplications + - DataKinds + - TypeFamilies + - FlexibleContexts + - NamedFieldPuns + - MultiWayIf + - PolyKinds + - ExplicitForAll + - FunctionalDependencies + - ExplicitNamespaces + - ScopedTypeVariables + - ExistentialQuantification + - InstanceSigs + - GeneralizedNewtypeDeriving + - BangPatterns diff --git a/src/Data/Email/Header.hs b/src/Data/Email/Header.hs new file mode 100644 index 0000000..5936302 --- /dev/null +++ b/src/Data/Email/Header.hs @@ -0,0 +1,50 @@ +module Data.Email.Header where + +import Data.Text + (Text) +import qualified Data.Text as T + +import qualified Data.Foldable as F + +import Data.Attoparsec.Text + +import Data.Vector + (Vector) +import qualified Data.Vector as V + +import Data.Char + (isSpace) + +import Control.Applicative + ((<|>)) + +data Header + = From !Text + | To !(Vector Text) + deriving (Show, Eq) + +decode :: Text -> Either String Header +decode = parseOnly parseHeader + where + parseHeader :: Parser Header + parseHeader = parseFrom <|> parseTo + parseFrom :: Parser Header + parseFrom = From <$> (string "From:" *> emptySpace *> email) + parseTo :: Parser Header + parseTo = To <$> (string "To:" *> emptySpace *> emails) + emptySpace = many' space + emails :: Parser (Vector Text) + emails = V.fromList <$> email `sepBy` char ',' + email :: Parser Text + email = do + _ <- many' space + name <- T.pack <$> many' (notChar '@') + _ <- char '@' + rest <- T.pack <$> many' (satisfy (\c -> not (isSpace c) && c /= ',')) + pure (name <> "@" <> rest) + + +encode :: Header -> Text +encode = \case + From addr -> "From: " <> addr + To addrs -> "To: " <> T.intercalate ", " (F.toList addrs) diff --git a/test.sh b/test.sh new file mode 100755 index 0000000..c805670 --- /dev/null +++ b/test.sh @@ -0,0 +1,5 @@ +#!/usr/bin/env bash + +# while :; do +find addressbook.cabal {src,test,app} -type f | entr -d -r -c -s 'cabal test --test-show-details=direct --ghc-options=-Wall' +# done diff --git a/test/Test/Data/Email/Header.hs b/test/Test/Data/Email/Header.hs new file mode 100644 index 0000000..b261a93 --- /dev/null +++ b/test/Test/Data/Email/Header.hs @@ -0,0 +1,36 @@ +module Test.Data.Email.Header where + +import Test.Tasty +import Test.Tasty.Hedgehog + +import Hedgehog +import qualified Hedgehog.Corpus as Corpus +import qualified Hedgehog.Gen as Gen +import qualified Hedgehog.Range as Range + +import Data.Text +import qualified Data.Vector as V + +import Data.Email.Header + +genHeader :: Gen Header +genHeader = Gen.choice + [ From <$> genEmail + , To . V.fromList <$> Gen.list (Range.linear 0 10) genEmail + ] + where + genEmail :: Gen Text + genEmail = do + name <- Gen.element Corpus.simpsons + domain <- Gen.element Corpus.cooking + tld <- Gen.element ["com","fi","org"] + pure $ name <> "@" <> domain <> "." <> tld + +prop_roundtrip_parse :: Property +prop_roundtrip_parse = property $ do + header <- forAll genHeader + tripping header encode decode + +tests :: TestTree +tests = testGroup "Data.Email.Header" + [ testProperty "roundtrip property" $ prop_roundtrip_parse ]