From a43b0bf471994044bb17de789bf07f501daba16e Mon Sep 17 00:00:00 2001
From: Mats Rauhala <mats.rauhala@iki.fi>
Date: Thu, 10 Dec 2020 21:08:48 +0200
Subject: [PATCH] Initial email header parsing

---
 .stylish-haskell.yaml          | 248 +++++++++++++++++++++++++++++++++
 src/Data/Email/Header.hs       |  50 +++++++
 test.sh                        |   5 +
 test/Test/Data/Email/Header.hs |  36 +++++
 4 files changed, 339 insertions(+)
 create mode 100644 .stylish-haskell.yaml
 create mode 100644 src/Data/Email/Header.hs
 create mode 100755 test.sh
 create mode 100644 test/Test/Data/Email/Header.hs

diff --git a/.stylish-haskell.yaml b/.stylish-haskell.yaml
new file mode 100644
index 0000000..472fa1d
--- /dev/null
+++ b/.stylish-haskell.yaml
@@ -0,0 +1,248 @@
+# stylish-haskell configuration file
+# ==================================
+
+# The stylish-haskell tool is mainly configured by specifying steps. These steps
+# are a list, so they have an order, and one specific step may appear more than
+# once (if needed). Each file is processed by these steps in the given order.
+steps:
+  # Convert some ASCII sequences to their Unicode equivalents. This is disabled
+  # by default.
+  # - unicode_syntax:
+  #     # In order to make this work, we also need to insert the UnicodeSyntax
+  #     # language pragma. If this flag is set to true, we insert it when it's
+  #     # not already present. You may want to disable it if you configure
+  #     # language extensions using some other method than pragmas. Default:
+  #     # true.
+  #     add_language_pragma: true
+
+  # Align the right hand side of some elements.  This is quite conservative
+  # and only applies to statements where each element occupies a single
+  # line.
+  - simple_align:
+      cases: false
+      top_level_patterns: false
+      records: false
+
+  # Import cleanup
+  - imports:
+      # There are different ways we can align names and lists.
+      #
+      # - global: Align the import names and import list throughout the entire
+      #   file.
+      #
+      # - file: Like global, but don't add padding when there are no qualified
+      #   imports in the file.
+      #
+      # - group: Only align the imports per group (a group is formed by adjacent
+      #   import lines).
+      #
+      # - none: Do not perform any alignment.
+      #
+      # Default: global.
+      align: none
+
+      # The following options affect only import list alignment.
+      #
+      # List align has following options:
+      #
+      # - after_alias: Import list is aligned with end of import including
+      #   'as' and 'hiding' keywords.
+      #
+      #   > import qualified Data.List      as List (concat, foldl, foldr, head,
+      #   >                                          init, last, length)
+      #
+      # - with_alias: Import list is aligned with start of alias or hiding.
+      #
+      #   > import qualified Data.List      as List (concat, foldl, foldr, head,
+      #   >                                 init, last, length)
+      #
+      # - new_line: Import list starts always on new line.
+      #
+      #   > import qualified Data.List      as List
+      #   >     (concat, foldl, foldr, head, init, last, length)
+      #
+      # Default: after_alias
+      list_align: new_line
+
+      # Right-pad the module names to align imports in a group:
+      #
+      # - true: a little more readable
+      #
+      #   > import qualified Data.List       as List (concat, foldl, foldr,
+      #   >                                           init, last, length)
+      #   > import qualified Data.List.Extra as List (concat, foldl, foldr,
+      #   >                                           init, last, length)
+      #
+      # - false: diff-safe
+      #
+      #   > import qualified Data.List as List (concat, foldl, foldr, init,
+      #   >                                     last, length)
+      #   > import qualified Data.List.Extra as List (concat, foldl, foldr,
+      #   >                                           init, last, length)
+      #
+      # Default: true
+      pad_module_names: false
+
+      # Long list align style takes effect when import is too long. This is
+      # determined by 'columns' setting.
+      #
+      # - inline: This option will put as much specs on same line as possible.
+      #
+      # - new_line: Import list will start on new line.
+      #
+      # - new_line_multiline: Import list will start on new line when it's
+      #   short enough to fit to single line. Otherwise it'll be multiline.
+      #
+      # - multiline: One line per import list entry.
+      #   Type with constructor list acts like single import.
+      #
+      #   > import qualified Data.Map as M
+      #   >     ( empty
+      #   >     , singleton
+      #   >     , ...
+      #   >     , delete
+      #   >     )
+      #
+      # Default: inline
+      long_list_align: new_line_multiline
+
+      # Align empty list (importing instances)
+      #
+      # Empty list align has following options
+      #
+      # - inherit: inherit list_align setting
+      #
+      # - right_after: () is right after the module name:
+      #
+      #   > import Vector.Instances ()
+      #
+      # Default: inherit
+      empty_list_align: inherit
+
+      # List padding determines indentation of import list on lines after import.
+      # This option affects 'long_list_align'.
+      #
+      # - <integer>: constant value
+      #
+      # - module_name: align under start of module name.
+      #   Useful for 'file' and 'group' align settings.
+      list_padding: 7
+
+      # Separate lists option affects formatting of import list for type
+      # or class. The only difference is single space between type and list
+      # of constructors, selectors and class functions.
+      #
+      # - true: There is single space between Foldable type and list of it's
+      #   functions.
+      #
+      #   > import Data.Foldable (Foldable (fold, foldl, foldMap))
+      #
+      # - false: There is no space between Foldable type and list of it's
+      #   functions.
+      #
+      #   > import Data.Foldable (Foldable(fold, foldl, foldMap))
+      #
+      # Default: true
+      separate_lists: false
+
+      # Space surround option affects formatting of import lists on a single
+      # line. The only difference is single space after the initial
+      # parenthesis and a single space before the terminal parenthesis.
+      #
+      # - true: There is single space associated with the enclosing
+      #   parenthesis.
+      #
+      #   > import Data.Foo ( foo )
+      #
+      # - false: There is no space associated with the enclosing parenthesis
+      #
+      #   > import Data.Foo (foo)
+      #
+      # Default: false
+      space_surround: false
+
+  # Language pragmas
+  - language_pragmas:
+      # We can generate different styles of language pragma lists.
+      #
+      # - vertical: Vertical-spaced language pragmas, one per line.
+      #
+      # - compact: A more compact style.
+      #
+      # - compact_line: Similar to compact, but wrap each line with
+      #   `{-#LANGUAGE #-}'.
+      #
+      # Default: vertical.
+      style: vertical
+
+      # Align affects alignment of closing pragma brackets.
+      #
+      # - true: Brackets are aligned in same column.
+      #
+      # - false: Brackets are not aligned together. There is only one space
+      #   between actual import and closing bracket.
+      #
+      # Default: true
+      align: false
+
+      # stylish-haskell can detect redundancy of some language pragmas. If this
+      # is set to true, it will remove those redundant pragmas. Default: true.
+      remove_redundant: true
+
+  # Replace tabs by spaces. This is disabled by default.
+  # - tabs:
+  #     # Number of spaces to use for each tab. Default: 8, as specified by the
+  #     # Haskell report.
+  #     spaces: 8
+
+  # Remove trailing whitespace
+  - trailing_whitespace: {}
+
+  # Squash multiple spaces between the left and right hand sides of some
+  # elements into single spaces. Basically, this undoes the effect of
+  # simple_align but is a bit less conservative.
+  # - squash: {}
+
+# A common setting is the number of columns (parts of) code will be wrapped
+# to. Different steps take this into account. Default: 80.
+columns: 80
+
+# By default, line endings are converted according to the OS. You can override
+# preferred format here.
+#
+# - native: Native newline format. CRLF on Windows, LF on other OSes.
+#
+# - lf: Convert to LF ("\n").
+#
+# - crlf: Convert to CRLF ("\r\n").
+#
+# Default: native.
+newline: native
+
+# Sometimes, language extensions are specified in a cabal file or from the
+# command line instead of using language pragmas in the file. stylish-haskell
+# needs to be aware of these, so it can parse the file correctly.
+#
+# No language extensions are enabled by default.
+language_extensions:
+  - RecordWildCards
+  - TemplateHaskell
+  - QuasiQuotes
+  - LambdaCase
+  - TupleSections
+  - MultiParamTypeClasses
+  - TypeApplications
+  - DataKinds
+  - TypeFamilies
+  - FlexibleContexts
+  - NamedFieldPuns
+  - MultiWayIf
+  - PolyKinds
+  - ExplicitForAll
+  - FunctionalDependencies
+  - ExplicitNamespaces
+  - ScopedTypeVariables
+  - ExistentialQuantification
+  - InstanceSigs
+  - GeneralizedNewtypeDeriving
+  - BangPatterns
diff --git a/src/Data/Email/Header.hs b/src/Data/Email/Header.hs
new file mode 100644
index 0000000..5936302
--- /dev/null
+++ b/src/Data/Email/Header.hs
@@ -0,0 +1,50 @@
+module Data.Email.Header where
+
+import Data.Text
+       (Text)
+import qualified Data.Text as T
+
+import qualified Data.Foldable as F
+
+import Data.Attoparsec.Text
+
+import Data.Vector
+       (Vector)
+import qualified Data.Vector as V
+
+import Data.Char
+       (isSpace)
+
+import Control.Applicative
+       ((<|>))
+
+data Header
+  = From !Text
+  | To !(Vector Text)
+  deriving (Show, Eq)
+
+decode :: Text -> Either String Header
+decode = parseOnly parseHeader
+  where
+    parseHeader :: Parser Header
+    parseHeader = parseFrom <|> parseTo
+    parseFrom :: Parser Header
+    parseFrom = From <$> (string "From:" *> emptySpace *> email)
+    parseTo :: Parser Header
+    parseTo = To <$> (string "To:" *> emptySpace *> emails)
+    emptySpace = many' space
+    emails :: Parser (Vector Text)
+    emails = V.fromList <$> email `sepBy` char ','
+    email :: Parser Text
+    email = do
+      _ <- many' space
+      name <- T.pack <$> many' (notChar '@')
+      _ <- char '@'
+      rest <- T.pack <$> many' (satisfy (\c -> not (isSpace c) && c /= ','))
+      pure (name <> "@" <> rest)
+
+
+encode :: Header -> Text
+encode = \case
+  From addr -> "From: " <> addr
+  To addrs -> "To: " <> T.intercalate ", " (F.toList addrs)
diff --git a/test.sh b/test.sh
new file mode 100755
index 0000000..c805670
--- /dev/null
+++ b/test.sh
@@ -0,0 +1,5 @@
+#!/usr/bin/env bash
+
+# while :; do
+find addressbook.cabal {src,test,app} -type f | entr -d -r -c -s 'cabal test --test-show-details=direct --ghc-options=-Wall'
+# done
diff --git a/test/Test/Data/Email/Header.hs b/test/Test/Data/Email/Header.hs
new file mode 100644
index 0000000..b261a93
--- /dev/null
+++ b/test/Test/Data/Email/Header.hs
@@ -0,0 +1,36 @@
+module Test.Data.Email.Header where
+
+import Test.Tasty
+import Test.Tasty.Hedgehog
+
+import Hedgehog
+import qualified Hedgehog.Corpus as Corpus
+import qualified Hedgehog.Gen as Gen
+import qualified Hedgehog.Range as Range
+
+import Data.Text
+import qualified Data.Vector as V
+
+import Data.Email.Header
+
+genHeader :: Gen Header
+genHeader = Gen.choice
+  [ From <$> genEmail
+  , To . V.fromList <$> Gen.list (Range.linear 0 10) genEmail
+  ]
+  where
+    genEmail :: Gen Text
+    genEmail = do
+      name <- Gen.element Corpus.simpsons
+      domain <- Gen.element Corpus.cooking
+      tld <- Gen.element ["com","fi","org"]
+      pure $ name <> "@" <> domain <> "." <> tld
+
+prop_roundtrip_parse :: Property
+prop_roundtrip_parse = property $ do
+  header <- forAll genHeader
+  tripping header encode decode
+
+tests :: TestTree
+tests = testGroup "Data.Email.Header"
+  [ testProperty "roundtrip property" $ prop_roundtrip_parse ]