diff --git a/default.nix b/default.nix index 3177a6c..2795847 100644 --- a/default.nix +++ b/default.nix @@ -1,8 +1,9 @@ { mkDerivation, aeson, amazonka, amazonka-s3, base, bytestring -, conduit, cryptonite, dhall, directory, either, ekg-core, filepath -, generic-lens, hedgehog, hspec, hspec-hedgehog, JuicyPixels -, JuicyPixels-extra, lens, lib, mtl, servant, servant-server -, sqlite-simple, text, unliftio, unordered-containers, vector, wai +, conduit, containers, cryptonite, dhall, directory, either +, ekg-core, filepath, generic-lens, hashable, hedgehog, hspec +, hspec-hedgehog, JuicyPixels, JuicyPixels-extra, lens, lib, mtl +, servant, servant-server, sqlite-simple, temporary, text +, transformers, unliftio, unordered-containers, vector, wai , wai-middleware-metrics, warp }: mkDerivation { @@ -20,7 +21,10 @@ mkDerivation { ]; executableHaskellDepends = [ base ]; testHaskellDepends = [ - aeson base dhall either hedgehog hspec hspec-hedgehog + aeson amazonka amazonka-s3 base bytestring conduit containers + cryptonite dhall either filepath generic-lens hashable hedgehog + hspec hspec-hedgehog lens temporary text transformers + unordered-containers ]; license = lib.licenses.bsd3; mainProgram = "image-backup"; diff --git a/image-backup.cabal b/image-backup.cabal index 7009cfd..f47768c 100644 --- a/image-backup.cabal +++ b/image-backup.cabal @@ -64,6 +64,7 @@ library Config Config.S3 Config.Watcher + S3.Interface Image.Fingerprint -- Modules included in this library but not exported. @@ -140,6 +141,7 @@ test-suite image-backup-test -- Modules included in this executable, other than Main. other-modules: Test.Config + Test.S3.Interface -- LANGUAGE extensions used by modules in this package. -- other-extensions: @@ -162,3 +164,17 @@ test-suite image-backup-test , dhall , aeson , either + , temporary + , cryptonite + , conduit + , text + , filepath + , bytestring + , containers + , unordered-containers + , hashable + , transformers + , amazonka + , lens + , generic-lens + , amazonka-s3 diff --git a/src/MyLib.hs b/src/MyLib.hs index 8bab55d..1ff53a5 100644 --- a/src/MyLib.hs +++ b/src/MyLib.hs @@ -21,11 +21,10 @@ import System.FilePath (isExtensionOf, takeBaseName) import qualified Data.Conduit.Combinators as C import Crypto.Hash (HashAlgorithm, Digest, SHA256) import qualified Crypto.Hash as Crypto -import Amazonka (newEnv, Region(..), Env, Env'(..), setEndpoint, runResourceT, send, chunkedFile, defaultChunkSize) -import Amazonka.Auth (discover) +import Amazonka (Env, runResourceT, send, chunkedFile, defaultChunkSize) import qualified Amazonka.S3 as S3 import qualified Data.Text as T -import Control.Lens (view, set, (&), re) +import Control.Lens (view, set, (&), (^.)) import Data.Generics.Labels () import Control.Monad (void, forever) import Data.HashMap.Strict (HashMap) @@ -34,13 +33,13 @@ import qualified Data.HashMap.Strict as HM import System.Environment (getEnv) import qualified Dhall import Config (Config) -import Data.Text.Strict.Lens (utf8) import Config.Watcher (Seconds) import Control.Concurrent (threadDelay) import UnliftIO (forConcurrently_) import Data.Word (Word64) import Image.Fingerprint (dhash) import Data.Maybe (catMaybes) +import qualified S3.Interface newtype Routes route = Routes { _getMetrics :: route :- MetricsRoute } @@ -86,28 +85,24 @@ analyze = C.mapM (\p -> Image p <$> hash p <*> fingerprint p) fingerprint = fmap (either (const Nothing) Just) . liftIO . dhash -watcher :: Env -> S3.BucketName -> FilePath -> Seconds -> IO () -watcher env bucket path delay = forever $ do +watcher :: S3.Interface.S3Interface -> FilePath -> Seconds -> IO () +watcher int path delay = forever $ do runResourceT $ runConduit $ scan path (isExtensionOf ".jpg") .| analyze .| C.iterM (liftIO . print) - .| C.mapM_ (void . liftIO . uploadImage env bucket) + .| C.map (\img -> (imagePath img, imageToMetadata img)) + .| C.mapM_ (void . liftIO . uncurry (S3.Interface.putFile int)) threadDelay (fromIntegral (view #seconds delay) * 100_000) someFunc :: IO () someFunc = do confPath <- getEnv "CONFIG_PATH" conf <- Dhall.inputFile (Dhall.auto @Config) confPath - discoveredEnv <- newEnv discover - let env = discoveredEnv - { region = Region' $ view (#s3 . #region) conf - , overrides = setEndpoint True (view (#s3 . #endpoint . re utf8) conf) 443 - } - bucket = S3.BucketName (view (#s3 . #bucket) conf) + s3Interface <- S3.Interface.buildInterface (conf ^. #s3) store <- newStore waiMetrics <- registerWaiMetrics store registerGcMetrics store - let watchers = [watcher env bucket path (view (#watcher . #period) conf) | path <- view (#watcher . #directories) conf] + let watchers = [watcher s3Interface path (view (#watcher . #period) conf) | path <- view (#watcher . #directories) conf] actions = ( runSettings settings (metrics waiMetrics $ app store) ) : watchers forConcurrently_ actions id -- runSettings settings (metrics waiMetrics $ app store) diff --git a/src/S3/Interface.hs b/src/S3/Interface.hs new file mode 100644 index 0000000..9968f11 --- /dev/null +++ b/src/S3/Interface.hs @@ -0,0 +1,65 @@ +{-# LANGUAGE OverloadedLabels #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE RecordWildCards #-} + +module S3.Interface where + +import Data.HashMap.Strict (HashMap) +import Data.Text (Text) +import Config.S3 (ConfigS3) +import Amazonka (newEnv, Region(..), Env, Env'(..), setEndpoint, runResourceT, send, chunkedFile, defaultChunkSize, discover, sinkBody, hashedFile, RequestBody (..)) +import qualified Amazonka.S3 as S3 +import Control.Lens (view, re, (&), set) +import Data.Text.Strict.Lens (utf8) +import Data.Generics.Labels () +import qualified Data.Text as T +import System.FilePath (takeBaseName, ()) +import qualified Data.Conduit.Combinators as C + +type Metadata = HashMap Text Text +type Key = Text + +data S3Interface = S3Interface + { putFile :: FilePath -> Metadata -> IO Key + -- ^ Upload a file from path + , getFile :: Key -> FilePath -> IO FilePath + -- ^ Download a file to a given root + , getMetadata :: Key -> IO Metadata + -- ^ Fetch the metadata + } + +buildInterface :: ConfigS3 -> IO S3Interface +buildInterface s3Conf = do + discoveredEnv <- newEnv discover + let env = discoveredEnv + { region = Region' $ view #region s3Conf + , overrides = setEndpoint True (view (#endpoint . re utf8) s3Conf) 443 + } + bucket = S3.BucketName (view #bucket s3Conf) + pure S3Interface + { putFile = uploadFile env bucket + , getFile = downloadFile env bucket + , getMetadata = fetchMeta env bucket + } + where + uploadFile :: Env -> S3.BucketName -> FilePath -> Metadata -> IO Key + uploadFile env bucket path metadata = do + obj <- Hashed <$> hashedFile path + runResourceT $ do + let putObj = S3.newPutObject bucket (S3.ObjectKey key) obj & set #metadata metadata + key = T.pack . takeBaseName $ path + _ <- send env putObj + pure key + fetchMeta :: Env -> S3.BucketName -> Key -> IO Metadata + fetchMeta env bucket key = do + runResourceT $ do + let getObjHead = S3.newHeadObject bucket (S3.ObjectKey key) + view #metadata <$> send env getObjHead + downloadFile :: Env -> S3.BucketName -> Key -> FilePath -> IO FilePath + downloadFile env bucket key root = do + let getObj = S3.newGetObject bucket (S3.ObjectKey key) + runResourceT $ do + x <- view #body <$> send env getObj + let path = root T.unpack key + _ <- sinkBody x (C.sinkFileBS path) + pure path diff --git a/test/Main.hs b/test/Main.hs index 990c4de..0540362 100644 --- a/test/Main.hs +++ b/test/Main.hs @@ -1,8 +1,14 @@ module Main (main) where import qualified Test.Config +import qualified Test.S3.Interface import Test.Hspec (hspec) +import System.Environment (lookupEnv) +import qualified Dhall main :: IO () -main = hspec $ do - Test.Config.spec +main = do + config <- lookupEnv "TEST_CONFIG_PATH" >>= maybe (pure Nothing) (fmap Just . Dhall.inputFile Dhall.auto) + hspec $ do + Test.Config.spec + Test.S3.Interface.spec config diff --git a/test/Test/S3/Interface.hs b/test/Test/S3/Interface.hs new file mode 100644 index 0000000..4eeb904 --- /dev/null +++ b/test/Test/S3/Interface.hs @@ -0,0 +1,140 @@ +{-# LANGUAGE TypeApplications #-} +module Test.S3.Interface where + +import Data.Generics.Labels () +import Control.Monad.Trans.Cont +import Test.Hspec +import Test.Hspec.Hedgehog +import qualified Hedgehog.Gen as Gen +import qualified Hedgehog.Range as Range +import S3.Interface +import qualified Data.Conduit.Combinators as C +import Conduit ((.|), runConduit, runResourceT, liftIO) +import qualified Data.Text as T +import System.FilePath (takeBaseName, (), (<.>)) +import qualified Data.ByteString.Char8 as BC +import Data.Map (Map) +import Data.HashMap.Strict (HashMap) +import qualified Data.Map.Strict as M +import qualified Data.HashMap.Strict as HM +import Data.Hashable (Hashable) +import Data.Text (Text) +import qualified Crypto.Random as Random +import MyLib (hash) +import System.IO.Temp (withSystemTempDirectory) +import Crypto.Hash.Algorithms (SHA256) +import Config.S3 (ConfigS3) +import Crypto.Hash (Digest) + +withTempDir :: FilePath -> ContT r IO FilePath +withTempDir path = ContT (withSystemTempDirectory path) + +-- XXX: Temporarily have the model just here +buildModel :: FilePath -> IO S3Interface +buildModel root = + pure S3Interface + { putFile = \path mt -> do + let key = T.pack . takeBaseName $ path + runResourceT $ do + runConduit $ C.sourceFileBS path .| C.sinkFileBS (root T.unpack key) + liftIO $ BC.writeFile (root T.unpack key <.> "metadata") (BC.pack $ show mt) + pure key + , getFile = \key outroot -> do + let outpath = outroot T.unpack key + inpath = root T.unpack key + runResourceT $ runConduit $ C.sourceFileBS inpath .| C.sinkFileBS outpath + pure outpath + , getMetadata = \key -> do + let path = root T.unpack key <.> "metadata" + read . BC.unpack <$> BC.readFile path + } + +genMetadata :: Gen Metadata +genMetadata = convert <$> Gen.map (Range.linear 0 5) ((,) <$> text <*> text) + where + convert :: Hashable a => Map a b -> HashMap a b + convert = HM.fromList . M.toList + text = Gen.text (Range.linear 1 32) Gen.alpha + +data File = File { name :: Text, chunks :: Int, metadata :: Metadata } + deriving Show + +genFile :: Gen File +genFile = File + <$> Gen.text (Range.linear 1 32) Gen.alphaNum + <*> Gen.integral (Range.linear 0 8192) + <*> genMetadata + +data FileCommand + = PutFile File + | ReadFile Int + | ReadMetadata Int + deriving Show + +genCommand :: Gen FileCommand +genCommand = Gen.choice + [ PutFile <$> genFile + , ReadFile <$> Gen.integral (Range.linear 0 1000) + , ReadMetadata <$> Gen.integral (Range.linear 0 1000) + ] + + +write :: FilePath -> File -> IO FilePath +write root f = do + let path = root T.unpack (name f) + runResourceT $ runConduit $ C.replicateM (chunks f) (liftIO $ Random.getRandomBytes 256) .| C.sinkFileBS path + pure path + +specGetPut :: PropertyT IO () +specGetPut = do + file <- forAll genFile + (wanted, got) <- liftIO $ flip runContT pure $ do + root <- withTempDir "test-getput-in" + modelroot <- withTempDir "test-getput-model" + downloadroot <- withTempDir "test-getput-download" + liftIO $ do + interface <- buildModel modelroot + inpath <- write root file + key <- putFile interface inpath (metadata file) + outpath <- getFile interface key downloadroot + (,) <$> hash @SHA256 inpath <*> hash @SHA256 outpath + wanted === got + +data Result = Result + { resultKey :: Text + , resultHash :: Digest SHA256 + , resultOriginHash :: Digest SHA256 + } + deriving (Show, Eq) + +specGetPutS3 :: ConfigS3 -> PropertyT IO () +specGetPutS3 config = do + file <- forAll genFile + annotateShow file + (wanted, got) <- liftIO $ flip runContT pure $ do + root <- withTempDir "test-getput-in" + modelroot <- withTempDir "test-getput-model" + downloadroot <- withTempDir "test-getput-download" + liftIO $ do + inpath <- write root file + let mt = metadata file + modelInt <- buildModel modelroot + s3Int <- buildInterface config + (,) <$> create modelInt downloadroot inpath mt <*> create s3Int downloadroot inpath mt + wanted === got + where + create interface downloadroot inpath mt = do + key <- putFile interface inpath mt + s3path <- getFile interface key downloadroot + Result <$> pure key <*> hash s3path <*> hash inpath + +-- Contract test for the interface + +spec :: Maybe ConfigS3 -> Spec +spec mConfig = + describe "Contract test between S3 and the model" $ do + context "Model" $ do + it "satisfies the get-put property for content" $ hedgehog specGetPut + flip (maybe (pure ())) mConfig $ \config -> do + context "S3" $ do + it "satisfies the get-put property for content" $ hedgehog (specGetPutS3 config)