Compare commits

...

2 Commits

Author SHA1 Message Date
Mats Rauhala c668a1b329 Test and contract test the s3 interface 2023-02-27 18:16:01 +02:00
Mats Rauhala e3807b3ade Configurability 2023-02-15 20:34:05 +02:00
24 changed files with 467 additions and 31 deletions

View File

@ -1,7 +1,10 @@
{ mkDerivation, amazonka, amazonka-s3, base, bytestring, conduit
, cryptonite, directory, ekg-core, filepath, generic-lens, lens
, lib, mtl, servant, servant-server, sqlite-simple, text, unliftio
, unordered-containers, wai, wai-middleware-metrics, warp
{ mkDerivation, aeson, amazonka, amazonka-s3, base, bytestring
, conduit, containers, cryptonite, dhall, directory, either
, ekg-core, filepath, generic-lens, hashable, hedgehog, hspec
, hspec-hedgehog, JuicyPixels, JuicyPixels-extra, lens, lib, mtl
, servant, servant-server, sqlite-simple, temporary, text
, transformers, unliftio, unordered-containers, vector, wai
, wai-middleware-metrics, warp
}:
mkDerivation {
pname = "image-backup";
@ -10,13 +13,19 @@ mkDerivation {
isLibrary = true;
isExecutable = true;
libraryHaskellDepends = [
amazonka amazonka-s3 base bytestring conduit cryptonite directory
ekg-core filepath generic-lens lens mtl servant servant-server
sqlite-simple text unliftio unordered-containers wai
amazonka amazonka-s3 base bytestring conduit cryptonite dhall
directory either ekg-core filepath generic-lens JuicyPixels
JuicyPixels-extra lens mtl servant servant-server sqlite-simple
text unliftio unordered-containers vector wai
wai-middleware-metrics warp
];
executableHaskellDepends = [ base ];
testHaskellDepends = [ base ];
testHaskellDepends = [
aeson amazonka amazonka-s3 base bytestring conduit containers
cryptonite dhall either filepath generic-lens hashable hedgehog
hspec hspec-hedgehog lens temporary text transformers
unordered-containers
];
license = lib.licenses.bsd3;
mainProgram = "image-backup";
}

3
dhall/Config/Type.dhall Normal file
View File

@ -0,0 +1,3 @@
{ s3 : ../S3/Type.dhall
, watcher : ../Watcher/Type.dhall
}

View File

@ -0,0 +1 @@
{=}

View File

@ -0,0 +1 @@
{ Type = ./Type.dhall, default = ./default.dhall }

4
dhall/S3/Type.dhall Normal file
View File

@ -0,0 +1,4 @@
{ region : Text
, endpoint : Text
, bucket : Text
}

1
dhall/S3/default.dhall Normal file
View File

@ -0,0 +1 @@
{=}

1
dhall/S3/package.dhall Normal file
View File

@ -0,0 +1 @@
{ Type = ./Type.dhall, default = ./default.dhall }

3
dhall/Watcher/Type.dhall Normal file
View File

@ -0,0 +1,3 @@
{ directories : List Text
, period : Natural
}

View File

@ -0,0 +1 @@
{ period = 300 }

View File

@ -0,0 +1 @@
{ Type = ./Type.dhall, default = ./default.dhall }

2
dhall/package.dhall Normal file
View File

@ -0,0 +1,2 @@
./Config/package.dhall
/\ { S3 = ./S3/package.dhall, Watcher = ./Watcher/package.dhall }

View File

@ -43,6 +43,8 @@
sops
hp.eventlog2html
dhall-lsp-server
];
};
}

2
hie.yaml Normal file
View File

@ -0,0 +1,2 @@
cradle:
cabal:

View File

@ -61,6 +61,11 @@ library
-- Modules exported by the library.
exposed-modules: MyLib
Servant.Metrics.Prometheus
Config
Config.S3
Config.Watcher
S3.Interface
Image.Fingerprint
-- Modules included in this library but not exported.
-- other-modules:
@ -76,6 +81,7 @@ library
, ekg-core
, servant
, servant-server
, vector
, cryptonite
, conduit
, filepath
@ -86,10 +92,14 @@ library
, wai-middleware-metrics
, unordered-containers
, wai
, JuicyPixels
, JuicyPixels-extra
, warp
, unliftio
, lens
, generic-lens
, dhall
, either
-- Directories containing source files.
hs-source-dirs: src
@ -130,7 +140,8 @@ test-suite image-backup-test
default-language: Haskell2010
-- Modules included in this executable, other than Main.
-- other-modules:
other-modules: Test.Config
Test.S3.Interface
-- LANGUAGE extensions used by modules in this package.
-- other-extensions:
@ -145,6 +156,25 @@ test-suite image-backup-test
main-is: Main.hs
-- Test dependencies.
build-depends:
base ^>=4.16.3.0,
image-backup
build-depends: base ^>=4.16.3.0
, image-backup
, hspec
, hedgehog
, hspec-hedgehog
, dhall
, aeson
, either
, temporary
, cryptonite
, conduit
, text
, filepath
, bytestring
, containers
, unordered-containers
, hashable
, transformers
, amazonka
, lens
, generic-lens
, amazonka-s3

6
sample_config.dhall Normal file
View File

@ -0,0 +1,6 @@
let Config = ./dhall/package.dhall
in Config::{
, s3 = Config.S3::{ bucket = "example", region = "fr-par", endpoint = "example.com" }
, watcher = Config.Watcher::{ directories = ["/tmp/foo"] }
}

19
src/Config.hs Normal file
View File

@ -0,0 +1,19 @@
{-# LANGUAGE DerivingVia #-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE TypeOperators #-}
{-# LANGUAGE StrictData #-}
module Config where
import Dhall.Deriving
import Dhall
import Config.S3 (ConfigS3)
import Config.Watcher (ConfigWatcher)
data Config = Config
{ s3 :: ConfigS3
, watcher :: ConfigWatcher
}
deriving (Show, Generic, Eq)
deriving (FromDhall, ToDhall) via Codec (Field AsIs) Config

18
src/Config/S3.hs Normal file
View File

@ -0,0 +1,18 @@
{-# LANGUAGE DerivingVia #-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE TypeOperators #-}
{-# LANGUAGE StrictData #-}
module Config.S3 where
import Dhall.Deriving
import Dhall
-- The S3 credentials provided by environment variables
data ConfigS3 = ConfigS3
{ region :: Text
, endpoint :: Text
, bucket :: Text
}
deriving (Show, Generic, Eq)
deriving (FromDhall, ToDhall) via Codec (Field AsIs) ConfigS3

20
src/Config/Watcher.hs Normal file
View File

@ -0,0 +1,20 @@
{-# LANGUAGE DerivingVia #-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE TypeOperators #-}
{-# LANGUAGE StrictData #-}
module Config.Watcher where
import Dhall.Deriving
import Dhall
newtype Seconds = Seconds { seconds :: Natural }
deriving (Show, Generic, Eq)
deriving (FromDhall, ToDhall) via Codec (SetSingletonConstructors Bare) Seconds
data ConfigWatcher = ConfigWatcher
{ directories :: [FilePath]
, period :: Seconds
}
deriving (Show, Generic, Eq)
deriving (FromDhall, ToDhall) via Codec AsIs ConfigWatcher

42
src/Image/Fingerprint.hs Normal file
View File

@ -0,0 +1,42 @@
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE FlexibleContexts #-}
module Image.Fingerprint where
import Data.Word (Word64)
import Codec.Picture.Types
import qualified Data.Foldable as F
import qualified Data.Vector as V
import Data.Bits (Bits(..))
import Data.Bool (bool)
import Codec.Picture.Extra (scaleBilinear)
import Data.Maybe (fromMaybe)
import Codec.Picture (convertRGB16, readImage)
dhash :: FilePath -> IO (Either String Word64)
dhash path = fmap (dynamicMap' process) <$> readImage path
process :: (LumaPlaneExtractable a, Pixel a, Bounded (PixelBaseComponent a), Integral (PixelBaseComponent a)) => Image a -> Word64
process img = hash
where
hash = F.foldl' (\acc x -> (acc `shiftL` 1) .|. bool 0 1 x) 0 bits
bits = V.concat [V.generate 8 (\x -> pixelAt scaled x y < pixelAt scaled (x+1) y) | y <- [0..7]]
scaled = extractLumaPlane . scaleBilinear 9 8 $ img
dynamicMap' :: (forall x. (Bounded (PixelBaseComponent x), Integral (PixelBaseComponent x), LumaPlaneExtractable x) => Image x -> a) -> DynamicImage -> a
dynamicMap' f d = fromMaybe (f (convertRGB16 d)) (go d)
where
go = \case
ImageY8 i -> Just $ f i
ImageY16 i -> Just $ f i
ImageY32 i -> Just $ f i
ImageYF _ -> Nothing
ImageYA8 i -> Just $ f i
ImageYA16 _ -> Nothing
ImageRGB8 i -> Just $ f i
ImageRGB16 i -> Just $ f i
ImageRGBF _ -> Nothing
ImageRGBA8 i -> Just $ f i
ImageRGBA16 _ -> Nothing
ImageYCbCr8 i -> Just $ f i
ImageCMYK8 _ -> Nothing
ImageCMYK16 _ -> Nothing

View File

@ -1,9 +1,12 @@
{-# LANGUAGE TypeOperators #-}
{-# LANGUAGE NumericUnderscores #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE TypeApplications #-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE TupleSections #-}
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE OverloadedLabels #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE TupleSections #-}
module MyLib where
import Network.Wai.Metrics (metrics, registerWaiMetrics)
@ -18,16 +21,25 @@ import System.FilePath (isExtensionOf, takeBaseName)
import qualified Data.Conduit.Combinators as C
import Crypto.Hash (HashAlgorithm, Digest, SHA256)
import qualified Crypto.Hash as Crypto
import Amazonka (newEnv, Region(..), Env, Env'(..), setEndpoint, runResourceT, send, chunkedFile, defaultChunkSize)
import Amazonka.Auth (discover)
import Amazonka (Env, runResourceT, send, chunkedFile, defaultChunkSize)
import qualified Amazonka.S3 as S3
import qualified Data.Text as T
import Control.Lens (set, (&))
import Data.Generics.Product (field)
import Control.Monad (void)
import Control.Lens (view, set, (&), (^.))
import Data.Generics.Labels ()
import Control.Monad (void, forever)
import Data.HashMap.Strict (HashMap)
import Data.Text (Text)
import qualified Data.HashMap.Strict as HM
import System.Environment (getEnv)
import qualified Dhall
import Config (Config)
import Config.Watcher (Seconds)
import Control.Concurrent (threadDelay)
import UnliftIO (forConcurrently_)
import Data.Word (Word64)
import Image.Fingerprint (dhash)
import Data.Maybe (catMaybes)
import qualified S3.Interface
newtype Routes route
= Routes { _getMetrics :: route :- MetricsRoute }
@ -45,17 +57,21 @@ hash path = C.withSourceFile path $ \source ->
data Image = Image
{ imagePath :: !FilePath
, imageHash :: !(Digest SHA256)
, imageFingerprint :: !(Maybe Word64)
}
deriving Show
imageToMetadata :: Image -> HashMap Text Text
imageToMetadata img = HM.fromList [("sha256", T.pack . show . imageHash $ img)]
imageToMetadata img = HM.fromList $ catMaybes
[ Just ("sha256", T.pack . show . imageHash $ img)
, ("fingerprint", ) . T.pack . show <$> imageFingerprint img
]
uploadImage :: Env -> S3.BucketName -> Image -> IO S3.PutObjectResponse
uploadImage env bucket img = do
obj <- chunkedFile defaultChunkSize (imagePath img)
runResourceT $ do
let putObj = S3.newPutObject bucket key obj & set (field @"metadata") mt & set (field @"contentType") (Just "image/jpeg")
let putObj = S3.newPutObject bucket key obj & set #metadata mt & set #contentType (Just "image/jpeg")
key = S3.ObjectKey (T.pack . takeBaseName . imagePath $ img)
mt = imageToMetadata img
send env putObj
@ -64,22 +80,32 @@ scan :: (MonadResource m) => FilePath -> (FilePath -> Bool) -> ConduitT () FileP
scan root predicate = C.sourceDirectoryDeep False root .| C.filter predicate
analyze :: (MonadUnliftIO m) => ConduitT FilePath Image m ()
analyze = C.mapM (\p -> Image p <$> hash p)
analyze = C.mapM (\p -> Image p <$> hash p <*> fingerprint p)
where
fingerprint = fmap (either (const Nothing) Just) . liftIO . dhash
watcher :: S3.Interface.S3Interface -> FilePath -> Seconds -> IO ()
watcher int path delay = forever $ do
runResourceT $ runConduit $ scan path (isExtensionOf ".jpg")
.| analyze
.| C.iterM (liftIO . print)
.| C.map (\img -> (imagePath img, imageToMetadata img))
.| C.mapM_ (void . liftIO . uncurry (S3.Interface.putFile int))
threadDelay (fromIntegral (view #seconds delay) * 100_000)
someFunc :: IO ()
someFunc = do
discoveredEnv <- newEnv discover
let env = discoveredEnv
{ region = Region' "fr-par"
, overrides = setEndpoint True "s3.fr-par.scw.cloud" 443
}
bucket = S3.BucketName "introitu-photoprism"
_ <- runResourceT $ runConduit $ scan "/home/masse/wikidata/" (isExtensionOf ".jpg") .| analyze .| C.iterM (liftIO . print) .| C.mapM_ (void . liftIO . uploadImage env bucket)
putStrLn "done"
confPath <- getEnv "CONFIG_PATH"
conf <- Dhall.inputFile (Dhall.auto @Config) confPath
s3Interface <- S3.Interface.buildInterface (conf ^. #s3)
store <- newStore
waiMetrics <- registerWaiMetrics store
registerGcMetrics store
runSettings settings (metrics waiMetrics $ app store)
let watchers = [watcher s3Interface path (view (#watcher . #period) conf) | path <- view (#watcher . #directories) conf]
actions = ( runSettings settings (metrics waiMetrics $ app store) ) : watchers
forConcurrently_ actions id
-- runSettings settings (metrics waiMetrics $ app store)
where
settings =
defaultSettings &

65
src/S3/Interface.hs Normal file
View File

@ -0,0 +1,65 @@
{-# LANGUAGE OverloadedLabels #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RecordWildCards #-}
module S3.Interface where
import Data.HashMap.Strict (HashMap)
import Data.Text (Text)
import Config.S3 (ConfigS3)
import Amazonka (newEnv, Region(..), Env, Env'(..), setEndpoint, runResourceT, send, chunkedFile, defaultChunkSize, discover, sinkBody, hashedFile, RequestBody (..))
import qualified Amazonka.S3 as S3
import Control.Lens (view, re, (&), set)
import Data.Text.Strict.Lens (utf8)
import Data.Generics.Labels ()
import qualified Data.Text as T
import System.FilePath (takeBaseName, (</>))
import qualified Data.Conduit.Combinators as C
type Metadata = HashMap Text Text
type Key = Text
data S3Interface = S3Interface
{ putFile :: FilePath -> Metadata -> IO Key
-- ^ Upload a file from path
, getFile :: Key -> FilePath -> IO FilePath
-- ^ Download a file to a given root
, getMetadata :: Key -> IO Metadata
-- ^ Fetch the metadata
}
buildInterface :: ConfigS3 -> IO S3Interface
buildInterface s3Conf = do
discoveredEnv <- newEnv discover
let env = discoveredEnv
{ region = Region' $ view #region s3Conf
, overrides = setEndpoint True (view (#endpoint . re utf8) s3Conf) 443
}
bucket = S3.BucketName (view #bucket s3Conf)
pure S3Interface
{ putFile = uploadFile env bucket
, getFile = downloadFile env bucket
, getMetadata = fetchMeta env bucket
}
where
uploadFile :: Env -> S3.BucketName -> FilePath -> Metadata -> IO Key
uploadFile env bucket path metadata = do
obj <- Hashed <$> hashedFile path
runResourceT $ do
let putObj = S3.newPutObject bucket (S3.ObjectKey key) obj & set #metadata metadata
key = T.pack . takeBaseName $ path
_ <- send env putObj
pure key
fetchMeta :: Env -> S3.BucketName -> Key -> IO Metadata
fetchMeta env bucket key = do
runResourceT $ do
let getObjHead = S3.newHeadObject bucket (S3.ObjectKey key)
view #metadata <$> send env getObjHead
downloadFile :: Env -> S3.BucketName -> Key -> FilePath -> IO FilePath
downloadFile env bucket key root = do
let getObj = S3.newGetObject bucket (S3.ObjectKey key)
runResourceT $ do
x <- view #body <$> send env getObj
let path = root </> T.unpack key
_ <- sinkBody x (C.sinkFileBS path)
pure path

View File

@ -1,4 +1,14 @@
module Main (main) where
import qualified Test.Config
import qualified Test.S3.Interface
import Test.Hspec (hspec)
import System.Environment (lookupEnv)
import qualified Dhall
main :: IO ()
main = putStrLn "Test suite not yet implemented."
main = do
config <- lookupEnv "TEST_CONFIG_PATH" >>= maybe (pure Nothing) (fmap Just . Dhall.inputFile Dhall.auto)
hspec $ do
Test.Config.spec
Test.S3.Interface.spec config

29
test/Test/Config.hs Normal file
View File

@ -0,0 +1,29 @@
{-# LANGUAGE OverloadedStrings #-}
module Test.Config where
import Test.Hspec
import Config
import Config.Watcher
import Config.S3
import Dhall (inputFile, auto)
import Control.Monad.IO.Class (MonadIO(..))
spec :: Spec
spec = describe "Reading configuration" $ do
it "Can read the sample configuration" $ do
conf <- liftIO $ inputFile auto "./sample_config.dhall"
let wanted = Config
{ watcher = ConfigWatcher
{ directories = ["/tmp/foo"]
, period = Seconds 300
}
, s3 = ConfigS3
{ region = "fr-par"
, endpoint = "example.com"
, bucket = "example"
}
}
conf `shouldBe` wanted

140
test/Test/S3/Interface.hs Normal file
View File

@ -0,0 +1,140 @@
{-# LANGUAGE TypeApplications #-}
module Test.S3.Interface where
import Data.Generics.Labels ()
import Control.Monad.Trans.Cont
import Test.Hspec
import Test.Hspec.Hedgehog
import qualified Hedgehog.Gen as Gen
import qualified Hedgehog.Range as Range
import S3.Interface
import qualified Data.Conduit.Combinators as C
import Conduit ((.|), runConduit, runResourceT, liftIO)
import qualified Data.Text as T
import System.FilePath (takeBaseName, (</>), (<.>))
import qualified Data.ByteString.Char8 as BC
import Data.Map (Map)
import Data.HashMap.Strict (HashMap)
import qualified Data.Map.Strict as M
import qualified Data.HashMap.Strict as HM
import Data.Hashable (Hashable)
import Data.Text (Text)
import qualified Crypto.Random as Random
import MyLib (hash)
import System.IO.Temp (withSystemTempDirectory)
import Crypto.Hash.Algorithms (SHA256)
import Config.S3 (ConfigS3)
import Crypto.Hash (Digest)
withTempDir :: FilePath -> ContT r IO FilePath
withTempDir path = ContT (withSystemTempDirectory path)
-- XXX: Temporarily have the model just here
buildModel :: FilePath -> IO S3Interface
buildModel root =
pure S3Interface
{ putFile = \path mt -> do
let key = T.pack . takeBaseName $ path
runResourceT $ do
runConduit $ C.sourceFileBS path .| C.sinkFileBS (root </> T.unpack key)
liftIO $ BC.writeFile (root </> T.unpack key <.> "metadata") (BC.pack $ show mt)
pure key
, getFile = \key outroot -> do
let outpath = outroot </> T.unpack key
inpath = root </> T.unpack key
runResourceT $ runConduit $ C.sourceFileBS inpath .| C.sinkFileBS outpath
pure outpath
, getMetadata = \key -> do
let path = root </> T.unpack key <.> "metadata"
read . BC.unpack <$> BC.readFile path
}
genMetadata :: Gen Metadata
genMetadata = convert <$> Gen.map (Range.linear 0 5) ((,) <$> text <*> text)
where
convert :: Hashable a => Map a b -> HashMap a b
convert = HM.fromList . M.toList
text = Gen.text (Range.linear 1 32) Gen.alpha
data File = File { name :: Text, chunks :: Int, metadata :: Metadata }
deriving Show
genFile :: Gen File
genFile = File
<$> Gen.text (Range.linear 1 32) Gen.alphaNum
<*> Gen.integral (Range.linear 0 8192)
<*> genMetadata
data FileCommand
= PutFile File
| ReadFile Int
| ReadMetadata Int
deriving Show
genCommand :: Gen FileCommand
genCommand = Gen.choice
[ PutFile <$> genFile
, ReadFile <$> Gen.integral (Range.linear 0 1000)
, ReadMetadata <$> Gen.integral (Range.linear 0 1000)
]
write :: FilePath -> File -> IO FilePath
write root f = do
let path = root </> T.unpack (name f)
runResourceT $ runConduit $ C.replicateM (chunks f) (liftIO $ Random.getRandomBytes 256) .| C.sinkFileBS path
pure path
specGetPut :: PropertyT IO ()
specGetPut = do
file <- forAll genFile
(wanted, got) <- liftIO $ flip runContT pure $ do
root <- withTempDir "test-getput-in"
modelroot <- withTempDir "test-getput-model"
downloadroot <- withTempDir "test-getput-download"
liftIO $ do
interface <- buildModel modelroot
inpath <- write root file
key <- putFile interface inpath (metadata file)
outpath <- getFile interface key downloadroot
(,) <$> hash @SHA256 inpath <*> hash @SHA256 outpath
wanted === got
data Result = Result
{ resultKey :: Text
, resultHash :: Digest SHA256
, resultOriginHash :: Digest SHA256
}
deriving (Show, Eq)
specGetPutS3 :: ConfigS3 -> PropertyT IO ()
specGetPutS3 config = do
file <- forAll genFile
annotateShow file
(wanted, got) <- liftIO $ flip runContT pure $ do
root <- withTempDir "test-getput-in"
modelroot <- withTempDir "test-getput-model"
downloadroot <- withTempDir "test-getput-download"
liftIO $ do
inpath <- write root file
let mt = metadata file
modelInt <- buildModel modelroot
s3Int <- buildInterface config
(,) <$> create modelInt downloadroot inpath mt <*> create s3Int downloadroot inpath mt
wanted === got
where
create interface downloadroot inpath mt = do
key <- putFile interface inpath mt
s3path <- getFile interface key downloadroot
Result <$> pure key <*> hash s3path <*> hash inpath
-- Contract test for the interface
spec :: Maybe ConfigS3 -> Spec
spec mConfig =
describe "Contract test between S3 and the model" $ do
context "Model" $ do
it "satisfies the get-put property for content" $ hedgehog specGetPut
flip (maybe (pure ())) mConfig $ \config -> do
context "S3" $ do
it "satisfies the get-put property for content" $ hedgehog (specGetPutS3 config)