Compare commits

...

4 Commits

Author SHA1 Message Date
7e448da839 Use comonads 2019-01-24 21:02:27 +02:00
8833880c22 Clean up 2019-01-02 20:13:20 +02:00
53b86d7efb Parameterize on range and algorithm 2019-01-02 20:08:30 +02:00
27a236619e Optional range 2019-01-02 20:04:46 +02:00

View File

@ -1,4 +1,5 @@
{-# LANGUAGE DeriveAnyClass #-} {-# LANGUAGE DeriveAnyClass #-}
{-# LANGUAGE DeriveFunctor #-}
{-# LANGUAGE DeriveGeneric #-} {-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RecordWildCards #-} {-# LANGUAGE RecordWildCards #-}
@ -8,25 +9,34 @@
module Main where module Main where
import Codec.Picture import Codec.Picture
import Codec.Picture.Extra (scaleBilinear) import Codec.Picture.Extra (scaleBilinear)
import Control.Exception (SomeException, try) import Control.Comonad (extend, extract)
import Control.Monad (forM_, void, when) import Control.Comonad.Store (Store, experiment, seek, store)
import Control.Exception (SomeException, try)
import Control.Monad (forM_, void, when)
import Data.Bits import Data.Bits
import qualified Data.BKTree as BK import qualified Data.BKTree as BK
import Data.List (foldl') import Data.List (foldl')
import Data.Word (Word64) import Data.Maybe (fromMaybe)
import Data.Word (Word64)
import Options.Generic import Options.Generic
import Pipes import Pipes
import Pipes.Files import Pipes.Files
import qualified Pipes.Prelude as P import qualified Pipes.Prelude as P
import Pipes.Safe (runSafeT) import Pipes.Safe (runSafeT)
import System.Directory (createDirectoryIfMissing, createFileLink) import System.Directory (createDirectoryIfMissing,
import System.FilePath (takeFileName, (</>)) createFileLink)
import System.FilePath (takeFileName, (</>))
data Alg = Average | DHash deriving (Read, Show, Generic)
instance ParseField Alg
data Cmd = Cmd { source :: FilePath data Cmd = Cmd { source :: FilePath
, target :: FilePath , target :: FilePath
, recursive :: Bool , range :: Maybe Int
, algorithm :: Maybe Alg
} deriving (Show, Generic, ParseRecord) } deriving (Show, Generic, ParseRecord)
data Fingerprint = data Fingerprint =
@ -40,29 +50,48 @@ instance BK.Metric Fingerprint where
let xored = a `xor` b let xored = a `xor` b
in foldr (\shiftA acc -> acc + if 1 `shift` shiftA .&. xored > 0 then 1 else 0) 0 [0..63] in foldr (\shiftA acc -> acc + if 1 `shift` shiftA .&. xored > 0 then 1 else 0) 0 [0..63]
fingerprint :: Alg -> DynamicImage -> Word64
fingerprint :: DynamicImage -> Word64 fingerprint alg = hash . grey . scale . convertRGB8
fingerprint = hash . grey . scale . convertRGB8
where where
scale :: Image PixelRGB8 -> Image PixelRGB8 scale :: Image PixelRGB8 -> Image PixelRGB8
scale = scaleBilinear 8 8 scale = scaleBilinear 8 8
grey :: Image PixelRGB8 -> Image Pixel8 grey :: Image PixelRGB8 -> Image Pixel8
grey = pixelMap (\(PixelRGB8 r g b) -> ceiling ((fromIntegral r * (0.3 :: Double)) + (fromIntegral g * 0.59) + (fromIntegral b * 0.11))) grey = pixelMap (\(PixelRGB8 r g b) -> ceiling ((fromIntegral r * (0.3 :: Double)) + (fromIntegral g * 0.59) + (fromIntegral b * 0.11)))
indexes = [(x,y) | x <- [0..7], y <- [0..7]]
hashWith :: Image Pixel8 -> (Store (Int, Int) Pixel8 -> Bool) -> Word64
hashWith img f =
let s = store (uncurry (pixelAt img)) (0,0)
img' = extend f s
at pos = extract $ seek pos img'
in foldr (\(shiftA, b) acc -> if b then 1 `shift` shiftA .|. acc else acc) 0 $ zip [0..] $ map at indexes
hash :: Image Pixel8 -> Word64 hash :: Image Pixel8 -> Word64
hash img = -- the average fingerprint method hash img = case alg of
let avg = fromIntegral (foldl' (\acc (x,y) -> acc + fromIntegral (pixelAt img x y)) (0 :: Int) [(x,y) | x <- [0..7], y <- [0..7]] `div` 64) Average ->
in foldr (\(shiftA, b) acc -> if b then 1 `shift` shiftA .|. acc else acc) 0 $ zip [0..] [pixelAt img x y > avg | x <- [0..7], y <- [0..7]] let avg = fromIntegral (foldl' (\acc (x,y) -> acc + fromIntegral (pixelAt img x y)) (0 :: Int) [(x,y) | (x,y) <- indexes] `div` 64)
in img `hashWith` (avgAlg avg)
DHash -> img `hashWith` dAlg
dAlg :: Store (Int, Int) Pixel8 -> Bool
dAlg img = case experiment (\(x,y) -> [(x,y), (succ x `mod` 8, y)]) img of
[l,r] -> l > r
_ -> False
avgAlg :: Pixel8 -> Store (Int, Int) Pixel8 -> Bool
avgAlg avg img = extract img > avg
main :: IO () main :: IO ()
main = do main = do
Cmd{..} <- getRecord "Image duplicate finder" Cmd{..} <- getRecord "Image duplicate finder"
-- XXX: This is a really long line, split it up index <- runSafeT (P.fold foldTree BK.empty id (find source (glob "*.jpg" <> regular)
index <- runSafeT (P.fold (\acc -> either (const acc) (\x -> x `seq` BK.insert x acc)) BK.empty id (find source (glob "*.jpg" <> regular) >-> P.mapM (\path -> liftIO (putStrLn path) >> fmap (path,) <$> liftIO (readImage path)) >-> P.map (fmap (\(path, img) -> Fingerprint path (fingerprint img))))) >-> P.mapM readImg
>-> P.map (fmap (toFingerprint (fromMaybe Average algorithm)))))
forM_ index $ \fp -> do forM_ index $ \fp -> do
let similar = BK.search 1 fp index let similar = BK.search (fromMaybe 1 range) fp index
when (length similar > 1) $ do when (length similar > 1) $ do
print similar print similar
let targetDir = target </> show (hash fp) let targetDir = target </> show (hash fp)
createDirectoryIfMissing True targetDir createDirectoryIfMissing True targetDir
forM_ similar $ \fp' -> forM_ similar $ \fp' ->
void $ try @SomeException (createFileLink (imagePath fp') (targetDir </> takeFileName (imagePath fp'))) void $ try @SomeException (createFileLink (imagePath fp') (targetDir </> takeFileName (imagePath fp')))
where
readImg path = liftIO (putStrLn path >> fmap (path,) <$> readImage path)
foldTree acc = either (const acc) (\x -> x `seq` BK.insert x acc)
toFingerprint alg (path,img) = Fingerprint path (fingerprint alg img)