70 lines
2.9 KiB
Haskell
70 lines
2.9 KiB
Haskell
{-# LANGUAGE DeriveAnyClass #-}
|
|
{-# LANGUAGE DeriveGeneric #-}
|
|
{-# LANGUAGE OverloadedStrings #-}
|
|
{-# LANGUAGE RecordWildCards #-}
|
|
{-# LANGUAGE ScopedTypeVariables #-}
|
|
{-# LANGUAGE TupleSections #-}
|
|
{-# LANGUAGE TypeApplications #-}
|
|
module Main where
|
|
|
|
import Codec.Picture
|
|
import Codec.Picture.Extra (scaleBilinear)
|
|
import Control.Exception (SomeException, try)
|
|
import Control.Monad (forM_, void, when)
|
|
import Data.Bits
|
|
import qualified Data.BKTree as BK
|
|
import Data.Either (rights)
|
|
import Data.List (foldl')
|
|
import Data.Word (Word64)
|
|
import Options.Generic
|
|
import Pipes
|
|
import Pipes.Files
|
|
import qualified Pipes.Prelude as P
|
|
import Pipes.Safe (runSafeT)
|
|
import System.Directory (createDirectoryIfMissing, createFileLink)
|
|
import System.FilePath (takeFileName, (</>))
|
|
|
|
|
|
data Cmd = Cmd { source :: FilePath
|
|
, target :: FilePath
|
|
, recursive :: Bool
|
|
} deriving (Show, Generic, ParseRecord)
|
|
|
|
data Fingerprint =
|
|
Fingerprint { imagePath :: FilePath
|
|
, hash :: !Word64
|
|
} deriving Show
|
|
|
|
instance BK.Metric Fingerprint where
|
|
-- hamming distance
|
|
distance (Fingerprint _ a) (Fingerprint _ b) =
|
|
let xored = a `xor` b
|
|
in foldr (\shiftA acc -> acc + if 1 `shift` shiftA .&. xored > 0 then 1 else 0) 0 [0..63]
|
|
|
|
|
|
fingerprint :: DynamicImage -> Word64
|
|
fingerprint = hash . grey . scale . convertRGB8
|
|
where
|
|
scale :: Image PixelRGB8 -> Image PixelRGB8
|
|
scale = scaleBilinear 8 8
|
|
grey :: Image PixelRGB8 -> Image Pixel8
|
|
grey = pixelMap (\(PixelRGB8 r g b) -> ceiling ((fromIntegral r * (0.3 :: Double)) + (fromIntegral g * 0.59) + (fromIntegral b * 0.11)))
|
|
hash :: Image Pixel8 -> Word64
|
|
hash img = -- the average fingerprint method
|
|
let avg = fromIntegral (foldl' (\acc (x,y) -> acc + fromIntegral (pixelAt img x y)) (0 :: Int) [(x,y) | x <- [0..7], y <- [0..7]] `div` 64)
|
|
in foldr (\(shiftA, b) acc -> if b then 1 `shift` shiftA .|. acc else acc) 0 $ zip [0..] [pixelAt img x y > avg | x <- [0..7], y <- [0..7]]
|
|
|
|
main :: IO ()
|
|
main = do
|
|
Cmd{..} <- getRecord "Image duplicate finder"
|
|
fingerprints <- rights <$> runSafeT (P.toListM (find source (glob "*.jpg" <> regular) >-> P.mapM (\path -> fmap (path,) <$> liftIO (readImage path)) >-> P.map (fmap (\(path, img) -> Fingerprint path (fingerprint img)))))
|
|
let index = foldl' (flip BK.insert) BK.empty fingerprints
|
|
forM_ fingerprints $ \fp -> do
|
|
let similar = BK.search 1 fp index
|
|
when (length similar > 1) $ do
|
|
print similar
|
|
let targetDir = target </> show (hash fp)
|
|
createDirectoryIfMissing True targetDir
|
|
forM_ similar $ \fp' -> do
|
|
void $ try @SomeException (createFileLink (imagePath fp') (targetDir </> takeFileName (imagePath fp')))
|