Compare commits
13 Commits
4b6b9b8f19
...
master
Author | SHA1 | Date | |
---|---|---|---|
7e448da839 | |||
8833880c22 | |||
53b86d7efb | |||
27a236619e | |||
4139562a49 | |||
22c19d0729 | |||
61a6efe94e | |||
08fdd68302 | |||
b8884d4bf4 | |||
c7e074b73f | |||
6649744654 | |||
b61ca54011 | |||
dbb5cbb209 |
@ -18,12 +18,13 @@ cabal-version: >=1.10
|
|||||||
executable imageduplicates
|
executable imageduplicates
|
||||||
main-is: Main.hs
|
main-is: Main.hs
|
||||||
ghc-options: -Wall -threaded -rtsopts -O2
|
ghc-options: -Wall -threaded -rtsopts -O2
|
||||||
-- other-modules:
|
other-modules: Data.BKTree
|
||||||
-- other-extensions:
|
-- other-extensions:
|
||||||
build-depends: base >=4.11 && <4.12
|
build-depends: base >=4.11 && <4.12
|
||||||
, JuicyPixels
|
, JuicyPixels
|
||||||
, JuicyPixels-extra
|
, JuicyPixels-extra
|
||||||
, bytestring
|
, bytestring
|
||||||
|
, either
|
||||||
, generic-lens
|
, generic-lens
|
||||||
, lens
|
, lens
|
||||||
, mtl
|
, mtl
|
||||||
@ -31,7 +32,10 @@ executable imageduplicates
|
|||||||
, pipes
|
, pipes
|
||||||
, pipes-files
|
, pipes-files
|
||||||
, pipes-safe
|
, pipes-safe
|
||||||
|
, recursion-schemes
|
||||||
, text
|
, text
|
||||||
, transformers
|
, transformers
|
||||||
|
, directory
|
||||||
|
, filepath
|
||||||
hs-source-dirs: src
|
hs-source-dirs: src
|
||||||
default-language: Haskell2010
|
default-language: Haskell2010
|
||||||
|
71
src/Data/BKTree.hs
Normal file
71
src/Data/BKTree.hs
Normal file
@ -0,0 +1,71 @@
|
|||||||
|
{-# LANGUAGE DeriveFoldable #-}
|
||||||
|
{-# LANGUAGE DeriveFunctor #-}
|
||||||
|
{-# LANGUAGE DeriveGeneric #-}
|
||||||
|
{-# LANGUAGE DeriveTraversable #-}
|
||||||
|
{-# LANGUAGE KindSignatures #-}
|
||||||
|
{-# LANGUAGE LambdaCase #-}
|
||||||
|
{-# LANGUAGE ScopedTypeVariables #-}
|
||||||
|
{-# LANGUAGE TemplateHaskell #-}
|
||||||
|
{-# LANGUAGE TypeFamilies #-}
|
||||||
|
module Data.BKTree where
|
||||||
|
|
||||||
|
import Data.Functor.Foldable
|
||||||
|
import Data.Functor.Foldable.TH
|
||||||
|
import GHC.Generics (Generic)
|
||||||
|
import Data.List (foldl')
|
||||||
|
import Data.Foldable (foldMap)
|
||||||
|
import Data.Monoid (Endo(..))
|
||||||
|
|
||||||
|
-- Point for testing purposes
|
||||||
|
data Point = Point Int Int deriving Show
|
||||||
|
|
||||||
|
instance Metric Point where
|
||||||
|
distance (Point p1 p2) (Point q1 q2) = abs (p1 - q1) + abs (p2 - q2)
|
||||||
|
|
||||||
|
class Metric a where
|
||||||
|
distance :: a -> a -> Int
|
||||||
|
|
||||||
|
data Tuple a = Tuple !Int a deriving (Show, Functor, Foldable, Traversable)
|
||||||
|
|
||||||
|
data BKTree a = Empty
|
||||||
|
| Node !a [Tuple (BKTree a)] deriving (Show, Generic, Functor, Traversable, Foldable)
|
||||||
|
|
||||||
|
makeBaseFunctor ''BKTree
|
||||||
|
|
||||||
|
empty :: BKTree a
|
||||||
|
empty = Empty
|
||||||
|
|
||||||
|
singleton :: Metric a => a -> BKTree a
|
||||||
|
singleton a = insert a empty
|
||||||
|
|
||||||
|
fromList :: Metric a => [a] -> BKTree a
|
||||||
|
fromList = foldl' (\acc x -> insert x acc) empty
|
||||||
|
|
||||||
|
toList :: BKTree a -> [a]
|
||||||
|
toList tree = appEndo (foldMap (\x -> Endo ([x] ++)) tree) []
|
||||||
|
|
||||||
|
insert :: Metric a => a -> BKTree a -> BKTree a
|
||||||
|
insert a = \case
|
||||||
|
Empty -> Node a []
|
||||||
|
Node b children ->
|
||||||
|
let newDistance = distance a b
|
||||||
|
in Node b (addChild newDistance children)
|
||||||
|
where
|
||||||
|
addChild d = \case
|
||||||
|
[] -> (Tuple d (insert a Empty)) : []
|
||||||
|
(Tuple d' child):children | d == d' -> (Tuple d' (insert a child)) : children
|
||||||
|
| otherwise -> (Tuple d' child) : addChild d children
|
||||||
|
|
||||||
|
|
||||||
|
search :: forall a. Metric a => Int -> a -> BKTree a -> [a]
|
||||||
|
search n a tree = cata alg tree
|
||||||
|
where
|
||||||
|
alg :: BKTreeF a [a] -> [a]
|
||||||
|
alg = \case
|
||||||
|
EmptyF -> []
|
||||||
|
NodeF x children ->
|
||||||
|
let thisDistance = distance a x
|
||||||
|
upper = thisDistance + n
|
||||||
|
lower = thisDistance - n
|
||||||
|
filteredChildren = concat [xs | Tuple d xs <- children, d <= upper, d >= lower]
|
||||||
|
in if thisDistance <= n then x : filteredChildren else filteredChildren
|
84
src/Main.hs
84
src/Main.hs
@ -1,45 +1,97 @@
|
|||||||
{-# LANGUAGE DeriveAnyClass #-}
|
{-# LANGUAGE DeriveAnyClass #-}
|
||||||
|
{-# LANGUAGE DeriveFunctor #-}
|
||||||
{-# LANGUAGE DeriveGeneric #-}
|
{-# LANGUAGE DeriveGeneric #-}
|
||||||
{-# LANGUAGE OverloadedStrings #-}
|
{-# LANGUAGE OverloadedStrings #-}
|
||||||
{-# LANGUAGE RecordWildCards #-}
|
{-# LANGUAGE RecordWildCards #-}
|
||||||
{-# LANGUAGE ScopedTypeVariables #-}
|
{-# LANGUAGE ScopedTypeVariables #-}
|
||||||
{-# LANGUAGE TupleSections #-}
|
{-# LANGUAGE TupleSections #-}
|
||||||
|
{-# LANGUAGE TypeApplications #-}
|
||||||
module Main where
|
module Main where
|
||||||
|
|
||||||
import Codec.Picture
|
import Codec.Picture
|
||||||
import Codec.Picture.Extra (scaleBilinear)
|
import Codec.Picture.Extra (scaleBilinear)
|
||||||
import Data.Bifunctor (second)
|
import Control.Comonad (extend, extract)
|
||||||
|
import Control.Comonad.Store (Store, experiment, seek, store)
|
||||||
|
import Control.Exception (SomeException, try)
|
||||||
|
import Control.Monad (forM_, void, when)
|
||||||
import Data.Bits
|
import Data.Bits
|
||||||
import Data.List (foldl')
|
import qualified Data.BKTree as BK
|
||||||
import Data.Word (Word64)
|
import Data.List (foldl')
|
||||||
|
import Data.Maybe (fromMaybe)
|
||||||
|
import Data.Word (Word64)
|
||||||
import Options.Generic
|
import Options.Generic
|
||||||
import Pipes
|
import Pipes
|
||||||
import Pipes.Files
|
import Pipes.Files
|
||||||
import qualified Pipes.Prelude as P
|
import qualified Pipes.Prelude as P
|
||||||
import Pipes.Safe (runSafeT)
|
import Pipes.Safe (runSafeT)
|
||||||
|
import System.Directory (createDirectoryIfMissing,
|
||||||
|
createFileLink)
|
||||||
|
import System.FilePath (takeFileName, (</>))
|
||||||
|
|
||||||
|
|
||||||
|
data Alg = Average | DHash deriving (Read, Show, Generic)
|
||||||
|
|
||||||
|
instance ParseField Alg
|
||||||
|
|
||||||
data Cmd = Cmd { source :: FilePath
|
data Cmd = Cmd { source :: FilePath
|
||||||
, target :: FilePath
|
, target :: FilePath
|
||||||
, recursive :: Bool
|
, range :: Maybe Int
|
||||||
|
, algorithm :: Maybe Alg
|
||||||
} deriving (Show, Generic, ParseRecord)
|
} deriving (Show, Generic, ParseRecord)
|
||||||
|
|
||||||
fingerprint :: DynamicImage -> Word64
|
data Fingerprint =
|
||||||
fingerprint = hash . grey . scale . convertRGB8
|
Fingerprint { imagePath :: FilePath
|
||||||
|
, hash :: !Word64
|
||||||
|
} deriving Show
|
||||||
|
|
||||||
|
instance BK.Metric Fingerprint where
|
||||||
|
-- hamming distance
|
||||||
|
distance (Fingerprint _ a) (Fingerprint _ b) =
|
||||||
|
let xored = a `xor` b
|
||||||
|
in foldr (\shiftA acc -> acc + if 1 `shift` shiftA .&. xored > 0 then 1 else 0) 0 [0..63]
|
||||||
|
|
||||||
|
fingerprint :: Alg -> DynamicImage -> Word64
|
||||||
|
fingerprint alg = hash . grey . scale . convertRGB8
|
||||||
where
|
where
|
||||||
scale :: Image PixelRGB8 -> Image PixelRGB8
|
scale :: Image PixelRGB8 -> Image PixelRGB8
|
||||||
scale = scaleBilinear 8 8
|
scale = scaleBilinear 8 8
|
||||||
grey :: Image PixelRGB8 -> Image Pixel8
|
grey :: Image PixelRGB8 -> Image Pixel8
|
||||||
grey = pixelMap (\(PixelRGB8 r g b) -> ceiling ((fromIntegral r * (0.3 :: Double)) + (fromIntegral g * 0.59) + (fromIntegral b * 0.11)))
|
grey = pixelMap (\(PixelRGB8 r g b) -> ceiling ((fromIntegral r * (0.3 :: Double)) + (fromIntegral g * 0.59) + (fromIntegral b * 0.11)))
|
||||||
|
indexes = [(x,y) | x <- [0..7], y <- [0..7]]
|
||||||
|
hashWith :: Image Pixel8 -> (Store (Int, Int) Pixel8 -> Bool) -> Word64
|
||||||
|
hashWith img f =
|
||||||
|
let s = store (uncurry (pixelAt img)) (0,0)
|
||||||
|
img' = extend f s
|
||||||
|
at pos = extract $ seek pos img'
|
||||||
|
in foldr (\(shiftA, b) acc -> if b then 1 `shift` shiftA .|. acc else acc) 0 $ zip [0..] $ map at indexes
|
||||||
hash :: Image Pixel8 -> Word64
|
hash :: Image Pixel8 -> Word64
|
||||||
hash img = -- the average fingerprint method
|
hash img = case alg of
|
||||||
let avg = fromIntegral (foldl' (\acc (x,y) -> acc + fromIntegral (pixelAt img x y)) (0 :: Int) [(x,y) | x <- [0..7], y <- [0..7]] `div` 64)
|
Average ->
|
||||||
in foldr (\(shiftA, b) acc -> if b then 1 `shift` shiftA .|. acc else acc) 0 $ zip [0..] [pixelAt img x y > avg | x <- [0..7], y <- [0..7]]
|
let avg = fromIntegral (foldl' (\acc (x,y) -> acc + fromIntegral (pixelAt img x y)) (0 :: Int) [(x,y) | (x,y) <- indexes] `div` 64)
|
||||||
|
in img `hashWith` (avgAlg avg)
|
||||||
|
DHash -> img `hashWith` dAlg
|
||||||
|
dAlg :: Store (Int, Int) Pixel8 -> Bool
|
||||||
|
dAlg img = case experiment (\(x,y) -> [(x,y), (succ x `mod` 8, y)]) img of
|
||||||
|
[l,r] -> l > r
|
||||||
|
_ -> False
|
||||||
|
avgAlg :: Pixel8 -> Store (Int, Int) Pixel8 -> Bool
|
||||||
|
avgAlg avg img = extract img > avg
|
||||||
|
|
||||||
main :: IO ()
|
main :: IO ()
|
||||||
main = do
|
main = do
|
||||||
Cmd{..} <- getRecord "Image duplicate finder"
|
Cmd{..} <- getRecord "Image duplicate finder"
|
||||||
runSafeT $
|
index <- runSafeT (P.fold foldTree BK.empty id (find source (glob "*.jpg" <> regular)
|
||||||
runEffect $
|
>-> P.mapM readImg
|
||||||
for (find source (glob "*.jpg" <> regular) >-> P.mapM (\path -> fmap (path,) <$> liftIO (readImage path)) >-> P.map (fmap (second fingerprint))) (liftIO . print)
|
>-> P.map (fmap (toFingerprint (fromMaybe Average algorithm)))))
|
||||||
putStrLn "Hello, Haskell!"
|
forM_ index $ \fp -> do
|
||||||
|
let similar = BK.search (fromMaybe 1 range) fp index
|
||||||
|
when (length similar > 1) $ do
|
||||||
|
print similar
|
||||||
|
let targetDir = target </> show (hash fp)
|
||||||
|
createDirectoryIfMissing True targetDir
|
||||||
|
forM_ similar $ \fp' ->
|
||||||
|
void $ try @SomeException (createFileLink (imagePath fp') (targetDir </> takeFileName (imagePath fp')))
|
||||||
|
where
|
||||||
|
readImg path = liftIO (putStrLn path >> fmap (path,) <$> readImage path)
|
||||||
|
foldTree acc = either (const acc) (\x -> x `seq` BK.insert x acc)
|
||||||
|
toFingerprint alg (path,img) = Fingerprint path (fingerprint alg img)
|
||||||
|
Reference in New Issue
Block a user