Compare commits

...

12 Commits

Author SHA1 Message Date
7e448da839 Use comonads 2019-01-24 21:02:27 +02:00
8833880c22 Clean up 2019-01-02 20:13:20 +02:00
53b86d7efb Parameterize on range and algorithm 2019-01-02 20:08:30 +02:00
27a236619e Optional range 2019-01-02 20:04:46 +02:00
4139562a49 Remove the intermediary list 2019-01-02 19:05:44 +02:00
22c19d0729 Print current state 2019-01-02 18:12:30 +02:00
61a6efe94e fromList and toList methods 2019-01-02 18:10:31 +02:00
08fdd68302 Fix memory profile 2019-01-02 08:02:40 +02:00
b8884d4bf4 Strictness 2019-01-01 23:47:32 +02:00
c7e074b73f Base feature set done 2019-01-01 23:36:58 +02:00
6649744654 Debug printing 2019-01-01 23:14:34 +02:00
b61ca54011 Record to fingerprint 2019-01-01 21:06:48 +02:00
3 changed files with 102 additions and 31 deletions

View File

@ -35,5 +35,7 @@ executable imageduplicates
, recursion-schemes , recursion-schemes
, text , text
, transformers , transformers
, directory
, filepath
hs-source-dirs: src hs-source-dirs: src
default-language: Haskell2010 default-language: Haskell2010

View File

@ -1,17 +1,20 @@
{-# LANGUAGE DeriveFoldable #-} {-# LANGUAGE DeriveFoldable #-}
{-# LANGUAGE DeriveFunctor #-} {-# LANGUAGE DeriveFunctor #-}
{-# LANGUAGE DeriveGeneric #-} {-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE DeriveTraversable #-} {-# LANGUAGE DeriveTraversable #-}
{-# LANGUAGE KindSignatures #-} {-# LANGUAGE KindSignatures #-}
{-# LANGUAGE LambdaCase #-} {-# LANGUAGE LambdaCase #-}
{-# LANGUAGE TemplateHaskell #-} {-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TypeFamilies #-} {-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE TypeFamilies #-}
module Data.BKTree where module Data.BKTree where
import Data.Functor.Foldable.TH
import Data.Functor.Foldable import Data.Functor.Foldable
import Data.Functor.Foldable.TH
import GHC.Generics (Generic) import GHC.Generics (Generic)
import Data.List (foldl')
import Data.Foldable (foldMap)
import Data.Monoid (Endo(..))
-- Point for testing purposes -- Point for testing purposes
data Point = Point Int Int deriving Show data Point = Point Int Int deriving Show
@ -22,11 +25,25 @@ instance Metric Point where
class Metric a where class Metric a where
distance :: a -> a -> Int distance :: a -> a -> Int
data Tuple a = Tuple !Int a deriving (Show, Functor, Foldable, Traversable)
data BKTree a = Empty data BKTree a = Empty
| Node a [(Int, BKTree a)] deriving (Show, Generic) | Node !a [Tuple (BKTree a)] deriving (Show, Generic, Functor, Traversable, Foldable)
makeBaseFunctor ''BKTree makeBaseFunctor ''BKTree
empty :: BKTree a
empty = Empty
singleton :: Metric a => a -> BKTree a
singleton a = insert a empty
fromList :: Metric a => [a] -> BKTree a
fromList = foldl' (\acc x -> insert x acc) empty
toList :: BKTree a -> [a]
toList tree = appEndo (foldMap (\x -> Endo ([x] ++)) tree) []
insert :: Metric a => a -> BKTree a -> BKTree a insert :: Metric a => a -> BKTree a -> BKTree a
insert a = \case insert a = \case
Empty -> Node a [] Empty -> Node a []
@ -35,9 +52,9 @@ insert a = \case
in Node b (addChild newDistance children) in Node b (addChild newDistance children)
where where
addChild d = \case addChild d = \case
[] -> (d, insert a Empty) : [] [] -> (Tuple d (insert a Empty)) : []
(d',child):children | d == d' -> (d', insert a child) : children (Tuple d' child):children | d == d' -> (Tuple d' (insert a child)) : children
| otherwise -> (d',child) : addChild d children | otherwise -> (Tuple d' child) : addChild d children
search :: forall a. Metric a => Int -> a -> BKTree a -> [a] search :: forall a. Metric a => Int -> a -> BKTree a -> [a]
@ -50,5 +67,5 @@ search n a tree = cata alg tree
let thisDistance = distance a x let thisDistance = distance a x
upper = thisDistance + n upper = thisDistance + n
lower = thisDistance - n lower = thisDistance - n
filteredChildren = concat [xs | (d, xs) <- children, d <= upper, d >= lower] filteredChildren = concat [xs | Tuple d xs <- children, d <= upper, d >= lower]
in if thisDistance <= n then x : filteredChildren else filteredChildren in if thisDistance <= n then x : filteredChildren else filteredChildren

View File

@ -1,45 +1,97 @@
{-# LANGUAGE DeriveAnyClass #-} {-# LANGUAGE DeriveAnyClass #-}
{-# LANGUAGE DeriveFunctor #-}
{-# LANGUAGE DeriveGeneric #-} {-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RecordWildCards #-} {-# LANGUAGE RecordWildCards #-}
{-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TupleSections #-} {-# LANGUAGE TupleSections #-}
{-# LANGUAGE TypeApplications #-}
module Main where module Main where
import Codec.Picture import Codec.Picture
import Codec.Picture.Extra (scaleBilinear) import Codec.Picture.Extra (scaleBilinear)
import Data.Bifunctor (second) import Control.Comonad (extend, extract)
import Control.Comonad.Store (Store, experiment, seek, store)
import Control.Exception (SomeException, try)
import Control.Monad (forM_, void, when)
import Data.Bits import Data.Bits
import Data.List (foldl') import qualified Data.BKTree as BK
import Data.Word (Word64) import Data.List (foldl')
import Data.Maybe (fromMaybe)
import Data.Word (Word64)
import Options.Generic import Options.Generic
import Pipes import Pipes
import Pipes.Files import Pipes.Files
import qualified Pipes.Prelude as P import qualified Pipes.Prelude as P
import Pipes.Safe (runSafeT) import Pipes.Safe (runSafeT)
import System.Directory (createDirectoryIfMissing,
createFileLink)
import System.FilePath (takeFileName, (</>))
data Alg = Average | DHash deriving (Read, Show, Generic)
instance ParseField Alg
data Cmd = Cmd { source :: FilePath data Cmd = Cmd { source :: FilePath
, target :: FilePath , target :: FilePath
, recursive :: Bool , range :: Maybe Int
, algorithm :: Maybe Alg
} deriving (Show, Generic, ParseRecord) } deriving (Show, Generic, ParseRecord)
fingerprint :: DynamicImage -> Word64 data Fingerprint =
fingerprint = hash . grey . scale . convertRGB8 Fingerprint { imagePath :: FilePath
, hash :: !Word64
} deriving Show
instance BK.Metric Fingerprint where
-- hamming distance
distance (Fingerprint _ a) (Fingerprint _ b) =
let xored = a `xor` b
in foldr (\shiftA acc -> acc + if 1 `shift` shiftA .&. xored > 0 then 1 else 0) 0 [0..63]
fingerprint :: Alg -> DynamicImage -> Word64
fingerprint alg = hash . grey . scale . convertRGB8
where where
scale :: Image PixelRGB8 -> Image PixelRGB8 scale :: Image PixelRGB8 -> Image PixelRGB8
scale = scaleBilinear 8 8 scale = scaleBilinear 8 8
grey :: Image PixelRGB8 -> Image Pixel8 grey :: Image PixelRGB8 -> Image Pixel8
grey = pixelMap (\(PixelRGB8 r g b) -> ceiling ((fromIntegral r * (0.3 :: Double)) + (fromIntegral g * 0.59) + (fromIntegral b * 0.11))) grey = pixelMap (\(PixelRGB8 r g b) -> ceiling ((fromIntegral r * (0.3 :: Double)) + (fromIntegral g * 0.59) + (fromIntegral b * 0.11)))
indexes = [(x,y) | x <- [0..7], y <- [0..7]]
hashWith :: Image Pixel8 -> (Store (Int, Int) Pixel8 -> Bool) -> Word64
hashWith img f =
let s = store (uncurry (pixelAt img)) (0,0)
img' = extend f s
at pos = extract $ seek pos img'
in foldr (\(shiftA, b) acc -> if b then 1 `shift` shiftA .|. acc else acc) 0 $ zip [0..] $ map at indexes
hash :: Image Pixel8 -> Word64 hash :: Image Pixel8 -> Word64
hash img = -- the average fingerprint method hash img = case alg of
let avg = fromIntegral (foldl' (\acc (x,y) -> acc + fromIntegral (pixelAt img x y)) (0 :: Int) [(x,y) | x <- [0..7], y <- [0..7]] `div` 64) Average ->
in foldr (\(shiftA, b) acc -> if b then 1 `shift` shiftA .|. acc else acc) 0 $ zip [0..] [pixelAt img x y > avg | x <- [0..7], y <- [0..7]] let avg = fromIntegral (foldl' (\acc (x,y) -> acc + fromIntegral (pixelAt img x y)) (0 :: Int) [(x,y) | (x,y) <- indexes] `div` 64)
in img `hashWith` (avgAlg avg)
DHash -> img `hashWith` dAlg
dAlg :: Store (Int, Int) Pixel8 -> Bool
dAlg img = case experiment (\(x,y) -> [(x,y), (succ x `mod` 8, y)]) img of
[l,r] -> l > r
_ -> False
avgAlg :: Pixel8 -> Store (Int, Int) Pixel8 -> Bool
avgAlg avg img = extract img > avg
main :: IO () main :: IO ()
main = do main = do
Cmd{..} <- getRecord "Image duplicate finder" Cmd{..} <- getRecord "Image duplicate finder"
runSafeT $ index <- runSafeT (P.fold foldTree BK.empty id (find source (glob "*.jpg" <> regular)
runEffect $ >-> P.mapM readImg
for (find source (glob "*.jpg" <> regular) >-> P.mapM (\path -> fmap (path,) <$> liftIO (readImage path)) >-> P.map (fmap (second fingerprint))) (liftIO . print) >-> P.map (fmap (toFingerprint (fromMaybe Average algorithm)))))
putStrLn "Hello, Haskell!" forM_ index $ \fp -> do
let similar = BK.search (fromMaybe 1 range) fp index
when (length similar > 1) $ do
print similar
let targetDir = target </> show (hash fp)
createDirectoryIfMissing True targetDir
forM_ similar $ \fp' ->
void $ try @SomeException (createFileLink (imagePath fp') (targetDir </> takeFileName (imagePath fp')))
where
readImg path = liftIO (putStrLn path >> fmap (path,) <$> readImage path)
foldTree acc = either (const acc) (\x -> x `seq` BK.insert x acc)
toFingerprint alg (path,img) = Fingerprint path (fingerprint alg img)