Compare commits

..

1 Commits

Author SHA1 Message Date
b33b45a4ea wip 2019-01-01 20:49:26 +02:00
3 changed files with 35 additions and 93 deletions

View File

@ -35,7 +35,5 @@ executable imageduplicates
, recursion-schemes
, text
, transformers
, directory
, filepath
hs-source-dirs: src
default-language: Haskell2010

View File

@ -1,20 +1,17 @@
{-# LANGUAGE DeriveFoldable #-}
{-# LANGUAGE DeriveFunctor #-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE DeriveTraversable #-}
{-# LANGUAGE KindSignatures #-}
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE DeriveFoldable #-}
{-# LANGUAGE DeriveFunctor #-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE DeriveTraversable #-}
{-# LANGUAGE KindSignatures #-}
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE ScopedTypeVariables #-}
module Data.BKTree where
import Data.Functor.Foldable
import Data.Functor.Foldable.TH
import Data.Functor.Foldable
import GHC.Generics (Generic)
import Data.List (foldl')
import Data.Foldable (foldMap)
import Data.Monoid (Endo(..))
-- Point for testing purposes
data Point = Point Int Int deriving Show
@ -25,25 +22,11 @@ instance Metric Point where
class Metric a where
distance :: a -> a -> Int
data Tuple a = Tuple !Int a deriving (Show, Functor, Foldable, Traversable)
data BKTree a = Empty
| Node !a [Tuple (BKTree a)] deriving (Show, Generic, Functor, Traversable, Foldable)
| Node a [(Int, BKTree a)] deriving (Show, Generic)
makeBaseFunctor ''BKTree
empty :: BKTree a
empty = Empty
singleton :: Metric a => a -> BKTree a
singleton a = insert a empty
fromList :: Metric a => [a] -> BKTree a
fromList = foldl' (\acc x -> insert x acc) empty
toList :: BKTree a -> [a]
toList tree = appEndo (foldMap (\x -> Endo ([x] ++)) tree) []
insert :: Metric a => a -> BKTree a -> BKTree a
insert a = \case
Empty -> Node a []
@ -52,9 +35,9 @@ insert a = \case
in Node b (addChild newDistance children)
where
addChild d = \case
[] -> (Tuple d (insert a Empty)) : []
(Tuple d' child):children | d == d' -> (Tuple d' (insert a child)) : children
| otherwise -> (Tuple d' child) : addChild d children
[] -> (d, insert a Empty) : []
(d',child):children | d == d' -> (d', insert a child) : children
| otherwise -> (d',child) : addChild d children
search :: forall a. Metric a => Int -> a -> BKTree a -> [a]
@ -67,5 +50,5 @@ search n a tree = cata alg tree
let thisDistance = distance a x
upper = thisDistance + n
lower = thisDistance - n
filteredChildren = concat [xs | Tuple d xs <- children, d <= upper, d >= lower]
filteredChildren = concat [xs | (d, xs) <- children, d <= upper, d >= lower]
in if thisDistance <= n then x : filteredChildren else filteredChildren

View File

@ -1,97 +1,58 @@
{-# LANGUAGE DeriveAnyClass #-}
{-# LANGUAGE DeriveFunctor #-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RecordWildCards #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TupleSections #-}
{-# LANGUAGE TypeApplications #-}
module Main where
import Codec.Picture
import Codec.Picture.Extra (scaleBilinear)
import Control.Comonad (extend, extract)
import Control.Comonad.Store (Store, experiment, seek, store)
import Control.Exception (SomeException, try)
import Control.Monad (forM_, void, when)
import Codec.Picture.Extra (scaleBilinear)
import Data.Bifunctor (second)
import Data.Bits
import qualified Data.BKTree as BK
import Data.List (foldl')
import Data.Maybe (fromMaybe)
import Data.Word (Word64)
import Data.List (foldl')
import Data.Word (Word64)
import Options.Generic
import Pipes
import Pipes.Files
import qualified Pipes.Prelude as P
import Pipes.Safe (runSafeT)
import System.Directory (createDirectoryIfMissing,
createFileLink)
import System.FilePath (takeFileName, (</>))
import qualified Pipes.Prelude as P
import Pipes.Safe (runSafeT)
import qualified Data.BKTree as BK
data Alg = Average | DHash deriving (Read, Show, Generic)
instance ParseField Alg
data Cmd = Cmd { source :: FilePath
, target :: FilePath
, range :: Maybe Int
, algorithm :: Maybe Alg
, recursive :: Bool
} deriving (Show, Generic, ParseRecord)
data Fingerprint =
Fingerprint { imagePath :: FilePath
, hash :: !Word64
, hash :: !Word64
} deriving Show
instance BK.Metric Fingerprint where
-- hamming distance
distance (Fingerprint _ a) (Fingerprint _ b) =
let xored = a `xor` b
in foldr (\shiftA acc -> acc + if 1 `shift` shiftA .&. xored > 0 then 1 else 0) 0 [0..63]
in _
fingerprint :: Alg -> DynamicImage -> Word64
fingerprint alg = hash . grey . scale . convertRGB8
fingerprint :: DynamicImage -> Word64
fingerprint = hash . grey . scale . convertRGB8
where
scale :: Image PixelRGB8 -> Image PixelRGB8
scale = scaleBilinear 8 8
grey :: Image PixelRGB8 -> Image Pixel8
grey = pixelMap (\(PixelRGB8 r g b) -> ceiling ((fromIntegral r * (0.3 :: Double)) + (fromIntegral g * 0.59) + (fromIntegral b * 0.11)))
indexes = [(x,y) | x <- [0..7], y <- [0..7]]
hashWith :: Image Pixel8 -> (Store (Int, Int) Pixel8 -> Bool) -> Word64
hashWith img f =
let s = store (uncurry (pixelAt img)) (0,0)
img' = extend f s
at pos = extract $ seek pos img'
in foldr (\(shiftA, b) acc -> if b then 1 `shift` shiftA .|. acc else acc) 0 $ zip [0..] $ map at indexes
hash :: Image Pixel8 -> Word64
hash img = case alg of
Average ->
let avg = fromIntegral (foldl' (\acc (x,y) -> acc + fromIntegral (pixelAt img x y)) (0 :: Int) [(x,y) | (x,y) <- indexes] `div` 64)
in img `hashWith` (avgAlg avg)
DHash -> img `hashWith` dAlg
dAlg :: Store (Int, Int) Pixel8 -> Bool
dAlg img = case experiment (\(x,y) -> [(x,y), (succ x `mod` 8, y)]) img of
[l,r] -> l > r
_ -> False
avgAlg :: Pixel8 -> Store (Int, Int) Pixel8 -> Bool
avgAlg avg img = extract img > avg
hash img = -- the average fingerprint method
let avg = fromIntegral (foldl' (\acc (x,y) -> acc + fromIntegral (pixelAt img x y)) (0 :: Int) [(x,y) | x <- [0..7], y <- [0..7]] `div` 64)
in foldr (\(shiftA, b) acc -> if b then 1 `shift` shiftA .|. acc else acc) 0 $ zip [0..] [pixelAt img x y > avg | x <- [0..7], y <- [0..7]]
main :: IO ()
main = do
Cmd{..} <- getRecord "Image duplicate finder"
index <- runSafeT (P.fold foldTree BK.empty id (find source (glob "*.jpg" <> regular)
>-> P.mapM readImg
>-> P.map (fmap (toFingerprint (fromMaybe Average algorithm)))))
forM_ index $ \fp -> do
let similar = BK.search (fromMaybe 1 range) fp index
when (length similar > 1) $ do
print similar
let targetDir = target </> show (hash fp)
createDirectoryIfMissing True targetDir
forM_ similar $ \fp' ->
void $ try @SomeException (createFileLink (imagePath fp') (targetDir </> takeFileName (imagePath fp')))
where
readImg path = liftIO (putStrLn path >> fmap (path,) <$> readImage path)
foldTree acc = either (const acc) (\x -> x `seq` BK.insert x acc)
toFingerprint alg (path,img) = Fingerprint path (fingerprint alg img)
runSafeT $
runEffect $
for (find source (glob "*.jpg" <> regular) >-> P.mapM (\path -> fmap (path,) <$> liftIO (readImage path)) >-> P.map (fmap (second fingerprint))) (liftIO . print)
putStrLn "Hello, Haskell!"