diff --git a/src/Control/Addressbook/Streaming.hs b/src/Control/Addressbook/Streaming.hs index 5c22554..1a62e53 100644 --- a/src/Control/Addressbook/Streaming.hs +++ b/src/Control/Addressbook/Streaming.hs @@ -43,23 +43,26 @@ chunks n = L.unfoldr $ \case run :: IO () run = do datDir <- fromMaybe "./" <$> lookupEnv "HOME" + let datFile = datDir ".addressbook.dat" + original <- Set.fromList . map LBS.toStrict . lbsLines <$> LBS.readFile datFile xs <- LBS.getContents >>= stream - let set = F.fold (parMap rseq F.fold (chunks 20 xs)) + let set = original `Set.union` F.fold (parMap rseq F.fold (chunks 20 xs)) runResourceT $ runConduit $ CL.sourceList (Set.elems set) .| C.map (<> "\n") - .| CB.sinkFileCautious (datDir ".addressbook.dat") + .| CB.sinkFileCautious datFile where separate = \case From x -> [x] To xs -> F.toList xs -- A set of (locally) unique addresses. Composes with parMap + lbsLines = LBS.split (fromIntegral $ ord '\n') stream :: LBS.ByteString -> IO [Set ByteString] stream = traverse (unsafeInterleaveIO . parse . LBC.unpack) . filter (not . LBS.null) - . LBS.split (fromIntegral $ ord '\n') + . lbsLines parse path = runResourceT $ runConduit $