Bring the addressbook back up to date
- bytestring-trie is not available - hls - modern shell.nix - base - Re-implement trie, with benchmarks - Realize my implementation of trie is slower than Data.Map, use that instead
This commit is contained in:
		@@ -12,7 +12,7 @@ import qualified Data.Conduit.Combinators as C
 | 
			
		||||
import qualified Data.Conduit.List as CL
 | 
			
		||||
import qualified Data.Conduit.Text as CT
 | 
			
		||||
 | 
			
		||||
import qualified Data.Trie as Trie
 | 
			
		||||
import qualified Data.Map.Strict as Map
 | 
			
		||||
 | 
			
		||||
import System.IO
 | 
			
		||||
       (stdout)
 | 
			
		||||
@@ -26,22 +26,26 @@ import System.FilePath
 | 
			
		||||
 | 
			
		||||
import Control.Exception
 | 
			
		||||
       (catch)
 | 
			
		||||
import Data.ByteString (ByteString)
 | 
			
		||||
import qualified Data.ByteString.Char8 as B
 | 
			
		||||
 | 
			
		||||
-- XXX: The current Data.Trie implementation is much slower than Data.Set
 | 
			
		||||
query :: Text -> IO ()
 | 
			
		||||
query prefix = do
 | 
			
		||||
  datDir <- fromMaybe "./" <$> lookupEnv "HOME"
 | 
			
		||||
  state <- catch @IOError (runResourceT $ runConduit $ readState datDir) (\_ -> pure Trie.empty)
 | 
			
		||||
  state <- catch @IOError (runResourceT $ runConduit $ readState datDir) (\_ -> pure Map.empty)
 | 
			
		||||
  runConduit $ outputResults state
 | 
			
		||||
  where
 | 
			
		||||
    readState :: FilePath -> ConduitM () Void (ResourceT IO) (Trie.Trie [Text])
 | 
			
		||||
    readState :: FilePath -> ConduitM () Void (ResourceT IO) (Map.Map ByteString [Text])
 | 
			
		||||
    readState dir =
 | 
			
		||||
      CB.sourceFile (dir </> ".addressbook.dat")
 | 
			
		||||
        .| CT.decode CT.utf8
 | 
			
		||||
        .| CT.lines
 | 
			
		||||
        .| C.foldMap (\s -> Trie.singleton (TE.encodeUtf8 $ T.toLower s) [s])
 | 
			
		||||
    outputResults :: Trie.Trie [Text] -> ConduitM () Void IO ()
 | 
			
		||||
        .| C.foldMap (\s -> Map.singleton (TE.encodeUtf8 $ T.toLower s) [s])
 | 
			
		||||
    prefixB = TE.encodeUtf8 prefix
 | 
			
		||||
    outputResults :: Map.Map ByteString [Text] -> ConduitM () Void IO ()
 | 
			
		||||
    outputResults state =
 | 
			
		||||
      CL.sourceList (Trie.elems $ Trie.submap (TE.encodeUtf8 prefix) state)
 | 
			
		||||
      CL.sourceList (Map.elems $ Map.filterWithKey (\k _ -> prefixB `B.isPrefixOf` k)  state)
 | 
			
		||||
        .| C.concat
 | 
			
		||||
        .| C.map (<> "\n")
 | 
			
		||||
        .| CT.encode CT.utf8
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										87
									
								
								src/Data/Trie.hs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										87
									
								
								src/Data/Trie.hs
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,87 @@
 | 
			
		||||
-- The bytestring-trie is marked as broken. Trie is a simple(ish) datastructure, implement one here
 | 
			
		||||
{-# LANGUAGE ScopedTypeVariables #-}
 | 
			
		||||
{-# LANGUAGE DeriveFunctor #-}
 | 
			
		||||
{-# LANGUAGE TypeApplications #-}
 | 
			
		||||
{-# LANGUAGE DeriveFoldable #-}
 | 
			
		||||
module Data.Trie where
 | 
			
		||||
import Data.ByteString (ByteString)
 | 
			
		||||
import Data.Word (Word8)
 | 
			
		||||
import Data.Foldable (foldl')
 | 
			
		||||
import Data.Semigroup (Last(..))
 | 
			
		||||
import qualified Data.ByteString as B
 | 
			
		||||
import Data.Coerce (coerce)
 | 
			
		||||
import Data.IntMap (IntMap)
 | 
			
		||||
import qualified Data.IntMap.Strict as M
 | 
			
		||||
 | 
			
		||||
data Pair a b = Pair !a !b
 | 
			
		||||
  deriving (Functor, Foldable)
 | 
			
		||||
 | 
			
		||||
data Trie a
 | 
			
		||||
  = Empty
 | 
			
		||||
  | Branch !(Maybe (Pair ByteString a)) !(IntMap (Trie a))
 | 
			
		||||
  deriving (Functor, Foldable)
 | 
			
		||||
 | 
			
		||||
instance Semigroup a => Semigroup (Trie a) where
 | 
			
		||||
  (<>) = union
 | 
			
		||||
 | 
			
		||||
instance Monoid a => Monoid (Trie a) where
 | 
			
		||||
  mempty = Empty
 | 
			
		||||
 | 
			
		||||
instance Show a => Show (Trie a) where
 | 
			
		||||
  show = show . toList
 | 
			
		||||
 | 
			
		||||
keys :: Trie a -> [ByteString]
 | 
			
		||||
keys Empty = []
 | 
			
		||||
keys (Branch Nothing m) = concatMap keys (M.elems m)
 | 
			
		||||
keys (Branch (Just (Pair b _)) m) = b : concatMap keys (M.elems m)
 | 
			
		||||
 | 
			
		||||
union :: Semigroup a => Trie a -> Trie a -> Trie a
 | 
			
		||||
union Empty r = r
 | 
			
		||||
union l Empty = l
 | 
			
		||||
union (Branch m_pa cl) (Branch ma cr) = Branch (merge m_pa ma) (M.unionWith union cl cr)
 | 
			
		||||
  where
 | 
			
		||||
    merge (Just (Pair x a)) (Just (Pair _ b)) = Just $ Pair x (a <> b)
 | 
			
		||||
    merge Nothing r = r
 | 
			
		||||
    merge l Nothing = l
 | 
			
		||||
 | 
			
		||||
unionR :: forall a. Trie a -> Trie a -> Trie a
 | 
			
		||||
unionR a b = coerce @(Trie (Last a)) $ union (coerce a) (coerce b)
 | 
			
		||||
 | 
			
		||||
singleton :: forall a. ByteString -> a -> Trie a
 | 
			
		||||
singleton bs a = go (B.uncons bs)
 | 
			
		||||
  where
 | 
			
		||||
    go :: Maybe (Word8, ByteString) -> Trie a
 | 
			
		||||
    go Nothing =
 | 
			
		||||
      let x = Pair bs a
 | 
			
		||||
      in x `seq` Branch (Just (Pair bs a)) M.empty
 | 
			
		||||
    go (Just (w,c)) =
 | 
			
		||||
      let y = M.singleton (fromIntegral w) (go (B.uncons c))
 | 
			
		||||
      in y `seq` Branch Nothing y
 | 
			
		||||
 | 
			
		||||
empty :: Trie a
 | 
			
		||||
empty = Empty
 | 
			
		||||
 | 
			
		||||
elems :: Trie a -> [a]
 | 
			
		||||
elems = foldr (:) []
 | 
			
		||||
 | 
			
		||||
submap :: forall a. ByteString -> Trie a -> Trie a
 | 
			
		||||
submap bs = go (B.uncons bs)
 | 
			
		||||
  where
 | 
			
		||||
    go :: Maybe (Word8, ByteString) -> Trie a -> Trie a
 | 
			
		||||
    go _ Empty = Empty
 | 
			
		||||
    go Nothing t = t
 | 
			
		||||
    go (Just (w,cs)) (Branch _ c) = maybe empty (go (B.uncons cs)) $ M.lookup (fromIntegral w) c
 | 
			
		||||
 | 
			
		||||
insert :: forall a. ByteString -> a -> Trie a -> Trie a
 | 
			
		||||
insert bs a = (`unionR` singleton bs a)
 | 
			
		||||
 | 
			
		||||
fromList :: [(ByteString, a)] -> Trie a
 | 
			
		||||
fromList = foldl' (\acc (k,v) -> insert k v acc) empty
 | 
			
		||||
 | 
			
		||||
toList :: forall a. Trie a -> [(ByteString, a)]
 | 
			
		||||
toList = go
 | 
			
		||||
  where
 | 
			
		||||
    go :: Trie a -> [(ByteString, a)]
 | 
			
		||||
    go Empty = []
 | 
			
		||||
    go (Branch Nothing m) = concatMap (\(_, child) -> go child) (M.toList m)
 | 
			
		||||
    go (Branch (Just (Pair a b)) m) = (a, b) : concatMap (\(_, child) -> go child) (M.toList m)
 | 
			
		||||
		Reference in New Issue
	
	Block a user