solidabis-koodihaaste/src/Data/NGram.hs

19 lines
583 B
Haskell

{-# LANGUAGE OverloadedStrings #-}
module Data.NGram where
import Data.List (unfoldr)
import Data.Map.Strict (Map)
import qualified Data.Map.Strict as M
import Data.Monoid (Sum (..))
import Data.Text (Text)
import qualified Data.Text as T
ngram :: Int -> Text -> Map Text (Sum Int)
ngram n = M.unionsWith (<>) . unfoldr go
where
go :: Text -> Maybe (Map Text (Sum Int), Text)
go str =
case T.splitAt n str of
("", _) -> Nothing
(xs, ys) -> Just (M.singleton (T.toLower xs) 1, ys)