solidabis-koodihaaste/src/Data/NGram.hs

29 lines
808 B
Haskell

{-# LANGUAGE OverloadedStrings #-}
{-|
Module : Data.NGram
Description : N-gram utilities
Copyright : (c) Mats Rauhala, 2019
License : BSD3
Maintainer : mats.rauhala@iki.fi
Stability : experimental
Portability : POSIX
Utilities for building n-gram models
-}
module Data.NGram where
import Data.List (unfoldr)
import Data.Map.Strict (Map)
import qualified Data.Map.Strict as M
import Data.Monoid (Sum (..))
import Data.Text (Text)
import qualified Data.Text as T
-- | Build a n-gram frequency map
ngram :: Int -> Text -> Map Text (Sum Int)
ngram n = M.unionsWith (<>) . unfoldr go . T.toLower
where
go :: Text -> Maybe (Map Text (Sum Int), Text)
go "" = Nothing
go xs = Just (M.singleton (T.take n xs) 1, T.tail xs)