Fix the n-gram implementation

This commit is contained in:
Mats Rauhala 2019-10-16 16:48:43 +03:00
parent 263f4e281a
commit 5f4b325e73
2 changed files with 22 additions and 24 deletions

View File

@ -21,6 +21,7 @@ module API where
import Control.Monad.Reader (MonadReader, asks)
import Control.Monad.Trans (MonadIO)
import Control.Parallel.Strategies
import Data.Either (partitionEithers)
import Data.Foldable (traverse_)
import Data.List (find, sortOn)
@ -32,7 +33,6 @@ import Servant.API
import Servant.API.Generic
import Servant.HTML.Lucid
import Servant.Server.Generic
import Control.Parallel.Strategies
import Data.Caesar
import Data.Language
@ -80,7 +80,7 @@ handler =
languageModel <- asks getModel
sentences <- map message . bullshits <$> getBullshits
let (bullshit, noBullshit) = partitionEithers best
best = map (findBest languageModel) sentences `using` parBuffer 10 rdeepseq
best = map (findBest languageModel) sentences `using` parListChunk 10 rdeepseq
return $ Index noBullshit bullshit
}
where
@ -88,5 +88,5 @@ handler =
findBest model sentence =
let caesared = map (\n -> T.map (caesar n) . T.toLower $ sentence) [0..29]
ranked = sortOn fst [(goodness model x, x) | x <- caesared]
found = find (\(rank, _) -> rank > -7.9) (reverse ranked)
found = find (\(rank, _) -> rank > -8.1) (reverse ranked)
in maybe (Left sentence) (Right . snd) found

View File

@ -21,10 +21,8 @@ import qualified Data.Text as T
-- | Build a n-gram frequency map
ngram :: Int -> Text -> Map Text (Sum Int)
ngram n = M.unionsWith (<>) . unfoldr go
ngram n = M.unionsWith (<>) . unfoldr go . T.toLower
where
go :: Text -> Maybe (Map Text (Sum Int), Text)
go str =
case T.splitAt n str of
("", _) -> Nothing
(xs, ys) -> Just (M.singleton (T.toLower xs) 1, ys)
go "" = Nothing
go xs = Just (M.singleton (T.take n xs) 1, T.tail xs)