Fix the n-gram implementation

This commit is contained in:
Mats Rauhala 2019-10-16 16:48:43 +03:00
parent 263f4e281a
commit 5f4b325e73
2 changed files with 22 additions and 24 deletions

View File

@ -1,9 +1,9 @@
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RecordWildCards #-}
{-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE TypeOperators #-}
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RecordWildCards #-}
{-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE TypeOperators #-}
{-|
Module : API
Description : The application API and handlers
@ -19,25 +19,25 @@ Even if this is only one module, I'm using this same style in all of my projects
-}
module API where
import Control.Monad.Reader (MonadReader, asks)
import Control.Monad.Trans (MonadIO)
import Data.Either (partitionEithers)
import Data.Foldable (traverse_)
import Data.List (find, sortOn)
import Data.Text (Text)
import qualified Data.Text as T
import Lucid.Base (HtmlT, ToHtml (..))
import Control.Monad.Reader (MonadReader, asks)
import Control.Monad.Trans (MonadIO)
import Control.Parallel.Strategies
import Data.Either (partitionEithers)
import Data.Foldable (traverse_)
import Data.List (find, sortOn)
import Data.Text (Text)
import qualified Data.Text as T
import Lucid.Base (HtmlT, ToHtml (..))
import Lucid.Html5
import Servant.API
import Servant.API.Generic
import Servant.HTML.Lucid
import Servant.Server.Generic
import Control.Parallel.Strategies
import Data.Caesar
import Data.Language
import Solidabis.API (HasClientEnv, HasToken, bullshits,
getBullshits, message)
import Solidabis.API (HasClientEnv, HasToken, bullshits,
getBullshits, message)
-- | Data for the frontpage
--
@ -80,7 +80,7 @@ handler =
languageModel <- asks getModel
sentences <- map message . bullshits <$> getBullshits
let (bullshit, noBullshit) = partitionEithers best
best = map (findBest languageModel) sentences `using` parBuffer 10 rdeepseq
best = map (findBest languageModel) sentences `using` parListChunk 10 rdeepseq
return $ Index noBullshit bullshit
}
where
@ -88,5 +88,5 @@ handler =
findBest model sentence =
let caesared = map (\n -> T.map (caesar n) . T.toLower $ sentence) [0..29]
ranked = sortOn fst [(goodness model x, x) | x <- caesared]
found = find (\(rank, _) -> rank > -7.9) (reverse ranked)
found = find (\(rank, _) -> rank > -8.1) (reverse ranked)
in maybe (Left sentence) (Right . snd) found

View File

@ -21,10 +21,8 @@ import qualified Data.Text as T
-- | Build a n-gram frequency map
ngram :: Int -> Text -> Map Text (Sum Int)
ngram n = M.unionsWith (<>) . unfoldr go
ngram n = M.unionsWith (<>) . unfoldr go . T.toLower
where
go :: Text -> Maybe (Map Text (Sum Int), Text)
go str =
case T.splitAt n str of
("", _) -> Nothing
(xs, ys) -> Just (M.singleton (T.toLower xs) 1, ys)
go "" = Nothing
go xs = Just (M.singleton (T.take n xs) 1, T.tail xs)