Fix the n-gram implementation

This commit is contained in:
Mats Rauhala 2019-10-16 16:48:43 +03:00
parent 263f4e281a
commit 5f4b325e73
2 changed files with 22 additions and 24 deletions

View File

@ -1,9 +1,9 @@
{-# LANGUAGE DataKinds #-} {-# LANGUAGE DataKinds #-}
{-# LANGUAGE DeriveGeneric #-} {-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RecordWildCards #-} {-# LANGUAGE RecordWildCards #-}
{-# LANGUAGE TypeFamilies #-} {-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE TypeOperators #-} {-# LANGUAGE TypeOperators #-}
{-| {-|
Module : API Module : API
Description : The application API and handlers Description : The application API and handlers
@ -19,25 +19,25 @@ Even if this is only one module, I'm using this same style in all of my projects
-} -}
module API where module API where
import Control.Monad.Reader (MonadReader, asks) import Control.Monad.Reader (MonadReader, asks)
import Control.Monad.Trans (MonadIO) import Control.Monad.Trans (MonadIO)
import Data.Either (partitionEithers) import Control.Parallel.Strategies
import Data.Foldable (traverse_) import Data.Either (partitionEithers)
import Data.List (find, sortOn) import Data.Foldable (traverse_)
import Data.Text (Text) import Data.List (find, sortOn)
import qualified Data.Text as T import Data.Text (Text)
import Lucid.Base (HtmlT, ToHtml (..)) import qualified Data.Text as T
import Lucid.Base (HtmlT, ToHtml (..))
import Lucid.Html5 import Lucid.Html5
import Servant.API import Servant.API
import Servant.API.Generic import Servant.API.Generic
import Servant.HTML.Lucid import Servant.HTML.Lucid
import Servant.Server.Generic import Servant.Server.Generic
import Control.Parallel.Strategies
import Data.Caesar import Data.Caesar
import Data.Language import Data.Language
import Solidabis.API (HasClientEnv, HasToken, bullshits, import Solidabis.API (HasClientEnv, HasToken, bullshits,
getBullshits, message) getBullshits, message)
-- | Data for the frontpage -- | Data for the frontpage
-- --
@ -80,7 +80,7 @@ handler =
languageModel <- asks getModel languageModel <- asks getModel
sentences <- map message . bullshits <$> getBullshits sentences <- map message . bullshits <$> getBullshits
let (bullshit, noBullshit) = partitionEithers best let (bullshit, noBullshit) = partitionEithers best
best = map (findBest languageModel) sentences `using` parBuffer 10 rdeepseq best = map (findBest languageModel) sentences `using` parListChunk 10 rdeepseq
return $ Index noBullshit bullshit return $ Index noBullshit bullshit
} }
where where
@ -88,5 +88,5 @@ handler =
findBest model sentence = findBest model sentence =
let caesared = map (\n -> T.map (caesar n) . T.toLower $ sentence) [0..29] let caesared = map (\n -> T.map (caesar n) . T.toLower $ sentence) [0..29]
ranked = sortOn fst [(goodness model x, x) | x <- caesared] ranked = sortOn fst [(goodness model x, x) | x <- caesared]
found = find (\(rank, _) -> rank > -7.9) (reverse ranked) found = find (\(rank, _) -> rank > -8.1) (reverse ranked)
in maybe (Left sentence) (Right . snd) found in maybe (Left sentence) (Right . snd) found

View File

@ -21,10 +21,8 @@ import qualified Data.Text as T
-- | Build a n-gram frequency map -- | Build a n-gram frequency map
ngram :: Int -> Text -> Map Text (Sum Int) ngram :: Int -> Text -> Map Text (Sum Int)
ngram n = M.unionsWith (<>) . unfoldr go ngram n = M.unionsWith (<>) . unfoldr go . T.toLower
where where
go :: Text -> Maybe (Map Text (Sum Int), Text) go :: Text -> Maybe (Map Text (Sum Int), Text)
go str = go "" = Nothing
case T.splitAt n str of go xs = Just (M.singleton (T.take n xs) 1, T.tail xs)
("", _) -> Nothing
(xs, ys) -> Just (M.singleton (T.toLower xs) 1, ys)