From 5f4b325e736c5fda109756e3a4054168e305913b Mon Sep 17 00:00:00 2001 From: Mats Rauhala Date: Wed, 16 Oct 2019 16:48:43 +0300 Subject: [PATCH] Fix the n-gram implementation --- src/API.hs | 38 +++++++++++++++++++------------------- src/Data/NGram.hs | 8 +++----- 2 files changed, 22 insertions(+), 24 deletions(-) diff --git a/src/API.hs b/src/API.hs index 830e83b..c37b599 100644 --- a/src/API.hs +++ b/src/API.hs @@ -1,9 +1,9 @@ -{-# LANGUAGE DataKinds #-} -{-# LANGUAGE DeriveGeneric #-} -{-# LANGUAGE OverloadedStrings #-} -{-# LANGUAGE RecordWildCards #-} -{-# LANGUAGE TypeFamilies #-} -{-# LANGUAGE TypeOperators #-} +{-# LANGUAGE DataKinds #-} +{-# LANGUAGE DeriveGeneric #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE RecordWildCards #-} +{-# LANGUAGE TypeFamilies #-} +{-# LANGUAGE TypeOperators #-} {-| Module : API Description : The application API and handlers @@ -19,25 +19,25 @@ Even if this is only one module, I'm using this same style in all of my projects -} module API where -import Control.Monad.Reader (MonadReader, asks) -import Control.Monad.Trans (MonadIO) -import Data.Either (partitionEithers) -import Data.Foldable (traverse_) -import Data.List (find, sortOn) -import Data.Text (Text) -import qualified Data.Text as T -import Lucid.Base (HtmlT, ToHtml (..)) +import Control.Monad.Reader (MonadReader, asks) +import Control.Monad.Trans (MonadIO) +import Control.Parallel.Strategies +import Data.Either (partitionEithers) +import Data.Foldable (traverse_) +import Data.List (find, sortOn) +import Data.Text (Text) +import qualified Data.Text as T +import Lucid.Base (HtmlT, ToHtml (..)) import Lucid.Html5 import Servant.API import Servant.API.Generic import Servant.HTML.Lucid import Servant.Server.Generic -import Control.Parallel.Strategies import Data.Caesar import Data.Language -import Solidabis.API (HasClientEnv, HasToken, bullshits, - getBullshits, message) +import Solidabis.API (HasClientEnv, HasToken, bullshits, + getBullshits, message) -- | Data for the frontpage -- @@ -80,7 +80,7 @@ handler = languageModel <- asks getModel sentences <- map message . bullshits <$> getBullshits let (bullshit, noBullshit) = partitionEithers best - best = map (findBest languageModel) sentences `using` parBuffer 10 rdeepseq + best = map (findBest languageModel) sentences `using` parListChunk 10 rdeepseq return $ Index noBullshit bullshit } where @@ -88,5 +88,5 @@ handler = findBest model sentence = let caesared = map (\n -> T.map (caesar n) . T.toLower $ sentence) [0..29] ranked = sortOn fst [(goodness model x, x) | x <- caesared] - found = find (\(rank, _) -> rank > -7.9) (reverse ranked) + found = find (\(rank, _) -> rank > -8.1) (reverse ranked) in maybe (Left sentence) (Right . snd) found diff --git a/src/Data/NGram.hs b/src/Data/NGram.hs index 9d05b0c..fa5462a 100644 --- a/src/Data/NGram.hs +++ b/src/Data/NGram.hs @@ -21,10 +21,8 @@ import qualified Data.Text as T -- | Build a n-gram frequency map ngram :: Int -> Text -> Map Text (Sum Int) -ngram n = M.unionsWith (<>) . unfoldr go +ngram n = M.unionsWith (<>) . unfoldr go . T.toLower where go :: Text -> Maybe (Map Text (Sum Int), Text) - go str = - case T.splitAt n str of - ("", _) -> Nothing - (xs, ys) -> Just (M.singleton (T.toLower xs) 1, ys) + go "" = Nothing + go xs = Just (M.singleton (T.take n xs) 1, T.tail xs)