Fix the n-gram implementation
This commit is contained in:
parent
263f4e281a
commit
5f4b325e73
38
src/API.hs
38
src/API.hs
@ -1,9 +1,9 @@
|
||||
{-# LANGUAGE DataKinds #-}
|
||||
{-# LANGUAGE DeriveGeneric #-}
|
||||
{-# LANGUAGE OverloadedStrings #-}
|
||||
{-# LANGUAGE RecordWildCards #-}
|
||||
{-# LANGUAGE TypeFamilies #-}
|
||||
{-# LANGUAGE TypeOperators #-}
|
||||
{-# LANGUAGE DataKinds #-}
|
||||
{-# LANGUAGE DeriveGeneric #-}
|
||||
{-# LANGUAGE OverloadedStrings #-}
|
||||
{-# LANGUAGE RecordWildCards #-}
|
||||
{-# LANGUAGE TypeFamilies #-}
|
||||
{-# LANGUAGE TypeOperators #-}
|
||||
{-|
|
||||
Module : API
|
||||
Description : The application API and handlers
|
||||
@ -19,25 +19,25 @@ Even if this is only one module, I'm using this same style in all of my projects
|
||||
-}
|
||||
module API where
|
||||
|
||||
import Control.Monad.Reader (MonadReader, asks)
|
||||
import Control.Monad.Trans (MonadIO)
|
||||
import Data.Either (partitionEithers)
|
||||
import Data.Foldable (traverse_)
|
||||
import Data.List (find, sortOn)
|
||||
import Data.Text (Text)
|
||||
import qualified Data.Text as T
|
||||
import Lucid.Base (HtmlT, ToHtml (..))
|
||||
import Control.Monad.Reader (MonadReader, asks)
|
||||
import Control.Monad.Trans (MonadIO)
|
||||
import Control.Parallel.Strategies
|
||||
import Data.Either (partitionEithers)
|
||||
import Data.Foldable (traverse_)
|
||||
import Data.List (find, sortOn)
|
||||
import Data.Text (Text)
|
||||
import qualified Data.Text as T
|
||||
import Lucid.Base (HtmlT, ToHtml (..))
|
||||
import Lucid.Html5
|
||||
import Servant.API
|
||||
import Servant.API.Generic
|
||||
import Servant.HTML.Lucid
|
||||
import Servant.Server.Generic
|
||||
import Control.Parallel.Strategies
|
||||
|
||||
import Data.Caesar
|
||||
import Data.Language
|
||||
import Solidabis.API (HasClientEnv, HasToken, bullshits,
|
||||
getBullshits, message)
|
||||
import Solidabis.API (HasClientEnv, HasToken, bullshits,
|
||||
getBullshits, message)
|
||||
|
||||
-- | Data for the frontpage
|
||||
--
|
||||
@ -80,7 +80,7 @@ handler =
|
||||
languageModel <- asks getModel
|
||||
sentences <- map message . bullshits <$> getBullshits
|
||||
let (bullshit, noBullshit) = partitionEithers best
|
||||
best = map (findBest languageModel) sentences `using` parBuffer 10 rdeepseq
|
||||
best = map (findBest languageModel) sentences `using` parListChunk 10 rdeepseq
|
||||
return $ Index noBullshit bullshit
|
||||
}
|
||||
where
|
||||
@ -88,5 +88,5 @@ handler =
|
||||
findBest model sentence =
|
||||
let caesared = map (\n -> T.map (caesar n) . T.toLower $ sentence) [0..29]
|
||||
ranked = sortOn fst [(goodness model x, x) | x <- caesared]
|
||||
found = find (\(rank, _) -> rank > -7.9) (reverse ranked)
|
||||
found = find (\(rank, _) -> rank > -8.1) (reverse ranked)
|
||||
in maybe (Left sentence) (Right . snd) found
|
||||
|
@ -21,10 +21,8 @@ import qualified Data.Text as T
|
||||
|
||||
-- | Build a n-gram frequency map
|
||||
ngram :: Int -> Text -> Map Text (Sum Int)
|
||||
ngram n = M.unionsWith (<>) . unfoldr go
|
||||
ngram n = M.unionsWith (<>) . unfoldr go . T.toLower
|
||||
where
|
||||
go :: Text -> Maybe (Map Text (Sum Int), Text)
|
||||
go str =
|
||||
case T.splitAt n str of
|
||||
("", _) -> Nothing
|
||||
(xs, ys) -> Just (M.singleton (T.toLower xs) 1, ys)
|
||||
go "" = Nothing
|
||||
go xs = Just (M.singleton (T.take n xs) 1, T.tail xs)
|
||||
|
Loading…
Reference in New Issue
Block a user