Fix the n-gram implementation
This commit is contained in:
parent
263f4e281a
commit
5f4b325e73
@ -21,6 +21,7 @@ module API where
|
||||
|
||||
import Control.Monad.Reader (MonadReader, asks)
|
||||
import Control.Monad.Trans (MonadIO)
|
||||
import Control.Parallel.Strategies
|
||||
import Data.Either (partitionEithers)
|
||||
import Data.Foldable (traverse_)
|
||||
import Data.List (find, sortOn)
|
||||
@ -32,7 +33,6 @@ import Servant.API
|
||||
import Servant.API.Generic
|
||||
import Servant.HTML.Lucid
|
||||
import Servant.Server.Generic
|
||||
import Control.Parallel.Strategies
|
||||
|
||||
import Data.Caesar
|
||||
import Data.Language
|
||||
@ -80,7 +80,7 @@ handler =
|
||||
languageModel <- asks getModel
|
||||
sentences <- map message . bullshits <$> getBullshits
|
||||
let (bullshit, noBullshit) = partitionEithers best
|
||||
best = map (findBest languageModel) sentences `using` parBuffer 10 rdeepseq
|
||||
best = map (findBest languageModel) sentences `using` parListChunk 10 rdeepseq
|
||||
return $ Index noBullshit bullshit
|
||||
}
|
||||
where
|
||||
@ -88,5 +88,5 @@ handler =
|
||||
findBest model sentence =
|
||||
let caesared = map (\n -> T.map (caesar n) . T.toLower $ sentence) [0..29]
|
||||
ranked = sortOn fst [(goodness model x, x) | x <- caesared]
|
||||
found = find (\(rank, _) -> rank > -7.9) (reverse ranked)
|
||||
found = find (\(rank, _) -> rank > -8.1) (reverse ranked)
|
||||
in maybe (Left sentence) (Right . snd) found
|
||||
|
@ -21,10 +21,8 @@ import qualified Data.Text as T
|
||||
|
||||
-- | Build a n-gram frequency map
|
||||
ngram :: Int -> Text -> Map Text (Sum Int)
|
||||
ngram n = M.unionsWith (<>) . unfoldr go
|
||||
ngram n = M.unionsWith (<>) . unfoldr go . T.toLower
|
||||
where
|
||||
go :: Text -> Maybe (Map Text (Sum Int), Text)
|
||||
go str =
|
||||
case T.splitAt n str of
|
||||
("", _) -> Nothing
|
||||
(xs, ys) -> Just (M.singleton (T.toLower xs) 1, ys)
|
||||
go "" = Nothing
|
||||
go xs = Just (M.singleton (T.take n xs) 1, T.tail xs)
|
||||
|
Loading…
Reference in New Issue
Block a user