Fix the n-gram implementation
This commit is contained in:
parent
263f4e281a
commit
5f4b325e73
@ -21,6 +21,7 @@ module API where
|
|||||||
|
|
||||||
import Control.Monad.Reader (MonadReader, asks)
|
import Control.Monad.Reader (MonadReader, asks)
|
||||||
import Control.Monad.Trans (MonadIO)
|
import Control.Monad.Trans (MonadIO)
|
||||||
|
import Control.Parallel.Strategies
|
||||||
import Data.Either (partitionEithers)
|
import Data.Either (partitionEithers)
|
||||||
import Data.Foldable (traverse_)
|
import Data.Foldable (traverse_)
|
||||||
import Data.List (find, sortOn)
|
import Data.List (find, sortOn)
|
||||||
@ -32,7 +33,6 @@ import Servant.API
|
|||||||
import Servant.API.Generic
|
import Servant.API.Generic
|
||||||
import Servant.HTML.Lucid
|
import Servant.HTML.Lucid
|
||||||
import Servant.Server.Generic
|
import Servant.Server.Generic
|
||||||
import Control.Parallel.Strategies
|
|
||||||
|
|
||||||
import Data.Caesar
|
import Data.Caesar
|
||||||
import Data.Language
|
import Data.Language
|
||||||
@ -80,7 +80,7 @@ handler =
|
|||||||
languageModel <- asks getModel
|
languageModel <- asks getModel
|
||||||
sentences <- map message . bullshits <$> getBullshits
|
sentences <- map message . bullshits <$> getBullshits
|
||||||
let (bullshit, noBullshit) = partitionEithers best
|
let (bullshit, noBullshit) = partitionEithers best
|
||||||
best = map (findBest languageModel) sentences `using` parBuffer 10 rdeepseq
|
best = map (findBest languageModel) sentences `using` parListChunk 10 rdeepseq
|
||||||
return $ Index noBullshit bullshit
|
return $ Index noBullshit bullshit
|
||||||
}
|
}
|
||||||
where
|
where
|
||||||
@ -88,5 +88,5 @@ handler =
|
|||||||
findBest model sentence =
|
findBest model sentence =
|
||||||
let caesared = map (\n -> T.map (caesar n) . T.toLower $ sentence) [0..29]
|
let caesared = map (\n -> T.map (caesar n) . T.toLower $ sentence) [0..29]
|
||||||
ranked = sortOn fst [(goodness model x, x) | x <- caesared]
|
ranked = sortOn fst [(goodness model x, x) | x <- caesared]
|
||||||
found = find (\(rank, _) -> rank > -7.9) (reverse ranked)
|
found = find (\(rank, _) -> rank > -8.1) (reverse ranked)
|
||||||
in maybe (Left sentence) (Right . snd) found
|
in maybe (Left sentence) (Right . snd) found
|
||||||
|
@ -21,10 +21,8 @@ import qualified Data.Text as T
|
|||||||
|
|
||||||
-- | Build a n-gram frequency map
|
-- | Build a n-gram frequency map
|
||||||
ngram :: Int -> Text -> Map Text (Sum Int)
|
ngram :: Int -> Text -> Map Text (Sum Int)
|
||||||
ngram n = M.unionsWith (<>) . unfoldr go
|
ngram n = M.unionsWith (<>) . unfoldr go . T.toLower
|
||||||
where
|
where
|
||||||
go :: Text -> Maybe (Map Text (Sum Int), Text)
|
go :: Text -> Maybe (Map Text (Sum Int), Text)
|
||||||
go str =
|
go "" = Nothing
|
||||||
case T.splitAt n str of
|
go xs = Just (M.singleton (T.take n xs) 1, T.tail xs)
|
||||||
("", _) -> Nothing
|
|
||||||
(xs, ys) -> Just (M.singleton (T.toLower xs) 1, ys)
|
|
||||||
|
Loading…
Reference in New Issue
Block a user