This commit is contained in:
Mats Rauhala 2019-10-15 20:22:31 +03:00
parent 4ab776d51e
commit 046ae33fb4
3 changed files with 64 additions and 0 deletions

View File

@ -1 +1,17 @@
module Data.Caesar where module Data.Caesar where
type Caesar = Char
next :: Char -> Char
next c =
case c of
'.' -> '.'
' ' -> ' '
'ö' -> 'ä'
'ä' -> 'å'
'å' -> 'z'
'a' -> 'ö'
x -> pred x
caesar :: Int -> Caesar -> Caesar
caesar n x = foldr ($) x (replicate n next)

View File

@ -1 +1,32 @@
{-# LANGUAGE RecordWildCards #-}
module Data.Language where module Data.Language where
import Data.Foldable (fold)
import Data.Map.Strict (Map)
import qualified Data.Map.Strict as M
import Data.Maybe (fromMaybe)
import Data.Monoid (Sum (..))
import Data.Text (Text)
import Data.NGram
data Model
= Model { parts :: Map Text (Sum Int)
, total :: Sum Int }
ngram_size :: Int
ngram_size = 3
buildModel :: Text -> Model
buildModel str =
let parts = ngram ngram_size str
total = fold parts
in Model{..}
goodness :: Model -> Text -> Double
goodness Model{..} str =
let comparison = M.keys $ ngram ngram_size str
ranksum = getSum $ foldMap (Sum . log . elm) comparison
in ranksum / fromIntegral (length comparison)
where
elm x = fromIntegral (getSum (fromMaybe 1 $ M.lookup x parts)) / fromIntegral (getSum total)

View File

@ -1 +1,18 @@
{-# LANGUAGE OverloadedStrings #-}
module Data.NGram where module Data.NGram where
import Data.List (unfoldr)
import Data.Map.Strict (Map)
import qualified Data.Map.Strict as M
import Data.Monoid (Sum (..))
import Data.Text (Text)
import qualified Data.Text as T
ngram :: Int -> Text -> Map Text (Sum Int)
ngram n = M.unionsWith (<>) . unfoldr go
where
go :: Text -> Maybe (Map Text (Sum Int), Text)
go str =
case T.splitAt n str of
("", _) -> Nothing
(xs, ys) -> Just (M.singleton (T.toLower xs) 1, ys)