Helpers
This commit is contained in:
parent
4ab776d51e
commit
046ae33fb4
@ -1 +1,17 @@
|
|||||||
module Data.Caesar where
|
module Data.Caesar where
|
||||||
|
|
||||||
|
type Caesar = Char
|
||||||
|
|
||||||
|
next :: Char -> Char
|
||||||
|
next c =
|
||||||
|
case c of
|
||||||
|
'.' -> '.'
|
||||||
|
' ' -> ' '
|
||||||
|
'ö' -> 'ä'
|
||||||
|
'ä' -> 'å'
|
||||||
|
'å' -> 'z'
|
||||||
|
'a' -> 'ö'
|
||||||
|
x -> pred x
|
||||||
|
|
||||||
|
caesar :: Int -> Caesar -> Caesar
|
||||||
|
caesar n x = foldr ($) x (replicate n next)
|
||||||
|
@ -1 +1,32 @@
|
|||||||
|
{-# LANGUAGE RecordWildCards #-}
|
||||||
module Data.Language where
|
module Data.Language where
|
||||||
|
|
||||||
|
import Data.Foldable (fold)
|
||||||
|
import Data.Map.Strict (Map)
|
||||||
|
import qualified Data.Map.Strict as M
|
||||||
|
import Data.Maybe (fromMaybe)
|
||||||
|
import Data.Monoid (Sum (..))
|
||||||
|
import Data.Text (Text)
|
||||||
|
|
||||||
|
import Data.NGram
|
||||||
|
|
||||||
|
data Model
|
||||||
|
= Model { parts :: Map Text (Sum Int)
|
||||||
|
, total :: Sum Int }
|
||||||
|
|
||||||
|
ngram_size :: Int
|
||||||
|
ngram_size = 3
|
||||||
|
|
||||||
|
buildModel :: Text -> Model
|
||||||
|
buildModel str =
|
||||||
|
let parts = ngram ngram_size str
|
||||||
|
total = fold parts
|
||||||
|
in Model{..}
|
||||||
|
|
||||||
|
goodness :: Model -> Text -> Double
|
||||||
|
goodness Model{..} str =
|
||||||
|
let comparison = M.keys $ ngram ngram_size str
|
||||||
|
ranksum = getSum $ foldMap (Sum . log . elm) comparison
|
||||||
|
in ranksum / fromIntegral (length comparison)
|
||||||
|
where
|
||||||
|
elm x = fromIntegral (getSum (fromMaybe 1 $ M.lookup x parts)) / fromIntegral (getSum total)
|
||||||
|
@ -1 +1,18 @@
|
|||||||
|
{-# LANGUAGE OverloadedStrings #-}
|
||||||
module Data.NGram where
|
module Data.NGram where
|
||||||
|
|
||||||
|
import Data.List (unfoldr)
|
||||||
|
import Data.Map.Strict (Map)
|
||||||
|
import qualified Data.Map.Strict as M
|
||||||
|
import Data.Monoid (Sum (..))
|
||||||
|
import Data.Text (Text)
|
||||||
|
import qualified Data.Text as T
|
||||||
|
|
||||||
|
ngram :: Int -> Text -> Map Text (Sum Int)
|
||||||
|
ngram n = M.unionsWith (<>) . unfoldr go
|
||||||
|
where
|
||||||
|
go :: Text -> Maybe (Map Text (Sum Int), Text)
|
||||||
|
go str =
|
||||||
|
case T.splitAt n str of
|
||||||
|
("", _) -> Nothing
|
||||||
|
(xs, ys) -> Just (M.singleton (T.toLower xs) 1, ys)
|
||||||
|
Loading…
Reference in New Issue
Block a user