Split to multiproject

This commit is contained in:
2022-05-16 21:42:11 +03:00
parent 0266d4b06b
commit 03b4cfb3bf
35 changed files with 70 additions and 8 deletions

5
reddit_pub/CHANGELOG.md Normal file
View File

@ -0,0 +1,5 @@
# Revision history for reddit-pub
## 0.1.0.0 -- YYYY-mm-dd
* First version. Released on an unsuspecting world.

9
reddit_pub/app/Main.hs Normal file
View File

@ -0,0 +1,9 @@
module Main where
import qualified MyLib (defaultMain)
import System.Environment (getArgs)
main :: IO ()
main = do
[path] <- getArgs
MyLib.defaultMain path

18
reddit_pub/config.dhall Normal file
View File

@ -0,0 +1,18 @@
let config = ./dhall/package.dhall
in { amqp = config.AMQP::{
, vhost = "reddit"
, username = env:AMQP_USER as Text ? "reddit_pub"
, password = config.Password.Type.Password (env:AMQP_PASS as Text ? "tester")
, host = env:AMQP_HOST as Text ? "127.0.0.1"
-- , host = "10.233.5.2"
}
, fetchers =
[ config.Fetcher::{ subreddit = "haskell" }
, config.Fetcher::{ subreddit = "scala" }
, config.Fetcher::{ subreddit = "all" }
, config.Fetcher::{ subreddit = "pics", entries = 150 }
]
, sqlite = "reddit.sqlite"
}
: config.Type

22
reddit_pub/default.nix Normal file
View File

@ -0,0 +1,22 @@
{ mkDerivation, aeson, amqp, base, bytestring, containers, dhall
, hedgehog, hspec, hspec-hedgehog, lens, lens-aeson, lib, mtl
, pipes, reddit-lib, sqlite-simple, text, wreq
}:
mkDerivation {
pname = "reddit-pub";
version = "0.1.0.0";
src = ./.;
isLibrary = true;
isExecutable = true;
enableSeparateDataOutput = true;
libraryHaskellDepends = [
aeson amqp base bytestring containers dhall lens lens-aeson mtl
pipes reddit-lib sqlite-simple text wreq
];
executableHaskellDepends = [ base ];
testHaskellDepends = [
base bytestring containers hedgehog hspec hspec-hedgehog mtl
];
license = "unknown";
hydraPlatforms = lib.platforms.none;
}

9
reddit_pub/dhall.nix Normal file
View File

@ -0,0 +1,9 @@
{ buildDhallDirectoryPackage }:
buildDhallDirectoryPackage {
name = "";
src = ./dhall;
file = "package.dhall";
source = false;
document = false;
dependencies = [];
}

View File

@ -0,0 +1,5 @@
{ vhost : Text
, username : Text
, password : ../Password/Type.dhall
, host : Text
}

View File

@ -0,0 +1,4 @@
{ vhost = "/"
, username = "guest"
, host = "localhost"
}

View File

@ -0,0 +1 @@
{ Type = ./Type.dhall, default = ./default.dhall }

View File

@ -0,0 +1,4 @@
{ subreddit : Text
, entries : Natural
, qualifier : Optional ../Qualifier/Type.dhall
}

View File

@ -0,0 +1,3 @@
{ entries = 50
, qualifier = None ../Qualifier/Type.dhall
}

View File

@ -0,0 +1 @@
{ Type = ./Type.dhall, default = ./default.dhall }

View File

@ -0,0 +1 @@
< Password : Text | File : Text >

View File

@ -0,0 +1 @@
{ Type = ./Type.dhall }

View File

@ -0,0 +1 @@
< Top | Controversial >

View File

@ -0,0 +1,4 @@
{ amqp : ./AMQP/Type.dhall
, fetchers : List ./Fetcher/Type.dhall
, sqlite : Text
}

View File

@ -0,0 +1 @@
{ amqp = ./AMQP/default.dhall }

View File

@ -0,0 +1,6 @@
{ Type = ./Type.dhall
, default = ./default.dhall
, AMQP = ./AMQP/package.dhall
, Fetcher = ./Fetcher/package.dhall
, Password = ./Password/package.dhall
}

View File

@ -0,0 +1,95 @@
cabal-version: 2.4
name: reddit-pub
version: 0.1.0.0
-- A short (one-line) description of the package.
-- synopsis:
-- A longer description of the package.
-- description:
-- A URL where users can report bugs.
-- bug-reports:
-- The license under which the package is released.
-- license:
author: Mats Rauhala
maintainer: mats.rauhala@iki.fi
-- A copyright notice.
-- copyright:
-- category:
extra-source-files: CHANGELOG.md
data-files: dhall/*.dhall
, dhall/AMQP/*.dhall
, dhall/Fetcher/*.dhall
, dhall/Qualifier/*.dhall
library
ghc-options: -Wall
exposed-modules: MyLib
Data.Config
Data.Deriving.Aeson
Network.Reddit
Data.SubReddit
Membership
-- Modules included in this library but not exported.
-- other-modules:
-- LANGUAGE extensions used by modules in this package.
-- other-extensions:
build-depends: base ^>=4.15.1.0
, amqp
, aeson
, lens
, lens-aeson
, mtl
, text
, bytestring
, dhall
, wreq
, pipes
, containers
, sqlite-simple
, reddit-lib
hs-source-dirs: src
default-language: Haskell2010
executable reddit-pub
main-is: Main.hs
-- Modules included in this executable, other than Main.
-- other-modules:
-- LANGUAGE extensions used by modules in this package.
-- other-extensions:
build-depends:
base ^>=4.15.1.0,
reddit-pub
hs-source-dirs: app
default-language: Haskell2010
test-suite reddit-tests
main-is: Spec.hs
type: exitcode-stdio-1.0
other-modules:
-- Modules included in this executable, other than Main.
-- other-modules:
-- LANGUAGE extensions used by modules in this package.
-- other-extensions:
build-depends:
base ^>=4.15.1.0,
mtl,
containers,
bytestring,
reddit-pub,
hedgehog,
hspec,
hspec-hedgehog
hs-source-dirs: test
default-language: Haskell2010

41
reddit_pub/shell.nix Normal file
View File

@ -0,0 +1,41 @@
{ nixpkgs ? import <nixpkgs> {} }:
with nixpkgs;
let
easy-dhall-nix-src = with builtins;
fetchgit { inherit (fromJSON (readFile ./easy-dhall-nix.json)) url rev sha256 fetchSubmodules; };
easy-hls-src = fetchFromGitHub {
owner = "ssbothwell";
repo = "easy-hls-nix";
inherit (builtins.fromJSON (builtins.readFile ./easy-hls-nix.json)) rev sha256;
};
easy-hls = callPackage easy-hls-src { ghcVersions = [ hp.ghc.version ]; };
easy-dhall-nix = import easy-dhall-nix-src {};
hp = haskellPackages.extend (self: super: {
reddit_pub = self.callPackage ./. {};
});
in
hp.shellFor {
packages = h: [h.reddit_pub];
withHoogle = true;
buildInputs = [
easy-dhall-nix.dhall-lsp-simple
entr
cabal-install
haskellPackages.hlint
stylish-haskell
ghcid
easy-hls
sqlite-interactive
rrdtool
jq
haskellPackages.graphmod
];
}

View File

@ -0,0 +1,80 @@
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE DerivingVia #-}
{-# LANGUAGE TypeOperators #-}
module Data.Config where
import Control.Lens
import Data.Text (Text)
import Dhall (FromDhall, Generic, ToDhall, auto, inputFile)
import Dhall.Deriving
import Numeric.Natural (Natural)
import Data.SubReddit (SubReddit)
data Password
= Password Text
| File FilePath
deriving stock (Generic, Show)
deriving (FromDhall, ToDhall)
via (Codec AsIs Password)
data AMQP = AMQP
{ amqpVhost :: Text
, amqpUsername :: Text
, amqpPassword :: Password
, amqpHost :: Text
}
deriving stock (Generic, Show)
deriving (FromDhall, ToDhall)
via (Codec (Field (CamelCase <<< DropPrefix "amqp"))) AMQP
host :: Lens' AMQP Text
host = lens amqpHost (\ am txt -> am{amqpHost=txt})
vhost :: Lens' AMQP Text
vhost = lens amqpVhost (\ am txt -> am{amqpVhost=txt})
username :: Lens' AMQP Text
username = lens amqpUsername (\ am txt -> am{amqpUsername=txt})
password :: Lens' AMQP Password
password = lens amqpPassword (\ am txt -> am{amqpPassword=txt})
data Fetcher = Fetcher
{ fetcherSubreddit :: SubReddit
, fetcherEntries :: Natural
, fetcherQualifier :: Maybe Qualifier
}
deriving stock (Show, Generic)
deriving (FromDhall, ToDhall) via Codec (Field (CamelCase <<< DropPrefix "fetcher")) Fetcher
subreddit :: Lens' Fetcher SubReddit
subreddit = lens fetcherSubreddit (\ fe sr -> fe{fetcherSubreddit=sr})
entries :: Lens' Fetcher Natural
entries = lens fetcherEntries (\ fe nat -> fe{fetcherEntries=nat})
data Qualifier = Top | Controversial
deriving stock (Show, Generic)
deriving (FromDhall, ToDhall) via Codec (Constructor TitleCase) Qualifier
data Config = Config
{ configAmqp :: AMQP
, configFetchers :: [Fetcher]
, configSqlite :: FilePath
}
deriving stock (Generic, Show)
deriving (FromDhall, ToDhall)
via (Codec (Field (CamelCase <<< DropPrefix "config"))) Config
amqp :: Lens' Config AMQP
amqp = lens configAmqp (\ con am -> con{configAmqp=am})
fetchers :: Lens' Config [Fetcher]
fetchers = lens configFetchers (\ con fes -> con{configFetchers=fes})
sqlite :: Lens' Config FilePath
sqlite = lens configSqlite (\ con s -> con{configSqlite=s})
readConfig :: FilePath -> IO Config
readConfig = inputFile auto

View File

@ -0,0 +1,41 @@
{-# LANGUAGE AllowAmbiguousTypes #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TypeApplications #-}
{-# LANGUAGE TypeOperators #-}
{-# LANGUAGE UndecidableInstances #-}
module Data.Deriving.Aeson
( AesonCodec(..)
, type (<<<)
, Field
, CamelCase
, DropPrefix
)
where
import Control.Lens
import Data.Aeson
import qualified Data.Text.Strict.Lens as T
import Dhall.Deriving
import GHC.Generics (Generic, Rep)
newtype AesonCodec codec a = AesonCodec a
class ModifyAesonOptions a where
modifyAesonOptions :: Options -> Options
instance (ModifyAesonOptions f, ModifyAesonOptions g) => ModifyAesonOptions (f <<< g) where
modifyAesonOptions = modifyAesonOptions @f . modifyAesonOptions @g
instance TextFunction f => ModifyAesonOptions (Field f) where
modifyAesonOptions opts = opts{fieldLabelModifier = over T.packed (textFunction @f) }
instance (Generic a, ModifyAesonOptions codec, GToJSON' Value Zero (Rep a))
=> ToJSON (AesonCodec codec a) where
toJSON (AesonCodec a) =
genericToJSON (modifyAesonOptions @codec defaultOptions) a
instance (ModifyAesonOptions codec, Generic a, GFromJSON Zero (Rep a))
=> FromJSON (AesonCodec codec a) where
parseJSON va =
AesonCodec <$> genericParseJSON (modifyAesonOptions @codec defaultOptions) va

View File

@ -0,0 +1,9 @@
{-# LANGUAGE DerivingVia #-}
module Data.SubReddit where
import Dhall (FromDhall, ToDhall)
newtype SubReddit = SubReddit { getSubReddit :: String }
deriving Show
deriving (FromDhall, ToDhall) via String

View File

@ -0,0 +1,20 @@
{-# LANGUAGE QuasiQuotes #-}
{-# LANGUAGE TypeApplications #-}
module Membership where
import Database.SQLite.Simple (Connection, Only (..))
import Network.Reddit (RedditId)
import qualified Database.SQLite.Simple as SQL
import Database.SQLite.Simple.QQ (sql)
import qualified Data.Foldable as F
import Data.Monoid (Any(..))
import Data.Text (Text)
recordSeen :: Connection -> RedditId -> IO ()
recordSeen conn rid = SQL.execute conn [sql|insert into membership (reddit_id) values (?) on conflict do nothing|] (Only rid)
isSeen :: Connection -> RedditId -> IO Bool
isSeen conn rid =
unwrap <$> SQL.query conn [sql|select reddit_id from membership where reddit_id = ?|] (Only rid)
where
unwrap = getAny . F.foldMap' (Any . const @_ @Text True . fromOnly)

131
reddit_pub/src/MyLib.hs Normal file
View File

@ -0,0 +1,131 @@
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE DeriveAnyClass #-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE DerivingVia #-}
{-# LANGUAGE NumericUnderscores #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE TypeOperators #-}
{-# LANGUAGE LambdaCase #-}
module MyLib (defaultMain) where
import Control.Concurrent (threadDelay)
import Control.Exception (bracket)
import Control.Lens
import Control.Monad (forever, void)
import Data.Aeson (FromJSON, ToJSON, Value)
import qualified Data.Aeson as A
import Data.Aeson.Lens (_String, key)
import Data.Bool (bool)
import Data.Config
import Data.Deriving.Aeson
import Data.Foldable (for_)
import Data.Functor.Contravariant ((>$<))
import Data.Text (Text)
import qualified Data.Text.IO as TI
import qualified Data.Text.Strict.Lens as T
import qualified Database.SQLite.Simple as SQL
import GHC.Generics (Generic)
import qualified Membership
import Network.AMQP
( Channel
, DeliveryMode(Persistent)
, closeConnection
, declareExchange
, exchangeName
, exchangeType
, msgBody
, msgDeliveryMode
, newExchange
, newMsg
, openChannel
, openConnection
, publishMsg
)
import Network.Reddit (RedditId(RedditId), publishEntries)
import Network.Wreq.Session (newSession)
import Reddit.Publish (Publish(..))
import Text.Printf (printf)
import Data.ByteString.Lazy (ByteString)
data MessageType = Create | Update
deriving stock (Show, Eq, Generic)
deriving anyclass (ToJSON, FromJSON)
data Message = Message
{ messageType :: MessageType
, messageIdentifier :: RedditId
, messageContent :: Value
}
deriving stock (Show, Eq, Generic)
deriving (ToJSON, FromJSON)
via AesonCodec (Field (CamelCase <<< DropPrefix "message")) Message
toMessage :: SQL.Connection -> Publish IO (Maybe Message) -> Publish IO Value
toMessage sqlConn (Publish p) = Publish $ \entry -> do
case RedditId <$> (entry ^? key "id" . _String) of
Nothing -> p Nothing
Just redditId -> do
event <- bool Create Update <$> Membership.isSeen sqlConn redditId
p $ Just $ Message event redditId entry
sqlRecorder :: SQL.Connection -> Publish IO (Maybe RedditId)
sqlRecorder conn = Publish $ maybe (pure ()) (Membership.recordSeen conn)
amqpPublisher :: Channel -> Text -> Publish IO (Maybe ByteString)
amqpPublisher channel exchange = Publish $ \case
Nothing -> pure ()
Just lbs ->
void $ publishMsg channel exchange routingKey (message lbs)
where
routingKey = "doesn't matter on fanout"
message lbs = newMsg
{ msgBody = lbs
, msgDeliveryMode = Just Persistent
}
stdoutPublisher :: Publish IO String
stdoutPublisher = Publish putStrLn
data Fetch
= Fetch Fetcher
| PublishMessage Message
| ParseFailed
fetchToLog :: Fetch -> String
fetchToLog (Fetch fetcher) = printf "Refreshing %s" (show $ fetcherSubreddit fetcher)
fetchToLog ParseFailed = printf "Failed parsing"
fetchToLog (PublishMessage msg) = messageToLog msg
where
messageToLog :: Message -> String
messageToLog m = printf "Publishing %s as type %s" (show $ messageIdentifier m) (show $ messageType m)
defaultMain :: FilePath -> IO ()
defaultMain path = do
conf <- readConfig path
pass <- getPassword (conf ^. amqp . password)
let rabbitConnect = openConnection
(conf ^. amqp . host . T.unpacked)
(conf ^. amqp . vhost)
(conf ^. amqp . username)
pass
bracket rabbitConnect closeConnection $ \conn -> do
SQL.withConnection (conf ^. sqlite) $ \sqlConn -> do
SQL.execute_ sqlConn "create table if not exists membership (reddit_id primary key)"
chan <- openChannel conn
declareExchange chan newExchange { exchangeName = "reddit_posts", exchangeType = "fanout" }
sess <- newSession
let encoder = amqpPublisher chan "reddit_posts"
recorder = sqlRecorder sqlConn
publisher = (fmap A.encode >$< encoder) <> (fmap messageIdentifier >$< recorder) <> (maybe ParseFailed PublishMessage >$< logger)
logger = fetchToLog >$< stdoutPublisher
forever $ do
for_ (conf ^. fetchers) $ \fetcher -> do
publish logger (Fetch fetcher)
publishEntries (toMessage sqlConn publisher) sess fetcher
threadDelay (15 * 60_000_000)
getPassword :: Password -> IO Text
getPassword (Password p) = pure p
getPassword (File path) = TI.readFile path

View File

@ -0,0 +1,46 @@
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE DerivingVia #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RankNTypes #-}
module Network.Reddit where
import Control.Lens
import Data.Aeson (FromJSON, ToJSON, Value)
import Data.Aeson.Lens
import Data.Config
import Data.SubReddit
import Data.Text (Text)
import Network.Wreq hiding (getWith)
import Network.Wreq.Session (Session, getWith)
import Pipes (Producer, (>->), for, runEffect)
import qualified Pipes.Prelude as P
import Reddit.Publish
import Data.Maybe (maybeToList)
import Control.Monad.Trans (liftIO)
import Database.SQLite.Simple.ToField (ToField)
import Database.SQLite.Simple.FromField (FromField)
newtype RedditId = RedditId Text
deriving stock (Show, Eq)
deriving (ToJSON, FromJSON, ToField, FromField) via Text
messages :: Session -> SubReddit -> Producer Value IO ()
messages sess sre = P.unfoldr go Nothing >-> P.concat
where
go :: Maybe Text -> IO (Either () ([Value], Maybe Text))
go after = do
let opts = defaults & header "User-Agent" .~ ["reddit-pubsub"] & param "after" .~ maybeToList after
r <- getWith opts sess ("https://www.reddit.com/r/" <> getSubReddit sre <> ".json")
let msgs = r ^.. responseBody . key "data" . key "children" . _Array . traversed . key "data"
next = r ^? responseBody . key "data" . key "after" . _String
print next
pure $ Right (msgs, next)
publishEntries :: Publish IO Value -> Session -> Fetcher -> IO ()
publishEntries publisher sess fetcher =
runEffect $
for
(messages sess (fetcher ^. subreddit) >-> P.take (fromIntegral $ fetcher ^. entries))
(liftIO . publish publisher)

View File

6
reddit_pub/test/Spec.hs Normal file
View File

@ -0,0 +1,6 @@
module Main where
import Test.Hspec (hspec)
main :: IO ()
main = hspec $ pure ()