From 8233419e19a10e6609fa6513eb2aa238c89826e2 Mon Sep 17 00:00:00 2001 From: Mats Rauhala Date: Mon, 15 Nov 2021 20:49:51 +0200 Subject: [PATCH] Some debugging --- FeedMonad/default.nix | 6 ++-- FeedMonad/src/Feed/Parser.hs | 40 ++++++++++++---------- FeedMonad/src/FeedMonad.hs | 64 ++++++++++++++++++++++++++++-------- FeedMonad/src/Trace.hs | 2 +- 4 files changed, 77 insertions(+), 35 deletions(-) diff --git a/FeedMonad/default.nix b/FeedMonad/default.nix index 6d95921..4221699 100644 --- a/FeedMonad/default.nix +++ b/FeedMonad/default.nix @@ -1,6 +1,6 @@ { mkDerivation, acid-state, base, bytestring, containers, feed , free, http-client, http-client-tls, lens, lib, mtl, safecopy -, servant, servant-server, text, xdg-basedir +, servant, servant-server, text, time, xdg-basedir, xml-conduit }: mkDerivation { pname = "FeedMonad"; @@ -8,8 +8,8 @@ mkDerivation { src = ./.; libraryHaskellDepends = [ acid-state base bytestring containers feed free http-client - http-client-tls lens mtl safecopy servant servant-server text - xdg-basedir + http-client-tls lens mtl safecopy servant servant-server text time + xdg-basedir xml-conduit ]; license = "unknown"; hydraPlatforms = lib.platforms.none; diff --git a/FeedMonad/src/Feed/Parser.hs b/FeedMonad/src/Feed/Parser.hs index 5943572..1061896 100644 --- a/FeedMonad/src/Feed/Parser.hs +++ b/FeedMonad/src/Feed/Parser.hs @@ -5,7 +5,6 @@ import Control.Lens (from, lazy, re, view, _Just) import Data.ByteString.Lazy (ByteString) import Data.Entry (Entry(..), Tag(Tag)) import Data.Foldable (toList) -import Data.Maybe (mapMaybe) import qualified Data.Set as S import Data.Text.Strict.Lens (unpacked, utf8) import Data.URL (URL(URL), _URL) @@ -23,18 +22,20 @@ import Text.XML , renderLBS ) import Data.Time (parseTimeM, rfc822DateFormat, defaultTimeLocale, iso8601DateFormat) +import Data.Text (Text) +import qualified Data.Text as T -parseAtom :: Atom.Feed -> [Entry] -parseAtom Atom.Feed{Atom.feedEntries=es} = mapMaybe parseEntry es +parseAtom :: Atom.Feed -> [Either Text Entry] +parseAtom Atom.Feed{Atom.feedEntries=es} = map parseEntry es where - parseEntry :: Atom.Entry -> Maybe Entry + parseEntry :: Atom.Entry -> Either Text Entry parseEntry atomEntry = Entry - <$> view (unpacked . from _URL . re _Just) entryId - <*> title - <*> content + <$> note "Missing entry id" (view (unpacked . from _URL . re _Just) entryId) + <*> note "Missing title" title + <*> note "Missing content" content <*> pure 0 <*> pure mempty - <*> (parseTimeM True defaultTimeLocale (iso8601DateFormat (Just "%H:%M:%S%Q%EZ")) . view unpacked =<< entryPublished) + <*> note ("Missing time: " <> T.pack (show entryUpdated)) (parseTimeM True defaultTimeLocale (iso8601DateFormat (Just "%H:%M:%S%Q%EZ")) . view unpacked $ entryUpdated) where content = case entryContent of @@ -49,7 +50,7 @@ parseAtom Atom.Feed{Atom.feedEntries=es} = mapMaybe parseEntry es Atom.Entry{ Atom.entryId , Atom.entryTitle , Atom.entryContent - , Atom.entryPublished } = atomEntry + , Atom.entryUpdated } = atomEntry renderElement :: [Element] -> ByteString renderElement els = renderLBS def doc @@ -58,18 +59,23 @@ renderElement els = renderLBS def doc doc = Document {documentPrologue = prologue, documentRoot = el, documentEpilogue = []} prologue = Prologue {prologueBefore = [], prologueDoctype = Nothing, prologueAfter = []} -parseRSS :: RSS.RSS -> [Entry] +-- | Add context to a Maybe by converting it into an Either +note :: e -> Maybe a -> Either e a +note e Nothing = Left e +note _ (Just a) = Right a + +parseRSS :: RSS.RSS -> [Either Text Entry] parseRSS RSS.RSS{RSS.rssChannel=RSS.RSSChannel{RSS.rssItems = items}} = - mapMaybe parseItem items + map parseItem items where - parseItem :: RSS.RSSItem -> Maybe Entry + parseItem :: RSS.RSSItem -> Either Text Entry parseItem item = Entry - <$> (URL . view unpacked <$> rssItemLink) - <*> rssItemTitle - <*> (regularContent <> Just otherContent) + <$> note "Missing entry url" (URL . view unpacked <$> rssItemLink) + <*> note "Missing title" rssItemTitle + <*> note "Missing content" (regularContent <> Just otherContent) <*> pure 0 <*> pure (foldMap (S.singleton . Tag . RSS.rssCategoryValue) rssItemCategories) - <*> (parseTimeM True defaultTimeLocale rfc822DateFormat . view unpacked =<< rssItemPubDate) + <*> note ("Missing time: " <> T.pack (show rssItemPubDate)) (parseTimeM True defaultTimeLocale rfc822DateFormat . view unpacked =<< rssItemPubDate) where regularContent = view (re utf8 . lazy) <$> rssItemContent otherContent = renderElement (concatMap (toList . fromXMLElement) rssItemOther) @@ -81,7 +87,7 @@ parseRSS RSS.RSS{RSS.rssChannel=RSS.RSSChannel{RSS.rssItems = items}} = , RSS.rssItemPubDate } = item -parseEntries :: Feed -> [Entry] +parseEntries :: Feed -> [Either Text Entry] parseEntries (AtomFeed atom) = parseAtom atom parseEntries (RSSFeed rss) = parseRSS rss parseEntries (RSS1Feed _rss1) = trace "rss1" [] diff --git a/FeedMonad/src/FeedMonad.hs b/FeedMonad/src/FeedMonad.hs index 67febd5..98d04f7 100644 --- a/FeedMonad/src/FeedMonad.hs +++ b/FeedMonad/src/FeedMonad.hs @@ -2,17 +2,18 @@ {-# LANGUAGE NamedFieldPuns #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE TupleSections #-} +{-# LANGUAGE LambdaCase #-} module FeedMonad where import Control.Exception (bracket) import Control.Monad.App (App, runApp) import Control.Monad.HTTP (execute, fetch) import Control.Monad.Reader (asks) -import Control.Monad.Trans (liftIO) +import Control.Monad.Trans (liftIO, MonadIO) import Data.Acid (AcidState(closeAcidState), openLocalState, query, update) import Data.Category (Category) import Data.Environment -import Data.Foldable (for_, toList) +import Data.Foldable (for_, toList, traverse_) import Data.Text (Text) import Database ( CountEntries(CountEntries) @@ -27,6 +28,12 @@ import Numeric.Natural (Natural) import Text.Feed.Import (parseFeedSource) import Data.Entry (Entry(entryURL)) import qualified Data.Map.Strict as M +import qualified Data.Text.IO as TI +import Trace +import qualified Data.Text as T +import Data.Functor.Contravariant ((>$<)) +import Text.Printf (printf) +import Data.Either (partitionEithers) newtype Minutes = Minutes Natural @@ -52,17 +59,45 @@ defaultConfig = FeedMonad , secretToken = "i am a secret" } +data TraceMsg + = UpdateFeed FeedId UpdateMsg + | UpdateFeeds -updateFeeds :: FeedMonad -> App () -updateFeeds f = do - mgr <- asks environmentManager - st <- asks environmentAcidState - for_ (feeds f) $ \c -> for_ c $ \fid -> liftIO $ do - let FeedId u = fid - entries <- maybe [] parseEntries . parseFeedSource <$> liftIO (execute mgr (fetch u)) - finalEntries <- foldMap (\e -> M.singleton (EntryId $ entryURL e) e) <$> traverse (execute mgr . filters f pure) entries - newEntries <- query st (UnseenEntries fid (M.keysSet finalEntries)) - update st (SaveEntries fid (foldMap (\eid -> toList $ M.lookup eid finalEntries) newEntries)) +data UpdateMsg + = Start + | NewEntries Int + | Failures [Text] + +formatTraceMsg :: TraceMsg -> Maybe Text +formatTraceMsg (UpdateFeed fi Start) = Just $ T.pack $ printf "Updating feed %s" (show fi) +formatTraceMsg (UpdateFeed fi (NewEntries n)) = Just $ T.pack $ printf "Feed (%s) has %d new entries" (show fi) n +formatTraceMsg (UpdateFeed _ (Failures [])) = Nothing +formatTraceMsg (UpdateFeed fi (Failures failures)) = Just $ T.pack $ printf "Feed (%s) has %d failures: %s" (show fi) (length failures) (show failures) +formatTraceMsg UpdateFeeds = Just $ T.pack $ printf "Updating feeds" + +logTrace :: MonadIO m => Trace m (Maybe Text) +logTrace = Trace $ \case + Nothing -> pure () + Just msg -> liftIO . TI.putStrLn $ msg + +updateFeeds :: Trace App TraceMsg -> FeedMonad -> App () +updateFeeds trace f = do + runTrace trace UpdateFeeds + for_ (feeds f) $ + traverse_ (\fid -> updateFeed (UpdateFeed fid >$< trace) fid) + where + updateFeed :: Trace App UpdateMsg -> FeedId -> App () + updateFeed t fid = do + let FeedId u = fid + mgr <- asks environmentManager + st <- asks environmentAcidState + runTrace t Start + (failures, entries) <- liftIO (partitionEithers . maybe [] parseEntries . parseFeedSource <$> liftIO (execute mgr (fetch u))) + runTrace t (Failures failures) + finalEntries <- liftIO (foldMap (\e -> M.singleton (EntryId $ entryURL e) e) <$> traverse (execute mgr . filters f pure) entries) + newEntries <- liftIO (query st (UnseenEntries fid (M.keysSet finalEntries))) + runTrace t (NewEntries (length newEntries)) + liftIO (update st (SaveEntries fid (foldMap (\eid -> toList $ M.lookup eid finalEntries) newEntries))) queryCategory :: FeedMonad -> App [Category (FeedId, Int)] queryCategory = traverse (traverse q) . feeds @@ -73,10 +108,11 @@ queryCategory = traverse (traverse q) . feeds (fid, ) <$> liftIO (query st (CountEntries fid)) defaultMain :: FeedMonad -> IO () -defaultMain f = +defaultMain f = do + let trace = formatTraceMsg >$< logTrace bracket (openLocalState emptyFeedMonadState) closeAcidState $ \st -> do mgr <- newTlsManager runApp (Environment mgr st) $ do - updateFeeds f + updateFeeds trace f cat <- queryCategory f liftIO $ print cat diff --git a/FeedMonad/src/Trace.hs b/FeedMonad/src/Trace.hs index 823f9ff..c848dce 100644 --- a/FeedMonad/src/Trace.hs +++ b/FeedMonad/src/Trace.hs @@ -4,6 +4,6 @@ module Trace where import Data.Functor.Contravariant (Op(..), Contravariant) import Data.Monoid (Ap(..)) -newtype Trace m a = Trace { trace :: a -> m () } +newtype Trace m a = Trace { runTrace :: a -> m () } deriving Contravariant via Op (m ()) deriving (Semigroup, Monoid) via Op (Ap m ()) a