{-# LANGUAGE NamedFieldPuns #-} {-# LANGUAGE OverloadedStrings #-} module Feed.Parser where import Control.Lens (from, lazy, re, view, _Just) import Data.ByteString.Lazy (ByteString) import Data.Entry (Entry(..), Tag(Tag)) import Data.Foldable (toList) import qualified Data.Set as S import Data.Text.Strict.Lens (unpacked, utf8) import Data.URL (URL(URL), _URL) import Debug.Trace (trace) import qualified Text.Atom.Feed as Atom import Text.Feed.Types (Feed(..)) import qualified Text.RSS.Syntax as RSS import Text.XML ( Document(Document, documentEpilogue, documentPrologue, documentRoot) , Element(..) , Node(NodeElement) , Prologue(Prologue, prologueAfter, prologueBefore, prologueDoctype) , def , fromXMLElement , renderLBS ) import Data.Time (parseTimeM, rfc822DateFormat, defaultTimeLocale, iso8601DateFormat) import Data.Text (Text) import qualified Data.Text as T parseAtom :: Atom.Feed -> [Either Text Entry] parseAtom Atom.Feed{Atom.feedEntries=es} = map parseEntry es where parseEntry :: Atom.Entry -> Either Text Entry parseEntry atomEntry = Entry <$> note "Missing entry id" (view (unpacked . from _URL . re _Just) entryId) <*> note "Missing title" title <*> note "Missing content" content <*> pure 0 <*> pure mempty <*> note ("Missing time: " <> T.pack (show entryUpdated)) (parseTimeM True defaultTimeLocale (iso8601DateFormat (Just "%H:%M:%S%Q%EZ")) . view unpacked $ entryUpdated) where content = case entryContent of Just (Atom.TextContent txt) -> Just $ view (re utf8 . lazy) txt Just (Atom.HTMLContent txt) -> Just $ view (re utf8 . lazy) txt Just (Atom.XHTMLContent xml) -> Just $ renderElement (toList $ fromXMLElement xml) _ -> Nothing title = case entryTitle of Atom.TextString s -> Just s _ -> Nothing Atom.Entry{ Atom.entryId , Atom.entryTitle , Atom.entryContent , Atom.entryUpdated } = atomEntry renderElement :: [Element] -> ByteString renderElement els = renderLBS def doc where el = Element {elementName = "span", elementAttributes = mempty, elementNodes = map NodeElement els} doc = Document {documentPrologue = prologue, documentRoot = el, documentEpilogue = []} prologue = Prologue {prologueBefore = [], prologueDoctype = Nothing, prologueAfter = []} -- | Add context to a Maybe by converting it into an Either note :: e -> Maybe a -> Either e a note e Nothing = Left e note _ (Just a) = Right a parseRSS :: RSS.RSS -> [Either Text Entry] parseRSS RSS.RSS{RSS.rssChannel=RSS.RSSChannel{RSS.rssItems = items}} = map parseItem items where parseItem :: RSS.RSSItem -> Either Text Entry parseItem item = Entry <$> note "Missing entry url" (URL . view unpacked <$> rssItemLink) <*> note "Missing title" rssItemTitle <*> note "Missing content" (regularContent <> Just otherContent) <*> pure 0 <*> pure (foldMap (S.singleton . Tag . RSS.rssCategoryValue) rssItemCategories) <*> note ("Missing time: " <> T.pack (show rssItemPubDate)) (parseTimeM True defaultTimeLocale rfc822DateFormat . view unpacked =<< rssItemPubDate) where regularContent = view (re utf8 . lazy) <$> rssItemContent otherContent = renderElement (concatMap (toList . fromXMLElement) rssItemOther) RSS.RSSItem { RSS.rssItemTitle , RSS.rssItemLink , RSS.rssItemCategories , RSS.rssItemContent , RSS.rssItemOther , RSS.rssItemPubDate } = item parseEntries :: Feed -> [Either Text Entry] parseEntries (AtomFeed atom) = parseAtom atom parseEntries (RSSFeed rss) = parseRSS rss parseEntries (RSS1Feed _rss1) = trace "rss1" [] parseEntries (XMLFeed _xml) = trace "xml" []