FeedMonad/FeedMonad/src/Feed/Parser.hs

89 lines
3.4 KiB
Haskell

{-# LANGUAGE NamedFieldPuns #-}
{-# LANGUAGE OverloadedStrings #-}
module Feed.Parser where
import Control.Lens (from, lazy, re, view, _Just)
import Data.ByteString.Lazy (ByteString)
import Data.Entry (Entry(..), Tag(Tag))
import Data.Foldable (toList)
import Data.Maybe (mapMaybe)
import qualified Data.Set as S
import Data.Text.Strict.Lens (unpacked, utf8)
import Data.URL (URL(URL), _URL)
import Debug.Trace (trace)
import qualified Text.Atom.Feed as Atom
import Text.Feed.Types (Feed(..))
import qualified Text.RSS.Syntax as RSS
import Text.XML
( Document(Document, documentEpilogue, documentPrologue, documentRoot)
, Element(..)
, Node(NodeElement)
, Prologue(Prologue, prologueAfter, prologueBefore, prologueDoctype)
, def
, fromXMLElement
, renderLBS
)
import Data.Time (parseTimeM, rfc822DateFormat, defaultTimeLocale, iso8601DateFormat)
parseAtom :: Atom.Feed -> [Entry]
parseAtom Atom.Feed{Atom.feedEntries=es} = mapMaybe parseEntry es
where
parseEntry :: Atom.Entry -> Maybe Entry
parseEntry atomEntry = Entry
<$> view (unpacked . from _URL . re _Just) entryId
<*> title
<*> content
<*> pure 0
<*> pure mempty
<*> (parseTimeM True defaultTimeLocale (iso8601DateFormat (Just "%H:%M:%S%Q%EZ")) . view unpacked =<< entryPublished)
where
content =
case entryContent of
Just (Atom.TextContent txt) -> Just $ view (re utf8 . lazy) txt
Just (Atom.HTMLContent txt) -> Just $ view (re utf8 . lazy) txt
Just (Atom.XHTMLContent xml) -> Just $ renderElement (toList $ fromXMLElement xml)
_ -> Nothing
title =
case entryTitle of
Atom.TextString s -> Just s
_ -> Nothing
Atom.Entry{ Atom.entryId
, Atom.entryTitle
, Atom.entryContent
, Atom.entryPublished } = atomEntry
renderElement :: [Element] -> ByteString
renderElement els = renderLBS def doc
where
el = Element {elementName = "span", elementAttributes = mempty, elementNodes = map NodeElement els}
doc = Document {documentPrologue = prologue, documentRoot = el, documentEpilogue = []}
prologue = Prologue {prologueBefore = [], prologueDoctype = Nothing, prologueAfter = []}
parseRSS :: RSS.RSS -> [Entry]
parseRSS RSS.RSS{RSS.rssChannel=RSS.RSSChannel{RSS.rssItems = items}} =
mapMaybe parseItem items
where
parseItem :: RSS.RSSItem -> Maybe Entry
parseItem item = Entry
<$> (URL . view unpacked <$> rssItemLink)
<*> rssItemTitle
<*> (regularContent <> Just otherContent)
<*> pure 0
<*> pure (foldMap (S.singleton . Tag . RSS.rssCategoryValue) rssItemCategories)
<*> (parseTimeM True defaultTimeLocale rfc822DateFormat . view unpacked =<< rssItemPubDate)
where
regularContent = view (re utf8 . lazy) <$> rssItemContent
otherContent = renderElement (concatMap (toList . fromXMLElement) rssItemOther)
RSS.RSSItem { RSS.rssItemTitle
, RSS.rssItemLink
, RSS.rssItemCategories
, RSS.rssItemContent
, RSS.rssItemOther
, RSS.rssItemPubDate
} = item
parseEntries :: Feed -> [Entry]
parseEntries (AtomFeed atom) = parseAtom atom
parseEntries (RSSFeed rss) = parseRSS rss
parseEntries (RSS1Feed _rss1) = trace "rss1" []
parseEntries (XMLFeed _xml) = trace "xml" []