89 lines
3.4 KiB
Haskell
89 lines
3.4 KiB
Haskell
{-# LANGUAGE NamedFieldPuns #-}
|
|
{-# LANGUAGE OverloadedStrings #-}
|
|
module Feed.Parser where
|
|
import Control.Lens (from, lazy, re, view, _Just)
|
|
import Data.ByteString.Lazy (ByteString)
|
|
import Data.Entry (Entry(..), Tag(Tag))
|
|
import Data.Foldable (toList)
|
|
import Data.Maybe (mapMaybe)
|
|
import qualified Data.Set as S
|
|
import Data.Text.Strict.Lens (unpacked, utf8)
|
|
import Data.URL (URL(URL), _URL)
|
|
import Debug.Trace (trace)
|
|
import qualified Text.Atom.Feed as Atom
|
|
import Text.Feed.Types (Feed(..))
|
|
import qualified Text.RSS.Syntax as RSS
|
|
import Text.XML
|
|
( Document(Document, documentEpilogue, documentPrologue, documentRoot)
|
|
, Element(..)
|
|
, Node(NodeElement)
|
|
, Prologue(Prologue, prologueAfter, prologueBefore, prologueDoctype)
|
|
, def
|
|
, fromXMLElement
|
|
, renderLBS
|
|
)
|
|
import Data.Time (parseTimeM, rfc822DateFormat, defaultTimeLocale, iso8601DateFormat)
|
|
|
|
parseAtom :: Atom.Feed -> [Entry]
|
|
parseAtom Atom.Feed{Atom.feedEntries=es} = mapMaybe parseEntry es
|
|
where
|
|
parseEntry :: Atom.Entry -> Maybe Entry
|
|
parseEntry atomEntry = Entry
|
|
<$> view (unpacked . from _URL . re _Just) entryId
|
|
<*> title
|
|
<*> content
|
|
<*> pure 0
|
|
<*> pure mempty
|
|
<*> (parseTimeM True defaultTimeLocale (iso8601DateFormat (Just "%H:%M:%S%Q%EZ")) . view unpacked =<< entryPublished)
|
|
where
|
|
content =
|
|
case entryContent of
|
|
Just (Atom.TextContent txt) -> Just $ view (re utf8 . lazy) txt
|
|
Just (Atom.HTMLContent txt) -> Just $ view (re utf8 . lazy) txt
|
|
Just (Atom.XHTMLContent xml) -> Just $ renderElement (toList $ fromXMLElement xml)
|
|
_ -> Nothing
|
|
title =
|
|
case entryTitle of
|
|
Atom.TextString s -> Just s
|
|
_ -> Nothing
|
|
Atom.Entry{ Atom.entryId
|
|
, Atom.entryTitle
|
|
, Atom.entryContent
|
|
, Atom.entryPublished } = atomEntry
|
|
|
|
renderElement :: [Element] -> ByteString
|
|
renderElement els = renderLBS def doc
|
|
where
|
|
el = Element {elementName = "span", elementAttributes = mempty, elementNodes = map NodeElement els}
|
|
doc = Document {documentPrologue = prologue, documentRoot = el, documentEpilogue = []}
|
|
prologue = Prologue {prologueBefore = [], prologueDoctype = Nothing, prologueAfter = []}
|
|
|
|
parseRSS :: RSS.RSS -> [Entry]
|
|
parseRSS RSS.RSS{RSS.rssChannel=RSS.RSSChannel{RSS.rssItems = items}} =
|
|
mapMaybe parseItem items
|
|
where
|
|
parseItem :: RSS.RSSItem -> Maybe Entry
|
|
parseItem item = Entry
|
|
<$> (URL . view unpacked <$> rssItemLink)
|
|
<*> rssItemTitle
|
|
<*> (regularContent <> Just otherContent)
|
|
<*> pure 0
|
|
<*> pure (foldMap (S.singleton . Tag . RSS.rssCategoryValue) rssItemCategories)
|
|
<*> (parseTimeM True defaultTimeLocale rfc822DateFormat . view unpacked =<< rssItemPubDate)
|
|
where
|
|
regularContent = view (re utf8 . lazy) <$> rssItemContent
|
|
otherContent = renderElement (concatMap (toList . fromXMLElement) rssItemOther)
|
|
RSS.RSSItem { RSS.rssItemTitle
|
|
, RSS.rssItemLink
|
|
, RSS.rssItemCategories
|
|
, RSS.rssItemContent
|
|
, RSS.rssItemOther
|
|
, RSS.rssItemPubDate
|
|
} = item
|
|
|
|
parseEntries :: Feed -> [Entry]
|
|
parseEntries (AtomFeed atom) = parseAtom atom
|
|
parseEntries (RSSFeed rss) = parseRSS rss
|
|
parseEntries (RSS1Feed _rss1) = trace "rss1" []
|
|
parseEntries (XMLFeed _xml) = trace "xml" []
|