{-# LANGUAGE PatternGuards,BangPatterns #-}
--------------------------------------------------------------------
-- |
-- Module:     Watch an RSS/Atom feed and write it to an IRC channel
-- Copyright : (c) Don Stewart 2008-2009, Simon Michael 2009
-- License   : BSD3
-- More info : rss2irc.cabal
--
--------------------------------------------------------------------
{-
TODO:
rss2irc.hs:17:0:
    Warning: The import of `Data.Either' is redundant
               except perhaps to import instances from `Data.Either'
             To import instances alone, use: import Data.Either()

rss2irc.hs:53:0:
    Warning: The import of `hPrintf'
             from module `Text.Printf' is redundant

rss2irc.hs:245:2:
    Warning: A do-notation statement discarded a result of type GHC.Conc.ThreadId.
             Suppress this warning by saying "_ <- ($)
                                                     forkIO
                                                     ($)
                                                       forever
                                                       do { catch
                                                              feedReaderThread b f
                                                              \ e -> (($)
                                                                        unless
                                                                          (Main.Quiet `elem` opts)
                                                                        (>>=)
                                                                          getTimeStamp
                                                                          \ t -> ...) }",
             or by using the flag -fno-warn-unused-do-bind

rss2irc.hs:251:2:
    Warning: A do-notation statement discarded a result of type GHC.Conc.ThreadId.
             Suppress this warning by saying "_ <- ($)
                                                     forkIO ircWriterThread bv 0",
             or by using the flag -fno-warn-unused-do-bind

rss2irc.hs:292:4:
    Warning: A do-notation statement discarded a result of type ghc-prim:GHC.Prim.Any.
             Suppress this warning by saying "_ <- (>>=)
                                                     getTimeStamp
                                                     printf ((++) "\n%s: polled " (++) url "\n")",
             or by using the flag -fno-warn-unused-do-bind

rss2irc.hs:295:4:
    Warning: A do-notation statement discarded a result of type ghc-prim:GHC.Prim.Any.
             Suppress this warning by saying "_ <- printf
                                                     "total polls, failed polls, items announced: %10d %10d %10d\n"
                                                     polls
                                                     failedpolls
                                                     numannounced",
             or by using the flag -fno-warn-unused-do-bind

rss2irc.hs:332:8:
    Warning: A do-notation statement discarded a result of type ghc-prim:GHC.Prim.Any.
             Suppress this warning by saying "_ <- printf
                                                     "total polls, failed polls, items announced: %10d %10d %10d\n"
                                                     polls'
                                                     failedpolls'
                                                     numannounced'",
             or by using the flag -fno-warn-unused-do-bind

-}

module Main where
import Control.Concurrent (forkIO,threadDelay)
import Control.Concurrent.Chan (Chan,newChan,writeList2Chan,readChan,unGetChan)
import Control.Concurrent.SampleVar (SampleVar,newSampleVar,writeSampleVar,readSampleVar)
import Control.Monad (when,unless,forever)
import Data.List (isPrefixOf,foldl',stripPrefix,intercalate)
import Data.List.Split (splitOn)
import Data.Either (either)
import Data.Maybe (fromMaybe,fromJust,isJust,isNothing)
import Data.Time.Clock (UTCTime,getCurrentTime,diffUTCTime)
import Data.Time.Format (parseTime)
import Data.Time.LocalTime (LocalTime,getCurrentTimeZone,utcToLocalTime)
import Locale (defaultTimeLocale)
import Network (PortID(PortNumber), connectTo)
import Network.Browser (browse,request,setAllowRedirects,setOutHandler)
import Network.HTTP (Response,getRequest,rspBody,rspCode)
import Network.IRC (Message(Message),msg_command,msg_params,decode,encode,nick,user,joinChan,privmsg) -- part,quit)
import Network.URI (URI(URI),uriScheme,parseURI)
import qualified Control.Exception.Extensible as E (bracket,catch)
import Control.Exception.Extensible (fromException)
import Control.Parallel.Strategies (NFData) --(rnf),($|))
import System.Console.GetOpt (OptDescr(Option), ArgDescr(ReqArg,NoArg), ArgOrder(Permute), getOpt, usageInfo)
import System.Environment (getArgs)
import System.Exit (exitWith, ExitCode(ExitSuccess), exitFailure)
import System.IO (Handle, BufferMode(NoBuffering),stdout,hSetBuffering,hFlush,hClose,hGetLine,hPutStr)
import Text.Feed.Import (parseFeedString)
import Text.Feed.Constructor (withItemDescription)
import Text.Feed.Query (feedItems
                       ,getItemTitle
                       ,getItemLink
                       ,getItemPublishDate
                       ,getItemDate
                       ,getItemAuthor
                       ,getItemCommentLink
                       ,getItemEnclosure
                       ,getItemFeedLink
                       ,getItemId
                       ,getItemCategories
                       ,getItemRights
                       ,getItemSummary
                       ,getItemDescription
                       )
import Text.Feed.Types (Feed(XMLFeed),Item)
import Text.Printf (printf,hPrintf)
import Text.RegexPR (splitRegexPR,gsubRegexPR)
-- import Debug.Trace
-- strace :: Show a => a -> a
-- strace a = trace (show a) a

defaultport, defaultinterval, defaultidle, defaultmaxitems, maxmessagelength, maxtitlelength, maxdesclength, maxauthorlength, maxdatelength, maxlinklength :: Int
defaultport      = 6667         -- default irc port
defaultinterval  = 5            -- default polling interval in minutes
defaultidle      = 0            -- default required silent time before announcing
defaultmaxitems  = 5            -- default max items to announce per interval
maxmessagelength = 400          -- max characters per irc message
-- max field sizes. Max announcement length will be the sum of these
-- plus typically 15 due to prettification plus any length increase
-- due to --replace. The below should keep most announcements within
-- maxmessagelength and all announcements within maxmessagelength * 2 or so.
maxtitlelength   = 100
maxdesclength    = 300
maxauthorlength  = 50
maxdatelength    = 50
maxlinklength    = 200

-- XXX hmm, none of these work for announcing recent-but-not-newest items from a blog added to a planet
-- | Strategies available for detecting announceable items. See also `feedReaderThread`.
--
-- - topnew - unseen items at the top, newer than last announced. Best for most feeds, especially jittery ones.
--
-- - allnew - unseen items anywhere, newer than last announced. Good for feeds with unreliable item ordering,
--   or planets (feed aggregators) when a new feed is added.
--
-- - top - any items above the previous top item. Good for feeds not ordered by date, eg a darcs repo's.
--
announcestrategies :: [String]
announcestrategies = ["topnew","allnew","top"]

defaultannouncestrategy :: String
defaultannouncestrategy = head announcestrategies

options :: [OptDescr Opt]
options =
    [Option ['p'] ["port"]        (ReqArg Port "PORT")    "irc server port (default 6667)"
    ,Option []    ["ident"]       (ReqArg Ident "STR")    "set the bot's identity string (useful for contact info)"
    ,Option []    ["delay"]       (ReqArg Delay "N")      "wait for N minutes before starting (helps avoid mass joins)"
    ,Option ['i'] ["interval"]    (ReqArg Interval "N")   ("polling interval in minutes (default "++(show defaultinterval)++")")
    ,Option []    ["idle"]        (ReqArg Idle "N")       ("announce only when channel has been idle N minutes (default "++(show defaultidle)++")")
    ,Option ['m'] ["max-items"]   (ReqArg MaxItems "N")   ("announce at most N items per polling interval (default "++(show defaultmaxitems)++")")
    ,Option ['r'] ["recent"]      (ReqArg Recent "N")     "announce up to N recent items at startup (default 0)"
    ,Option []    ["announce"]    (ReqArg Announce $ intercalate "|" announcestrategies) ("which items to announce (default: "++defaultannouncestrategy++")")
    ,Option []    ["no-title"]    (NoArg NoTitle)         ("don't show title (title is announced by default, up to "++(show maxtitlelength)++" chars)")
    ,Option ['a'] ["author"]      (NoArg Author)          ("show author (up to "++(show maxauthorlength)++" chars)")
    ,Option ['d'] ["description"] (NoArg Description)     ("show description (up to "++(show maxdesclength)++" chars)")
    ,Option ['l'] ["link"]        (NoArg Link)            ("show link URL (up to "++(show maxlinklength)++" chars)")
    ,Option ['t'] ["time"]        (NoArg Time)            ("show timestamp (up to "++(show maxdatelength)++" chars)")
    ,Option ['e'] ["email"]       (NoArg Email)           "show email addresses (stripped by default)"
    ,Option ['h'] ["html"]        (NoArg Html)            "show HTML tags and entities (stripped by default)"
    ,Option []    ["action"]      (NoArg Action)          "use CTCP ACTIONs instead of normal IRC messages"
    ,Option []    ["dupe-descriptions"] (NoArg DupeDescriptions) "show identical consecutive descriptions (elided by default)"
    ,Option []    ["replace"]     (ReqArg Replace "\"OLD/NEW\"") "replace OLD with NEW (regexpr patterns)"
    ,Option ['n'] ["num-iterations"] (ReqArg NumIterations "N") "exit after N iterations"
    ,Option ['q'] ["quiet"]       (NoArg Quiet)           "silence normal console output"
    ,Option []    ["debug"]       (NoArg Debug)           "do not connect to irc (same as no irc argument)"
    ,Option []    ["debug-irc"]   (NoArg DebugIrc)        "show irc activity"
    ,Option []    ["debug-feed"]  (NoArg DebugFeed)       "show feed items and polling stats"
    ,Option []    ["debug-xml"]   (NoArg DebugXml)        "show feed content"
    ,Option []    ["debug-http"]  (NoArg DebugHttp)       "show feed fetching progress"
    ]

help :: IO a
help = do
  putStrLn "Usage: rss2irc FEEDURL [BOTNAME@IRCSERVER/#CHANNEL] [OPTS]"
  putStrLn "Options:"
  putStrLn (usageInfo "" options)
  exitWith ExitSuccess

data Opt =
          Port {value::String}
        | Ident {value::String}
        | Delay {value::String}
        | Interval {value::String}
        | MaxItems {value::String}
        | Recent {value::String}
        | Announce {value::String}
        | Idle {value::String}
        | Author
        | Description
        | Time
        | Link
        | NoTitle
        | Email
        | Html
        | Action
        | DupeDescriptions
        | Replace {value::String}
        | NumIterations {value::String}
        | Quiet
        | Debug
        | DebugHttp
        | DebugXml
        | DebugFeed
        | DebugIrc
    deriving (Eq, Show)

data Bot = Bot { socket  :: Handle
               , server  :: String
               , port    :: !Int
               , channel :: String
               , botnick    :: String
               , botopts :: ![Opt]
               , outputqueue :: Chan String
               , lastmsgtime :: UTCTime
               }

instance NFData Item

main :: IO ()
main = do
    (opts, args, errs) <- getOpt Permute options `fmap` getArgs
    let delay = optIntValue Delay 0 opts
        interval = optIntValue Interval defaultinterval opts
        announcestrategy = optValue Announce defaultannouncestrategy opts
        errs' = errs
                ++ if (interval > 0 || (isJust $ numIterations opts)) then [] else ["Eh.. no."]
                ++ if announcestrategy `elem` announcestrategies then [] else ["--announce should be one of "++intercalate ", " announcestrategies]
    when (not . null $ errs') $ mapM_ putStrLn errs' >> help
    -- force early failure if there is a bad regexp. XXX Don't know how to catch this yet.
    seq (applyReplacements opts "") $ return ()
    when (delay > 0) $ threadDelay $ delay * minutes

    q <- newChan
    t <- getCurrentTime
    (feed,bot) <- case args of
         [f,nsc] -> case map (splitOn "@") $ splitOn "/" $ maybe nsc id (stripPrefix "irc://" nsc) of
                      [[n,s],[c]] -> return (f, Bot{ socket  = stdout
                                                 , server  = s
                                                 , port    = optIntValue Port defaultport opts
                                                 , channel = c
                                                 , botnick    = n
                                                 , botopts = opts
                                                 , outputqueue = q
                                                 , lastmsgtime = t
                                                 })
                      _ -> help
         [f] -> return (f, Bot{ socket  = stdout
                              , server  = ""
                              , port    = 0
                              , channel = ""
                              , botnick    = ""
                              , botopts = Debug:opts
                              , outputqueue = q
                              , lastmsgtime = t
                              })
         _ -> help

    -- XXX error handling needs review. run tries to handle some errors by
    -- restarting its threads, but does not reconnect the bot. Some errors
    -- might be caught but not propagated here, in which case the thread
    -- exits. Errors which do propagate here cause a program exit.
    E.bracket
       (connect bot)
       (disconnect)
       (\b -> run b feed `E.catch` exit)
       where 
         exit e = case fromException e of
                    Just ExitSuccess -> exitWith ExitSuccess
                    _ -> getTimeStamp >>= \t -> printf "%s: rss2irc error: run died with: %s, exiting\n" t (show e) >> exitFailure

-- | Connect to the irc server.
connect :: Bot -> IO Bot
connect b | Debug `elem` (botopts b) = do
  unless (Quiet `elem` botopts b) $ printf "Skipping IRC connection due to --debug\n"
  return b
connect b@(Bot{server=s,port=p,channel=c,botnick=n,botopts=opts}) = do
  let ident = optValue Ident "rss2irc gateway" opts
  unless (Quiet `elem` opts) $ getTimeStamp >>= \t -> printf "%s: %s connecting to %s, channel %s...\n" t n s c >> hFlush stdout
  h <- connectTo s (PortNumber $ fromIntegral p)
  hSetBuffering h NoBuffering
  let b' = b{socket=h}
  ircWrite b' $ encode $ nick n
  ircWrite b' $ encode $ user n "0" "*" ident
  (connected,err) <- ircWaitForServerResp b' -- some servers require this, eg quakenet
  if not connected
   then fail $ printf "rss2irc error: irc connection failed with %s\n" err
   else do
     ircWrite b' $ encode $ joinChan c
     unless (Quiet `elem` opts) $ getTimeStamp >>= \t -> printf "%s: connected.\n" t >> hFlush stdout
     return b'

-- | Given a connected bot, start various threads to poll and announce,
-- restarting them if they fail.
run :: Bot -> String -> IO ()
run b@(Bot{botopts=opts}) f = do
  -- XXX do errors in forked threads propagate to the bracket above ? I think not.
  forkIO $ forever $ do
    feedReaderThread b f
      `catch` \e -> (unless (Quiet `elem` opts) $ getTimeStamp >>= \t -> printf "%s: rss2irc error: feed reader thread died with %s, restarting\n" t (show e) >> hFlush stdout)
  bv <- newSampleVar b
  -- XXX reconnect bot when these fail (?)
  -- forkIO $ forever $ do
  forkIO $ ircWriterThread bv 0
  --    `E.catch` \e -> (unless (Quiet `elem` opts) $ getTimeStamp >>= \t -> printf "%s: rss2irc error: irc writer thread died with %s, restarting\n" t (show $ fromException e) >> hFlush stdout)
  -- forever $ do
  ircResponderThread bv
  --    `E.catch` \e -> (unless (Quiet `elem` opts) $ getTimeStamp >>= \t -> printf "%s: rss2irc error: irc responder thread died with %s, restarting\n" t (show $ fromException e) >> hFlush stdout)

-- | Disconnect from the irc server.
disconnect :: Bot -> IO ()
disconnect b | Debug `elem` (botopts b) = return ()
disconnect b = hClose $ socket b

-- feed stuff

-- | Poll the feed every interval minutes and send announceable items
-- to the announcer thread.
--
-- Some smartness is needed to be robust with real-world feeds, which
-- may have jitter due to http caching, unreliable feed order,
-- unpredictable or missing item dates, etc. The most robust approach
-- seems to be: assume that feeds provide items sorted newest
-- first. Then, announceable items are the new (newer pub date than
-- the last announced item) and unseen (id not among the last N ids
-- seen since startup) items which appear at the top of the feed. This
-- is the default \"topnew\" strategy. We support some other strategies
-- which may be useful in some cases: \"allnew\" (announce new unseen
-- items appearing anywhere in the feed) and \"top\" (announce items
-- appearing above the previous top item, new or not).
--
feedReaderThread :: Bot -> String -> IO ()
feedReaderThread b@(Bot{botopts=opts,outputqueue=q}) url = do
  when (iterations == Just 0) $ exitIterations
  unless (Quiet `elem` opts) $ printf "Polling %s every %s\n" url m >> hFlush stdout
  (is,polls,failedpolls) <- pollUntilFetchItems url delay opts 0 0 -- go no further until we have baseline items

  let seen = map itemId is
      recent = take (optIntValue Recent 0 opts) is
      announceable = reverse recent
      announceable' = (if DupeDescriptions `elem` opts then id else elideDuplicateDescriptions) announceable
      lastpubdate = if null announceable' then Nothing else getItemPublishDate $ last announceable'
      numannounced = fromIntegral $ length announceable'
  when (DebugFeed `elem` opts) $ do
    getTimeStamp >>= printf ("\n%s: polled " ++ url ++ "\n")
    printItemDetails "feed items, in feed order" is
    printItemDetails "announceable items, oldest first" announceable
    printf "total polls, failed polls, items announced: %10d %10d %10d\n" polls failedpolls numannounced
    hFlush stdout
  writeList2Chan q $ map (announcement b) announceable'

  go seen lastpubdate iterations numannounced polls failedpolls

  where
    go :: [String] -> Maybe String -> Maybe Int -> Integer -> Integer -> Integer -> IO ()
    go !seen !lastpubdate !iterationsleft !numannounced !polls !failedpolls = do
      when (iterationsleft == Just 0) $ exitIterations
      threadDelay delay
      when (DebugFeed `elem` opts) $ do
        getTimeStamp >>= \t -> printf "\n%s: polling %s\n" t url

      fetched <- fetchItems url opts
      let polls' = polls + 1
          failedpolls' | isLeft fetched = failedpolls+1
                       | otherwise      = failedpolls
          fetched' = either (const []) id fetched
          announcestrategy = optValue Announce defaultannouncestrategy opts
          isprevioustop = ((== head seen).itemId)
          isunseen = ((`notElem` seen).itemId)
          isnew = (`notOlderThan` lastpubdate)
          isnewunseen i = isnew i && isunseen i
          new = case announcestrategy of
                  "top"    -> takeWhile (not.isprevioustop) fetched'
                  "allnew" -> filter isnewunseen fetched'
                  _        -> takeWhile isnewunseen fetched' -- "topnew"
          seen' = take windowsize $ (map itemId new) ++ seen
          announceable = reverse new
          announceable' = (if DupeDescriptions `elem` opts then id else elideDuplicateDescriptions) announceable
          lastpubdate' = if null announceable' then lastpubdate else getItemPublishDate $ last announceable'
          numannounced' = numannounced + (fromIntegral $ length announceable')
          iterationsleft' = maybe Nothing (Just . pred) iterationsleft
      when (DebugFeed `elem` opts) $ do
        printItemDetails "feed items, in feed order" fetched'
        printItemDetails "announceable items, oldest first" announceable
        printf "total polls, failed polls, items announced: %10d %10d %10d\n" polls' failedpolls' numannounced'
        hFlush stdout
      writeList2Chan q $ map (announcement b) announceable'

      go seen' lastpubdate' iterationsleft' numannounced' polls' failedpolls'

    iterations = numIterations opts
    exitIterations = unless (Quiet `elem` opts) (printf "Exiting after %d iterations.\n" (fromJust iterations)) >> exitWith ExitSuccess
    interval = optIntValue Interval defaultinterval opts
    m = if interval==1 then "minute" else show interval ++ " minutes"
    delay = interval * minutes
    windowsize = 200

-- | Check if an item's publish date is older than another date.
-- Either date may be Nothing, a parseable date string or unparseable.
-- In the (likely) event we can't parse two dates, return True.
notOlderThan :: Item -> Maybe String -> Bool
notOlderThan _ Nothing = True
notOlderThan i (Just s1) =
    case getItemPublishDate i of
      Nothing -> True
      Just s2 -> case (parseDateTime s1, parseDateTime s2) of
                   (Just d1, Just d2) -> d2 >= d1
                   _ -> True

-- | Elide any identical consecutive item descriptions.
elideDuplicateDescriptions :: [Item] -> [Item]
elideDuplicateDescriptions = elidedupes Nothing
    where
      elidedupes :: Maybe String -> [Item] -> [Item]
      elidedupes _ [] = []
      elidedupes (Just lastdesc) (i:is)
          | getItemDescription i==Just lastdesc = [withItemDescription ditto i] ++ elidedupes (Just lastdesc) is
          where ditto = "''" -- or http://en.wikipedia.org/wiki/Ditto_mark : 〃
      elidedupes _ (i:is) = [i] ++ elidedupes (getItemDescription i) is

-- | Get the best available unique identifier for a feed item.
itemId :: Item -> String
itemId i = case getItemId i of 
             Just (_,s) -> s
             Nothing    -> case getItemTitle i of
                             Just s  -> s
                             Nothing -> case getItemDate i of
                                          Just s  -> s
                                          Nothing -> show i

-- | Like fetchItems, but if it fails with a transient error, keep
-- retrying at the specified interval. Returns a tuple of items, poll
-- and failed poll counts, or throws an IO error.
pollUntilFetchItems :: String -> Int -> [Opt] -> Integer -> Integer -> IO ([Item],Integer,Integer)
pollUntilFetchItems url delay opts polls failedpolls = do
  is <- fetchItems url opts
  case is of Right is' -> return (is',polls+1,failedpolls)
             Left _    -> do
               threadDelay delay
               pollUntilFetchItems url delay opts (polls+1) (failedpolls+1)

-- | Get the items from the feed at the specified url, with redirects
-- and authentication allowed, or an error string, or throw an IO
-- error if the error looks permanent.
fetchItems :: String -> [Opt] -> IO (Either String [Item])
fetchItems url opts = either Left (Right . feedItems) `fmap` readFeed url opts

-- | Fetch a feed, with redirects and authentication allowed, or an error string,
-- or throw an IO error if the error looks permanent. Also show the raw content
-- as debug output if that option is in effect.
readFeed :: String -> [Opt] -> IO (Either String Feed)
readFeed url opts = do
    s <- readUri url opts
    case s of Left  e  -> return $ Left e
              Right s' -> do
                   when (DebugXml `elem` opts) $ do
                            getTimeStamp >>= \t -> printf "\n%s: feed content:\n%s\n" t s'
                   case parseFeedString s' of
                     Nothing          -> noparse
                     Just (XMLFeed _) -> noparse
                     Just f           -> return $ Right f
                    where
                      noparse = return $ Left "could not parse feed"

-- | Fetch the contents of a uri, with redirects and authentication allowed, or an error string,
-- or throw an IO error if the error looks permanent. Also show the http transaction progress
-- as debug output if that option is in effect. "file:..." uris are also allowed.
readUri :: String -> [Opt] -> IO (Either String String)
readUri uri opts =
    case parseURI uri of
      Just URI{uriScheme="file:"} -> do
        Right `fmap` readFile (drop 5 uri)
                      `catch` \e -> return $ Left $ show e
      _ -> do
        (_uri',rsp) <- browse $ do
                         if (DebugHttp `elem` opts)
                          then setOutHandler (\s -> getTimeStamp >>= \t -> printf "\n%s: %s\n" t s)
                          else setOutHandler (const $ return ())
                         setAllowRedirects True
                         request $ getRequest uri
        case rspCode rsp of
          (2,_,_) -> return $ Right $ rspBody rsp
          code -> do
            getTimeStamp >>= \t -> printf "%s: rss2irc error fetching %s: %s\n" t uri (show code) >> hFlush stdout
            return $ Left $ show code

-- | Dump item details to the console for debugging.
printItemDetails :: String -> [Item] -> IO ()
printItemDetails hdr is = printf "%s: %d\n%s" hdr count items >> hFlush stdout
    where
      items = unlines [printf " %-29s%s  %-*s" d p twidth t | (d,p,t,_) <- fields]
      twidth = maximum $ map (length.fromMaybe "".getItemTitle) is
      -- subhdr = "(date, (publish date if different), title)\n"
      -- subhdr' = if null is then "" else subhdr
      count = length is
      fields = [(d, if p==d then "" else printf "  pubdate:%-29s" p, t, i) | item <- is
               ,let d = fromMaybe "" $ getItemDate item
               ,let p = fromMaybe "" $ getItemPublishDate item
               ,let t = fromMaybe "" $ getItemTitle item
               ,let i = maybe "" show $ getItemId item
               ]

-- deriving instance Eq Item
instance Eq Item where
    (==) a b = let match f = f a == f b in
               all match [getItemTitle
                         ,getItemLink
                         ,getItemPublishDate
                         ,getItemDate
                         ,getItemAuthor
                         ,getItemCommentLink
                         ,getItemFeedLink
                         ,getItemRights
                         ,getItemSummary
                         ,getItemDescription
                         ]
                 && match getItemCategories
                 && match getItemEnclosure
                 && match getItemId

-- | Convert a feed item to a string for the bot to announce on irc.
-- The announcement is likely but not guaranteed to fit within a
-- single irc message.
announcement:: Bot -> Item -> String
announcement (Bot{botopts=opts}) i = applyReplacements opts $ printf "%s%s%s%s%s" title desc author date link
    where
      title = if elem NoTitle opts then "" else maybe "" (truncateWordsAt maxtitlelength "..." . clean) (getItemTitle i)
      desc = ifopt Description $ maybe "" ((" - "++) . truncateWordsAt maxdesclength "..." . clean) (getItemDescription i)
      author = ifopt Author $ maybe "" ((" "++) . parenthesise . truncateWordsAt maxauthorlength "..." . clean) (getItemAuthor i)
      date = ifopt Time $ maybe "" ((" "++) . truncateAt maxdatelength "..." . clean) (getItemDate i)
      link = ifopt Link $ maybe "" (("  "++) . truncateAt maxlinklength "..." . clean) (getItemLink i)

      clean = oneline . trimwhitespace . striphtml . stripemail
      ifopt o = if elem o opts then id else const ""
      oneline = intercalate "  " . map strip . lines -- two spaces to hint at newlines & brs
      trimwhitespace = gsubRegexPR "[ \t][ \t]+" " "
      striphtml = if elem Html opts then id else stripHtml . brtonewline
      brtonewline = gsubRegexPR "(<|&lt;) *br */?(>|&gt;)" "\n"
      stripemail = if elem Email opts then id else stripEmails
      parenthesise = (++")").("("++)

-- irc stuff

-- | Wait for server connection confirmation.
ircWaitForServerResp :: Bot -> IO (Bool,String)
ircWaitForServerResp b@(Bot{socket=h,botopts=opts}) = do
  if (Debug `elem` opts)
   then return (True,"")
   else do
    s <- hGetLine h
    when (DebugIrc `elem` opts) $ getTimeStamp >>= \t -> printf "%s: <-%s\n" t s >> hFlush stdout
    if isping s then pong b s >> ircWaitForServerResp b else do
            if isResponseOK s then return (True, s) else
                if isNotice s then ircWaitForServerResp b else return (False, s) 
  where
    parseRespCode x = if length (words x) > 1 then (words x) !! 1 else "000" 
    isResponseOK x = (parseRespCode x) `elem` [ "001", "002", "003", "004" ]
    isNotice     x = (head $ parseRespCode x) `elem` ('0':['a'..'z']++['A'..'Z'])
  
-- | Print announcements appearing in the bot's announce queue to its irc channel,
-- complying with bot and irc server policies.
ircWriterThread :: SampleVar Bot -> Int -> IO ()
ircWriterThread bv batchindex = do
    b'@(Bot{outputqueue=q,botopts=opts}) <- readSampleVar bv
    writeSampleVar bv b'
    ann <- readChan q

    -- policy:
    -- if specified, wait for --idle minutes of silence before sending messages
    -- no more than 400 chars per message
    -- no more than one message per 2s
    --   XXX on freenode, 6 such messages still cause a flood. Try limiting chars-per-period, or do a ping-pong
    -- no more than --max-items items per polling interval
    -- ditto for messages, except a final multi-message item will be completed.

    -- reread the samplevar to get an accurate idle time
    b <- readSampleVar bv
    writeSampleVar bv b
    idle <- channelIdleTime b

    let maxitems     = optIntValue MaxItems defaultmaxitems opts
        requiredidle = optIntValue Idle defaultidle opts         -- minutes
        pollinterval = optIntValue Interval defaultinterval opts -- minutes
        sendinterval = if Debug `elem` opts then 0 else 2        -- seconds
        iscontinuation = continuationprefix `isPrefixOf` ann
        act | batchindex >= maxitems && not iscontinuation = (do
                                         when (DebugIrc `elem` opts) $
                                              getTimeStamp >>= \t -> printf "%s: sent %d messages in this batch, max is %d, sleeping for %dm\n" t batchindex maxitems pollinterval >> hFlush stdout
                                         unGetChan q ann
                                         threadDelay $ pollinterval * minutes
                                         ircWriterThread bv 0)
            | requiredidle > 0 && (idle < requiredidle) = (do
                                      let idleinterval = requiredidle-idle
                                      when (DebugIrc `elem` opts) $
                                           getTimeStamp >>= \t -> printf "%s: channel has been idle %dm, %dm required, sleeping for %dm\n" t idle requiredidle idleinterval >> hFlush stdout
                                      unGetChan q ann
                                      threadDelay $ idleinterval * minutes
                                      ircWriterThread bv batchindex)
            | otherwise = (do
                            when (DebugIrc `elem` opts) $
                                 getTimeStamp >>= \t -> printf "%s: sent %d messages in this batch%s, sending next\n" t batchindex (if requiredidle == 0 then "" else printf " and channel has been idle %dm" idle) >> hFlush stdout
                            let (msg,rest) = splitAnnouncement ann
                            unless (null rest) $ unGetChan q rest
                            ircPrivmsgH b msg
                            threadDelay $ sendinterval * seconds
                            ircWriterThread bv (batchindex+1))
    act

-- | The time in minutes since the last message on this bot's channel, or
-- otherwise since joining the channel. Leap seconds are ignored.
channelIdleTime :: Bot -> IO Int
channelIdleTime (Bot{lastmsgtime=t1}) = do
  t <- getCurrentTime
  return $ round (diffUTCTime t t1) `div` 60

-- | Handle any incoming commands from the bot's irc channel.
-- The following commands are supported: PING.
-- Also track the last message time.
ircResponderThread :: SampleVar Bot -> IO ()
ircResponderThread bv = do
  b@(Bot{socket=h,botopts=opts}) <- readSampleVar bv
  writeSampleVar bv b
  if (Debug `elem` opts)
   then threadDelay $ 1 * hours
   else do
    s <- hGetLine h
    let s' = init s
    when (DebugIrc `elem` opts) $ (getTimeStamp >>= \t -> printf "%s: <-%s\n" t s') >> hFlush stdout
    let respond | ismessage s = do t <- getCurrentTime
                                   writeSampleVar bv b{lastmsgtime=t}
                | isping s    = pong b s'
                | otherwise   = return ()
    respond
  ircResponderThread bv

ismessage :: String -> Bool
ismessage s = isprivmsg s && not ("VERSION" `elem` (msg_params $ fromJust $ decode s))

isprivmsg :: String -> Bool
isprivmsg s = case decode s of Just Message{msg_command="PRIVMSG"} -> True
                               _ -> False

isping :: String -> Bool
isping s = case decode s of Just Message{msg_command="PING"} -> True
                            _ -> False

pong :: Bot -> String -> IO ()
pong b x  = ircWrite b $ printf "PONG :%s" (drop 6 x)

-- | Send a privmsg to the bot's irc server & channel.
ircPrivmsgH :: Bot -> String -> IO ()
ircPrivmsgH b@(Bot{channel=c,botopts=opts}) msg = ircWrite b $ encode $ privmsg c msg'
    where
    msg' | Action `elem` opts = "\1ACTION " ++ msg ++ "\1"
         | otherwise = msg

-- | Send a string to the bot's irc server unless --debug is in effect,
-- and to the console if --debug-irc is in effect.
ircWrite :: Bot -> String -> IO ()
ircWrite (Bot{socket=h,botopts=opts}) s = do
  -- XXX may fail with isFullError if the device is full or
  -- isPermissionError if another system resource limit would be exceeded.
  when (not $ Debug `elem` opts) $ hPutStr h (s++"\r\n")
  debugoutput
  where
    debugoutput
        | DebugIrc `elem` opts = getTimeStamp >>= \t -> printf "%s: ->%s\n" t s >> hFlush stdout
        | not (Quiet `elem` opts) && ("PRIVMSG" `isPrefixOf` s) = (printf "%s\n" $ filter (/='\1') $ drop 1 $ dropWhile (/=':') s) >> hFlush stdout
        | otherwise = return ()

-- utils

-- | Split an announcement into one or more suitably truncated and
-- formatted irc messages. Each call returns the next message and
-- the remainder of the announcement.
-- XXX n must be > length continuationsuffix
splitAnnouncement :: String -> (String,String)
splitAnnouncement a
    | length a <= maxmessagelength = (a,"")
    | otherwise =
        case splitAtWordBefore n a of
          (m,rest@(_:_)) -> (m++continuationsuffix, continuationprefix++rest)
          (m,"")         -> (m, "")
    where
      n = maxmessagelength - length continuationsuffix

continuationprefix, continuationsuffix :: String
continuationprefix = "... "
continuationsuffix = " ..."

-- | Truncate a string, if possible at a word boundary, at or before
-- the specified position, and indicate truncation with the specified
-- suffix. The length of the returned string will be in the range
-- n, n+length suffix.
truncateWordsAt :: Int -> String -> String -> String
truncateWordsAt n suffix s
    | s' == s   = s
    | otherwise = s' ++ suffix
    where
      s' = fst $ splitAtWordBefore n s

-- | Truncate a string at the specified position, and indicate
-- truncation with the specified suffix. The length of the returned
-- string will be in the range n, n+length suffix.
truncateAt :: Int -> String -> String -> String
truncateAt n suffix s
    | s' == s   = s
    | otherwise = s' ++ suffix
    where
      s' = take n s

-- | Split a string at or before the specified position, on a word boundary if possible.
splitAtWordBefore :: Int -> String -> (String,String)
splitAtWordBefore n s
    | null a || (null b) = (rstrip a, lstrip b)
    | last a == ' ' || (head b == ' ') || (not $ ' ' `elem` a) = (rstrip a, lstrip b)
    | otherwise = (rstrip $ take (length a - length partialword) a, partialword ++ lstrip b)
    where (a,b) = splitAt n s
          partialword = reverse $ takeWhile (/= ' ') $ reverse a


-- | Apply all --replace substitutions to a string, in turn.
-- Warning, will fail at runtime if there is a bad regexp.
applyReplacements :: [Opt] -> String -> String
applyReplacements opts = foldl' (.) id (reverse substitutions)
    where substitutions = map make $ optValues Replace opts
          make s = case splitRegexPR "(?<!\\\\)/" s of
                     (pat:sub:[]) -> gsubRegexPR pat sub
                     _ -> id

-- | Replace any HTML tags or entities in a string with a single space.
stripHtml :: String -> String
stripHtml = gsubRegexPR "(&[^ \t]*?;|<.*?>)" " "

-- | Remove any email addresses from a string.
stripEmails :: String -> String
stripEmails = gsubRegexPR "(?i) ?(<|&lt;)?\\b[-._%+a-z0-9]+@[-.a-z0-9]+\\.[a-z]{2,4}\\b(>|&gt;)?" ""

optValue :: (String -> Opt) -> String -> [Opt] -> String
optValue oc def = head . optValues oc . (++[oc def])

optIntValue :: (String -> Opt) -> Int -> [Opt] -> Int
optIntValue oc def opts = fromMaybe def $ maybeRead $ optValue oc "" opts

optValues :: (String -> Opt) -> [Opt] -> [String]
optValues oc opts = concatMap getval opts
    where getval o = if oc v == o then [v] else [] where v = value o

numIterations :: [Opt] -> Maybe Int
numIterations opts = case optIntValue NumIterations (-1) opts of
                         (-1) -> Nothing
                         n    -> Just n

maybeRead :: Read a => String -> Maybe a
maybeRead s = case reads s of
    [(x, _)] -> Just x
    _        -> Nothing

-- | Parse a datetime string if possible, trying at least the formats
-- likely to be used in RSS/Atom feeds.
parseDateTime :: String -> Maybe UTCTime
parseDateTime s = firstJust [parseTime defaultTimeLocale f s' | f <- formats]
    where
      s' = adaptForParseTime s
      adaptForParseTime = gsubRegexPR "(....-..-..T..:..:..[\\+\\-]..):(..)" "\\1\\2" -- 2009-09-22T13:10:56+00:00
      formats = -- http://hackage.haskell.org/packages/archive/time/1.1.4/doc/html/Data-Time-Format.html#v%3AformatTime
          [
           "%a, %d %b %Y %T %z" -- Fri, 18 Sep 2009 12:42:07 -0400
          ,"%a, %d %b %Y %T %Z" -- Fri, 25 Sep 2009 11:01:23 UTC
          ,"%Y-%m-%dT%T%z"      -- 2009-09-22T13:10:56+0000
          ]

firstJust :: [Maybe a] -> Maybe a
firstJust ms = case dropWhile isNothing ms of (m:_) -> m
                                              _     -> Nothing

getCurrentLocalTime :: IO LocalTime
getCurrentLocalTime = do
  t <- getCurrentTime
  tz <- getCurrentTimeZone
  return $ utcToLocalTime tz t

getTimeStamp :: IO String
getTimeStamp = do
  t <- getCurrentLocalTime
  tz <- getCurrentTimeZone
  return $ printf "%s %s" (take 19 $ show t) (show tz)

isLeft :: Either a b -> Bool
isLeft (Left _) = True
isLeft _        = False

-- strict :: NFData a => a -> a
-- strict = id $| rnf

hours, minutes, seconds :: Int
hours = 60 * minutes
minutes = 60 * seconds
seconds = 10^(6::Int)

strip, lstrip, rstrip, dropws :: String -> String
strip = lstrip . rstrip
lstrip = dropws
rstrip = reverse . dropws . reverse
dropws = dropWhile (`elem` " \t")
