{-# LANGUAGE CPP #-}

{-|
A library for listing "files" in an http "directory".

@
import Network.HTTP.Directory
import qualified Data.Text as T

main = do
  mgr <- httpManager
  let url = "https://example.com/some/dir/"
  files <- httpDirectory mgr url
  mapM_ T.putStrLn files
  let file = url </> T.unpack (head files)
  httpFileSize mgr file >>= print
  httpLastModified mgr file >>= print
@
-}

module Network.HTTP.Directory
       ( httpDirectory,
         httpDirectory',
         httpRawDirectory,
         httpExists,
         httpFileSize,
         httpLastModified,
         httpManager,
         httpRedirect,
         httpRedirect',
         httpRedirects,
         isHttpUrl,
         trailingSlash,
         noTrailingSlash,
         Manager
       ) where

#if (defined(MIN_VERSION_base) && MIN_VERSION_base(4,8,0))
#else
import Control.Applicative ((<$>))
#endif
import Control.Monad (when)

import qualified Data.ByteString.Char8 as B
import qualified Data.List as L
import Data.Maybe
import Data.Text (Text)
import qualified Data.Text as T
import Data.Time.Clock (UTCTime)

import Network.HTTP.Client (hrRedirects, httpLbs, httpNoBody, Manager, method,
                            newManager, parseRequest, Request,
                            Response, responseBody, responseHeaders,
                            responseOpenHistory, responseStatus)
import Network.HTTP.Client.TLS (tlsManagerSettings)
import Network.HTTP.Date (httpDateToUTC, parseHTTPDate)
import Network.HTTP.Types (hContentLength, hLocation, methodHead, statusCode)
import Network.URI (parseURI, URI(..))

import Text.HTML.DOM (parseLBS)
import Text.XML.Cursor

-- | List the files (hrefs) in an http directory
--
-- It filters out absolute urls & paths, queries, '..', and '#' links.
--
-- Raises an error if the http request fails.
--
-- Note if the directory (webpage) url is redirected to a different path
-- you may need to use 'httpRedirect' to determine
-- the actual final url prefix for relative links
-- (files).
--
-- (before 0.1.4 it was just httpRawDirectory)
httpDirectory :: Manager -> String -> IO [Text]
httpDirectory :: Manager -> String -> IO [Text]
httpDirectory mgr :: Manager
mgr url :: String
url = do
  [Text]
hrefs <- Manager -> String -> IO [Text]
httpRawDirectory Manager
mgr String
url
  [Text] -> IO [Text]
forall (m :: * -> *) a. Monad m => a -> m a
return ([Text] -> IO [Text]) -> [Text] -> IO [Text]
forall a b. (a -> b) -> a -> b
$ Maybe URI -> [Text] -> [Text]
defaultFilesFilter Maybe URI
uri [Text]
hrefs
  where
    uri :: Maybe URI
uri = String -> Maybe URI
parseURI String
url

defaultFilesFilter :: Maybe URI -> [Text] -> [Text]
defaultFilesFilter :: Maybe URI -> [Text] -> [Text]
defaultFilesFilter mUri :: Maybe URI
mUri =
  [Text] -> [Text]
forall a. Eq a => [a] -> [a]
L.nub ([Text] -> [Text]) -> ([Text] -> [Text]) -> [Text] -> [Text]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Text -> Bool) -> [Text] -> [Text]
forall a. (a -> Bool) -> [a] -> [a]
filter (Bool -> Bool
not (Bool -> Bool) -> (Text -> Bool) -> Text -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Bool] -> Bool
forall (t :: * -> *). Foldable t => t Bool -> Bool
or ([Bool] -> Bool) -> (Text -> [Bool]) -> Text -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Text -> Bool] -> Text -> [Bool]
forall a b. [a -> b] -> a -> [b]
flist ((Text -> Text -> Bool) -> [Text] -> [Text -> Bool]
forall a b. (a -> b) -> [a] -> [b]
map Text -> Text -> Bool
T.isInfixOf [":", "?", "#"] [Text -> Bool] -> [Text -> Bool] -> [Text -> Bool]
forall a. [a] -> [a] -> [a]
++ [Text -> Bool
nonTrailingSlash] [Text -> Bool] -> [Text -> Bool] -> [Text -> Bool]
forall a. [a] -> [a] -> [a]
++ [(Text -> [Text] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` ["../", ".."])])) ([Text] -> [Text]) -> ([Text] -> [Text]) -> [Text] -> [Text]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Text -> Text) -> [Text] -> [Text]
forall a b. (a -> b) -> [a] -> [b]
map Text -> Text
removePath
  where
    -- picked from swish
    flist :: [a->b] -> a -> [b]
    flist :: [a -> b] -> a -> [b]
flist fs :: [a -> b]
fs a :: a
a = ((a -> b) -> b) -> [a -> b] -> [b]
forall a b. (a -> b) -> [a] -> [b]
map ((a -> b) -> a -> b
forall a b. (a -> b) -> a -> b
$ a
a) [a -> b]
fs

    -- may return "" which nonTrailingSlash then removes
    removePath :: Text -> Text
    removePath :: Text -> Text
removePath t :: Text
t =
      case Maybe Text
murlPath of
        Nothing -> Text
t
        Just path :: Text
path ->
          Text -> Maybe Text -> Text
forall a. a -> Maybe a -> a
fromMaybe Text
t (Maybe Text -> Text) -> Maybe Text -> Text
forall a b. (a -> b) -> a -> b
$ Text -> Text -> Maybe Text
T.stripPrefix Text
path Text
t

    murlPath :: Maybe Text
    murlPath :: Maybe Text
murlPath = (URI -> Text) -> Maybe URI -> Maybe Text
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (String -> Text
T.pack (String -> Text) -> (URI -> String) -> URI -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> String
trailingSlash (String -> String) -> (URI -> String) -> URI -> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. URI -> String
uriPath) Maybe URI
mUri

    -- True means remove
    nonTrailingSlash :: Text -> Bool
    nonTrailingSlash :: Text -> Bool
nonTrailingSlash "" = Bool
True     -- from removed uriPath
    nonTrailingSlash "/" = Bool
True
    nonTrailingSlash t :: Text
t =
      (Text -> Int
T.length Text
t Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> 1) Bool -> Bool -> Bool
&& ("/" Text -> Text -> Bool
`T.isInfixOf` Text -> Text
T.init Text
t)

-- | Like httpDirectory but uses own Manager
--
-- @since 0.1.4
httpDirectory' :: String -> IO [Text]
httpDirectory' :: String -> IO [Text]
httpDirectory' url :: String
url = do
  Manager
mgr <- IO Manager
httpManager
  Manager -> String -> IO [Text]
httpDirectory Manager
mgr String
url

-- | List all the hrefs in an http directory html file.
--
-- Raises an error if the http request fails.
--
-- Note if the directory (webpage) url is redirected to a different path
-- you may need to use 'httpRedirect' to determine
-- the actual final url prefix for relative links
-- (files).
--
-- @since 0.1.4
httpRawDirectory :: Manager -> String -> IO [Text]
httpRawDirectory :: Manager -> String -> IO [Text]
httpRawDirectory mgr :: Manager
mgr url :: String
url = do
  Request
request <- String -> IO Request
forall (m :: * -> *). MonadThrow m => String -> m Request
parseRequest String
url
  Response ByteString
response <- Request -> Manager -> IO (Response ByteString)
httpLbs Request
request Manager
mgr
  String -> Response ByteString -> IO ()
forall r. String -> Response r -> IO ()
checkResponse String
url Response ByteString
response
  let body :: ByteString
body = Response ByteString -> ByteString
forall body. Response body -> body
responseBody Response ByteString
response
      doc :: Document
doc = ByteString -> Document
parseLBS ByteString
body
      cursor :: Cursor
cursor = Document -> Cursor
fromDocument Document
doc
  [Text] -> IO [Text]
forall (m :: * -> *) a. Monad m => a -> m a
return ([Text] -> IO [Text]) -> [Text] -> IO [Text]
forall a b. (a -> b) -> a -> b
$ (Cursor -> [Text]) -> [Cursor] -> [Text]
forall (t :: * -> *) a b. Foldable t => (a -> [b]) -> t a -> [b]
concatMap (Name -> Cursor -> [Text]
attribute "href") ([Cursor] -> [Text]) -> [Cursor] -> [Text]
forall a b. (a -> b) -> a -> b
$ Cursor
cursor Cursor -> (Cursor -> [Cursor]) -> [Cursor]
forall node a. Cursor node -> (Cursor node -> [a]) -> [a]
$// Name -> Cursor -> [Cursor]
element "a"

-- | Test if an file (url) exists
--
-- @since 0.1.3
httpExists :: Manager -> String -> IO Bool
httpExists :: Manager -> String -> IO Bool
httpExists mgr :: Manager
mgr url :: String
url = do
  Response ()
response <- Manager -> String -> IO (Response ())
httpHead Manager
mgr String
url
  Bool -> IO Bool
forall (m :: * -> *) a. Monad m => a -> m a
return (Bool -> IO Bool) -> Bool -> IO Bool
forall a b. (a -> b) -> a -> b
$ Status -> Int
statusCode (Response () -> Status
forall body. Response body -> Status
responseStatus Response ()
response) Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== 200

-- | Try to get the filesize (Content-Length field) of an http file
--
-- Raises an error if the http request fails.
httpFileSize :: Manager -> String -> IO (Maybe Integer)
httpFileSize :: Manager -> String -> IO (Maybe Integer)
httpFileSize mgr :: Manager
mgr url :: String
url = do
  Response ()
response <- Manager -> String -> IO (Response ())
httpHead Manager
mgr String
url
  String -> Response () -> IO ()
forall r. String -> Response r -> IO ()
checkResponse String
url Response ()
response
  let headers :: ResponseHeaders
headers = Response () -> ResponseHeaders
forall body. Response body -> ResponseHeaders
responseHeaders Response ()
response
  Maybe Integer -> IO (Maybe Integer)
forall (m :: * -> *) a. Monad m => a -> m a
return (Maybe Integer -> IO (Maybe Integer))
-> Maybe Integer -> IO (Maybe Integer)
forall a b. (a -> b) -> a -> b
$ String -> Integer
forall a. Read a => String -> a
read (String -> Integer)
-> (ByteString -> String) -> ByteString -> Integer
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> String
B.unpack (ByteString -> Integer) -> Maybe ByteString -> Maybe Integer
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> HeaderName -> ResponseHeaders -> Maybe ByteString
forall a b. Eq a => a -> [(a, b)] -> Maybe b
lookup HeaderName
hContentLength ResponseHeaders
headers

-- | Try to get the modification time (Last-Modified field) of an http file
--
-- Raises an error if the http request fails.
--
-- @since 0.1.1
httpLastModified :: Manager -> String -> IO (Maybe UTCTime)
httpLastModified :: Manager -> String -> IO (Maybe UTCTime)
httpLastModified mgr :: Manager
mgr url :: String
url = do
  Response ()
response <- Manager -> String -> IO (Response ())
httpHead Manager
mgr String
url
  String -> Response () -> IO ()
forall r. String -> Response r -> IO ()
checkResponse String
url Response ()
response
  let headers :: ResponseHeaders
headers = Response () -> ResponseHeaders
forall body. Response body -> ResponseHeaders
responseHeaders Response ()
response
      mdate :: Maybe ByteString
mdate = HeaderName -> ResponseHeaders -> Maybe ByteString
forall a b. Eq a => a -> [(a, b)] -> Maybe b
lookup "Last-Modified" ResponseHeaders
headers
  Maybe UTCTime -> IO (Maybe UTCTime)
forall (m :: * -> *) a. Monad m => a -> m a
return (Maybe UTCTime -> IO (Maybe UTCTime))
-> Maybe UTCTime -> IO (Maybe UTCTime)
forall a b. (a -> b) -> a -> b
$ HTTPDate -> UTCTime
httpDateToUTC (HTTPDate -> UTCTime) -> Maybe HTTPDate -> Maybe UTCTime
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (ByteString -> Maybe HTTPDate
parseHTTPDate (ByteString -> Maybe HTTPDate)
-> Maybe ByteString -> Maybe HTTPDate
forall (m :: * -> *) a b. Monad m => (a -> m b) -> m a -> m b
=<< Maybe ByteString
mdate)

-- conflicts with Request
checkResponse :: String -> Response r -> IO ()
checkResponse :: String -> Response r -> IO ()
checkResponse url :: String
url response :: Response r
response =
  Bool -> IO () -> IO ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when (Status -> Int
statusCode (Response r -> Status
forall body. Response body -> Status
responseStatus Response r
response) Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
/= 200) (IO () -> IO ()) -> IO () -> IO ()
forall a b. (a -> b) -> a -> b
$ do
    String -> IO ()
putStrLn String
url
    String -> IO ()
forall a. String -> a
error' (String -> IO ()) -> String -> IO ()
forall a b. (a -> b) -> a -> b
$ Status -> String
forall a. Show a => a -> String
show (Status -> String) -> Status -> String
forall a b. (a -> b) -> a -> b
$ Response r -> Status
forall body. Response body -> Status
responseStatus Response r
response

-- | alias for 'newManager tlsManagerSettings'
-- so one does not need to import http-client etc
--
-- @since 0.1.2
httpManager :: IO Manager
httpManager :: IO Manager
httpManager =
  ManagerSettings -> IO Manager
newManager ManagerSettings
tlsManagerSettings

-- | Returns the list of http redirects for an url in reverse order
-- (ie last redirect is listed first)
httpRedirects :: Manager -> String -> IO [B.ByteString]
httpRedirects :: Manager -> String -> IO [ByteString]
httpRedirects mgr :: Manager
mgr url :: String
url = do
  Request
request <- String -> IO Request
parseRequestHead String
url
  HistoriedResponse BodyReader
respHist <- Request -> Manager -> IO (HistoriedResponse BodyReader)
responseOpenHistory Request
request Manager
mgr
  [ByteString] -> IO [ByteString]
forall (m :: * -> *) a. Monad m => a -> m a
return ([ByteString] -> IO [ByteString])
-> [ByteString] -> IO [ByteString]
forall a b. (a -> b) -> a -> b
$ [ByteString] -> [ByteString]
forall a. [a] -> [a]
reverse ([ByteString] -> [ByteString]) -> [ByteString] -> [ByteString]
forall a b. (a -> b) -> a -> b
$ ((Request, Response ByteString) -> Maybe ByteString)
-> [(Request, Response ByteString)] -> [ByteString]
forall a b. (a -> Maybe b) -> [a] -> [b]
mapMaybe (HeaderName -> ResponseHeaders -> Maybe ByteString
forall a b. Eq a => a -> [(a, b)] -> Maybe b
lookup HeaderName
hLocation (ResponseHeaders -> Maybe ByteString)
-> ((Request, Response ByteString) -> ResponseHeaders)
-> (Request, Response ByteString)
-> Maybe ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Response ByteString -> ResponseHeaders
forall body. Response body -> ResponseHeaders
responseHeaders (Response ByteString -> ResponseHeaders)
-> ((Request, Response ByteString) -> Response ByteString)
-> (Request, Response ByteString)
-> ResponseHeaders
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Request, Response ByteString) -> Response ByteString
forall a b. (a, b) -> b
snd) ([(Request, Response ByteString)] -> [ByteString])
-> [(Request, Response ByteString)] -> [ByteString]
forall a b. (a -> b) -> a -> b
$ HistoriedResponse BodyReader -> [(Request, Response ByteString)]
forall body.
HistoriedResponse body -> [(Request, Response ByteString)]
hrRedirects HistoriedResponse BodyReader
respHist

-- | Return final redirect for an url
httpRedirect :: Manager -> String -> IO (Maybe B.ByteString)
httpRedirect :: Manager -> String -> IO (Maybe ByteString)
httpRedirect mgr :: Manager
mgr url :: String
url =
  [ByteString] -> Maybe ByteString
forall a. [a] -> Maybe a
listToMaybe ([ByteString] -> Maybe ByteString)
-> IO [ByteString] -> IO (Maybe ByteString)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Manager -> String -> IO [ByteString]
httpRedirects Manager
mgr String
url

-- | Like httpRedirect but uses own Manager.
--
-- @since 0.1.4
httpRedirect' :: String -> IO (Maybe B.ByteString)
httpRedirect' :: String -> IO (Maybe ByteString)
httpRedirect' url :: String
url = do
  Manager
mgr <- IO Manager
httpManager
  [ByteString] -> Maybe ByteString
forall a. [a] -> Maybe a
listToMaybe ([ByteString] -> Maybe ByteString)
-> IO [ByteString] -> IO (Maybe ByteString)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Manager -> String -> IO [ByteString]
httpRedirects Manager
mgr String
url

parseRequestHead :: String -> IO Request
parseRequestHead :: String -> IO Request
parseRequestHead url :: String
url = do
  Request
request <- String -> IO Request
forall (m :: * -> *). MonadThrow m => String -> m Request
parseRequest String
url
  Request -> IO Request
forall (m :: * -> *) a. Monad m => a -> m a
return (Request -> IO Request) -> Request -> IO Request
forall a b. (a -> b) -> a -> b
$ Request
request {method :: ByteString
method = ByteString
methodHead}

httpHead :: Manager -> String -> IO (Response ())
httpHead :: Manager -> String -> IO (Response ())
httpHead mgr :: Manager
mgr url :: String
url = do
  Request
request <- String -> IO Request
parseRequestHead String
url
  Request -> Manager -> IO (Response ())
httpNoBody Request
request Manager
mgr

-- | Test if string starts with http[s]:
--
-- @since 0.1.5
isHttpUrl :: String -> Bool
isHttpUrl :: String -> Bool
isHttpUrl loc :: String
loc = "http:" String -> String -> Bool
forall a. Eq a => [a] -> [a] -> Bool
`L.isPrefixOf` String
loc Bool -> Bool -> Bool
|| "https:" String -> String -> Bool
forall a. Eq a => [a] -> [a] -> Bool
`L.isPrefixOf` String
loc

-- | Make sure an url ends with "/"
--
-- @
-- trailingSlash "url" == "url/"
-- trailingSlash "url/" == "url/"
-- @
--
-- @since 0.1.6
trailingSlash :: String -> String
trailingSlash :: String -> String
trailingSlash "" = ""
trailingSlash str :: String
str =
  if String -> Char
forall a. [a] -> a
last String
str Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== '/' then String
str else String
str String -> String -> String
forall a. [a] -> [a] -> [a]
++ "/"

-- | Remove all trailing slashes from filename or url
--
-- @
-- noTrailingSlash "dir/" == "dir"
-- noTrailingSlash "dir//" == "dir"
-- @
--
-- @since 0.1.6
noTrailingSlash :: Text -> Text
noTrailingSlash :: Text -> Text
noTrailingSlash = (Char -> Bool) -> Text -> Text
T.dropWhileEnd (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== '/')

-- from simple-cmd
error' :: String -> a
#if (defined(MIN_VERSION_base) && MIN_VERSION_base(4,9,0))
error' :: String -> a
error' = String -> a
forall a. String -> a
errorWithoutStackTrace
#else
error' = error
#endif