291 lines
9.6 KiB
Haskell
291 lines
9.6 KiB
Haskell
{-# LANGUAGE BlockArguments #-}
|
|
{-# LANGUAGE DataKinds #-}
|
|
{-# LANGUAGE LambdaCase #-}
|
|
{-# LANGUAGE OverloadedRecordDot #-}
|
|
{-# LANGUAGE OverloadedStrings #-}
|
|
{-# LANGUAGE RecordWildCards #-}
|
|
|
|
module Main where
|
|
|
|
import Control.Exception
|
|
import Control.Lens (view, (&), (.~))
|
|
import Control.Monad (when)
|
|
import Control.Monad.IO.Class
|
|
import Control.Monad.Trans.Class (lift)
|
|
import Data.Aeson hiding (Options)
|
|
import Data.Aeson qualified as Aeson
|
|
import Data.ByteString.Lazy qualified as LBS
|
|
import Data.Maybe (fromJust, fromMaybe, listToMaybe)
|
|
import Data.String (IsString (..))
|
|
import Data.Text qualified as Text
|
|
import Data.Text.Encoding qualified as Text
|
|
import Database.SQLite.Simple
|
|
import GHC.Generics
|
|
import Katip
|
|
import Network.HTTP.Client.OpenSSL
|
|
import Network.Wreq hiding (Options)
|
|
import Network.Wreq qualified as Wreq
|
|
import Options.Generic
|
|
import Servant.Client (mkClientEnv)
|
|
import System.Exit (exitFailure)
|
|
import System.IO
|
|
import Telegram.Bot.API
|
|
import Telegram.Bot.Simple
|
|
import Telegram.Bot.Simple.Debug
|
|
import Text.HTML.Scalpel
|
|
|
|
main :: IO ()
|
|
main = withOpenSSL $ do
|
|
opts :: Options Unwrapped <- unwrapRecord "Berlin Scrapper"
|
|
token <- getEnvToken "TELEGRAM_BOT_TOKEN"
|
|
httpMgr <- newOpenSSLManager
|
|
dbConn <- open opts.dbFile
|
|
createTable dbConn
|
|
|
|
handleScribe <- mkHandleScribe ColorIfTerminal stdout (permitItem InfoS) V2
|
|
let makeLogEnv = registerScribe "stdout" handleScribe defaultScribeSettings =<< initLogEnv "MyApp" "production"
|
|
bracket makeLogEnv closeScribes $ \logEnv -> do
|
|
let clientEnv = mkClientEnv httpMgr (botBaseUrl token)
|
|
wreqOpts = Wreq.defaults & Wreq.manager .~ Right httpMgr
|
|
env = Env {..}
|
|
liftIO $
|
|
startBot (traceBotDefault $ botApp env) clientEnv >>= \case
|
|
Left err -> do
|
|
runKatipContextT env.logEnv () "main" $ info $ "Bot failed with: " <> fromString (show err)
|
|
exitFailure
|
|
_ -> pure ()
|
|
|
|
-- * Types
|
|
|
|
newtype Options w = Options {dbFile :: w ::: FilePath <?> "Path to the sqlite database"}
|
|
deriving (Generic)
|
|
|
|
instance ParseRecord (Options Wrapped)
|
|
|
|
data Offer = Offer
|
|
{ id_ :: Text,
|
|
title :: Text,
|
|
address :: Maybe Text,
|
|
rooms :: Maybe Double,
|
|
area :: Maybe Double,
|
|
availableFrom :: Maybe Text,
|
|
link :: Text
|
|
}
|
|
deriving (Show, Generic)
|
|
|
|
instance FromRow Offer
|
|
|
|
instance ToRow Offer
|
|
|
|
data IBWResponse = IBWResponse
|
|
{ headline :: Text,
|
|
searchresults :: Text
|
|
}
|
|
deriving (Generic)
|
|
|
|
instance FromJSON IBWResponse
|
|
|
|
data Env = Env
|
|
{ dbConn :: Connection,
|
|
wreqOpts :: Wreq.Options,
|
|
logEnv :: LogEnv
|
|
}
|
|
|
|
-- * Parsing
|
|
|
|
queryIBW :: Wreq.Options -> WBS -> IO IBWResponse
|
|
queryIBW wreqOpts wbs = do
|
|
let wbsParam = case wbs of
|
|
WBS -> "all"
|
|
NoWBS -> "0"
|
|
let reqBody =
|
|
[ partText "q" "wf-save-srch",
|
|
partText "save" "false",
|
|
partText "heizung_zentral" "false",
|
|
partText "heizung_etage" "false",
|
|
partText "energy_fernwaerme" "false",
|
|
partText "heizung_nachtstrom" "false",
|
|
partText "heizung_ofen" "false",
|
|
partText "heizung_gas" "false",
|
|
partText "heizung_oel" "false",
|
|
partText "heizung_solar" "false",
|
|
partText "heizung_erdwaerme" "false",
|
|
partText "heizung_fussboden" "false",
|
|
partText "seniorenwohnung" "false",
|
|
partText "maisonette" "false",
|
|
partText "etagen_dg" "false",
|
|
partText "balkon_loggia_terrasse" "false",
|
|
partText "garten" "false",
|
|
partText "wbs" wbsParam,
|
|
partText "barrierefrei" "false",
|
|
partText "gaeste_wc" "false",
|
|
partText "aufzug" "false",
|
|
partText "stellplatz" "false",
|
|
partText "keller" "false",
|
|
partText "badewanne" "false",
|
|
partText "dusche" "false"
|
|
-- , partText "bez[]" "01_00",
|
|
-- partText "bez[]" "02_00",
|
|
-- partText "bez[]" "04_00",
|
|
-- partText "bez[]" "07_00",
|
|
-- partText "bez[]" "08_00",
|
|
-- partText "bez[]" "09_00",
|
|
-- partText "bez[]" "11_00"
|
|
]
|
|
let link = "https://inberlinwohnen.de/wp-content/themes/ibw/skript/wohnungsfinder.php"
|
|
resBS <- view responseBody <$> postWith wreqOpts link reqBody
|
|
pure $ fromJust $ decode @IBWResponse resBS
|
|
|
|
scrapeOffers :: Scraper Text [Offer]
|
|
scrapeOffers = do
|
|
let listItemsSelector = "div" @: [hasClass "result-list"] // "ul" // "li"
|
|
chroots listItemsSelector scrapeOffer
|
|
|
|
scrapeOffer :: Scraper Text Offer
|
|
scrapeOffer = do
|
|
title <- Text.strip <$> text "h3"
|
|
|
|
let tableSelector = "div" @: [hasClass "tb-merkdetails"] // "div" @: [hasClass "span_wflist_data"] // "table" @: [hasClass "fullw"] // "tbody"
|
|
rowsSelector = tableSelector // "tr"
|
|
tableDataParser = do
|
|
thVal <- text "th"
|
|
(Text.strip thVal,) <$> text "td"
|
|
allTableData <- chroots rowsSelector tableDataParser
|
|
let address = lookup "Adresse:" allTableData
|
|
roomsStr = lookup "Zimmeranzahl:" allTableData
|
|
rooms = readEuropeanNumber <$> roomsStr
|
|
areaStr = lookup "Wohnfläche:" allTableData
|
|
area = readEuropeanNumber <$> (Text.stripSuffix " m²" =<< areaStr)
|
|
availableFrom = lookup "Bezugsfertig ab:" allTableData
|
|
|
|
link <- attr "href" $ "a" @: [hasClass "org-but"]
|
|
id_ <- attr "id" "li"
|
|
pure Offer {..}
|
|
|
|
readEuropeanNumber :: Text -> Double
|
|
readEuropeanNumber x = do
|
|
read $ Text.unpack $ Text.replace "," "." x
|
|
|
|
-- * SQLite
|
|
|
|
data WBS = WBS | NoWBS
|
|
|
|
tableName :: WBS -> Text
|
|
tableName WBS = "offers"
|
|
tableName NoWBS = "offers_without_wbs"
|
|
|
|
insertOffer :: Connection -> WBS -> Offer -> IO ()
|
|
insertOffer conn wbs =
|
|
execute conn $ "INSERT INTO " <> Query (tableName wbs) <> " (id, title, address, rooms, area, availableFrom, link) VALUES (?, ?, ?, ?, ?, ?, ?)"
|
|
|
|
getOffer :: Connection -> WBS -> Text -> IO (Maybe Offer)
|
|
getOffer conn wbs offerId =
|
|
listToMaybe <$> query conn ("SELECT * from " <> Query (tableName wbs) <> " where id = ?") (Only offerId)
|
|
|
|
createTable :: Connection -> IO ()
|
|
createTable conn = do
|
|
execute_ conn "CREATE TABLE IF NOT EXISTS offers (id TEXT PRIMARY KEY, title TEXT, address TEXT, rooms REAL, area REAL, availableFrom TEXT, link TEXT)"
|
|
execute_ conn "CREATE TABLE IF NOT EXISTS offers_without_wbs (id TEXT PRIMARY KEY, title TEXT, address TEXT, rooms REAL, area REAL, availableFrom TEXT, link TEXT)"
|
|
|
|
saveOffer :: Connection -> WBS -> Offer -> IO Bool
|
|
saveOffer conn wbs offer = do
|
|
getOffer conn wbs offer.id_ >>= \case
|
|
Nothing -> do
|
|
insertOffer conn wbs offer
|
|
pure True
|
|
_ -> pure False
|
|
|
|
-- * Telegram
|
|
|
|
type Model = ()
|
|
|
|
newtype Action = StartChat Chat
|
|
deriving (Show)
|
|
|
|
botApp :: Env -> BotApp Model Action
|
|
botApp env =
|
|
BotApp
|
|
{ botInitialModel = (),
|
|
botAction = action,
|
|
botHandler = handler env,
|
|
botJobs = [scrapeJob env]
|
|
}
|
|
|
|
action :: Update -> Model -> Maybe Action
|
|
action update _ = do
|
|
msg <- update.updateMessage
|
|
txt <- msg.messageText
|
|
if txt == "/start"
|
|
then pure $ StartChat msg.messageChat
|
|
else Nothing
|
|
|
|
handler :: Env -> Action -> Model -> Eff Action Model
|
|
handler env (StartChat chat) model =
|
|
model <# do
|
|
runKatipContextT env.logEnv () "telegram.handler" $ info $ "Chat started! " <> fromString (ppAsJSON chat)
|
|
|
|
scrapeJob :: Env -> BotJob Model Action
|
|
scrapeJob env =
|
|
BotJob
|
|
{ botJobSchedule = "* * * * *",
|
|
botJobTask = \m -> do
|
|
scrapeJobTask env m WBS
|
|
scrapeJobTask env m NoWBS
|
|
}
|
|
|
|
wbsText :: WBS -> Text
|
|
wbsText WBS = "WBS"
|
|
wbsText NoWBS = "NoWBS"
|
|
|
|
scrapeJobTask :: Env -> Model -> WBS -> Eff Action Model
|
|
scrapeJobTask env m wbs =
|
|
m
|
|
<# runKatipContextT
|
|
env.logEnv
|
|
(sl "wbs" (wbsText wbs))
|
|
"scrapeJobTask"
|
|
( do
|
|
info "Starting scrape job"
|
|
res <- liftIO $ queryIBW env.wreqOpts wbs
|
|
let offers = fromJust $ scrapeStringLike res.searchresults scrapeOffers
|
|
info "Fetched offers"
|
|
mapM_
|
|
( \offer -> do
|
|
isNewOffer <- liftIO $ saveOffer env.dbConn wbs offer
|
|
when isNewOffer $ do
|
|
info "Found a new offer"
|
|
katipAddNamespace "notify" $ notify wbs offer
|
|
)
|
|
offers
|
|
)
|
|
|
|
info :: (Katip m) => LogStr -> m ()
|
|
info = logMsg mempty InfoS
|
|
|
|
notify :: WBS -> Offer -> KatipContextT BotM ()
|
|
notify wbs offer = do
|
|
let offerTitle = "<b><u>" <> offer.title <> "</u></b>"
|
|
offerAddress = "<b>Address:</b> " <> fromMaybe "N/A" offer.address
|
|
offerRooms = "<b>Rooms:</b> " <> maybe "N/A" (Text.pack . show) offer.rooms
|
|
offerArea = "<b>Area:</b> " <> maybe "N/A" ((<> " m²") . Text.pack . show) offer.area
|
|
offerLink = "<a href=\"https://inberlinwohnen.de" <> offer.link <> "\" >Apply Here</a>"
|
|
offerBody = offerAddress <> "\n" <> offerRooms <> "\n" <> offerArea <> "\n" <> offerLink
|
|
offerText = offerTitle <> "\n\n" <> offerBody
|
|
-- sendMsgReq1 = (defSendMessage (SomeChatId $ ChatId 952512153) offerText) {sendMessageParseMode = Just HTML}
|
|
-- sendMsgReq2 = (defSendMessage (SomeChatId $ ChatId 116981707) offerText) {sendMessageParseMode = Just HTML}
|
|
sendMsgReq3 = (defSendMessage (SomeChatId $ ChatId 5781922807) offerText) {sendMessageParseMode = Just HTML}
|
|
sendMsgReq4 = (defSendMessage (SomeChatId $ ChatId 7008484163) offerText) {sendMessageParseMode = Just HTML}
|
|
msgReq = case wbs of
|
|
WBS -> sendMsgReq3
|
|
NoWBS -> sendMsgReq4
|
|
sendMessageWithLogs offer msgReq
|
|
|
|
sendMessageWithLogs :: Offer -> SendMessageRequest -> KatipContextT BotM ()
|
|
sendMessageWithLogs offer sendMsgReq = do
|
|
res <- lift $ runTG sendMsgReq
|
|
if res.responseOk
|
|
then info "Notified successfully"
|
|
else do
|
|
info $ "Failed to notify the offer: " <> fromString (show offer)
|
|
info $ "Response: " <> fromString (Text.unpack $ Text.decodeUtf8 (LBS.toStrict $ Aeson.encode res))
|