wohnugs-suche/berlin-scraper/app/Main.hs
2024-04-22 23:21:44 +02:00

291 lines
9.6 KiB
Haskell

{-# LANGUAGE BlockArguments #-}
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE OverloadedRecordDot #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RecordWildCards #-}
module Main where
import Control.Exception
import Control.Lens (view, (&), (.~))
import Control.Monad (when)
import Control.Monad.IO.Class
import Control.Monad.Trans.Class (lift)
import Data.Aeson hiding (Options)
import Data.Aeson qualified as Aeson
import Data.ByteString.Lazy qualified as LBS
import Data.Maybe (fromJust, fromMaybe, listToMaybe)
import Data.String (IsString (..))
import Data.Text qualified as Text
import Data.Text.Encoding qualified as Text
import Database.SQLite.Simple
import GHC.Generics
import Katip
import Network.HTTP.Client.OpenSSL
import Network.Wreq hiding (Options)
import Network.Wreq qualified as Wreq
import Options.Generic
import Servant.Client (mkClientEnv)
import System.Exit (exitFailure)
import System.IO
import Telegram.Bot.API
import Telegram.Bot.Simple
import Telegram.Bot.Simple.Debug
import Text.HTML.Scalpel
main :: IO ()
main = withOpenSSL $ do
opts :: Options Unwrapped <- unwrapRecord "Berlin Scrapper"
token <- getEnvToken "TELEGRAM_BOT_TOKEN"
httpMgr <- newOpenSSLManager
dbConn <- open opts.dbFile
createTable dbConn
handleScribe <- mkHandleScribe ColorIfTerminal stdout (permitItem InfoS) V2
let makeLogEnv = registerScribe "stdout" handleScribe defaultScribeSettings =<< initLogEnv "MyApp" "production"
bracket makeLogEnv closeScribes $ \logEnv -> do
let clientEnv = mkClientEnv httpMgr (botBaseUrl token)
wreqOpts = Wreq.defaults & Wreq.manager .~ Right httpMgr
env = Env {..}
liftIO $
startBot (traceBotDefault $ botApp env) clientEnv >>= \case
Left err -> do
runKatipContextT env.logEnv () "main" $ info $ "Bot failed with: " <> fromString (show err)
exitFailure
_ -> pure ()
-- * Types
newtype Options w = Options {dbFile :: w ::: FilePath <?> "Path to the sqlite database"}
deriving (Generic)
instance ParseRecord (Options Wrapped)
data Offer = Offer
{ id_ :: Text,
title :: Text,
address :: Maybe Text,
rooms :: Maybe Double,
area :: Maybe Double,
availableFrom :: Maybe Text,
link :: Text
}
deriving (Show, Generic)
instance FromRow Offer
instance ToRow Offer
data IBWResponse = IBWResponse
{ headline :: Text,
searchresults :: Text
}
deriving (Generic)
instance FromJSON IBWResponse
data Env = Env
{ dbConn :: Connection,
wreqOpts :: Wreq.Options,
logEnv :: LogEnv
}
-- * Parsing
queryIBW :: Wreq.Options -> WBS -> IO IBWResponse
queryIBW wreqOpts wbs = do
let wbsParam = case wbs of
WBS -> "all"
NoWBS -> "0"
let reqBody =
[ partText "q" "wf-save-srch",
partText "save" "false",
partText "heizung_zentral" "false",
partText "heizung_etage" "false",
partText "energy_fernwaerme" "false",
partText "heizung_nachtstrom" "false",
partText "heizung_ofen" "false",
partText "heizung_gas" "false",
partText "heizung_oel" "false",
partText "heizung_solar" "false",
partText "heizung_erdwaerme" "false",
partText "heizung_fussboden" "false",
partText "seniorenwohnung" "false",
partText "maisonette" "false",
partText "etagen_dg" "false",
partText "balkon_loggia_terrasse" "false",
partText "garten" "false",
partText "wbs" wbsParam,
partText "barrierefrei" "false",
partText "gaeste_wc" "false",
partText "aufzug" "false",
partText "stellplatz" "false",
partText "keller" "false",
partText "badewanne" "false",
partText "dusche" "false"
-- , partText "bez[]" "01_00",
-- partText "bez[]" "02_00",
-- partText "bez[]" "04_00",
-- partText "bez[]" "07_00",
-- partText "bez[]" "08_00",
-- partText "bez[]" "09_00",
-- partText "bez[]" "11_00"
]
let link = "https://inberlinwohnen.de/wp-content/themes/ibw/skript/wohnungsfinder.php"
resBS <- view responseBody <$> postWith wreqOpts link reqBody
pure $ fromJust $ decode @IBWResponse resBS
scrapeOffers :: Scraper Text [Offer]
scrapeOffers = do
let listItemsSelector = "div" @: [hasClass "result-list"] // "ul" // "li"
chroots listItemsSelector scrapeOffer
scrapeOffer :: Scraper Text Offer
scrapeOffer = do
title <- Text.strip <$> text "h3"
let tableSelector = "div" @: [hasClass "tb-merkdetails"] // "div" @: [hasClass "span_wflist_data"] // "table" @: [hasClass "fullw"] // "tbody"
rowsSelector = tableSelector // "tr"
tableDataParser = do
thVal <- text "th"
(Text.strip thVal,) <$> text "td"
allTableData <- chroots rowsSelector tableDataParser
let address = lookup "Adresse:" allTableData
roomsStr = lookup "Zimmeranzahl:" allTableData
rooms = readEuropeanNumber <$> roomsStr
areaStr = lookup "Wohnfläche:" allTableData
area = readEuropeanNumber <$> (Text.stripSuffix "" =<< areaStr)
availableFrom = lookup "Bezugsfertig ab:" allTableData
link <- attr "href" $ "a" @: [hasClass "org-but"]
id_ <- attr "id" "li"
pure Offer {..}
readEuropeanNumber :: Text -> Double
readEuropeanNumber x = do
read $ Text.unpack $ Text.replace "," "." x
-- * SQLite
data WBS = WBS | NoWBS
tableName :: WBS -> Text
tableName WBS = "offers"
tableName NoWBS = "offers_without_wbs"
insertOffer :: Connection -> WBS -> Offer -> IO ()
insertOffer conn wbs =
execute conn $ "INSERT INTO " <> Query (tableName wbs) <> " (id, title, address, rooms, area, availableFrom, link) VALUES (?, ?, ?, ?, ?, ?, ?)"
getOffer :: Connection -> WBS -> Text -> IO (Maybe Offer)
getOffer conn wbs offerId =
listToMaybe <$> query conn ("SELECT * from " <> Query (tableName wbs) <> " where id = ?") (Only offerId)
createTable :: Connection -> IO ()
createTable conn = do
execute_ conn "CREATE TABLE IF NOT EXISTS offers (id TEXT PRIMARY KEY, title TEXT, address TEXT, rooms REAL, area REAL, availableFrom TEXT, link TEXT)"
execute_ conn "CREATE TABLE IF NOT EXISTS offers_without_wbs (id TEXT PRIMARY KEY, title TEXT, address TEXT, rooms REAL, area REAL, availableFrom TEXT, link TEXT)"
saveOffer :: Connection -> WBS -> Offer -> IO Bool
saveOffer conn wbs offer = do
getOffer conn wbs offer.id_ >>= \case
Nothing -> do
insertOffer conn wbs offer
pure True
_ -> pure False
-- * Telegram
type Model = ()
newtype Action = StartChat Chat
deriving (Show)
botApp :: Env -> BotApp Model Action
botApp env =
BotApp
{ botInitialModel = (),
botAction = action,
botHandler = handler env,
botJobs = [scrapeJob env]
}
action :: Update -> Model -> Maybe Action
action update _ = do
msg <- update.updateMessage
txt <- msg.messageText
if txt == "/start"
then pure $ StartChat msg.messageChat
else Nothing
handler :: Env -> Action -> Model -> Eff Action Model
handler env (StartChat chat) model =
model <# do
runKatipContextT env.logEnv () "telegram.handler" $ info $ "Chat started! " <> fromString (ppAsJSON chat)
scrapeJob :: Env -> BotJob Model Action
scrapeJob env =
BotJob
{ botJobSchedule = "* * * * *",
botJobTask = \m -> do
scrapeJobTask env m WBS
scrapeJobTask env m NoWBS
}
wbsText :: WBS -> Text
wbsText WBS = "WBS"
wbsText NoWBS = "NoWBS"
scrapeJobTask :: Env -> Model -> WBS -> Eff Action Model
scrapeJobTask env m wbs =
m
<# runKatipContextT
env.logEnv
(sl "wbs" (wbsText wbs))
"scrapeJobTask"
( do
info "Starting scrape job"
res <- liftIO $ queryIBW env.wreqOpts wbs
let offers = fromJust $ scrapeStringLike res.searchresults scrapeOffers
info "Fetched offers"
mapM_
( \offer -> do
isNewOffer <- liftIO $ saveOffer env.dbConn wbs offer
when isNewOffer $ do
info "Found a new offer"
katipAddNamespace "notify" $ notify wbs offer
)
offers
)
info :: (Katip m) => LogStr -> m ()
info = logMsg mempty InfoS
notify :: WBS -> Offer -> KatipContextT BotM ()
notify wbs offer = do
let offerTitle = "<b><u>" <> offer.title <> "</u></b>"
offerAddress = "<b>Address:</b> " <> fromMaybe "N/A" offer.address
offerRooms = "<b>Rooms:</b> " <> maybe "N/A" (Text.pack . show) offer.rooms
offerArea = "<b>Area:</b> " <> maybe "N/A" ((<> "") . Text.pack . show) offer.area
offerLink = "<a href=\"https://inberlinwohnen.de" <> offer.link <> "\" >Apply Here</a>"
offerBody = offerAddress <> "\n" <> offerRooms <> "\n" <> offerArea <> "\n" <> offerLink
offerText = offerTitle <> "\n\n" <> offerBody
-- sendMsgReq1 = (defSendMessage (SomeChatId $ ChatId 952512153) offerText) {sendMessageParseMode = Just HTML}
-- sendMsgReq2 = (defSendMessage (SomeChatId $ ChatId 116981707) offerText) {sendMessageParseMode = Just HTML}
sendMsgReq3 = (defSendMessage (SomeChatId $ ChatId 5781922807) offerText) {sendMessageParseMode = Just HTML}
sendMsgReq4 = (defSendMessage (SomeChatId $ ChatId 7008484163) offerText) {sendMessageParseMode = Just HTML}
msgReq = case wbs of
WBS -> sendMsgReq3
NoWBS -> sendMsgReq4
sendMessageWithLogs offer msgReq
sendMessageWithLogs :: Offer -> SendMessageRequest -> KatipContextT BotM ()
sendMessageWithLogs offer sendMsgReq = do
res <- lift $ runTG sendMsgReq
if res.responseOk
then info "Notified successfully"
else do
info $ "Failed to notify the offer: " <> fromString (show offer)
info $ "Response: " <> fromString (Text.unpack $ Text.decodeUtf8 (LBS.toStrict $ Aeson.encode res))