236 lines
7.3 KiB
Haskell
236 lines
7.3 KiB
Haskell
{-# LANGUAGE BlockArguments #-}
|
|
{-# LANGUAGE DataKinds #-}
|
|
{-# LANGUAGE LambdaCase #-}
|
|
{-# LANGUAGE OverloadedRecordDot #-}
|
|
{-# LANGUAGE OverloadedStrings #-}
|
|
{-# LANGUAGE RecordWildCards #-}
|
|
|
|
module Main where
|
|
|
|
import Control.Lens (view)
|
|
import Control.Monad (when)
|
|
import Control.Monad.IO.Class
|
|
import Data.Aeson hiding (Options)
|
|
import Data.Aeson qualified as Aeson
|
|
import Data.ByteString.Lazy qualified as LBS
|
|
import Data.Maybe (fromJust, fromMaybe, listToMaybe)
|
|
import Data.Text qualified as Text
|
|
import Data.Text.Encoding qualified as Text
|
|
import Data.Text.IO qualified as Text
|
|
import Database.SQLite.Simple
|
|
import GHC.Generics
|
|
import Network.Wreq hiding (Options)
|
|
import Options.Generic
|
|
import System.Exit (exitFailure)
|
|
import Telegram.Bot.API
|
|
import Telegram.Bot.Simple
|
|
import Telegram.Bot.Simple.Debug
|
|
import Text.HTML.Scalpel
|
|
|
|
main :: IO ()
|
|
main = do
|
|
opts <- unwrapRecord "Berlin Scrapper"
|
|
token <- getEnvToken "TELEGRAM_BOT_TOKEN"
|
|
clientEnv <- defaultTelegramClientEnv token
|
|
startBot (traceBotDefault $ botApp opts) clientEnv >>= \case
|
|
Left err -> do
|
|
putStrLn $ "Bot failed with: " <> show err
|
|
exitFailure
|
|
_ -> pure ()
|
|
|
|
-- * Types
|
|
|
|
newtype Options w = Options {dbFile :: w ::: FilePath <?> "Path to the sqlite database"}
|
|
deriving (Generic)
|
|
|
|
instance ParseRecord (Options Wrapped)
|
|
|
|
data Offer = Offer
|
|
{ id_ :: Text,
|
|
title :: Text,
|
|
address :: Maybe Text,
|
|
rooms :: Maybe Double,
|
|
area :: Maybe Double,
|
|
availableFrom :: Maybe Text,
|
|
link :: Text
|
|
}
|
|
deriving (Show, Generic)
|
|
|
|
instance FromRow Offer
|
|
|
|
instance ToRow Offer
|
|
|
|
data IBWResponse = IBWResponse
|
|
{ headline :: Text,
|
|
searchresults :: Text
|
|
}
|
|
deriving (Generic)
|
|
|
|
instance FromJSON IBWResponse
|
|
|
|
-- * Parsing
|
|
|
|
queryIBW :: IO IBWResponse
|
|
queryIBW = do
|
|
let reqBody =
|
|
[ partText "q" "wf-save-srch",
|
|
partText "save" "false",
|
|
partText "heizung_zentral" "false",
|
|
partText "heizung_etage" "false",
|
|
partText "energy_fernwaerme" "false",
|
|
partText "heizung_nachtstrom" "false",
|
|
partText "heizung_ofen" "false",
|
|
partText "heizung_gas" "false",
|
|
partText "heizung_oel" "false",
|
|
partText "heizung_solar" "false",
|
|
partText "heizung_erdwaerme" "false",
|
|
partText "heizung_fussboden" "false",
|
|
partText "seniorenwohnung" "false",
|
|
partText "maisonette" "false",
|
|
partText "etagen_dg" "false",
|
|
partText "balkon_loggia_terrasse" "false",
|
|
partText "garten" "false",
|
|
partText "wbs" "0",
|
|
partText "barrierefrei" "false",
|
|
partText "gaeste_wc" "false",
|
|
partText "aufzug" "false",
|
|
partText "stellplatz" "false",
|
|
partText "keller" "false",
|
|
partText "badewanne" "false",
|
|
partText "dusche" "false",
|
|
partText "bez[]" "01_00",
|
|
partText "bez[]" "02_00",
|
|
partText "bez[]" "04_00",
|
|
partText "bez[]" "07_00",
|
|
partText "bez[]" "08_00",
|
|
partText "bez[]" "09_00",
|
|
partText "bez[]" "11_00"
|
|
]
|
|
let link = "https://inberlinwohnen.de/wp-content/themes/ibw/skript/wohnungsfinder.php"
|
|
resBS <- view responseBody <$> post link reqBody
|
|
pure $ fromJust $ decode @IBWResponse resBS
|
|
|
|
scrapeOffers :: Scraper Text [Offer]
|
|
scrapeOffers = do
|
|
let listItemsSelector = "div" @: [hasClass "result-list"] // "ul" // "li"
|
|
chroots listItemsSelector scrapeOffer
|
|
|
|
scrapeOffer :: Scraper Text Offer
|
|
scrapeOffer = do
|
|
title <- Text.strip <$> text "h3"
|
|
|
|
let tableSelector = "div" @: [hasClass "tb-merkdetails"] // "div" @: [hasClass "span_wflist_data"] // "table" @: [hasClass "fullw"] // "tbody"
|
|
rowsSelector = tableSelector // "tr"
|
|
tableDataParser = do
|
|
thVal <- text "th"
|
|
(Text.strip thVal,) <$> text "td"
|
|
allTableData <- chroots rowsSelector tableDataParser
|
|
let address = lookup "Adresse:" allTableData
|
|
roomsStr = lookup "Zimmeranzahl:" allTableData
|
|
rooms = readEuropeanNumber <$> roomsStr
|
|
areaStr = lookup "Wohnfläche:" allTableData
|
|
area = readEuropeanNumber <$> (Text.stripSuffix " m²" =<< areaStr)
|
|
availableFrom = lookup "Bezugsfertig ab:" allTableData
|
|
|
|
link <- attr "href" $ "a" @: [hasClass "org-but"]
|
|
id_ <- attr "id" "li"
|
|
pure Offer {..}
|
|
|
|
readEuropeanNumber :: Text -> Double
|
|
readEuropeanNumber x = do
|
|
read $ Text.unpack $ Text.replace "," "." x
|
|
|
|
-- * SQLite
|
|
|
|
insertOffer :: Connection -> Offer -> IO ()
|
|
insertOffer conn =
|
|
execute conn "INSERT INTO offers (id, title, address, rooms, area, availableFrom, link) VALUES (?, ?, ?, ?, ?, ?, ?)"
|
|
|
|
getOffer :: Connection -> Text -> IO (Maybe Offer)
|
|
getOffer conn offerId =
|
|
listToMaybe <$> query conn "SELECT * from offers where id = ?" (Only offerId)
|
|
|
|
createTable :: Connection -> IO ()
|
|
createTable conn =
|
|
execute_ conn "CREATE TABLE IF NOT EXISTS offers (id TEXT PRIMARY KEY, title TEXT, address TEXT, rooms REAL, area REAL, availableFrom TEXT, link TEXT)"
|
|
|
|
saveOffer :: Connection -> Offer -> IO Bool
|
|
saveOffer conn offer = do
|
|
getOffer conn offer.id_ >>= \case
|
|
Nothing -> do
|
|
insertOffer conn offer
|
|
pure True
|
|
_ -> pure False
|
|
|
|
-- * Telegram
|
|
|
|
type Model = ()
|
|
|
|
newtype Action = StartChat Chat
|
|
deriving (Show)
|
|
|
|
botApp :: Options Unwrapped -> BotApp Model Action
|
|
botApp opts =
|
|
BotApp
|
|
{ botInitialModel = (),
|
|
botAction = action,
|
|
botHandler = handler,
|
|
botJobs = [scrapeJob opts]
|
|
}
|
|
|
|
action :: Update -> Model -> Maybe Action
|
|
action update _ = do
|
|
msg <- update.updateMessage
|
|
txt <- msg.messageText
|
|
if txt == "/start"
|
|
then pure $ StartChat msg.messageChat
|
|
else Nothing
|
|
|
|
handler :: Action -> Model -> Eff Action Model
|
|
handler (StartChat chat) model =
|
|
model <# do
|
|
liftIO $ putStrLn $ "Chat started! " <> ppAsJSON chat
|
|
|
|
scrapeJob :: Options Unwrapped -> BotJob Model Action
|
|
scrapeJob opts =
|
|
BotJob
|
|
{ botJobSchedule = "* * * * *",
|
|
botJobTask = scrapeJobTask opts
|
|
}
|
|
|
|
scrapeJobTask :: Options Unwrapped -> Model -> Eff Action Model
|
|
scrapeJobTask opts m =
|
|
m <# do
|
|
liftIO $ putStrLn "Starting scrape job"
|
|
res <- liftIO queryIBW
|
|
let offers = fromJust $ scrapeStringLike res.searchresults scrapeOffers
|
|
liftIO $ putStrLn "Fetched offers"
|
|
conn <- liftIO $ open (dbFile opts)
|
|
liftIO $ createTable conn
|
|
mapM_
|
|
( \offer -> do
|
|
isNewOffer <- liftIO $ saveOffer conn offer
|
|
when isNewOffer $ do
|
|
liftIO $ putStrLn "Found a new offer"
|
|
notify offer
|
|
)
|
|
offers
|
|
|
|
notify :: Offer -> BotM ()
|
|
notify offer = do
|
|
let offerTitle = "<b><u>" <> offer.title <> "</u></b>"
|
|
offerAddress = "<b>Address:</b> " <> fromMaybe "N/A" offer.address
|
|
offerRooms = "<b>Rooms:</b> " <> maybe "N/A" (Text.pack . show) offer.rooms
|
|
offerArea = "<b>Area:</b> " <> maybe "N/A" ((<> " m²") . Text.pack . show) offer.area
|
|
offerLink = "<a href=\"https://inberlinwohnen.de" <> offer.link <> "\" >Apply Here</a>"
|
|
offerBody = offerAddress <> "\n" <> offerRooms <> "\n" <> offerArea <> "\n" <> offerLink
|
|
offerText = offerTitle <> "\n\n" <> offerBody
|
|
sendMsgReq = (defSendMessage (SomeChatId $ ChatId 952512153) offerText) {sendMessageParseMode = Just HTML}
|
|
res <- runTG sendMsgReq
|
|
liftIO $
|
|
if res.responseOk
|
|
then putStrLn "Notified successfully"
|
|
else do
|
|
putStrLn $ "Failed to notify the offer: " <> show offer
|
|
Text.putStrLn $ "Response: " <> Text.decodeUtf8 (LBS.toStrict $ Aeson.encode res)
|