{-# LANGUAGE BlockArguments #-} {-# LANGUAGE DataKinds #-} {-# LANGUAGE LambdaCase #-} {-# LANGUAGE OverloadedRecordDot #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE RecordWildCards #-} module Main where import Control.Lens (view, (&), (.~)) import Control.Monad (when) import Control.Monad.IO.Class import Data.Aeson hiding (Options) import Data.Aeson qualified as Aeson import Data.ByteString.Lazy qualified as LBS import Data.Maybe (fromJust, fromMaybe, listToMaybe) import Data.Text qualified as Text import Data.Text.Encoding qualified as Text import Data.Text.IO qualified as Text import Database.SQLite.Simple import GHC.Generics import Network.HTTP.Client.OpenSSL import Network.Wreq hiding (Options) import Options.Generic import Servant.Client (mkClientEnv) import System.Exit (exitFailure) import Telegram.Bot.API import Telegram.Bot.Simple import Telegram.Bot.Simple.Debug import Text.HTML.Scalpel import qualified Network.Wreq as Wreq main :: IO () main = withOpenSSL $ do opts <- unwrapRecord "Berlin Scrapper" token <- getEnvToken "TELEGRAM_BOT_TOKEN" httpMgr <- newOpenSSLManager let clientEnv = mkClientEnv httpMgr (botBaseUrl token) wreqOpts = Wreq.defaults & Wreq.manager .~ Right httpMgr startBot (traceBotDefault $ botApp opts wreqOpts) clientEnv >>= \case Left err -> do putStrLn $ "Bot failed with: " <> show err exitFailure _ -> pure () -- * Types newtype Options w = Options {dbFile :: w ::: FilePath "Path to the sqlite database"} deriving (Generic) instance ParseRecord (Options Wrapped) data Offer = Offer { id_ :: Text, title :: Text, address :: Maybe Text, rooms :: Maybe Double, area :: Maybe Double, availableFrom :: Maybe Text, link :: Text } deriving (Show, Generic) instance FromRow Offer instance ToRow Offer data IBWResponse = IBWResponse { headline :: Text, searchresults :: Text } deriving (Generic) instance FromJSON IBWResponse -- * Parsing queryIBW :: Wreq.Options -> IO IBWResponse queryIBW wreqOpts = do let reqBody = [ partText "q" "wf-save-srch", partText "save" "false", partText "heizung_zentral" "false", partText "heizung_etage" "false", partText "energy_fernwaerme" "false", partText "heizung_nachtstrom" "false", partText "heizung_ofen" "false", partText "heizung_gas" "false", partText "heizung_oel" "false", partText "heizung_solar" "false", partText "heizung_erdwaerme" "false", partText "heizung_fussboden" "false", partText "seniorenwohnung" "false", partText "maisonette" "false", partText "etagen_dg" "false", partText "balkon_loggia_terrasse" "false", partText "garten" "false", partText "wbs" "0", partText "barrierefrei" "false", partText "gaeste_wc" "false", partText "aufzug" "false", partText "stellplatz" "false", partText "keller" "false", partText "badewanne" "false", partText "dusche" "false", partText "bez[]" "01_00", partText "bez[]" "02_00", partText "bez[]" "04_00", partText "bez[]" "07_00", partText "bez[]" "08_00", partText "bez[]" "09_00", partText "bez[]" "11_00" ] let link = "https://inberlinwohnen.de/wp-content/themes/ibw/skript/wohnungsfinder.php" resBS <- view responseBody <$> postWith wreqOpts link reqBody pure $ fromJust $ decode @IBWResponse resBS scrapeOffers :: Scraper Text [Offer] scrapeOffers = do let listItemsSelector = "div" @: [hasClass "result-list"] // "ul" // "li" chroots listItemsSelector scrapeOffer scrapeOffer :: Scraper Text Offer scrapeOffer = do title <- Text.strip <$> text "h3" let tableSelector = "div" @: [hasClass "tb-merkdetails"] // "div" @: [hasClass "span_wflist_data"] // "table" @: [hasClass "fullw"] // "tbody" rowsSelector = tableSelector // "tr" tableDataParser = do thVal <- text "th" (Text.strip thVal,) <$> text "td" allTableData <- chroots rowsSelector tableDataParser let address = lookup "Adresse:" allTableData roomsStr = lookup "Zimmeranzahl:" allTableData rooms = readEuropeanNumber <$> roomsStr areaStr = lookup "Wohnfläche:" allTableData area = readEuropeanNumber <$> (Text.stripSuffix " m²" =<< areaStr) availableFrom = lookup "Bezugsfertig ab:" allTableData link <- attr "href" $ "a" @: [hasClass "org-but"] id_ <- attr "id" "li" pure Offer {..} readEuropeanNumber :: Text -> Double readEuropeanNumber x = do read $ Text.unpack $ Text.replace "," "." x -- * SQLite insertOffer :: Connection -> Offer -> IO () insertOffer conn = execute conn "INSERT INTO offers (id, title, address, rooms, area, availableFrom, link) VALUES (?, ?, ?, ?, ?, ?, ?)" getOffer :: Connection -> Text -> IO (Maybe Offer) getOffer conn offerId = listToMaybe <$> query conn "SELECT * from offers where id = ?" (Only offerId) createTable :: Connection -> IO () createTable conn = execute_ conn "CREATE TABLE IF NOT EXISTS offers (id TEXT PRIMARY KEY, title TEXT, address TEXT, rooms REAL, area REAL, availableFrom TEXT, link TEXT)" saveOffer :: Connection -> Offer -> IO Bool saveOffer conn offer = do getOffer conn offer.id_ >>= \case Nothing -> do insertOffer conn offer pure True _ -> pure False -- * Telegram type Model = () newtype Action = StartChat Chat deriving (Show) botApp :: Options Unwrapped -> Wreq.Options -> BotApp Model Action botApp opts wreqOpts = BotApp { botInitialModel = (), botAction = action, botHandler = handler, botJobs = [scrapeJob opts wreqOpts] } action :: Update -> Model -> Maybe Action action update _ = do msg <- update.updateMessage txt <- msg.messageText if txt == "/start" then pure $ StartChat msg.messageChat else Nothing handler :: Action -> Model -> Eff Action Model handler (StartChat chat) model = model <# do liftIO $ putStrLn $ "Chat started! " <> ppAsJSON chat scrapeJob :: Options Unwrapped -> Wreq.Options -> BotJob Model Action scrapeJob opts wreqOpts = BotJob { botJobSchedule = "* * * * *", botJobTask = scrapeJobTask opts wreqOpts } scrapeJobTask :: Options Unwrapped -> Wreq.Options -> Model -> Eff Action Model scrapeJobTask opts wreqOpts m = m <# do liftIO $ putStrLn "Starting scrape job" res <- liftIO $ queryIBW wreqOpts let offers = fromJust $ scrapeStringLike res.searchresults scrapeOffers liftIO $ putStrLn "Fetched offers" conn <- liftIO $ open (dbFile opts) liftIO $ createTable conn mapM_ ( \offer -> do isNewOffer <- liftIO $ saveOffer conn offer when isNewOffer $ do liftIO $ putStrLn "Found a new offer" notify offer ) offers notify :: Offer -> BotM () notify offer = do let offerTitle = "" <> offer.title <> "" offerAddress = "Address: " <> fromMaybe "N/A" offer.address offerRooms = "Rooms: " <> maybe "N/A" (Text.pack . show) offer.rooms offerArea = "Area: " <> maybe "N/A" ((<> " m²") . Text.pack . show) offer.area offerLink = " offer.link <> "\" >Apply Here" offerBody = offerAddress <> "\n" <> offerRooms <> "\n" <> offerArea <> "\n" <> offerLink offerText = offerTitle <> "\n\n" <> offerBody sendMsgReq1 = (defSendMessage (SomeChatId $ ChatId 952512153) offerText) {sendMessageParseMode = Just HTML} sendMsgReq2 = (defSendMessage (SomeChatId $ ChatId 116981707) offerText) {sendMessageParseMode = Just HTML} res1 <- runTG sendMsgReq1 liftIO $ if res1.responseOk then putStrLn "Notified successfully" else do putStrLn $ "Failed to notify the offer: " <> show offer Text.putStrLn $ "Response: " <> Text.decodeUtf8 (LBS.toStrict $ Aeson.encode res1) res2 <- runTG sendMsgReq2 liftIO $ if res2.responseOk then putStrLn "Notified successfully" else do putStrLn $ "Failed to notify the offer: " <> show offer Text.putStrLn $ "Response: " <> Text.decodeUtf8 (LBS.toStrict $ Aeson.encode res2)