From 52449d367e13ebffe926860e01bba25f5f4a194e Mon Sep 17 00:00:00 2001 From: Marc Koch Date: Sun, 15 Sep 2024 23:16:37 +0200 Subject: [PATCH] =?UTF-8?q?=F0=9F=8E=89=20Initial=20commit?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .gitignore | 290 ++++++++++++++++ .idea/.gitignore | 8 + .idea/.name | 1 + .idea/cgn-appointments.iml | 11 + .idea/inspectionProfiles/Project_Default.xml | 69 ++++ .../inspectionProfiles/profiles_settings.xml | 6 + .idea/material_theme_project_new.xml | 13 + .idea/misc.xml | 10 + .idea/modules.xml | 8 + .idea/vcs.xml | 6 + README.md | 1 + pyproject.toml | 86 +++++ src/cgnappointments/__about__.py | 4 + src/cgnappointments/__init__.py | 0 src/cgnappointments/__main__.py | 314 ++++++++++++++++++ src/cgnappointments/config_template.yaml | 76 +++++ src/cgnappointments/logger.py | 119 +++++++ 17 files changed, 1022 insertions(+) create mode 100644 .gitignore create mode 100644 .idea/.gitignore create mode 100644 .idea/.name create mode 100644 .idea/cgn-appointments.iml create mode 100644 .idea/inspectionProfiles/Project_Default.xml create mode 100644 .idea/inspectionProfiles/profiles_settings.xml create mode 100644 .idea/material_theme_project_new.xml create mode 100644 .idea/misc.xml create mode 100644 .idea/modules.xml create mode 100644 .idea/vcs.xml create mode 100644 README.md create mode 100644 pyproject.toml create mode 100644 src/cgnappointments/__about__.py create mode 100644 src/cgnappointments/__init__.py create mode 100644 src/cgnappointments/__main__.py create mode 100644 src/cgnappointments/config_template.yaml create mode 100644 src/cgnappointments/logger.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..b0831e7 --- /dev/null +++ b/.gitignore @@ -0,0 +1,290 @@ +# Created by https://www.toptal.com/developers/gitignore/api/python,pycharm +# Edit at https://www.toptal.com/developers/gitignore?templates=python,pycharm + +### PyCharm ### +# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider +# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839 + +# User-specific stuff +.idea/**/workspace.xml +.idea/**/tasks.xml +.idea/**/usage.statistics.xml +.idea/**/dictionaries +.idea/**/shelf + +# AWS User-specific +.idea/**/aws.xml + +# Generated files +.idea/**/contentModel.xml + +# Sensitive or high-churn files +.idea/**/dataSources/ +.idea/**/dataSources.ids +.idea/**/dataSources.local.xml +.idea/**/sqlDataSources.xml +.idea/**/dynamic.xml +.idea/**/uiDesigner.xml +.idea/**/dbnavigator.xml + +# Gradle +.idea/**/gradle.xml +.idea/**/libraries + +# Gradle and Maven with auto-import +# When using Gradle or Maven with auto-import, you should exclude module files, +# since they will be recreated, and may cause churn. Uncomment if using +# auto-import. +# .idea/artifacts +# .idea/compiler.xml +# .idea/jarRepositories.xml +# .idea/modules.xml +# .idea/*.iml +# .idea/modules +# *.iml +# *.ipr + +# CMake +cmake-build-*/ + +# Mongo Explorer plugin +.idea/**/mongoSettings.xml + +# File-based project format +*.iws + +# IntelliJ +out/ + +# mpeltonen/sbt-idea plugin +.idea_modules/ + +# JIRA plugin +atlassian-ide-plugin.xml + +# Cursive Clojure plugin +.idea/replstate.xml + +# SonarLint plugin +.idea/sonarlint/ + +# Crashlytics plugin (for Android Studio and IntelliJ) +com_crashlytics_export_strings.xml +crashlytics.properties +crashlytics-build.properties +fabric.properties + +# Editor-based Rest Client +.idea/httpRequests + +# Android studio 3.1+ serialized cache file +.idea/caches/build_file_checksums.ser + +### PyCharm Patch ### +# Comment Reason: https://github.com/joeblau/gitignore.io/issues/186#issuecomment-215987721 + +# *.iml +# modules.xml +# .idea/misc.xml +# *.ipr + +# Sonarlint plugin +# https://plugins.jetbrains.com/plugin/7973-sonarlint +.idea/**/sonarlint/ + +# SonarQube Plugin +# https://plugins.jetbrains.com/plugin/7238-sonarqube-community-plugin +.idea/**/sonarIssues.xml + +# Markdown Navigator plugin +# https://plugins.jetbrains.com/plugin/7896-markdown-navigator-enhanced +.idea/**/markdown-navigator.xml +.idea/**/markdown-navigator-enh.xml +.idea/**/markdown-navigator/ + +# Cache file creation bug +# See https://youtrack.jetbrains.com/issue/JBR-2257 +.idea/$CACHE_FILE$ + +# CodeStream plugin +# https://plugins.jetbrains.com/plugin/12206-codestream +.idea/codestream.xml + +# Azure Toolkit for IntelliJ plugin +# https://plugins.jetbrains.com/plugin/8053-azure-toolkit-for-intellij +.idea/**/azureSettings.xml + +### Python ### +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +#poetry.lock + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +#pdm.lock +# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it +# in version control. +# https://pdm.fming.dev/#use-with-ide +.pdm.toml + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +#.idea/ + +### Python Patch ### +# Poetry local configuration file - https://python-poetry.org/docs/configuration/#local-configuration +poetry.toml + +# ruff +.ruff_cache/ + +# LSP config files +pyrightconfig.json + +# End of https://www.toptal.com/developers/gitignore/api/python,pycharm + diff --git a/.idea/.gitignore b/.idea/.gitignore new file mode 100644 index 0000000..13566b8 --- /dev/null +++ b/.idea/.gitignore @@ -0,0 +1,8 @@ +# Default ignored files +/shelf/ +/workspace.xml +# Editor-based HTTP Client requests +/httpRequests/ +# Datasource local storage ignored files +/dataSources/ +/dataSources.local.xml diff --git a/.idea/.name b/.idea/.name new file mode 100644 index 0000000..808224a --- /dev/null +++ b/.idea/.name @@ -0,0 +1 @@ +cgn-appointments \ No newline at end of file diff --git a/.idea/cgn-appointments.iml b/.idea/cgn-appointments.iml new file mode 100644 index 0000000..891ace9 --- /dev/null +++ b/.idea/cgn-appointments.iml @@ -0,0 +1,11 @@ + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/inspectionProfiles/Project_Default.xml b/.idea/inspectionProfiles/Project_Default.xml new file mode 100644 index 0000000..223075d --- /dev/null +++ b/.idea/inspectionProfiles/Project_Default.xml @@ -0,0 +1,69 @@ + + + + \ No newline at end of file diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml new file mode 100644 index 0000000..105ce2d --- /dev/null +++ b/.idea/inspectionProfiles/profiles_settings.xml @@ -0,0 +1,6 @@ + + + + \ No newline at end of file diff --git a/.idea/material_theme_project_new.xml b/.idea/material_theme_project_new.xml new file mode 100644 index 0000000..2209535 --- /dev/null +++ b/.idea/material_theme_project_new.xml @@ -0,0 +1,13 @@ + + + + + + \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml new file mode 100644 index 0000000..678a045 --- /dev/null +++ b/.idea/misc.xml @@ -0,0 +1,10 @@ + + + + + + + + \ No newline at end of file diff --git a/.idea/modules.xml b/.idea/modules.xml new file mode 100644 index 0000000..2942a23 --- /dev/null +++ b/.idea/modules.xml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/.idea/vcs.xml b/.idea/vcs.xml new file mode 100644 index 0000000..94a25f7 --- /dev/null +++ b/.idea/vcs.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..576bd8a --- /dev/null +++ b/README.md @@ -0,0 +1 @@ +# cgn-appointments diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..da9931a --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,86 @@ +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[project] +name = "cgn-appointments" +dynamic = ["version"] +description = 'Scrapes appointments from termine.stadt-koeln.de an sends a message to a ntfy server.' +readme = "README.md" +requires-python = ">=3.8" +license = "MIT" +keywords = ["selenim", "cologne", "scraper"] +authors = [ + { name = "Marc Koch", email = "marc-koch@posteo.de" }, +] +classifiers = [ + "Development Status :: 4 - Beta", + "Programming Language :: Python", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: Implementation :: CPython", + "Programming Language :: Python :: Implementation :: PyPy", +] +dependencies = [ + "attrs==24.2.0", + "certifi==2024.8.30", + "charset-normalizer==3.3.2", + "h11==0.14.0", + "idna==3.8", + "outcome==1.3.0.post0", + "platformdirs==4.3.2", + "PySocks==1.7.1", + "PyYAML==6.0.2", + "requests==2.32.3", + "selenium==4.24.0", + "sniffio==1.3.1", + "sortedcontainers==2.4.0", + "trio==0.26.2", + "trio-websocket==0.11.1", + "typing_extensions==4.12.2", + "urllib3==2.2.3", + "validators==0.34.0", + "websocket-client==1.8.0", + "wsproto==1.2.0", +] + +[project.urls] +Homepage = "https://git.extrasolar.space/marc/cgn-appointments.git" + +[project.scripts] +cgn-appointments = "cgnappointments.__main__:main" + +[tool.hatch.version] +path = "src/cgnappointments/__about__.py" + +[tool.hatch.envs.types] +extra-dependencies = [ + "mypy>=1.0.0", +] +[tool.hatch.envs.types.scripts] +check = "mypy --install-types --non-interactive {args:src/cgn_appointments tests}" + +[tool.coverage.run] +source_pkgs = ["cgnappointments"] +branch = true +parallel = true +omit = [ + "src/cgnappointments/__about__.py", +] + +[tool.hatch.build.targets.wheel] +packages = ["src/cgnappointments"] +include = ["cgnappointments/config_template.yaml"] + +[tool.coverage.paths] +cgn_appointments = ["src/cgnappointments"] + +[tool.coverage.report] +exclude_lines = [ + "no cov", + "if __name__ == .__main__.:", + "if TYPE_CHECKING:", +] diff --git a/src/cgnappointments/__about__.py b/src/cgnappointments/__about__.py new file mode 100644 index 0000000..f854876 --- /dev/null +++ b/src/cgnappointments/__about__.py @@ -0,0 +1,4 @@ +# SPDX-FileCopyrightText: 2024-present Marc Koch +# +# SPDX-License-Identifier: MIT +__version__ = "1.0.0rc0" \ No newline at end of file diff --git a/src/cgnappointments/__init__.py b/src/cgnappointments/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/cgnappointments/__main__.py b/src/cgnappointments/__main__.py new file mode 100644 index 0000000..4ac9f2d --- /dev/null +++ b/src/cgnappointments/__main__.py @@ -0,0 +1,314 @@ +import csv +import json +import logging +import re +from datetime import datetime +from pathlib import Path +from time import sleep + +import requests +import yaml +from platformdirs import user_config_dir, user_data_dir +from requests import HTTPError +from selenium import webdriver +from selenium.webdriver.chrome.options import Options +from selenium.webdriver.common.by import By +from selenium.webdriver.support.select import Select +from validators import url as validate_url + +from .logger import setup_logging + +PROJECT_ROOT = Path(__file__).parent.parent + +logger = logging.getLogger(__package__) + + +def get_config() -> dict: + """ + Get the configuration from the config.yaml file. + :return: + """ + config_yaml = Path(user_config_dir()) / "cgn-appointments" / "config.yaml" + + if not config_yaml.exists(): + logger.info(f"""config.yaml not found. + Creating a new one under '{config_yaml}'. + Please fill in the required information.""") + config_yaml.parent.mkdir(parents=True, exist_ok=True) + Path("./config_template.yaml").write_text(config_yaml.read_text()) + exit(0) + + try: + with open(config_yaml, "r") as file: + return dict(yaml.safe_load(file)) + except FileNotFoundError: + print("config.yaml not found") + exit(1) + + +def define_csv_path(csv_path: str|None, csv_name: str|None) -> Path: + """ + Define the path to the csv file. + :param csv_path: + :param csv_name: + :return: + """ + csv_path = Path(csv_path) if csv_path else None + csv_name = Path(csv_name) if csv_name else None + + if csv_path is not None and csv_path.is_file(): + return csv_path + elif csv_path is not None and csv_path.is_dir() and csv_name is not None: + return csv_path / csv_name + elif csv_path is not None and csv_path.is_dir() and csv_name is None: + return csv_path / "cgn-appointments.csv" + elif csv_name is not None: + return Path(user_data_dir()) / csv_name + else: + return Path(user_data_dir()) / "cgn-appointments.csv" + + +def select_options(services, selects): + """ + Selects the first option of dropdown elements. + :param services: + :param selects: + :return: + """ + for select in selects: + parents = select.find_element(By.XPATH, "../..").text + available_services = [service for service in services if service in parents] + if available_services: + logger.debug(f"Selecting option for {', '.join(available_services)}") + for attr in select.get_property('attributes'): + if attr['name'] == 'data-testid' and attr['value'].startswith("service--"): + Select(select).select_by_index(1) + + +def ntfy(server: str, + topic: str, + title: str, + message: str, + click_url: str, + tags: list = None, + priority: int = 3) -> None: + """ + Sends a notification to the ntfy server. + :param title: + :param click_url: + :param topic: + :param server: + :param message: + :param tags: + :param priority: + :return: + """ + data = { + "topic": topic, + "title": title, + "message": message, + "tags": tags, + "click": click_url, + "priority": priority + } + + logger.debug(f"Sending notification to '{server}'.", + extra={"ntfy": {"server": server} | data}) + + try: + response = requests.post( + server, + data=json.dumps(data) + ) + response.raise_for_status() + except HTTPError as e: + logger.error(f"HTTP error occurred: {e}", + extra={"ntfy": {"server": server} | data }) + + +def write_csv(csv_path, lines): + """ + Writes the lines to a csv file. + :param csv_path: + :param lines: + :return: + """ + logger.debug(f"Writing to csv file.", + extra={"csv_path": csv_path, "lines": lines}) + try: + with open(csv_path, mode="w") as file: + csv_writer = csv.writer(file) + for line in lines: + csv_writer.writerow(line) + except Exception as e: + logger.error(f"Error writing to csv file: {e}", + exc_info=True, + extra={"csv_path": csv_path, "lines": lines}) + + +def main(): + """ + Main function. + :return: + """ + + # Get the configuration + config = get_config() + + # Set up logging + setup_logging(config.get("logging")) + + # Set the variables + url = config.get("url") + services = config.get("services") + check_locations = config.get("locations") + csv_name = config.get("csv_name", "cgn-appointments.csv") + csv_path = define_csv_path(config.get("csv_path"), csv_name) + date_regex = config.get("date_regex") + date_format = config.get("date_format") + ntfy_server = config.get("ntfy").get("server") + ntfy_topic = config.get("ntfy").get("topic") + ntfy_title = config.get("ntfy").get("title") + ntfy_message = config.get("ntfy").get("message") + ntfy_tags = config.get("ntfy").get("tags") + ntfy_priority = config.get("ntfy").get("priority") + + # Validate data + if not validate_url(url): + logger.error(f"Invalid URL '{url}'.") + exit(1) + if not services: + logger.error("No services defined.") + exit(1) + if not check_locations: + logger.error("No locations defined.") + exit(1) + if not date_regex: + logger.error("No date regex defined.") + exit(1) + try: + re.compile(date_regex) + except re.error: + logger.error(f"Invalid date regex '{date_regex}'.") + exit(1) + if not date_format: + logger.error("No date format defined.") + exit(1) + if not validate_url(ntfy_server): + logger.error(f"Invalid ntfy server '{ntfy_server}'.") + exit(1) + if not ntfy_topic: + logger.error("No ntfy topic defined.") + exit(1) + + locations = {} + + options = Options() + options.add_argument("--headless=new") + driver = webdriver.Chrome(options=options) + + # Open the website + logger.debug(f"Opening website: {url}") + driver.get(url) + + # Start a new session if the previous one expired + session_expired = driver.find_element(By.ID, "page_title").text == "Ihre Sitzung ist abgelaufen." + if session_expired: + logger.debug("Session expired. Starting a new session.") + new_session_button = driver.find_element(By.CLASS_NAME, "button") + new_session_button.click() + + # Select services + selects = driver.find_elements(By.TAG_NAME, 'select') + select_options(services, selects) + + # Click next button + next_button = driver.find_element(By.TAG_NAME, "button") + next_button.click() + + # Wait for the page to load + logger.debug("Waiting 10 seconds for the page to load...") + sleep(10) + + # Get location containers + location_containers = driver.find_elements(By.CLASS_NAME, "location-container") + for location_container in location_containers: + loc_title = location_container.find_element(By.CLASS_NAME, "location_title") + for loc in check_locations: + if loc in loc_title.text: + locations.update({loc: {"location_container": location_container}}) + if len(locations) > 0: + logger.debug(f"Location containers found", + extra={"locations": locations}) + else: + logger.warning("No location containers found.") + + # Get earliest date for each location + for loc in locations.keys(): + location_container = locations[loc]["location_container"] + date_text = location_container.find_element(By.CLASS_NAME, "earliest").text + found_date = re.search(date_regex, date_text) + if found_date: + locations[loc]["earliest"] = datetime.strptime(found_date.group(), date_format) + elif "kein Termin gefunden" in date_text: + locations[loc]["earliest"] = None + logger.info(f"No appointment found for {loc}.") + else: + logger.warning(f"Could not find date or 'kein Termin gefunden' for {loc}.", + extra={"location": loc, "date_text": date_text}) + + # Store session URL and close browser + session_url = driver.current_url + driver.quit() + logger.debug("Browser closed.") + + # Read previous data from file + csv_path.touch() + with open(csv_path, mode="r") as file: + csv_file = list(csv.reader(file)) + logger.debug(f"Read csv file", + extra={"csv_path": csv_path, "csv_file": csv_file}) + + # Append previous data to locations + if len(csv_file) > 0: + for n, line in enumerate(csv_file): + if len(line) == 2 and line[0] in check_locations: + locations[line[0]]["previous"] = datetime.strptime(line[1], date_format) + elif len(line) != 2: + logger.debug("Invalid line in csv file.", + extra={"line_number": n, "line": line}) + exit(1) + + # Compare previous and new dates and send notification if new date is different + lines = [] + for name, data in locations.items(): + previous_date = data.get("previous") + new_date = data.get("earliest") + if new_date and new_date != previous_date: + logger.info(f"New appointment found for {name}: {new_date}", + extra={"location": name, "previous_date": previous_date, + "new_date": new_date}) + lines.append((name, new_date.strftime(date_format))) + ntfy( + ntfy_server, + ntfy_topic, + ntfy_title, + ntfy_message % (name, new_date), + session_url, + ntfy_tags, + ntfy_priority, + ) + elif previous_date is not None: + lines.append((name, previous_date.strftime(date_format))) + + # Write new data to file + write_csv(csv_path, lines) + + + +if __name__ == "__main__": + try: + main() + except Exception as e: + logger.error(f"An error occurred: {e}", exc_info=True) + exit(1) \ No newline at end of file diff --git a/src/cgnappointments/config_template.yaml b/src/cgnappointments/config_template.yaml new file mode 100644 index 0000000..601be69 --- /dev/null +++ b/src/cgnappointments/config_template.yaml @@ -0,0 +1,76 @@ +--- +url: 'https://termine.stadt-koeln.de/m/kundenzentren/extern/calendar/?uid=b5a5a394-ec33-4130-9af3-490f99517071&wsid=e570a1ea-7b3d-43f6-bf43-3e60b3d7d888&lang=de&set_lang_ui=de&rev=rfOtF#top' + +# Which services should be checked for free appointments? +services: + - 'Personalausweis - Antrag' + - 'Reisepass - Antrag (seit 01.01.2024 auch für Kinder unter 12 Jahren)' + +# In which locations should the services be checked? +locations: + - Ehrenfeld + - Kalk + +# Path to the CSV file to store the scraped appointments +# csv_path: ~/Termine.csv + +# Name of the CSV file to store the scraped appointments +csv_name: 'appointments.csv' + +# Regex to extract the date from the website +date_regex: '(\\d{2}\\.\\d{2}\\.\\d{4}\\s\\d{2}:\\d{2})' + +# Date format to store the date in the CSV file (should match the date_regex) +date_format: '%d.%m.%Y %H:%M' + +# ntfy configuration +ntfy: + server: https://ntfy.sh/ + topic: public_cgn_appintments_83e0c8db1f51a7044b6431ddb2814c11 + title: 'A new appointment is available!' + message: 'A new appointment is available in %s: %s' + tags: + - tada + priority: 3 # 1-5 + +# Configure logging +logging: + version: 1 + disable_existing_loggers: false + formatters: + simple: + format: '[%(levelname)s|%(module)s|L%(lineno)d] %(asctime)s: %(message)s' + datefmt: '%Y-%m-%dT%H:%M:%S%z' + json: + fmt_keys: + level: levelname + message: message + timestamp: timestamp + logger: name + module: module + function: funcName + line: lineno + thread_name: threadName + handlers: + stderr: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stderr + level: DEBUG + file: + class: logging.handlers.RotatingFileHandler + formatter: json + level: INFO + maxBytes: 10000000 + backupCount: 3 + queue_handler: + class: logging.handlers.QueueHandler + handlers: + - stderr + - file + respect_handler_level: true + loggers: + root: + handlers: + - queue_handler + level: DEBUG diff --git a/src/cgnappointments/logger.py b/src/cgnappointments/logger.py new file mode 100644 index 0000000..a842773 --- /dev/null +++ b/src/cgnappointments/logger.py @@ -0,0 +1,119 @@ +import atexit +import datetime as dt +import json +import logging +import logging.config +from pathlib import Path +from typing import override + +from platformdirs import user_log_dir + +PROJECT_ROOT = Path(__file__).parent.parent + +LOG_RECORD_BUILTIN_ATTRS = { + "args", + "asctime", + "created", + "exc_info", + "exc_text", + "filename", + "funcName", + "levelname", + "levelno", + "lineno", + "module", + "msecs", + "message", + "msg", + "name", + "pathname", + "process", + "processName", + "relativeCreated", + "stack_info", + "thread", + "threadName", + "taskName", +} + + +def setup_logging(logging_config: dict, loglevel: int | str | None = None): + """ + Setup logging configuration + :return: + """ + # Override log level if provided + if loglevel: + level_str = logging.getLevelName(loglevel) + if not isinstance(level_str, str): + level_str = logging.getLevelName(level_str) + for handler in logging_config["handlers"].values(): + handler["level"] = level_str + + # Set log file path to user log directory if it does not exist + if "file" in logging_config["handlers"].keys(): + if "filename" not in logging_config["handlers"]["file"].keys(): + logging_config["handlers"]["file"]["filename"] = ( + Path(user_log_dir()) / "cgn-appointments.log.jsonl") + + # Create path and file if it does not exist + if "file" in logging_config["handlers"].keys(): + Path(logging_config["handlers"]["file"]["filename"]).parent.mkdir( + parents=True, exist_ok=True) + Path(logging_config["handlers"]["file"]["filename"]).touch() + + # If json formatter is configured, add JSONFormatter class to config + if "json" in logging_config["formatters"].keys(): + logging_config["formatters"]["json"]["()"] = f"{JSONFormatter.__module__}.{JSONFormatter.__qualname__}" + + logging.config.dictConfig(logging_config) + queue_handler = logging.getHandlerByName("queue_handler") + if queue_handler is not None: + queue_handler.listener.start() + atexit.register(queue_handler.listener.stop) + + +class JSONFormatter(logging.Formatter): + """ + A custom JSON formatter for logging + """ + def __init__( + self, + *, + fmt_keys: dict[str, str] | None = None, + ): + super().__init__() + self.fmt_keys = fmt_keys if fmt_keys is not None else {} + + @override + def format(self, record: logging.LogRecord) -> str: + message = self._prepare_log_dict(record) + return json.dumps(message, default=str) + + def _prepare_log_dict(self, record: logging.LogRecord) -> dict: + always_fields = { + "message": record.getMessage(), + "timestamp": dt.datetime.fromtimestamp( + record.created, tz=dt.timezone.utc + ).isoformat() + } + if record.exc_info is not None: + always_fields["exc_info"] = self.formatException(record.exc_info) + + if record.stack_info is not None: + always_fields["stack_info"] = self.formatStack(record.stack_info) + + message = { + key: msg_val + if (msg_val := always_fields.pop(val, None)) is not None + else getattr(record, val) + for key, val in self.fmt_keys.items() + } + message.update(always_fields) + + # Include all other attributes + for key, val, in record.__dict__.items(): + if key not in LOG_RECORD_BUILTIN_ATTRS: + message[key] = val + + return message