🎉 Initial commit

This commit is contained in:
Marc 2024-09-15 23:16:37 +02:00
commit 52449d367e
17 changed files with 1022 additions and 0 deletions

290
.gitignore vendored Normal file
View file

@ -0,0 +1,290 @@
# Created by https://www.toptal.com/developers/gitignore/api/python,pycharm
# Edit at https://www.toptal.com/developers/gitignore?templates=python,pycharm
### PyCharm ###
# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider
# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
# User-specific stuff
.idea/**/workspace.xml
.idea/**/tasks.xml
.idea/**/usage.statistics.xml
.idea/**/dictionaries
.idea/**/shelf
# AWS User-specific
.idea/**/aws.xml
# Generated files
.idea/**/contentModel.xml
# Sensitive or high-churn files
.idea/**/dataSources/
.idea/**/dataSources.ids
.idea/**/dataSources.local.xml
.idea/**/sqlDataSources.xml
.idea/**/dynamic.xml
.idea/**/uiDesigner.xml
.idea/**/dbnavigator.xml
# Gradle
.idea/**/gradle.xml
.idea/**/libraries
# Gradle and Maven with auto-import
# When using Gradle or Maven with auto-import, you should exclude module files,
# since they will be recreated, and may cause churn. Uncomment if using
# auto-import.
# .idea/artifacts
# .idea/compiler.xml
# .idea/jarRepositories.xml
# .idea/modules.xml
# .idea/*.iml
# .idea/modules
# *.iml
# *.ipr
# CMake
cmake-build-*/
# Mongo Explorer plugin
.idea/**/mongoSettings.xml
# File-based project format
*.iws
# IntelliJ
out/
# mpeltonen/sbt-idea plugin
.idea_modules/
# JIRA plugin
atlassian-ide-plugin.xml
# Cursive Clojure plugin
.idea/replstate.xml
# SonarLint plugin
.idea/sonarlint/
# Crashlytics plugin (for Android Studio and IntelliJ)
com_crashlytics_export_strings.xml
crashlytics.properties
crashlytics-build.properties
fabric.properties
# Editor-based Rest Client
.idea/httpRequests
# Android studio 3.1+ serialized cache file
.idea/caches/build_file_checksums.ser
### PyCharm Patch ###
# Comment Reason: https://github.com/joeblau/gitignore.io/issues/186#issuecomment-215987721
# *.iml
# modules.xml
# .idea/misc.xml
# *.ipr
# Sonarlint plugin
# https://plugins.jetbrains.com/plugin/7973-sonarlint
.idea/**/sonarlint/
# SonarQube Plugin
# https://plugins.jetbrains.com/plugin/7238-sonarqube-community-plugin
.idea/**/sonarIssues.xml
# Markdown Navigator plugin
# https://plugins.jetbrains.com/plugin/7896-markdown-navigator-enhanced
.idea/**/markdown-navigator.xml
.idea/**/markdown-navigator-enh.xml
.idea/**/markdown-navigator/
# Cache file creation bug
# See https://youtrack.jetbrains.com/issue/JBR-2257
.idea/$CACHE_FILE$
# CodeStream plugin
# https://plugins.jetbrains.com/plugin/12206-codestream
.idea/codestream.xml
# Azure Toolkit for IntelliJ plugin
# https://plugins.jetbrains.com/plugin/8053-azure-toolkit-for-intellij
.idea/**/azureSettings.xml
### Python ###
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
cover/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
.pybuilder/
target/
# Jupyter Notebook
.ipynb_checkpoints
# IPython
profile_default/
ipython_config.py
# pyenv
# For a library or package, you might want to ignore these files since the code is
# intended to run in multiple environments; otherwise, check them in:
# .python-version
# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock
# poetry
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
# This is especially recommended for binary packages to ensure reproducibility, and is more
# commonly ignored for libraries.
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
#poetry.lock
# pdm
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
#pdm.lock
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
# in version control.
# https://pdm.fming.dev/#use-with-ide
.pdm.toml
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
__pypackages__/
# Celery stuff
celerybeat-schedule
celerybeat.pid
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
.dmypy.json
dmypy.json
# Pyre type checker
.pyre/
# pytype static type analyzer
.pytype/
# Cython debug symbols
cython_debug/
# PyCharm
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/
### Python Patch ###
# Poetry local configuration file - https://python-poetry.org/docs/configuration/#local-configuration
poetry.toml
# ruff
.ruff_cache/
# LSP config files
pyrightconfig.json
# End of https://www.toptal.com/developers/gitignore/api/python,pycharm

8
.idea/.gitignore vendored Normal file
View file

@ -0,0 +1,8 @@
# Default ignored files
/shelf/
/workspace.xml
# Editor-based HTTP Client requests
/httpRequests/
# Datasource local storage ignored files
/dataSources/
/dataSources.local.xml

1
.idea/.name Normal file
View file

@ -0,0 +1 @@
cgn-appointments

View file

@ -0,0 +1,11 @@
<?xml version="1.0" encoding="UTF-8"?>
<module type="PYTHON_MODULE" version="4">
<component name="NewModuleRootManager">
<content url="file://$MODULE_DIR$">
<sourceFolder url="file://$MODULE_DIR$/src" isTestSource="false" />
<excludeFolder url="file://$MODULE_DIR$/.venv" />
</content>
<orderEntry type="jdk" jdkName="Python 3.12 (.venv)" jdkType="Python SDK" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
</module>

View file

@ -0,0 +1,69 @@
<component name="InspectionProjectProfileManager">
<profile version="1.0">
<option name="myName" value="Project Default" />
<inspection_tool class="HttpUrlsUsage" enabled="true" level="WEAK WARNING" enabled_by_default="true">
<option name="ignoredUrls">
<list>
<option value="http://localhost" />
<option value="http://127.0.0.1" />
<option value="http://0.0.0.0" />
<option value="http://www.w3.org/" />
<option value="http://json-schema.org/draft" />
<option value="http://java.sun.com/" />
<option value="http://xmlns.jcp.org/" />
<option value="http://javafx.com/javafx/" />
<option value="http://javafx.com/fxml" />
<option value="http://maven.apache.org/xsd/" />
<option value="http://maven.apache.org/POM/" />
<option value="http://www.springframework.org/schema/" />
<option value="http://www.springframework.org/tags" />
<option value="http://www.springframework.org/security/tags" />
<option value="http://www.thymeleaf.org" />
<option value="http://www.jboss.org/j2ee/schema/" />
<option value="http://www.jboss.com/xml/ns/" />
<option value="http://www.ibm.com/webservices/xsd" />
<option value="http://activemq.apache.org/schema/" />
<option value="http://schema.cloudfoundry.org/spring/" />
<option value="http://schemas.xmlsoap.org/" />
<option value="http://cxf.apache.org/schemas/" />
<option value="http://primefaces.org/ui" />
<option value="http://tiles.apache.org/" />
<option value="http://" />
</list>
</option>
</inspection_tool>
<inspection_tool class="PyCompatibilityInspection" enabled="true" level="WARNING" enabled_by_default="true">
<option name="ourVersions">
<value>
<list size="3">
<item index="0" class="java.lang.String" itemvalue="3.12" />
<item index="1" class="java.lang.String" itemvalue="3.11" />
<item index="2" class="java.lang.String" itemvalue="3.10" />
</list>
</value>
</option>
</inspection_tool>
<inspection_tool class="PyPackageRequirementsInspection" enabled="true" level="WARNING" enabled_by_default="true">
<option name="ignoredPackages">
<value>
<list size="13">
<item index="0" class="java.lang.String" itemvalue="pandas" />
<item index="1" class="java.lang.String" itemvalue="PyPDF2" />
<item index="2" class="java.lang.String" itemvalue="drf-spectacular" />
<item index="3" class="java.lang.String" itemvalue="django-crispy-forms" />
<item index="4" class="java.lang.String" itemvalue="Django" />
<item index="5" class="java.lang.String" itemvalue="qrcode" />
<item index="6" class="java.lang.String" itemvalue="psycopg2" />
<item index="7" class="java.lang.String" itemvalue="django-active-link" />
<item index="8" class="java.lang.String" itemvalue="shortuuid" />
<item index="9" class="java.lang.String" itemvalue="uwsgi" />
<item index="10" class="java.lang.String" itemvalue="djangorestframework" />
<item index="11" class="java.lang.String" itemvalue="Pillow" />
<item index="12" class="java.lang.String" itemvalue="pyproject_hooks" />
</list>
</value>
</option>
</inspection_tool>
<inspection_tool class="ReassignedToPlainText" enabled="false" level="WARNING" enabled_by_default="false" />
</profile>
</component>

View file

@ -0,0 +1,6 @@
<component name="InspectionProjectProfileManager">
<settings>
<option name="USE_PROJECT_PROFILE" value="false" />
<version value="1.0" />
</settings>
</component>

View file

@ -0,0 +1,13 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="MaterialThemeProjectNewConfig">
<option name="metadata">
<MTProjectMetadataState>
<option name="migrated" value="true" />
<option name="pristineConfig" value="false" />
<option name="userId" value="2be6031c:184ebfe94a2:-8000" />
<option name="version" value="8.13.2" />
</MTProjectMetadataState>
</option>
</component>
</project>

10
.idea/misc.xml Normal file
View file

@ -0,0 +1,10 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="Black">
<option name="sdkName" value="Python 3.12 (Termin_Scraper_Koeln)" />
</component>
<component name="MarkdownSettingsMigration">
<option name="stateVersion" value="1" />
</component>
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.12 (Termin_Scraper_Koeln)" project-jdk-type="Python SDK" />
</project>

8
.idea/modules.xml Normal file
View file

@ -0,0 +1,8 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectModuleManager">
<modules>
<module fileurl="file://$PROJECT_DIR$/.idea/Termin_Scraper_Koeln.iml" filepath="$PROJECT_DIR$/.idea/Termin_Scraper_Koeln.iml" />
</modules>
</component>
</project>

6
.idea/vcs.xml Normal file
View file

@ -0,0 +1,6 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="VcsDirectoryMappings">
<mapping directory="$PROJECT_DIR$" vcs="Git" />
</component>
</project>

1
README.md Normal file
View file

@ -0,0 +1 @@
# cgn-appointments

86
pyproject.toml Normal file
View file

@ -0,0 +1,86 @@
[build-system]
requires = ["hatchling"]
build-backend = "hatchling.build"
[project]
name = "cgn-appointments"
dynamic = ["version"]
description = 'Scrapes appointments from termine.stadt-koeln.de an sends a message to a ntfy server.'
readme = "README.md"
requires-python = ">=3.8"
license = "MIT"
keywords = ["selenim", "cologne", "scraper"]
authors = [
{ name = "Marc Koch", email = "marc-koch@posteo.de" },
]
classifiers = [
"Development Status :: 4 - Beta",
"Programming Language :: Python",
"Programming Language :: Python :: 3.8",
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12",
"Programming Language :: Python :: Implementation :: CPython",
"Programming Language :: Python :: Implementation :: PyPy",
]
dependencies = [
"attrs==24.2.0",
"certifi==2024.8.30",
"charset-normalizer==3.3.2",
"h11==0.14.0",
"idna==3.8",
"outcome==1.3.0.post0",
"platformdirs==4.3.2",
"PySocks==1.7.1",
"PyYAML==6.0.2",
"requests==2.32.3",
"selenium==4.24.0",
"sniffio==1.3.1",
"sortedcontainers==2.4.0",
"trio==0.26.2",
"trio-websocket==0.11.1",
"typing_extensions==4.12.2",
"urllib3==2.2.3",
"validators==0.34.0",
"websocket-client==1.8.0",
"wsproto==1.2.0",
]
[project.urls]
Homepage = "https://git.extrasolar.space/marc/cgn-appointments.git"
[project.scripts]
cgn-appointments = "cgnappointments.__main__:main"
[tool.hatch.version]
path = "src/cgnappointments/__about__.py"
[tool.hatch.envs.types]
extra-dependencies = [
"mypy>=1.0.0",
]
[tool.hatch.envs.types.scripts]
check = "mypy --install-types --non-interactive {args:src/cgn_appointments tests}"
[tool.coverage.run]
source_pkgs = ["cgnappointments"]
branch = true
parallel = true
omit = [
"src/cgnappointments/__about__.py",
]
[tool.hatch.build.targets.wheel]
packages = ["src/cgnappointments"]
include = ["cgnappointments/config_template.yaml"]
[tool.coverage.paths]
cgn_appointments = ["src/cgnappointments"]
[tool.coverage.report]
exclude_lines = [
"no cov",
"if __name__ == .__main__.:",
"if TYPE_CHECKING:",
]

View file

@ -0,0 +1,4 @@
# SPDX-FileCopyrightText: 2024-present Marc Koch <marc-koch@posteo.de>
#
# SPDX-License-Identifier: MIT
__version__ = "1.0.0rc0"

View file

View file

@ -0,0 +1,314 @@
import csv
import json
import logging
import re
from datetime import datetime
from pathlib import Path
from time import sleep
import requests
import yaml
from platformdirs import user_config_dir, user_data_dir
from requests import HTTPError
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.select import Select
from validators import url as validate_url
from .logger import setup_logging
PROJECT_ROOT = Path(__file__).parent.parent
logger = logging.getLogger(__package__)
def get_config() -> dict:
"""
Get the configuration from the config.yaml file.
:return:
"""
config_yaml = Path(user_config_dir()) / "cgn-appointments" / "config.yaml"
if not config_yaml.exists():
logger.info(f"""config.yaml not found.
Creating a new one under '{config_yaml}'.
Please fill in the required information.""")
config_yaml.parent.mkdir(parents=True, exist_ok=True)
Path("./config_template.yaml").write_text(config_yaml.read_text())
exit(0)
try:
with open(config_yaml, "r") as file:
return dict(yaml.safe_load(file))
except FileNotFoundError:
print("config.yaml not found")
exit(1)
def define_csv_path(csv_path: str|None, csv_name: str|None) -> Path:
"""
Define the path to the csv file.
:param csv_path:
:param csv_name:
:return:
"""
csv_path = Path(csv_path) if csv_path else None
csv_name = Path(csv_name) if csv_name else None
if csv_path is not None and csv_path.is_file():
return csv_path
elif csv_path is not None and csv_path.is_dir() and csv_name is not None:
return csv_path / csv_name
elif csv_path is not None and csv_path.is_dir() and csv_name is None:
return csv_path / "cgn-appointments.csv"
elif csv_name is not None:
return Path(user_data_dir()) / csv_name
else:
return Path(user_data_dir()) / "cgn-appointments.csv"
def select_options(services, selects):
"""
Selects the first option of dropdown elements.
:param services:
:param selects:
:return:
"""
for select in selects:
parents = select.find_element(By.XPATH, "../..").text
available_services = [service for service in services if service in parents]
if available_services:
logger.debug(f"Selecting option for {', '.join(available_services)}")
for attr in select.get_property('attributes'):
if attr['name'] == 'data-testid' and attr['value'].startswith("service--"):
Select(select).select_by_index(1)
def ntfy(server: str,
topic: str,
title: str,
message: str,
click_url: str,
tags: list = None,
priority: int = 3) -> None:
"""
Sends a notification to the ntfy server.
:param title:
:param click_url:
:param topic:
:param server:
:param message:
:param tags:
:param priority:
:return:
"""
data = {
"topic": topic,
"title": title,
"message": message,
"tags": tags,
"click": click_url,
"priority": priority
}
logger.debug(f"Sending notification to '{server}'.",
extra={"ntfy": {"server": server} | data})
try:
response = requests.post(
server,
data=json.dumps(data)
)
response.raise_for_status()
except HTTPError as e:
logger.error(f"HTTP error occurred: {e}",
extra={"ntfy": {"server": server} | data })
def write_csv(csv_path, lines):
"""
Writes the lines to a csv file.
:param csv_path:
:param lines:
:return:
"""
logger.debug(f"Writing to csv file.",
extra={"csv_path": csv_path, "lines": lines})
try:
with open(csv_path, mode="w") as file:
csv_writer = csv.writer(file)
for line in lines:
csv_writer.writerow(line)
except Exception as e:
logger.error(f"Error writing to csv file: {e}",
exc_info=True,
extra={"csv_path": csv_path, "lines": lines})
def main():
"""
Main function.
:return:
"""
# Get the configuration
config = get_config()
# Set up logging
setup_logging(config.get("logging"))
# Set the variables
url = config.get("url")
services = config.get("services")
check_locations = config.get("locations")
csv_name = config.get("csv_name", "cgn-appointments.csv")
csv_path = define_csv_path(config.get("csv_path"), csv_name)
date_regex = config.get("date_regex")
date_format = config.get("date_format")
ntfy_server = config.get("ntfy").get("server")
ntfy_topic = config.get("ntfy").get("topic")
ntfy_title = config.get("ntfy").get("title")
ntfy_message = config.get("ntfy").get("message")
ntfy_tags = config.get("ntfy").get("tags")
ntfy_priority = config.get("ntfy").get("priority")
# Validate data
if not validate_url(url):
logger.error(f"Invalid URL '{url}'.")
exit(1)
if not services:
logger.error("No services defined.")
exit(1)
if not check_locations:
logger.error("No locations defined.")
exit(1)
if not date_regex:
logger.error("No date regex defined.")
exit(1)
try:
re.compile(date_regex)
except re.error:
logger.error(f"Invalid date regex '{date_regex}'.")
exit(1)
if not date_format:
logger.error("No date format defined.")
exit(1)
if not validate_url(ntfy_server):
logger.error(f"Invalid ntfy server '{ntfy_server}'.")
exit(1)
if not ntfy_topic:
logger.error("No ntfy topic defined.")
exit(1)
locations = {}
options = Options()
options.add_argument("--headless=new")
driver = webdriver.Chrome(options=options)
# Open the website
logger.debug(f"Opening website: {url}")
driver.get(url)
# Start a new session if the previous one expired
session_expired = driver.find_element(By.ID, "page_title").text == "Ihre Sitzung ist abgelaufen."
if session_expired:
logger.debug("Session expired. Starting a new session.")
new_session_button = driver.find_element(By.CLASS_NAME, "button")
new_session_button.click()
# Select services
selects = driver.find_elements(By.TAG_NAME, 'select')
select_options(services, selects)
# Click next button
next_button = driver.find_element(By.TAG_NAME, "button")
next_button.click()
# Wait for the page to load
logger.debug("Waiting 10 seconds for the page to load...")
sleep(10)
# Get location containers
location_containers = driver.find_elements(By.CLASS_NAME, "location-container")
for location_container in location_containers:
loc_title = location_container.find_element(By.CLASS_NAME, "location_title")
for loc in check_locations:
if loc in loc_title.text:
locations.update({loc: {"location_container": location_container}})
if len(locations) > 0:
logger.debug(f"Location containers found",
extra={"locations": locations})
else:
logger.warning("No location containers found.")
# Get earliest date for each location
for loc in locations.keys():
location_container = locations[loc]["location_container"]
date_text = location_container.find_element(By.CLASS_NAME, "earliest").text
found_date = re.search(date_regex, date_text)
if found_date:
locations[loc]["earliest"] = datetime.strptime(found_date.group(), date_format)
elif "kein Termin gefunden" in date_text:
locations[loc]["earliest"] = None
logger.info(f"No appointment found for {loc}.")
else:
logger.warning(f"Could not find date or 'kein Termin gefunden' for {loc}.",
extra={"location": loc, "date_text": date_text})
# Store session URL and close browser
session_url = driver.current_url
driver.quit()
logger.debug("Browser closed.")
# Read previous data from file
csv_path.touch()
with open(csv_path, mode="r") as file:
csv_file = list(csv.reader(file))
logger.debug(f"Read csv file",
extra={"csv_path": csv_path, "csv_file": csv_file})
# Append previous data to locations
if len(csv_file) > 0:
for n, line in enumerate(csv_file):
if len(line) == 2 and line[0] in check_locations:
locations[line[0]]["previous"] = datetime.strptime(line[1], date_format)
elif len(line) != 2:
logger.debug("Invalid line in csv file.",
extra={"line_number": n, "line": line})
exit(1)
# Compare previous and new dates and send notification if new date is different
lines = []
for name, data in locations.items():
previous_date = data.get("previous")
new_date = data.get("earliest")
if new_date and new_date != previous_date:
logger.info(f"New appointment found for {name}: {new_date}",
extra={"location": name, "previous_date": previous_date,
"new_date": new_date})
lines.append((name, new_date.strftime(date_format)))
ntfy(
ntfy_server,
ntfy_topic,
ntfy_title,
ntfy_message % (name, new_date),
session_url,
ntfy_tags,
ntfy_priority,
)
elif previous_date is not None:
lines.append((name, previous_date.strftime(date_format)))
# Write new data to file
write_csv(csv_path, lines)
if __name__ == "__main__":
try:
main()
except Exception as e:
logger.error(f"An error occurred: {e}", exc_info=True)
exit(1)

View file

@ -0,0 +1,76 @@
---
url: 'https://termine.stadt-koeln.de/m/kundenzentren/extern/calendar/?uid=b5a5a394-ec33-4130-9af3-490f99517071&wsid=e570a1ea-7b3d-43f6-bf43-3e60b3d7d888&lang=de&set_lang_ui=de&rev=rfOtF#top'
# Which services should be checked for free appointments?
services:
- 'Personalausweis - Antrag'
- 'Reisepass - Antrag (seit 01.01.2024 auch für Kinder unter 12 Jahren)'
# In which locations should the services be checked?
locations:
- Ehrenfeld
- Kalk
# Path to the CSV file to store the scraped appointments
# csv_path: ~/Termine.csv
# Name of the CSV file to store the scraped appointments
csv_name: 'appointments.csv'
# Regex to extract the date from the website
date_regex: '(\\d{2}\\.\\d{2}\\.\\d{4}\\s\\d{2}:\\d{2})'
# Date format to store the date in the CSV file (should match the date_regex)
date_format: '%d.%m.%Y %H:%M'
# ntfy configuration
ntfy:
server: https://ntfy.sh/
topic: public_cgn_appintments_83e0c8db1f51a7044b6431ddb2814c11
title: 'A new appointment is available!'
message: 'A new appointment is available in %s: %s'
tags:
- tada
priority: 3 # 1-5
# Configure logging
logging:
version: 1
disable_existing_loggers: false
formatters:
simple:
format: '[%(levelname)s|%(module)s|L%(lineno)d] %(asctime)s: %(message)s'
datefmt: '%Y-%m-%dT%H:%M:%S%z'
json:
fmt_keys:
level: levelname
message: message
timestamp: timestamp
logger: name
module: module
function: funcName
line: lineno
thread_name: threadName
handlers:
stderr:
class: logging.StreamHandler
formatter: simple
stream: ext://sys.stderr
level: DEBUG
file:
class: logging.handlers.RotatingFileHandler
formatter: json
level: INFO
maxBytes: 10000000
backupCount: 3
queue_handler:
class: logging.handlers.QueueHandler
handlers:
- stderr
- file
respect_handler_level: true
loggers:
root:
handlers:
- queue_handler
level: DEBUG

View file

@ -0,0 +1,119 @@
import atexit
import datetime as dt
import json
import logging
import logging.config
from pathlib import Path
from typing import override
from platformdirs import user_log_dir
PROJECT_ROOT = Path(__file__).parent.parent
LOG_RECORD_BUILTIN_ATTRS = {
"args",
"asctime",
"created",
"exc_info",
"exc_text",
"filename",
"funcName",
"levelname",
"levelno",
"lineno",
"module",
"msecs",
"message",
"msg",
"name",
"pathname",
"process",
"processName",
"relativeCreated",
"stack_info",
"thread",
"threadName",
"taskName",
}
def setup_logging(logging_config: dict, loglevel: int | str | None = None):
"""
Setup logging configuration
:return:
"""
# Override log level if provided
if loglevel:
level_str = logging.getLevelName(loglevel)
if not isinstance(level_str, str):
level_str = logging.getLevelName(level_str)
for handler in logging_config["handlers"].values():
handler["level"] = level_str
# Set log file path to user log directory if it does not exist
if "file" in logging_config["handlers"].keys():
if "filename" not in logging_config["handlers"]["file"].keys():
logging_config["handlers"]["file"]["filename"] = (
Path(user_log_dir()) / "cgn-appointments.log.jsonl")
# Create path and file if it does not exist
if "file" in logging_config["handlers"].keys():
Path(logging_config["handlers"]["file"]["filename"]).parent.mkdir(
parents=True, exist_ok=True)
Path(logging_config["handlers"]["file"]["filename"]).touch()
# If json formatter is configured, add JSONFormatter class to config
if "json" in logging_config["formatters"].keys():
logging_config["formatters"]["json"]["()"] = f"{JSONFormatter.__module__}.{JSONFormatter.__qualname__}"
logging.config.dictConfig(logging_config)
queue_handler = logging.getHandlerByName("queue_handler")
if queue_handler is not None:
queue_handler.listener.start()
atexit.register(queue_handler.listener.stop)
class JSONFormatter(logging.Formatter):
"""
A custom JSON formatter for logging
"""
def __init__(
self,
*,
fmt_keys: dict[str, str] | None = None,
):
super().__init__()
self.fmt_keys = fmt_keys if fmt_keys is not None else {}
@override
def format(self, record: logging.LogRecord) -> str:
message = self._prepare_log_dict(record)
return json.dumps(message, default=str)
def _prepare_log_dict(self, record: logging.LogRecord) -> dict:
always_fields = {
"message": record.getMessage(),
"timestamp": dt.datetime.fromtimestamp(
record.created, tz=dt.timezone.utc
).isoformat()
}
if record.exc_info is not None:
always_fields["exc_info"] = self.formatException(record.exc_info)
if record.stack_info is not None:
always_fields["stack_info"] = self.formatStack(record.stack_info)
message = {
key: msg_val
if (msg_val := always_fields.pop(val, None)) is not None
else getattr(record, val)
for key, val in self.fmt_keys.items()
}
message.update(always_fields)
# Include all other attributes
for key, val, in record.__dict__.items():
if key not in LOG_RECORD_BUILTIN_ATTRS:
message[key] = val
return message