From a9e2607d80e612339e143bc59f817ee6e9d3b104 Mon Sep 17 00:00:00 2001 From: Slavi Pantaleev Date: Sat, 5 Nov 2022 07:29:47 +0200 Subject: [PATCH] Fix yaml[comments-indentation] in workers config and remove automation --- .../files/workers-doc-to-yaml.awk | 145 ------- .../files/workers-doc-to-yaml.sh | 7 - roles/custom/matrix-synapse/vars/main.yml | 361 +----------------- 3 files changed, 4 insertions(+), 509 deletions(-) delete mode 100755 roles/custom/matrix-synapse/files/workers-doc-to-yaml.awk delete mode 100755 roles/custom/matrix-synapse/files/workers-doc-to-yaml.sh diff --git a/roles/custom/matrix-synapse/files/workers-doc-to-yaml.awk b/roles/custom/matrix-synapse/files/workers-doc-to-yaml.awk deleted file mode 100755 index 1911690f..00000000 --- a/roles/custom/matrix-synapse/files/workers-doc-to-yaml.awk +++ /dev/null @@ -1,145 +0,0 @@ -#!/usr/bin/awk -# Hackish approach to get a machine-readable list of current matrix -# synapse REST API endpoints from the official documentation at -# https://github.com/matrix-org/synapse/raw/master/docs/workers.md -# -# invoke in shell with: -# URL=https://github.com/matrix-org/synapse/raw/master/docs/workers.md -# curl -L ${URL} | awk -f workers-doc-to-yaml.awk - - -function worker_stanza_append(string) { - worker_stanza = worker_stanza string -} - -function line_is_endpoint_url(line) { - # probably API endpoint if it starts with white-space and ^ or / - return (line ~ /^ +[\^\/].*\//) -} - -# Put YAML marker at beginning of file. -BEGIN { - endpoint_conditional_comment = " # FIXME: ADDITIONAL CONDITIONS REQUIRED: to be enabled manually\n" -} - -# Enable further processing after the introductory text. -# Read each synapse worker section as record and its lines as fields. -/Available worker applications/ { - enable_parsing = 1 - # set record separator to markdown section header - RS = "\n### " - # set field separator to newline - FS = "\n" -} - -# Once parsing is active, this will process each section as record. -enable_parsing { - # Each worker section starts with a synapse.app.X headline - if ($1 ~ /synapse\.app\./) { - - # get rid of the backticks and extract worker type from headline - gsub("`", "", $1) - gsub("synapse.app.", "", $1) - worker_type = $1 - - # initialize empty worker stanza - worker_stanza = "" - - # track if any endpoints are mentioned in a specific section - worker_has_urls = 0 - - # some endpoint descriptions contain flag terms - endpoints_seem_conditional = 0 - - # also, collect a list of available workers - workers = (workers ? workers "\n" : "") " - " worker_type - - # loop through the lines (2 - number of fields in record) - for (i = 2; i < NF + 1; i++) { - # copy line for gsub replacements - line = $i - - # end all lines but the last with a linefeed - linefeed = (i < NF - 1) ? "\n" : "" - - # line starts with white-space and a hash: endpoint block headline - if (line ~ /^ +#/) { - - # copy to output verbatim, normalizing white-space - gsub(/^ +/, "", line) - worker_stanza_append(" " line linefeed) - - } else if (line_is_endpoint_url(line)) { - - # mark section for special output formatting - worker_has_urls = 1 - - # remove leading white-space - gsub(/^ +/, "", line) - api_endpoint_regex = line - - # FIXME: https://github.com/matrix-org/synapse/issues/new - # munge inconsistent media_repository endpoint notation - if (api_endpoint_regex == "/_matrix/media/") { - api_endpoint_regex = "^" line - } - - # FIXME: https://github.com/matrix-org/synapse/issues/7530 - # https://github.com/spantaleev/matrix-docker-ansible-deploy/pull/456#issuecomment-719015911 - if (api_endpoint_regex == "^/_matrix/client/(r0|v3|unstable)/auth/.*/fallback/web$") { - worker_stanza_append(" # FIXME: possible bug with SSO and multiple generic workers\n") - worker_stanza_append(" # see https://github.com/matrix-org/synapse/issues/7530\n") - worker_stanza_append(" # " api_endpoint_regex linefeed) - continue - } - - # disable endpoints which specify complications - if (endpoints_seem_conditional) { - # only add notice if previous line didn't match - if (!line_is_endpoint_url($(i - 1))) { - worker_stanza_append(endpoint_conditional_comment) - } - worker_stanza_append(" # " api_endpoint_regex linefeed) - } else { - # output endpoint regex - worker_stanza_append(" - " api_endpoint_regex linefeed) - } - - # white-space only line? - } else if (line ~ /^ *$/) { - - if (i > 3 && i < NF) { - # print white-space lines unless 1st or last line in section - worker_stanza_append(line linefeed) - } - - # nothing of the above: the line is regular documentation text - } else { - - # include this text line as comment - worker_stanza_append(" # " line linefeed) - - # and take note of words hinting at additional conditions to be met - if (line ~ /(^[Ii]f|care must be taken|can be handled for)/) { - endpoints_seem_conditional = 1 - } - } - } - - if (worker_has_urls) { - print "\nmatrix_synapse_workers_" worker_type "_endpoints:" - print worker_stanza - } else { - # include workers without endpoints as well for reference - print "\n# " worker_type " worker (no API endpoints) [" - print worker_stanza - print "# ]" - } - } -} - -END { - print "\nmatrix_synapse_workers_avail_list:" - print workers | "sort" -} - -# vim: tabstop=4 shiftwidth=4 expandtab autoindent diff --git a/roles/custom/matrix-synapse/files/workers-doc-to-yaml.sh b/roles/custom/matrix-synapse/files/workers-doc-to-yaml.sh deleted file mode 100755 index 50a526bc..00000000 --- a/roles/custom/matrix-synapse/files/workers-doc-to-yaml.sh +++ /dev/null @@ -1,7 +0,0 @@ -#!/bin/sh -# Fetch the synapse worker documentation and extract endpoint URLs -# matrix-org/synapse master branch points to current stable release -# and put it between `workers:start` and `workers:end` tokens in ../vars/main.yml - -snippet="$(curl -L https://github.com/matrix-org/synapse/raw/master/docs/workers.md | awk -f workers-doc-to-yaml.awk)" -awk -v snippet="$snippet" -i inplace '/workers:start/{f=1;print;print snippet}/workers:end/{f=0}!f' ../vars/main.yml diff --git a/roles/custom/matrix-synapse/vars/main.yml b/roles/custom/matrix-synapse/vars/main.yml index d2f45126..69c0ce46 100644 --- a/roles/custom/matrix-synapse/vars/main.yml +++ b/roles/custom/matrix-synapse/vars/main.yml @@ -111,16 +111,12 @@ matrix_synapse_known_worker_types: | matrix_synapse_known_instance_map_eligible_worker_types: - stream_writer -# the following section contains semi-automatic generated content +# The following section contains content that had previously been generated by a script (`workers-doc-to-yaml.awk`) processing https://github.com/matrix-org/synapse/raw/master/docs/workers.md, +# but is now maintained manually due to: +# - the script being tripped up by the content and generating somewhat inaccurate definitions, which had to be fixed up manually. +# - the script being complicated and unmaintainable ### workers:start - matrix_synapse_workers_generic_worker_endpoints: - # This worker can handle API requests matching the following regular expressions. - # These endpoints can be routed to any worker. If a worker is set up to handle a - # stream then, for maximum efficiency, additional endpoints should be routed to that - # worker: refer to the [stream writers](#stream-writers) section below for further - # information. - # Sync requests - ^/_matrix/client/(r0|v3)/sync$ - ^/_matrix/client/(api/v1|r0|v3)/events$ @@ -191,317 +187,6 @@ matrix_synapse_workers_generic_worker_endpoints: - ^/_matrix/client/(api/v1|r0|v3|unstable)/join/ - ^/_matrix/client/(api/v1|r0|v3|unstable)/profile/ -# These appear to be conditional and should not be enabled by default. -# We need to fix up our workers-doc-to-yaml.awk parsing script to exclude them. -# For now, they've been commented out manually. -# # Account data requests -# - ^/_matrix/client/(r0|v3|unstable)/.*/tags -# - ^/_matrix/client/(r0|v3|unstable)/.*/account_data -# -# # Receipts requests -# - ^/_matrix/client/(r0|v3|unstable)/rooms/.*/receipt -# - ^/_matrix/client/(r0|v3|unstable)/rooms/.*/read_markers -# -# # Presence requests -# - ^/_matrix/client/(api/v1|r0|v3|unstable)/presence/ - - # User directory search requests - # Any worker can handle these, but we have a dedicated user_dir worker for this, - # so we'd like for other generic workers to not try and capture these requests. - # - ^/_matrix/client/(r0|v3|unstable)/user_directory/search$ - - # Additionally, the following REST endpoints can be handled for GET requests: - - # FIXME: ADDITIONAL CONDITIONS REQUIRED: to be enabled manually - # ^/_matrix/client/(api/v1|r0|v3|unstable)/pushrules/ - - # Pagination requests can also be handled, but all requests for a given - # room must be routed to the same instance. Additionally, care must be taken to - # ensure that the purge history admin API is not used while pagination requests - # for the room are in flight: - - # FIXME: ADDITIONAL CONDITIONS REQUIRED: to be enabled manually - # ^/_matrix/client/(api/v1|r0|v3|unstable)/rooms/.*/messages$ - - # Additionally, the following endpoints should be included if Synapse is configured - # to use SSO (you only need to include the ones for whichever SSO provider you're - # using): - - # for all SSO providers - # FIXME: ADDITIONAL CONDITIONS REQUIRED: to be enabled manually - # ^/_matrix/client/(api/v1|r0|v3|unstable)/login/sso/redirect - # ^/_synapse/client/pick_idp$ - # ^/_synapse/client/pick_username - # ^/_synapse/client/new_user_consent$ - # ^/_synapse/client/sso_register$ - - # OpenID Connect requests. - # FIXME: ADDITIONAL CONDITIONS REQUIRED: to be enabled manually - # ^/_synapse/client/oidc/callback$ - - # SAML requests. - # FIXME: ADDITIONAL CONDITIONS REQUIRED: to be enabled manually - # ^/_synapse/client/saml2/authn_response$ - - # CAS requests. - # FIXME: ADDITIONAL CONDITIONS REQUIRED: to be enabled manually - # ^/_matrix/client/(api/v1|r0|v3|unstable)/login/cas/ticket$ - - # Ensure that all SSO logins go to a single process. - # For multiple workers not handling the SSO endpoints properly, see - # [#7530](https://github.com/matrix-org/synapse/issues/7530) and - # [#9427](https://github.com/matrix-org/synapse/issues/9427). - - # Note that a [HTTP listener](usage/configuration/config_documentation.md#listeners) - # with `client` and `federation` `resources` must be configured in the `worker_listeners` - # option in the worker config. - - # #### Load balancing - - # It is possible to run multiple instances of this worker app, with incoming requests - # being load-balanced between them by the reverse-proxy. However, different endpoints - # have different characteristics and so admins - # may wish to run multiple groups of workers handling different endpoints so that - # load balancing can be done in different ways. - - # For `/sync` and `/initialSync` requests it will be more efficient if all - # requests from a particular user are routed to a single instance. Extracting a - # user ID from the access token or `Authorization` header is currently left as an - # exercise for the reader. Admins may additionally wish to separate out `/sync` - # requests that have a `since` query parameter from those that don't (and - # `/initialSync`), as requests that don't are known as "initial sync" that happens - # when a user logs in on a new device and can be *very* resource intensive, so - # isolating these requests will stop them from interfering with other users ongoing - # syncs. - - # Federation and client requests can be balanced via simple round robin. - - # The inbound federation transaction request `^/_matrix/federation/v1/send/` - # should be balanced by source IP so that transactions from the same remote server - # go to the same process. - - # Registration/login requests can be handled separately purely to help ensure that - # unexpected load doesn't affect new logins and sign ups. - - # Finally, event sending requests can be balanced by the room ID in the URI (or - # the full URI, or even just round robin), the room ID is the path component after - # `/rooms/`. If there is a large bridge connected that is sending or may send lots - # of events, then a dedicated set of workers can be provisioned to limit the - # effects of bursts of events from that bridge on events sent by normal users. - - # #### Stream writers - - # Additionally, the writing of specific streams (such as events) can be moved off - # of the main process to a particular worker. - - # To enable this, the worker must have a - # [HTTP `replication` listener](usage/configuration/config_documentation.md#listeners) configured, - # have a `worker_name` and be listed in the `instance_map` config. The same worker - # can handle multiple streams, but unless otherwise documented, each stream can only - # have a single writer. - - # For example, to move event persistence off to a dedicated worker, the shared - # configuration would include: - - # ```yaml - # instance_map: - # event_persister1: - # host: localhost - # port: 8034 - - # stream_writers: - # events: event_persister1 - # ``` - - # An example for a stream writer instance: - - # ```yaml - # {{#include systemd-with-workers/workers/event_persister.yaml}} - # ``` - - # Some of the streams have associated endpoints which, for maximum efficiency, should - # be routed to the workers handling that stream. See below for the currently supported - # streams and the endpoints associated with them: - - # ##### The `events` stream - - # The `events` stream experimentally supports having multiple writers, where work - # is sharded between them by room ID. Note that you *must* restart all worker - # instances when adding or removing event persisters. An example `stream_writers` - # configuration with multiple writers: - - # ```yaml - # stream_writers: - # events: - # - event_persister1 - # - event_persister2 - # ``` - - # ##### The `typing` stream - - # The following endpoints should be routed directly to the worker configured as - # the stream writer for the `typing` stream: - - # FIXME: ADDITIONAL CONDITIONS REQUIRED: to be enabled manually - # ^/_matrix/client/(api/v1|r0|v3|unstable)/rooms/.*/typing - - # ##### The `to_device` stream - - # The following endpoints should be routed directly to the worker configured as - # the stream writer for the `to_device` stream: - - # FIXME: ADDITIONAL CONDITIONS REQUIRED: to be enabled manually - # ^/_matrix/client/(r0|v3|unstable)/sendToDevice/ - - # ##### The `account_data` stream - - # The following endpoints should be routed directly to the worker configured as - # the stream writer for the `account_data` stream: - - # FIXME: ADDITIONAL CONDITIONS REQUIRED: to be enabled manually - # ^/_matrix/client/(r0|v3|unstable)/.*/tags - # ^/_matrix/client/(r0|v3|unstable)/.*/account_data - - # ##### The `receipts` stream - - # The following endpoints should be routed directly to the worker configured as - # the stream writer for the `receipts` stream: - - # FIXME: ADDITIONAL CONDITIONS REQUIRED: to be enabled manually - # ^/_matrix/client/(r0|v3|unstable)/rooms/.*/receipt - # ^/_matrix/client/(r0|v3|unstable)/rooms/.*/read_markers - - # ##### The `presence` stream - - # The following endpoints should be routed directly to the worker configured as - # the stream writer for the `presence` stream: - - # FIXME: ADDITIONAL CONDITIONS REQUIRED: to be enabled manually - # ^/_matrix/client/(api/v1|r0|v3|unstable)/presence/ - - # #### Background tasks - - # There is also support for moving background tasks to a separate - # worker. Background tasks are run periodically or started via replication. Exactly - # which tasks are configured to run depends on your Synapse configuration (e.g. if - # stats is enabled). This worker doesn't handle any REST endpoints itself. - - # To enable this, the worker must have a `worker_name` and can be configured to run - # background tasks. For example, to move background tasks to a dedicated worker, - # the shared configuration would include: - - # ```yaml - # run_background_tasks_on: background_worker - # ``` - - # You might also wish to investigate the `update_user_directory_from_worker` and - # `media_instance_running_background_jobs` settings. - - # An example for a dedicated background worker instance: - - # ```yaml - # {{#include systemd-with-workers/workers/background_worker.yaml}} - # ``` - - # #### Updating the User Directory - - # You can designate one generic worker to update the user directory. - - # Specify its name in the shared configuration as follows: - - # ```yaml - # update_user_directory_from_worker: worker_name - # ``` - - # This work cannot be load-balanced; please ensure the main process is restarted - # after setting this option in the shared configuration! - - # User directory updates allow REST endpoints matching the following regular - # expressions to work: - - # FIXME: ADDITIONAL CONDITIONS REQUIRED: to be enabled manually - # ^/_matrix/client/(r0|v3|unstable)/user_directory/search$ - - # The above endpoints can be routed to any worker, though you may choose to route - # it to the chosen user directory worker. - - # This style of configuration supersedes the legacy `synapse.app.user_dir` - # worker application type. - - - # #### Notifying Application Services - - # You can designate one generic worker to send output traffic to Application Services. - # Doesn't handle any REST endpoints itself, but you should specify its name in the - # shared configuration as follows: - - # ```yaml - # notify_appservices_from_worker: worker_name - # ``` - - # This work cannot be load-balanced; please ensure the main process is restarted - # after setting this option in the shared configuration! - - # This style of configuration supersedes the legacy `synapse.app.appservice` - # worker application type. - - -# pusher worker (no API endpoints) [ - # Handles sending push notifications to sygnal and email. Doesn't handle any - # REST endpoints itself, but you should set `start_pushers: False` in the - # shared configuration file to stop the main synapse sending push notifications. - - # To run multiple instances at once the `pusher_instances` option should list all - # pusher instances by their worker name, e.g.: - - # ```yaml - # pusher_instances: - # - pusher_worker1 - # - pusher_worker2 - # ``` - - # An example for a pusher instance: - - # ```yaml - # {{#include systemd-with-workers/workers/pusher_worker.yaml}} - # ``` - -# ] - -# appservice worker (no API endpoints) [ - # **Deprecated as of Synapse v1.59.** [Use `synapse.app.generic_worker` with the - # `notify_appservices_from_worker` option instead.](#notifying-application-services) - - # Handles sending output traffic to Application Services. Doesn't handle any - # REST endpoints itself, but you should set `notify_appservices: False` in the - # shared configuration file to stop the main synapse sending appservice notifications. - - # Note this worker cannot be load-balanced: only one instance should be active. - -# ] - -# federation_sender worker (no API endpoints) [ - # Handles sending federation traffic to other servers. Doesn't handle any - # REST endpoints itself, but you should set `send_federation: False` in the - # shared configuration file to stop the main synapse sending this traffic. - - # If running multiple federation senders then you must list each - # instance in the `federation_sender_instances` option by their `worker_name`. - # All instances must be stopped and started when adding or removing instances. - # For example: - - # ```yaml - # federation_sender_instances: - # - federation_sender1 - # - federation_sender2 - # ``` - - # An example for a federation sender instance: - - # ```yaml - # {{#include systemd-with-workers/workers/federation_sender.yaml}} - # ``` -# ] matrix_synapse_workers_media_repository_endpoints: # Handles the media repository. It can handle all endpoints starting with: @@ -517,50 +202,12 @@ matrix_synapse_workers_media_repository_endpoints: - ^/_synapse/admin/v1/quarantine_media/.*$ - ^/_synapse/admin/v1/users/.*/media$ - # You should also set `enable_media_repo: False` in the shared configuration - # file to stop the main synapse running background jobs related to managing the - # media repository. Note that doing so will prevent the main process from being - # able to handle the above endpoints. - - # In the `media_repository` worker configuration file, configure the - # [HTTP listener](usage/configuration/config_documentation.md#listeners) to - # expose the `media` resource. For example: - - # ```yaml - # {{#include systemd-with-workers/workers/media_worker.yaml}} - # ``` - - # Note that if running multiple media repositories they must be on the same server - # and you must configure a single instance to run the background tasks, e.g.: - - # ```yaml - # media_instance_running_background_jobs: "media-repository-1" - # ``` - - # Note that if a reverse proxy is used , then `/_matrix/media/` must be routed for both inbound client and federation requests (if they are handled separately). - matrix_synapse_workers_user_dir_endpoints: - # **Deprecated as of Synapse v1.59.** [Use `synapse.app.generic_worker` with the - # `update_user_directory_from_worker` option instead.](#updating-the-user-directory) - # Handles searches in the user directory. It can handle REST endpoints matching # the following regular expressions: - ^/_matrix/client/(r0|v3|unstable)/user_directory/search$ - # When using this worker you must also set `update_user_directory: false` in the - # shared configuration file to stop the main synapse running background - # jobs related to updating the user directory. - - # Above endpoint is not *required* to be routed to this worker. By default, - # `update_user_directory` is set to `true`, which means the main process - # will handle updates. All workers configured with `client` can handle the above - # endpoint as long as either this worker or the main process are configured to - # handle it, and are online. - - # If `update_user_directory` is set to `false`, and this worker is not running, - # the above endpoint may give outdated results. - matrix_synapse_workers_avail_list: - appservice - federation_sender