diff --git a/api/requirements.txt b/api/requirements.txt index 1f266a8..a26309f 100644 --- a/api/requirements.txt +++ b/api/requirements.txt @@ -12,3 +12,8 @@ asyncpg~=0.27.0 pyshp~=2.3.1 alembic~=1.9.4 stream-zip~=0.0.50 +msgpack~=1.0.5 +osmium~=3.6.0 +psycopg~=3.1.8 +shapely~=2.0.1 +pyproj~=3.4.1 diff --git a/api/tools/convert_osm.py b/api/tools/convert_osm.py new file mode 100755 index 0000000..cf53351 --- /dev/null +++ b/api/tools/convert_osm.py @@ -0,0 +1,205 @@ +#!/usr/bin/env python3 + +import sys +import re +import msgpack + +import osmium +import shapely.wkb as wkb +from shapely.ops import transform + +HIGHWAY_TYPES = { + "trunk", + "primary", + "secondary", + "tertiary", + "unclassified", + "residential", + "trunk_link", + "primary_link", + "secondary_link", + "tertiary_link", + "living_street", + "service", + "track", + "road", +} +ZONE_TYPES = { + "urban", + "rural", + "motorway", +} +URBAN_TYPES = { + "residential", + "living_street", + "road", +} +MOTORWAY_TYPES = { + "motorway", + "motorway_link", +} + +ADMIN_LEVEL_MIN = 2 +ADMIN_LEVEL_MAX = 8 +MINSPEED_RURAL = 60 + +ONEWAY_YES = {"yes", "true", "1"} +ONEWAY_REVERSE = {"reverse", "-1"} + + +def parse_number(tag): + if not tag: + return None + + match = re.search(r"[0-9]+", tag) + if not match: + return None + + digits = match.group(0) + try: + return int(digits) + except ValueError: + return None + + +def determine_zone(tags): + highway = tags.get("highway") + zone = tags.get("zone:traffic") + + if zone is not None: + if "rural" in zone: + return "rural" + + if "motorway" in zone: + return "motorway" + + return "urban" + + # From here on we are guessing based on other tags + + if highway in URBAN_TYPES: + return "urban" + + if highway in MOTORWAY_TYPES: + return "motorway" + + maxspeed_source = tags.get("source:maxspeed") + if maxspeed_source and "rural" in maxspeed_source: + return "rural" + if maxspeed_source and "urban" in maxspeed_source: + return "urban" + + for key in ["maxspeed", "maxspeed:forward", "maxspeed:backward"]: + maxspeed = parse_number(tags.get(key)) + if maxspeed is not None and maxspeed > MINSPEED_RURAL: + return "rural" + + # default to urban if we have no idea + return "urban" + + +def determine_direction(tags, zone): + if ( + tags.get("oneway") in ONEWAY_YES + or tags.get("junction") == "roundabout" + or zone == "motorway" + ): + return 1, True + + if tags.get("oneway") in ONEWAY_REVERSE: + return -1, True + + return 0, False + + +class StreamPacker: + def __init__(self, stream, *args, **kwargs): + self.stream = stream + self.packer = msgpack.Packer(*args, autoreset=False, **kwargs) + + def _write_out(self): + if hasattr(self.packer, "getbuffer"): + chunk = self.packer.getbuffer() + else: + chunk = self.packer.bytes() + + self.stream.write(chunk) + self.packer.reset() + + def pack(self, *args, **kwargs): + self.packer.pack(*args, **kwargs) + self._write_out() + + def pack_array_header(self, *args, **kwargs): + self.packer.pack_array_header(*args, **kwargs) + self._write_out() + + def pack_map_header(self, *args, **kwargs): + self.packer.pack_map_header(*args, **kwargs) + self._write_out() + + def pack_map_pairs(self, *args, **kwargs): + self.packer.pack_map_pairs(*args, **kwargs) + self._write_out() + + +# A global factory that creates WKB from a osmium geometry +wkbfab = osmium.geom.WKBFactory() + +from pyproj import Transformer + +project = Transformer.from_crs("EPSG:4326", "EPSG:3857", always_xy=True).transform + + +class OSMHandler(osmium.SimpleHandler): + def __init__(self, packer): + self.packer = packer + super().__init__() + + def way(self, way): + tags = way.tags + + highway = tags.get("highway") + if not highway or highway not in HIGHWAY_TYPES: + return + + zone = determine_zone(tags) + directionality, oneway = determine_direction(tags, zone) + name = tags.get("name") + + geometry = wkb.loads(wkbfab.create_linestring(way), hex=True) + geometry = transform(project, geometry) + geometry = wkb.dumps(geometry) + self.packer.pack(b"\x01") + self.packer.pack([way.id, name, zone, directionality, oneway, geometry]) + + def area(self, area): + tags = area.tags + if tags.get("boundary") != "administrative": + return + + admin_level = parse_number(tags.get("admin_level")) + if not admin_level: + return + + if admin_level < ADMIN_LEVEL_MIN or admin_level > ADMIN_LEVEL_MAX: + return + + name = tags.get("name") + geometry = bytes.fromhex(wkbfab.create_multipolygon(area)) + + self.packer.pack(b"\x02") + self.packer.pack( + [ + area.id, + name, + admin_level, + geometry, + ] + ) + + +with open(sys.argv[2], "wb") as fout: + packer = StreamPacker(fout) + osmhandler = OSMHandler(packer) + osmhandler.apply_file(sys.argv[1], locations=True) diff --git a/api/tools/import_osm.py b/api/tools/import_osm.py new file mode 100755 index 0000000..9172cb0 --- /dev/null +++ b/api/tools/import_osm.py @@ -0,0 +1,117 @@ +#!/usr/bin/env python3 + +from dataclasses import asdict, dataclass +import asyncio +from os.path import basename, splitext +import sys +import logging + +import msgpack +import psycopg +from psycopg.types import TypeInfo +from psycopg.types.shapely import register_shapely +from shapely.wkb import loads + +from obs.api.app import app +from obs.api.db import ZoneType, connect_db, make_session +from obs.api.utils import chunk + +log = logging.getLogger(__name__) + + +ROAD_BUFFER = 1000 +AREA_BUFFER = 100 + + +@dataclass +class Region: + relation_id: int + name: str + admin_level: int + geometry: bytes + + +@dataclass +class Road: + way_id: int + name: str + zone: str + directionality: int + oneway: int + geometry: bytes + + +def read_file(filename, type_only=False): + """ + Reads a file iteratively, yielding Road and Region objects as they + appear. Those may be mixed. + """ + with open(filename, "rb") as f: + unpacker = msgpack.Unpacker(f) + try: + while True: + type_id = unpacker.unpack() + data = unpacker.unpack() + + if type_id == b"\x01": + yield Road(*data) + + # elif type_id == b"\x02": + # yield Region(*data) + except msgpack.OutOfData: + pass + + +async def import_osm(connection, filename, import_group=None): + if import_group is None: + import_group = splitext(basename(filename))[0] + + # Pass 1: Find IDs only + road_ids = [] + for item in read_file(filename): + road_ids.append(item.way_id) + + async with connection.cursor() as cursor: + print(f"Pass 1: Delete previous") + + print(f"Deleting import group {import_group}") + await cursor.execute( + "DELETE FROM road WHERE import_group = %s", (import_group,) + ) + + print(f"Deleting by ID") + for ids in chunk(road_ids, 10000): + await cursor.execute("DELETE FROM road WHERE way_id = ANY(%s)", (ids,)) + + # Pass 2: Import + print(f"Pass 2: Import") + + async with cursor.copy( + "COPY road (way_id, name, zone, directionality, oneway, geometry, import_group) FROM STDIN" + ) as copy: + for item in read_file(filename): + await copy.write_row( + ( + item.way_id, + item.name, + item.zone, + item.directionality, + item.oneway, + bytes.hex(item.geometry), + import_group, + ) + ) + + +async def main(): + url = app.config.POSTGRES_URL + url = url.replace("+asyncpg", "") + + async with await psycopg.AsyncConnection.connect(url) as connection: + for filename in sys.argv[1:]: + print("Loading file", filename) + await import_osm(connection, filename) + + +if __name__ == "__main__": + asyncio.run(main())