from __future__ import annotations import json import math from dataclasses import dataclass from pathlib import Path from typing import Callable import osmium from sqlalchemy import delete, func, select, text from sqlalchemy.dialects.postgresql import insert as postgresql_insert from sqlalchemy.orm import Session from app.config import settings from app.models import Dataset, RoutingEdge, RoutingNode from app.spatial import analyze_postgresql_tables, refresh_postgis_geometries ProgressCallback = Callable[[str, str, int | None, int | None, dict[str, object] | None], None] ROUTING_LAYER_VERSION = "routing_layer_v2_osm_highway_segments_service_tags" DRIVE_HIGHWAYS = { "motorway", "motorway_link", "trunk", "trunk_link", "primary", "primary_link", "secondary", "secondary_link", "tertiary", "tertiary_link", "unclassified", "residential", "living_street", "service", "road", "track", } WALK_HIGHWAYS = { "pedestrian", "footway", "path", "steps", "cycleway", "bridleway", "living_street", "residential", "service", "track", "unclassified", "tertiary", "tertiary_link", "secondary", "secondary_link", "primary", "primary_link", "road", } EXCLUDED_HIGHWAYS = {"construction", "proposed", "abandoned", "platform", "raceway"} NO_VALUES = {"no", "private", "agricultural", "forestry", "delivery", "customers"} YES_VALUES = {"yes", "designated", "permissive", "destination"} ONEWAY_FORWARD = {"yes", "true", "1"} ONEWAY_REVERSE = {"-1", "reverse"} DEFAULT_DRIVE_SPEED_KMH = { "motorway": 110, "motorway_link": 50, "trunk": 90, "trunk_link": 45, "primary": 70, "primary_link": 40, "secondary": 60, "secondary_link": 35, "tertiary": 50, "tertiary_link": 30, "unclassified": 40, "residential": 30, "living_street": 10, "service": 15, "road": 30, "track": 15, } DEFAULT_WALK_SPEED_MPS = 1.35 STEP_WALK_SPEED_MPS = 0.65 @dataclass class RoutingImportResult: dataset_id: int input_path: str nodes: int edges: int walk_edges: int drive_edges: int skipped_ways: int version: str = ROUTING_LAYER_VERSION def as_dict(self) -> dict[str, object]: return { "version": self.version, "dataset_id": self.dataset_id, "input_path": self.input_path, "nodes": self.nodes, "edges": self.edges, "walk_edges": self.walk_edges, "drive_edges": self.drive_edges, "skipped_ways": self.skipped_ways, } def active_routing_dataset(session: Session) -> Dataset | None: active_osm = session.scalar( select(Dataset).where(Dataset.kind == "osm_geojson", Dataset.is_active.is_(True)).order_by(Dataset.id.desc()) ) if active_osm is not None: metadata = _metadata(active_osm) raw_dataset_id = metadata.get("raw_dataset_id") if raw_dataset_id is not None: raw = session.get(Dataset, int(raw_dataset_id)) if raw is not None and Path(raw.local_path).exists(): return raw return session.scalar( select(Dataset) .where(Dataset.kind == "osm_pbf_raw") .order_by(Dataset.is_active.desc(), Dataset.id.desc()) ) def rebuild_routing_layer( session: Session, *, dataset_id: int | None = None, input_path: str | Path | None = None, reset: bool = True, batch_size: int = 5000, progress_callback: ProgressCallback | None = None, ) -> dict[str, object]: if not settings.is_postgresql_database: raise RuntimeError("The routing layer importer requires PostgreSQL/PostGIS.") dataset = session.get(Dataset, dataset_id) if dataset_id is not None else active_routing_dataset(session) if dataset is None: raise ValueError("No OSM PBF dataset is available for routing import.") path = Path(input_path or dataset.local_path) if not path.exists(): raise FileNotFoundError(f"Routing import PBF does not exist: {path}") if reset: _emit(progress_callback, "routing_layer_clear_started", "Clearing existing routing graph.", None, None, {"dataset_id": dataset.id}) session.execute(delete(RoutingEdge).where(RoutingEdge.dataset_id == dataset.id)) session.execute(delete(RoutingNode).where(RoutingNode.dataset_id == dataset.id)) session.commit() _emit(progress_callback, "routing_layer_import_started", "Importing routable OSM highway graph.", None, None, {"dataset_id": dataset.id, "path": str(path)}) handler = _RoutingGraphHandler(session=session, dataset_id=dataset.id, batch_size=batch_size, progress_callback=progress_callback) handler.apply_file(str(path), locations=True) handler.flush() return finalize_routing_layer( session, dataset_id=dataset.id, input_path=str(path), skipped_way_count=handler.skipped_way_count, progress_callback=progress_callback, ) def finalize_routing_layer( session: Session, *, dataset_id: int | None = None, input_path: str | Path | None = None, skipped_way_count: int = 0, progress_callback: ProgressCallback | None = None, ) -> dict[str, object]: if not settings.is_postgresql_database: raise RuntimeError("The routing layer finalizer requires PostgreSQL/PostGIS.") dataset = session.get(Dataset, dataset_id) if dataset_id is not None else active_routing_dataset(session) if dataset is None: raise ValueError("No routing dataset is available to finalize.") path = Path(input_path or dataset.local_path) _emit(progress_callback, "routing_layer_geometry_indexes_dropped", "Dropping routing geometry indexes before bulk refresh.", None, None, {"dataset_id": dataset.id}) _drop_routing_geometry_indexes(session) session.commit() _emit(progress_callback, "routing_layer_geometry_started", "Refreshing routing node PostGIS geometries.", None, None, {"dataset_id": dataset.id}) refresh_postgis_geometries(session, dataset_id=dataset.id, tables=["routing_nodes"], only_missing=False) session.commit() _emit(progress_callback, "routing_layer_geometry_indexes_started", "Rebuilding routing geometry indexes.", None, None, {"dataset_id": dataset.id}) _create_routing_geometry_indexes(session) session.commit() analyze_postgresql_tables(session, ["routing_nodes", "routing_edges"]) node_count = int(session.scalar(select(func.count()).select_from(RoutingNode).where(RoutingNode.dataset_id == dataset.id)) or 0) edge_count = int(session.scalar(select(func.count()).select_from(RoutingEdge).where(RoutingEdge.dataset_id == dataset.id)) or 0) walk_edge_count = int(session.scalar(select(func.count()).select_from(RoutingEdge).where(RoutingEdge.dataset_id == dataset.id, RoutingEdge.walk_cost_s.is_not(None))) or 0) drive_edge_count = int(session.scalar(select(func.count()).select_from(RoutingEdge).where(RoutingEdge.dataset_id == dataset.id, RoutingEdge.drive_cost_s.is_not(None))) or 0) dataset_metadata = _metadata(dataset) dataset_metadata["routing_layer"] = { "version": ROUTING_LAYER_VERSION, "nodes": node_count, "edges": edge_count, "walk_edges": walk_edge_count, "drive_edges": drive_edge_count, "input_path": str(path), } dataset.metadata_json = json.dumps(dataset_metadata, indent=2) session.commit() result = RoutingImportResult( dataset_id=dataset.id, input_path=str(path), nodes=node_count, edges=edge_count, walk_edges=walk_edge_count, drive_edges=drive_edge_count, skipped_ways=skipped_way_count, ).as_dict() _emit(progress_callback, "routing_layer_import_completed", "Routing graph import completed.", edge_count, edge_count, result) return result def _drop_routing_geometry_indexes(session: Session) -> None: session.execute(text("DROP INDEX IF EXISTS ix_routing_nodes_geom_gist")) session.execute(text("DROP INDEX IF EXISTS ix_routing_edges_geom_gist")) session.execute(text("DROP INDEX IF EXISTS ix_routing_edges_bbox_box_gist")) def _create_routing_geometry_indexes(session: Session) -> None: session.execute(text("CREATE INDEX IF NOT EXISTS ix_routing_nodes_geom_gist ON routing_nodes USING GIST (geom)")) session.execute(text("CREATE INDEX IF NOT EXISTS ix_routing_edges_bbox_box_gist ON routing_edges USING GIST (box(point(max_lon, max_lat), point(min_lon, min_lat)))")) class _RoutingGraphHandler(osmium.SimpleHandler): def __init__( self, *, session: Session, dataset_id: int, batch_size: int, progress_callback: ProgressCallback | None, ) -> None: super().__init__() self.session = session self.dataset_id = dataset_id self.batch_size = max(500, int(batch_size)) self.progress_callback = progress_callback self.nodes: dict[int, dict[str, object]] = {} self.edges: list[dict[str, object]] = [] self.node_count = int( session.scalar(select(func.count()).select_from(RoutingNode).where(RoutingNode.dataset_id == dataset_id)) or 0 ) self.edge_count = int( session.scalar(select(func.count()).select_from(RoutingEdge).where(RoutingEdge.dataset_id == dataset_id)) or 0 ) self.walk_edge_count = 0 self.drive_edge_count = 0 self.skipped_way_count = 0 self.processed_way_count = 0 def way(self, way) -> None: tags = {tag.k: tag.v for tag in way.tags} highway = tags.get("highway") if not highway or highway in EXCLUDED_HIGHWAYS: self.skipped_way_count += 1 return walkable = _walkable(tags, highway) drivable = _drivable(tags, highway) if not walkable and not drivable: self.skipped_way_count += 1 return nodes = [] for node in way.nodes: if not node.location.valid(): continue nodes.append((int(node.ref), float(node.location.lon), float(node.location.lat))) if len(nodes) < 2: self.skipped_way_count += 1 return oneway = _oneway_direction(tags, highway) drive_speed_mps = _drive_speed_mps(tags, highway) walk_speed_mps = STEP_WALK_SPEED_MPS if highway == "steps" else DEFAULT_WALK_SPEED_MPS for left, right in zip(nodes, nodes[1:]): source_id, source_lon, source_lat = left target_id, target_lon, target_lat = right if source_id == target_id: continue length_m = _distance_m(source_lat, source_lon, target_lat, target_lon) if length_m <= 0: continue if oneway == "reverse": source_id, target_id = target_id, source_id source_lon, target_lon = target_lon, source_lon source_lat, target_lat = target_lat, source_lat walk_cost = length_m / walk_speed_mps if walkable else None drive_cost = length_m / drive_speed_mps if drivable and drive_speed_mps > 0 else None reverse_walk_cost = walk_cost reverse_drive_cost = None if oneway in {"forward", "reverse"} else drive_cost self.nodes[source_id] = {"dataset_id": self.dataset_id, "osm_node_id": source_id, "lon": source_lon, "lat": source_lat} self.nodes[target_id] = {"dataset_id": self.dataset_id, "osm_node_id": target_id, "lon": target_lon, "lat": target_lat} self.edges.append( { "dataset_id": self.dataset_id, "osm_way_id": int(way.id), "source_osm_node_id": source_id, "target_osm_node_id": target_id, "source_lon": source_lon, "source_lat": source_lat, "target_lon": target_lon, "target_lat": target_lat, "highway": highway, "name": tags.get("name"), "length_m": length_m, "walk_cost_s": walk_cost, "reverse_walk_cost_s": reverse_walk_cost, "drive_cost_s": drive_cost, "reverse_drive_cost_s": reverse_drive_cost, "geometry_geojson": json.dumps({"type": "LineString", "coordinates": [[source_lon, source_lat], [target_lon, target_lat]]}, separators=(",", ":")), "min_lon": min(source_lon, target_lon), "min_lat": min(source_lat, target_lat), "max_lon": max(source_lon, target_lon), "max_lat": max(source_lat, target_lat), "tags_json": _routing_tags_json(tags), } ) self.edge_count += 1 if walk_cost is not None: self.walk_edge_count += 1 if drive_cost is not None: self.drive_edge_count += 1 self.processed_way_count += 1 if len(self.edges) >= self.batch_size: self.flush() if self.processed_way_count % 100_000 == 0: _emit( self.progress_callback, "routing_layer_import_batch", f"Imported {self.edge_count:,} routing edges.", self.edge_count, None, {"processed_ways": self.processed_way_count, "nodes_pending": len(self.nodes), "edges": self.edge_count}, ) def flush(self) -> None: if not self.nodes and not self.edges: return node_rows = list(self.nodes.values()) edge_rows = self.edges if node_rows: stmt = postgresql_insert(RoutingNode).values(node_rows) stmt = stmt.on_conflict_do_nothing(index_elements=["dataset_id", "osm_node_id"]) self.session.execute(stmt) self.node_count += len(node_rows) self.nodes.clear() if edge_rows: self.session.bulk_insert_mappings(RoutingEdge, edge_rows) self.edges = [] self.session.commit() def _walkable(tags: dict[str, str], highway: str) -> bool: if highway not in WALK_HIGHWAYS: return False access = _tag_value(tags, "access") foot = _tag_value(tags, "foot") if foot in NO_VALUES: return False if access in NO_VALUES and foot not in YES_VALUES: return False if highway in {"motorway", "motorway_link", "trunk", "trunk_link"} and foot not in YES_VALUES: return False return True def _drivable(tags: dict[str, str], highway: str) -> bool: if highway not in DRIVE_HIGHWAYS: return False access = _tag_value(tags, "access") motor_vehicle = _tag_value(tags, "motor_vehicle") motorcar = _tag_value(tags, "motorcar") vehicle = _tag_value(tags, "vehicle") if motorcar in NO_VALUES or motor_vehicle in NO_VALUES or vehicle in NO_VALUES: return False if access in NO_VALUES and motorcar not in YES_VALUES and motor_vehicle not in YES_VALUES: return False if highway in {"footway", "path", "pedestrian", "steps", "cycleway", "bridleway"}: return motorcar in YES_VALUES or motor_vehicle in YES_VALUES return True def _oneway_direction(tags: dict[str, str], highway: str) -> str: oneway = _tag_value(tags, "oneway") if oneway in ONEWAY_REVERSE: return "reverse" if oneway in ONEWAY_FORWARD or tags.get("junction") == "roundabout" or highway == "motorway": return "forward" return "both" def _drive_speed_mps(tags: dict[str, str], highway: str) -> float: maxspeed = _parse_maxspeed(tags.get("maxspeed")) kmh = maxspeed or DEFAULT_DRIVE_SPEED_KMH.get(highway, 30) return max(5.0, float(kmh) / 3.6) def _parse_maxspeed(value: str | None) -> float | None: if not value: return None text = value.strip().lower() if text in {"signals", "none", "walk", "variable"}: return None if text.endswith("mph"): number = _leading_float(text[:-3]) return None if number is None else number * 1.60934 return _leading_float(text) def _leading_float(value: str) -> float | None: digits = [] for char in value.strip(): if char.isdigit() or char == ".": digits.append(char) elif digits: break if not digits: return None try: return float("".join(digits)) except ValueError: return None def _routing_tags_json(tags: dict[str, str]) -> str: selected = { key: value for key, value in tags.items() if key in {"access", "bicycle", "bridge", "foot", "highway", "junction", "maxspeed", "motor_vehicle", "motorcar", "name", "oneway", "service", "surface", "tunnel", "vehicle"} } return json.dumps(selected, separators=(",", ":")) def _tag_value(tags: dict[str, str], key: str) -> str: return str(tags.get(key) or "").strip().lower() def _distance_m(lat_a: float, lon_a: float, lat_b: float, lon_b: float) -> float: radius = 6_371_000.0 phi_a = math.radians(lat_a) phi_b = math.radians(lat_b) delta_phi = math.radians(lat_b - lat_a) delta_lambda = math.radians(lon_b - lon_a) hav = math.sin(delta_phi / 2) ** 2 + math.cos(phi_a) * math.cos(phi_b) * math.sin(delta_lambda / 2) ** 2 return radius * 2 * math.atan2(math.sqrt(hav), math.sqrt(1 - hav)) def _metadata(dataset: Dataset) -> dict[str, object]: try: value = json.loads(dataset.metadata_json or "{}") except json.JSONDecodeError: return {} return value if isinstance(value, dict) else {} def _emit( progress_callback: ProgressCallback | None, event_type: str, message: str, progress_current: int | None, progress_total: int | None, metadata: dict[str, object] | None = None, ) -> None: if progress_callback is not None: progress_callback(event_type, message, progress_current, progress_total, metadata)