Files
meubility-workbench/app/pipeline/routing_layer.py
2026-07-01 23:29:51 +02:00

474 lines
18 KiB
Python

from __future__ import annotations
import json
import math
from dataclasses import dataclass
from pathlib import Path
from typing import Callable
import osmium
from sqlalchemy import delete, func, select, text
from sqlalchemy.dialects.postgresql import insert as postgresql_insert
from sqlalchemy.orm import Session
from app.config import settings
from app.models import Dataset, RoutingEdge, RoutingNode
from app.spatial import analyze_postgresql_tables, refresh_postgis_geometries
ProgressCallback = Callable[[str, str, int | None, int | None, dict[str, object] | None], None]
ROUTING_LAYER_VERSION = "routing_layer_v2_osm_highway_segments_service_tags"
DRIVE_HIGHWAYS = {
"motorway",
"motorway_link",
"trunk",
"trunk_link",
"primary",
"primary_link",
"secondary",
"secondary_link",
"tertiary",
"tertiary_link",
"unclassified",
"residential",
"living_street",
"service",
"road",
"track",
}
WALK_HIGHWAYS = {
"pedestrian",
"footway",
"path",
"steps",
"cycleway",
"bridleway",
"living_street",
"residential",
"service",
"track",
"unclassified",
"tertiary",
"tertiary_link",
"secondary",
"secondary_link",
"primary",
"primary_link",
"road",
}
EXCLUDED_HIGHWAYS = {"construction", "proposed", "abandoned", "platform", "raceway"}
NO_VALUES = {"no", "private", "agricultural", "forestry", "delivery", "customers"}
YES_VALUES = {"yes", "designated", "permissive", "destination"}
ONEWAY_FORWARD = {"yes", "true", "1"}
ONEWAY_REVERSE = {"-1", "reverse"}
DEFAULT_DRIVE_SPEED_KMH = {
"motorway": 110,
"motorway_link": 50,
"trunk": 90,
"trunk_link": 45,
"primary": 70,
"primary_link": 40,
"secondary": 60,
"secondary_link": 35,
"tertiary": 50,
"tertiary_link": 30,
"unclassified": 40,
"residential": 30,
"living_street": 10,
"service": 15,
"road": 30,
"track": 15,
}
DEFAULT_WALK_SPEED_MPS = 1.35
STEP_WALK_SPEED_MPS = 0.65
@dataclass
class RoutingImportResult:
dataset_id: int
input_path: str
nodes: int
edges: int
walk_edges: int
drive_edges: int
skipped_ways: int
version: str = ROUTING_LAYER_VERSION
def as_dict(self) -> dict[str, object]:
return {
"version": self.version,
"dataset_id": self.dataset_id,
"input_path": self.input_path,
"nodes": self.nodes,
"edges": self.edges,
"walk_edges": self.walk_edges,
"drive_edges": self.drive_edges,
"skipped_ways": self.skipped_ways,
}
def active_routing_dataset(session: Session) -> Dataset | None:
active_osm = session.scalar(
select(Dataset).where(Dataset.kind == "osm_geojson", Dataset.is_active.is_(True)).order_by(Dataset.id.desc())
)
if active_osm is not None:
metadata = _metadata(active_osm)
raw_dataset_id = metadata.get("raw_dataset_id")
if raw_dataset_id is not None:
raw = session.get(Dataset, int(raw_dataset_id))
if raw is not None and Path(raw.local_path).exists():
return raw
return session.scalar(
select(Dataset)
.where(Dataset.kind == "osm_pbf_raw")
.order_by(Dataset.is_active.desc(), Dataset.id.desc())
)
def rebuild_routing_layer(
session: Session,
*,
dataset_id: int | None = None,
input_path: str | Path | None = None,
reset: bool = True,
batch_size: int = 5000,
progress_callback: ProgressCallback | None = None,
) -> dict[str, object]:
if not settings.is_postgresql_database:
raise RuntimeError("The routing layer importer requires PostgreSQL/PostGIS.")
dataset = session.get(Dataset, dataset_id) if dataset_id is not None else active_routing_dataset(session)
if dataset is None:
raise ValueError("No OSM PBF dataset is available for routing import.")
path = Path(input_path or dataset.local_path)
if not path.exists():
raise FileNotFoundError(f"Routing import PBF does not exist: {path}")
if reset:
_emit(progress_callback, "routing_layer_clear_started", "Clearing existing routing graph.", None, None, {"dataset_id": dataset.id})
session.execute(delete(RoutingEdge).where(RoutingEdge.dataset_id == dataset.id))
session.execute(delete(RoutingNode).where(RoutingNode.dataset_id == dataset.id))
session.commit()
_emit(progress_callback, "routing_layer_import_started", "Importing routable OSM highway graph.", None, None, {"dataset_id": dataset.id, "path": str(path)})
handler = _RoutingGraphHandler(session=session, dataset_id=dataset.id, batch_size=batch_size, progress_callback=progress_callback)
handler.apply_file(str(path), locations=True)
handler.flush()
return finalize_routing_layer(
session,
dataset_id=dataset.id,
input_path=str(path),
skipped_way_count=handler.skipped_way_count,
progress_callback=progress_callback,
)
def finalize_routing_layer(
session: Session,
*,
dataset_id: int | None = None,
input_path: str | Path | None = None,
skipped_way_count: int = 0,
progress_callback: ProgressCallback | None = None,
) -> dict[str, object]:
if not settings.is_postgresql_database:
raise RuntimeError("The routing layer finalizer requires PostgreSQL/PostGIS.")
dataset = session.get(Dataset, dataset_id) if dataset_id is not None else active_routing_dataset(session)
if dataset is None:
raise ValueError("No routing dataset is available to finalize.")
path = Path(input_path or dataset.local_path)
_emit(progress_callback, "routing_layer_geometry_indexes_dropped", "Dropping routing geometry indexes before bulk refresh.", None, None, {"dataset_id": dataset.id})
_drop_routing_geometry_indexes(session)
session.commit()
_emit(progress_callback, "routing_layer_geometry_started", "Refreshing routing node PostGIS geometries.", None, None, {"dataset_id": dataset.id})
refresh_postgis_geometries(session, dataset_id=dataset.id, tables=["routing_nodes"], only_missing=False)
session.commit()
_emit(progress_callback, "routing_layer_geometry_indexes_started", "Rebuilding routing geometry indexes.", None, None, {"dataset_id": dataset.id})
_create_routing_geometry_indexes(session)
session.commit()
analyze_postgresql_tables(session, ["routing_nodes", "routing_edges"])
node_count = int(session.scalar(select(func.count()).select_from(RoutingNode).where(RoutingNode.dataset_id == dataset.id)) or 0)
edge_count = int(session.scalar(select(func.count()).select_from(RoutingEdge).where(RoutingEdge.dataset_id == dataset.id)) or 0)
walk_edge_count = int(session.scalar(select(func.count()).select_from(RoutingEdge).where(RoutingEdge.dataset_id == dataset.id, RoutingEdge.walk_cost_s.is_not(None))) or 0)
drive_edge_count = int(session.scalar(select(func.count()).select_from(RoutingEdge).where(RoutingEdge.dataset_id == dataset.id, RoutingEdge.drive_cost_s.is_not(None))) or 0)
dataset_metadata = _metadata(dataset)
dataset_metadata["routing_layer"] = {
"version": ROUTING_LAYER_VERSION,
"nodes": node_count,
"edges": edge_count,
"walk_edges": walk_edge_count,
"drive_edges": drive_edge_count,
"input_path": str(path),
}
dataset.metadata_json = json.dumps(dataset_metadata, indent=2)
session.commit()
result = RoutingImportResult(
dataset_id=dataset.id,
input_path=str(path),
nodes=node_count,
edges=edge_count,
walk_edges=walk_edge_count,
drive_edges=drive_edge_count,
skipped_ways=skipped_way_count,
).as_dict()
_emit(progress_callback, "routing_layer_import_completed", "Routing graph import completed.", edge_count, edge_count, result)
return result
def _drop_routing_geometry_indexes(session: Session) -> None:
session.execute(text("DROP INDEX IF EXISTS ix_routing_nodes_geom_gist"))
session.execute(text("DROP INDEX IF EXISTS ix_routing_edges_geom_gist"))
session.execute(text("DROP INDEX IF EXISTS ix_routing_edges_bbox_box_gist"))
def _create_routing_geometry_indexes(session: Session) -> None:
session.execute(text("CREATE INDEX IF NOT EXISTS ix_routing_nodes_geom_gist ON routing_nodes USING GIST (geom)"))
session.execute(text("CREATE INDEX IF NOT EXISTS ix_routing_edges_bbox_box_gist ON routing_edges USING GIST (box(point(max_lon, max_lat), point(min_lon, min_lat)))"))
class _RoutingGraphHandler(osmium.SimpleHandler):
def __init__(
self,
*,
session: Session,
dataset_id: int,
batch_size: int,
progress_callback: ProgressCallback | None,
) -> None:
super().__init__()
self.session = session
self.dataset_id = dataset_id
self.batch_size = max(500, int(batch_size))
self.progress_callback = progress_callback
self.nodes: dict[int, dict[str, object]] = {}
self.edges: list[dict[str, object]] = []
self.node_count = int(
session.scalar(select(func.count()).select_from(RoutingNode).where(RoutingNode.dataset_id == dataset_id)) or 0
)
self.edge_count = int(
session.scalar(select(func.count()).select_from(RoutingEdge).where(RoutingEdge.dataset_id == dataset_id)) or 0
)
self.walk_edge_count = 0
self.drive_edge_count = 0
self.skipped_way_count = 0
self.processed_way_count = 0
def way(self, way) -> None:
tags = {tag.k: tag.v for tag in way.tags}
highway = tags.get("highway")
if not highway or highway in EXCLUDED_HIGHWAYS:
self.skipped_way_count += 1
return
walkable = _walkable(tags, highway)
drivable = _drivable(tags, highway)
if not walkable and not drivable:
self.skipped_way_count += 1
return
nodes = []
for node in way.nodes:
if not node.location.valid():
continue
nodes.append((int(node.ref), float(node.location.lon), float(node.location.lat)))
if len(nodes) < 2:
self.skipped_way_count += 1
return
oneway = _oneway_direction(tags, highway)
drive_speed_mps = _drive_speed_mps(tags, highway)
walk_speed_mps = STEP_WALK_SPEED_MPS if highway == "steps" else DEFAULT_WALK_SPEED_MPS
for left, right in zip(nodes, nodes[1:]):
source_id, source_lon, source_lat = left
target_id, target_lon, target_lat = right
if source_id == target_id:
continue
length_m = _distance_m(source_lat, source_lon, target_lat, target_lon)
if length_m <= 0:
continue
if oneway == "reverse":
source_id, target_id = target_id, source_id
source_lon, target_lon = target_lon, source_lon
source_lat, target_lat = target_lat, source_lat
walk_cost = length_m / walk_speed_mps if walkable else None
drive_cost = length_m / drive_speed_mps if drivable and drive_speed_mps > 0 else None
reverse_walk_cost = walk_cost
reverse_drive_cost = None if oneway in {"forward", "reverse"} else drive_cost
self.nodes[source_id] = {"dataset_id": self.dataset_id, "osm_node_id": source_id, "lon": source_lon, "lat": source_lat}
self.nodes[target_id] = {"dataset_id": self.dataset_id, "osm_node_id": target_id, "lon": target_lon, "lat": target_lat}
self.edges.append(
{
"dataset_id": self.dataset_id,
"osm_way_id": int(way.id),
"source_osm_node_id": source_id,
"target_osm_node_id": target_id,
"source_lon": source_lon,
"source_lat": source_lat,
"target_lon": target_lon,
"target_lat": target_lat,
"highway": highway,
"name": tags.get("name"),
"length_m": length_m,
"walk_cost_s": walk_cost,
"reverse_walk_cost_s": reverse_walk_cost,
"drive_cost_s": drive_cost,
"reverse_drive_cost_s": reverse_drive_cost,
"geometry_geojson": json.dumps({"type": "LineString", "coordinates": [[source_lon, source_lat], [target_lon, target_lat]]}, separators=(",", ":")),
"min_lon": min(source_lon, target_lon),
"min_lat": min(source_lat, target_lat),
"max_lon": max(source_lon, target_lon),
"max_lat": max(source_lat, target_lat),
"tags_json": _routing_tags_json(tags),
}
)
self.edge_count += 1
if walk_cost is not None:
self.walk_edge_count += 1
if drive_cost is not None:
self.drive_edge_count += 1
self.processed_way_count += 1
if len(self.edges) >= self.batch_size:
self.flush()
if self.processed_way_count % 100_000 == 0:
_emit(
self.progress_callback,
"routing_layer_import_batch",
f"Imported {self.edge_count:,} routing edges.",
self.edge_count,
None,
{"processed_ways": self.processed_way_count, "nodes_pending": len(self.nodes), "edges": self.edge_count},
)
def flush(self) -> None:
if not self.nodes and not self.edges:
return
node_rows = list(self.nodes.values())
edge_rows = self.edges
if node_rows:
stmt = postgresql_insert(RoutingNode).values(node_rows)
stmt = stmt.on_conflict_do_nothing(index_elements=["dataset_id", "osm_node_id"])
self.session.execute(stmt)
self.node_count += len(node_rows)
self.nodes.clear()
if edge_rows:
self.session.bulk_insert_mappings(RoutingEdge, edge_rows)
self.edges = []
self.session.commit()
def _walkable(tags: dict[str, str], highway: str) -> bool:
if highway not in WALK_HIGHWAYS:
return False
access = _tag_value(tags, "access")
foot = _tag_value(tags, "foot")
if foot in NO_VALUES:
return False
if access in NO_VALUES and foot not in YES_VALUES:
return False
if highway in {"motorway", "motorway_link", "trunk", "trunk_link"} and foot not in YES_VALUES:
return False
return True
def _drivable(tags: dict[str, str], highway: str) -> bool:
if highway not in DRIVE_HIGHWAYS:
return False
access = _tag_value(tags, "access")
motor_vehicle = _tag_value(tags, "motor_vehicle")
motorcar = _tag_value(tags, "motorcar")
vehicle = _tag_value(tags, "vehicle")
if motorcar in NO_VALUES or motor_vehicle in NO_VALUES or vehicle in NO_VALUES:
return False
if access in NO_VALUES and motorcar not in YES_VALUES and motor_vehicle not in YES_VALUES:
return False
if highway in {"footway", "path", "pedestrian", "steps", "cycleway", "bridleway"}:
return motorcar in YES_VALUES or motor_vehicle in YES_VALUES
return True
def _oneway_direction(tags: dict[str, str], highway: str) -> str:
oneway = _tag_value(tags, "oneway")
if oneway in ONEWAY_REVERSE:
return "reverse"
if oneway in ONEWAY_FORWARD or tags.get("junction") == "roundabout" or highway == "motorway":
return "forward"
return "both"
def _drive_speed_mps(tags: dict[str, str], highway: str) -> float:
maxspeed = _parse_maxspeed(tags.get("maxspeed"))
kmh = maxspeed or DEFAULT_DRIVE_SPEED_KMH.get(highway, 30)
return max(5.0, float(kmh) / 3.6)
def _parse_maxspeed(value: str | None) -> float | None:
if not value:
return None
text = value.strip().lower()
if text in {"signals", "none", "walk", "variable"}:
return None
if text.endswith("mph"):
number = _leading_float(text[:-3])
return None if number is None else number * 1.60934
return _leading_float(text)
def _leading_float(value: str) -> float | None:
digits = []
for char in value.strip():
if char.isdigit() or char == ".":
digits.append(char)
elif digits:
break
if not digits:
return None
try:
return float("".join(digits))
except ValueError:
return None
def _routing_tags_json(tags: dict[str, str]) -> str:
selected = {
key: value
for key, value in tags.items()
if key in {"access", "bicycle", "bridge", "foot", "highway", "junction", "maxspeed", "motor_vehicle", "motorcar", "name", "oneway", "service", "surface", "tunnel", "vehicle"}
}
return json.dumps(selected, separators=(",", ":"))
def _tag_value(tags: dict[str, str], key: str) -> str:
return str(tags.get(key) or "").strip().lower()
def _distance_m(lat_a: float, lon_a: float, lat_b: float, lon_b: float) -> float:
radius = 6_371_000.0
phi_a = math.radians(lat_a)
phi_b = math.radians(lat_b)
delta_phi = math.radians(lat_b - lat_a)
delta_lambda = math.radians(lon_b - lon_a)
hav = math.sin(delta_phi / 2) ** 2 + math.cos(phi_a) * math.cos(phi_b) * math.sin(delta_lambda / 2) ** 2
return radius * 2 * math.atan2(math.sqrt(hav), math.sqrt(1 - hav))
def _metadata(dataset: Dataset) -> dict[str, object]:
try:
value = json.loads(dataset.metadata_json or "{}")
except json.JSONDecodeError:
return {}
return value if isinstance(value, dict) else {}
def _emit(
progress_callback: ProgressCallback | None,
event_type: str,
message: str,
progress_current: int | None,
progress_total: int | None,
metadata: dict[str, object] | None = None,
) -> None:
if progress_callback is not None:
progress_callback(event_type, message, progress_current, progress_total, metadata)