474 lines
18 KiB
Python
474 lines
18 KiB
Python
from __future__ import annotations
|
|
|
|
import json
|
|
import math
|
|
from dataclasses import dataclass
|
|
from pathlib import Path
|
|
from typing import Callable
|
|
|
|
import osmium
|
|
from sqlalchemy import delete, func, select, text
|
|
from sqlalchemy.dialects.postgresql import insert as postgresql_insert
|
|
from sqlalchemy.orm import Session
|
|
|
|
from app.config import settings
|
|
from app.models import Dataset, RoutingEdge, RoutingNode
|
|
from app.spatial import analyze_postgresql_tables, refresh_postgis_geometries
|
|
|
|
|
|
ProgressCallback = Callable[[str, str, int | None, int | None, dict[str, object] | None], None]
|
|
ROUTING_LAYER_VERSION = "routing_layer_v2_osm_highway_segments_service_tags"
|
|
|
|
DRIVE_HIGHWAYS = {
|
|
"motorway",
|
|
"motorway_link",
|
|
"trunk",
|
|
"trunk_link",
|
|
"primary",
|
|
"primary_link",
|
|
"secondary",
|
|
"secondary_link",
|
|
"tertiary",
|
|
"tertiary_link",
|
|
"unclassified",
|
|
"residential",
|
|
"living_street",
|
|
"service",
|
|
"road",
|
|
"track",
|
|
}
|
|
WALK_HIGHWAYS = {
|
|
"pedestrian",
|
|
"footway",
|
|
"path",
|
|
"steps",
|
|
"cycleway",
|
|
"bridleway",
|
|
"living_street",
|
|
"residential",
|
|
"service",
|
|
"track",
|
|
"unclassified",
|
|
"tertiary",
|
|
"tertiary_link",
|
|
"secondary",
|
|
"secondary_link",
|
|
"primary",
|
|
"primary_link",
|
|
"road",
|
|
}
|
|
EXCLUDED_HIGHWAYS = {"construction", "proposed", "abandoned", "platform", "raceway"}
|
|
NO_VALUES = {"no", "private", "agricultural", "forestry", "delivery", "customers"}
|
|
YES_VALUES = {"yes", "designated", "permissive", "destination"}
|
|
ONEWAY_FORWARD = {"yes", "true", "1"}
|
|
ONEWAY_REVERSE = {"-1", "reverse"}
|
|
DEFAULT_DRIVE_SPEED_KMH = {
|
|
"motorway": 110,
|
|
"motorway_link": 50,
|
|
"trunk": 90,
|
|
"trunk_link": 45,
|
|
"primary": 70,
|
|
"primary_link": 40,
|
|
"secondary": 60,
|
|
"secondary_link": 35,
|
|
"tertiary": 50,
|
|
"tertiary_link": 30,
|
|
"unclassified": 40,
|
|
"residential": 30,
|
|
"living_street": 10,
|
|
"service": 15,
|
|
"road": 30,
|
|
"track": 15,
|
|
}
|
|
DEFAULT_WALK_SPEED_MPS = 1.35
|
|
STEP_WALK_SPEED_MPS = 0.65
|
|
|
|
|
|
@dataclass
|
|
class RoutingImportResult:
|
|
dataset_id: int
|
|
input_path: str
|
|
nodes: int
|
|
edges: int
|
|
walk_edges: int
|
|
drive_edges: int
|
|
skipped_ways: int
|
|
version: str = ROUTING_LAYER_VERSION
|
|
|
|
def as_dict(self) -> dict[str, object]:
|
|
return {
|
|
"version": self.version,
|
|
"dataset_id": self.dataset_id,
|
|
"input_path": self.input_path,
|
|
"nodes": self.nodes,
|
|
"edges": self.edges,
|
|
"walk_edges": self.walk_edges,
|
|
"drive_edges": self.drive_edges,
|
|
"skipped_ways": self.skipped_ways,
|
|
}
|
|
|
|
|
|
def active_routing_dataset(session: Session) -> Dataset | None:
|
|
active_osm = session.scalar(
|
|
select(Dataset).where(Dataset.kind == "osm_geojson", Dataset.is_active.is_(True)).order_by(Dataset.id.desc())
|
|
)
|
|
if active_osm is not None:
|
|
metadata = _metadata(active_osm)
|
|
raw_dataset_id = metadata.get("raw_dataset_id")
|
|
if raw_dataset_id is not None:
|
|
raw = session.get(Dataset, int(raw_dataset_id))
|
|
if raw is not None and Path(raw.local_path).exists():
|
|
return raw
|
|
return session.scalar(
|
|
select(Dataset)
|
|
.where(Dataset.kind == "osm_pbf_raw")
|
|
.order_by(Dataset.is_active.desc(), Dataset.id.desc())
|
|
)
|
|
|
|
|
|
def rebuild_routing_layer(
|
|
session: Session,
|
|
*,
|
|
dataset_id: int | None = None,
|
|
input_path: str | Path | None = None,
|
|
reset: bool = True,
|
|
batch_size: int = 5000,
|
|
progress_callback: ProgressCallback | None = None,
|
|
) -> dict[str, object]:
|
|
if not settings.is_postgresql_database:
|
|
raise RuntimeError("The routing layer importer requires PostgreSQL/PostGIS.")
|
|
dataset = session.get(Dataset, dataset_id) if dataset_id is not None else active_routing_dataset(session)
|
|
if dataset is None:
|
|
raise ValueError("No OSM PBF dataset is available for routing import.")
|
|
path = Path(input_path or dataset.local_path)
|
|
if not path.exists():
|
|
raise FileNotFoundError(f"Routing import PBF does not exist: {path}")
|
|
|
|
if reset:
|
|
_emit(progress_callback, "routing_layer_clear_started", "Clearing existing routing graph.", None, None, {"dataset_id": dataset.id})
|
|
session.execute(delete(RoutingEdge).where(RoutingEdge.dataset_id == dataset.id))
|
|
session.execute(delete(RoutingNode).where(RoutingNode.dataset_id == dataset.id))
|
|
session.commit()
|
|
|
|
_emit(progress_callback, "routing_layer_import_started", "Importing routable OSM highway graph.", None, None, {"dataset_id": dataset.id, "path": str(path)})
|
|
handler = _RoutingGraphHandler(session=session, dataset_id=dataset.id, batch_size=batch_size, progress_callback=progress_callback)
|
|
handler.apply_file(str(path), locations=True)
|
|
handler.flush()
|
|
|
|
return finalize_routing_layer(
|
|
session,
|
|
dataset_id=dataset.id,
|
|
input_path=str(path),
|
|
skipped_way_count=handler.skipped_way_count,
|
|
progress_callback=progress_callback,
|
|
)
|
|
|
|
|
|
def finalize_routing_layer(
|
|
session: Session,
|
|
*,
|
|
dataset_id: int | None = None,
|
|
input_path: str | Path | None = None,
|
|
skipped_way_count: int = 0,
|
|
progress_callback: ProgressCallback | None = None,
|
|
) -> dict[str, object]:
|
|
if not settings.is_postgresql_database:
|
|
raise RuntimeError("The routing layer finalizer requires PostgreSQL/PostGIS.")
|
|
dataset = session.get(Dataset, dataset_id) if dataset_id is not None else active_routing_dataset(session)
|
|
if dataset is None:
|
|
raise ValueError("No routing dataset is available to finalize.")
|
|
path = Path(input_path or dataset.local_path)
|
|
_emit(progress_callback, "routing_layer_geometry_indexes_dropped", "Dropping routing geometry indexes before bulk refresh.", None, None, {"dataset_id": dataset.id})
|
|
_drop_routing_geometry_indexes(session)
|
|
session.commit()
|
|
_emit(progress_callback, "routing_layer_geometry_started", "Refreshing routing node PostGIS geometries.", None, None, {"dataset_id": dataset.id})
|
|
refresh_postgis_geometries(session, dataset_id=dataset.id, tables=["routing_nodes"], only_missing=False)
|
|
session.commit()
|
|
_emit(progress_callback, "routing_layer_geometry_indexes_started", "Rebuilding routing geometry indexes.", None, None, {"dataset_id": dataset.id})
|
|
_create_routing_geometry_indexes(session)
|
|
session.commit()
|
|
analyze_postgresql_tables(session, ["routing_nodes", "routing_edges"])
|
|
node_count = int(session.scalar(select(func.count()).select_from(RoutingNode).where(RoutingNode.dataset_id == dataset.id)) or 0)
|
|
edge_count = int(session.scalar(select(func.count()).select_from(RoutingEdge).where(RoutingEdge.dataset_id == dataset.id)) or 0)
|
|
walk_edge_count = int(session.scalar(select(func.count()).select_from(RoutingEdge).where(RoutingEdge.dataset_id == dataset.id, RoutingEdge.walk_cost_s.is_not(None))) or 0)
|
|
drive_edge_count = int(session.scalar(select(func.count()).select_from(RoutingEdge).where(RoutingEdge.dataset_id == dataset.id, RoutingEdge.drive_cost_s.is_not(None))) or 0)
|
|
dataset_metadata = _metadata(dataset)
|
|
dataset_metadata["routing_layer"] = {
|
|
"version": ROUTING_LAYER_VERSION,
|
|
"nodes": node_count,
|
|
"edges": edge_count,
|
|
"walk_edges": walk_edge_count,
|
|
"drive_edges": drive_edge_count,
|
|
"input_path": str(path),
|
|
}
|
|
dataset.metadata_json = json.dumps(dataset_metadata, indent=2)
|
|
session.commit()
|
|
result = RoutingImportResult(
|
|
dataset_id=dataset.id,
|
|
input_path=str(path),
|
|
nodes=node_count,
|
|
edges=edge_count,
|
|
walk_edges=walk_edge_count,
|
|
drive_edges=drive_edge_count,
|
|
skipped_ways=skipped_way_count,
|
|
).as_dict()
|
|
_emit(progress_callback, "routing_layer_import_completed", "Routing graph import completed.", edge_count, edge_count, result)
|
|
return result
|
|
|
|
|
|
def _drop_routing_geometry_indexes(session: Session) -> None:
|
|
session.execute(text("DROP INDEX IF EXISTS ix_routing_nodes_geom_gist"))
|
|
session.execute(text("DROP INDEX IF EXISTS ix_routing_edges_geom_gist"))
|
|
session.execute(text("DROP INDEX IF EXISTS ix_routing_edges_bbox_box_gist"))
|
|
|
|
|
|
def _create_routing_geometry_indexes(session: Session) -> None:
|
|
session.execute(text("CREATE INDEX IF NOT EXISTS ix_routing_nodes_geom_gist ON routing_nodes USING GIST (geom)"))
|
|
session.execute(text("CREATE INDEX IF NOT EXISTS ix_routing_edges_bbox_box_gist ON routing_edges USING GIST (box(point(max_lon, max_lat), point(min_lon, min_lat)))"))
|
|
|
|
|
|
class _RoutingGraphHandler(osmium.SimpleHandler):
|
|
def __init__(
|
|
self,
|
|
*,
|
|
session: Session,
|
|
dataset_id: int,
|
|
batch_size: int,
|
|
progress_callback: ProgressCallback | None,
|
|
) -> None:
|
|
super().__init__()
|
|
self.session = session
|
|
self.dataset_id = dataset_id
|
|
self.batch_size = max(500, int(batch_size))
|
|
self.progress_callback = progress_callback
|
|
self.nodes: dict[int, dict[str, object]] = {}
|
|
self.edges: list[dict[str, object]] = []
|
|
self.node_count = int(
|
|
session.scalar(select(func.count()).select_from(RoutingNode).where(RoutingNode.dataset_id == dataset_id)) or 0
|
|
)
|
|
self.edge_count = int(
|
|
session.scalar(select(func.count()).select_from(RoutingEdge).where(RoutingEdge.dataset_id == dataset_id)) or 0
|
|
)
|
|
self.walk_edge_count = 0
|
|
self.drive_edge_count = 0
|
|
self.skipped_way_count = 0
|
|
self.processed_way_count = 0
|
|
|
|
def way(self, way) -> None:
|
|
tags = {tag.k: tag.v for tag in way.tags}
|
|
highway = tags.get("highway")
|
|
if not highway or highway in EXCLUDED_HIGHWAYS:
|
|
self.skipped_way_count += 1
|
|
return
|
|
walkable = _walkable(tags, highway)
|
|
drivable = _drivable(tags, highway)
|
|
if not walkable and not drivable:
|
|
self.skipped_way_count += 1
|
|
return
|
|
|
|
nodes = []
|
|
for node in way.nodes:
|
|
if not node.location.valid():
|
|
continue
|
|
nodes.append((int(node.ref), float(node.location.lon), float(node.location.lat)))
|
|
if len(nodes) < 2:
|
|
self.skipped_way_count += 1
|
|
return
|
|
|
|
oneway = _oneway_direction(tags, highway)
|
|
drive_speed_mps = _drive_speed_mps(tags, highway)
|
|
walk_speed_mps = STEP_WALK_SPEED_MPS if highway == "steps" else DEFAULT_WALK_SPEED_MPS
|
|
for left, right in zip(nodes, nodes[1:]):
|
|
source_id, source_lon, source_lat = left
|
|
target_id, target_lon, target_lat = right
|
|
if source_id == target_id:
|
|
continue
|
|
length_m = _distance_m(source_lat, source_lon, target_lat, target_lon)
|
|
if length_m <= 0:
|
|
continue
|
|
if oneway == "reverse":
|
|
source_id, target_id = target_id, source_id
|
|
source_lon, target_lon = target_lon, source_lon
|
|
source_lat, target_lat = target_lat, source_lat
|
|
|
|
walk_cost = length_m / walk_speed_mps if walkable else None
|
|
drive_cost = length_m / drive_speed_mps if drivable and drive_speed_mps > 0 else None
|
|
reverse_walk_cost = walk_cost
|
|
reverse_drive_cost = None if oneway in {"forward", "reverse"} else drive_cost
|
|
self.nodes[source_id] = {"dataset_id": self.dataset_id, "osm_node_id": source_id, "lon": source_lon, "lat": source_lat}
|
|
self.nodes[target_id] = {"dataset_id": self.dataset_id, "osm_node_id": target_id, "lon": target_lon, "lat": target_lat}
|
|
self.edges.append(
|
|
{
|
|
"dataset_id": self.dataset_id,
|
|
"osm_way_id": int(way.id),
|
|
"source_osm_node_id": source_id,
|
|
"target_osm_node_id": target_id,
|
|
"source_lon": source_lon,
|
|
"source_lat": source_lat,
|
|
"target_lon": target_lon,
|
|
"target_lat": target_lat,
|
|
"highway": highway,
|
|
"name": tags.get("name"),
|
|
"length_m": length_m,
|
|
"walk_cost_s": walk_cost,
|
|
"reverse_walk_cost_s": reverse_walk_cost,
|
|
"drive_cost_s": drive_cost,
|
|
"reverse_drive_cost_s": reverse_drive_cost,
|
|
"geometry_geojson": json.dumps({"type": "LineString", "coordinates": [[source_lon, source_lat], [target_lon, target_lat]]}, separators=(",", ":")),
|
|
"min_lon": min(source_lon, target_lon),
|
|
"min_lat": min(source_lat, target_lat),
|
|
"max_lon": max(source_lon, target_lon),
|
|
"max_lat": max(source_lat, target_lat),
|
|
"tags_json": _routing_tags_json(tags),
|
|
}
|
|
)
|
|
self.edge_count += 1
|
|
if walk_cost is not None:
|
|
self.walk_edge_count += 1
|
|
if drive_cost is not None:
|
|
self.drive_edge_count += 1
|
|
|
|
self.processed_way_count += 1
|
|
if len(self.edges) >= self.batch_size:
|
|
self.flush()
|
|
if self.processed_way_count % 100_000 == 0:
|
|
_emit(
|
|
self.progress_callback,
|
|
"routing_layer_import_batch",
|
|
f"Imported {self.edge_count:,} routing edges.",
|
|
self.edge_count,
|
|
None,
|
|
{"processed_ways": self.processed_way_count, "nodes_pending": len(self.nodes), "edges": self.edge_count},
|
|
)
|
|
|
|
def flush(self) -> None:
|
|
if not self.nodes and not self.edges:
|
|
return
|
|
node_rows = list(self.nodes.values())
|
|
edge_rows = self.edges
|
|
if node_rows:
|
|
stmt = postgresql_insert(RoutingNode).values(node_rows)
|
|
stmt = stmt.on_conflict_do_nothing(index_elements=["dataset_id", "osm_node_id"])
|
|
self.session.execute(stmt)
|
|
self.node_count += len(node_rows)
|
|
self.nodes.clear()
|
|
if edge_rows:
|
|
self.session.bulk_insert_mappings(RoutingEdge, edge_rows)
|
|
self.edges = []
|
|
self.session.commit()
|
|
|
|
|
|
def _walkable(tags: dict[str, str], highway: str) -> bool:
|
|
if highway not in WALK_HIGHWAYS:
|
|
return False
|
|
access = _tag_value(tags, "access")
|
|
foot = _tag_value(tags, "foot")
|
|
if foot in NO_VALUES:
|
|
return False
|
|
if access in NO_VALUES and foot not in YES_VALUES:
|
|
return False
|
|
if highway in {"motorway", "motorway_link", "trunk", "trunk_link"} and foot not in YES_VALUES:
|
|
return False
|
|
return True
|
|
|
|
|
|
def _drivable(tags: dict[str, str], highway: str) -> bool:
|
|
if highway not in DRIVE_HIGHWAYS:
|
|
return False
|
|
access = _tag_value(tags, "access")
|
|
motor_vehicle = _tag_value(tags, "motor_vehicle")
|
|
motorcar = _tag_value(tags, "motorcar")
|
|
vehicle = _tag_value(tags, "vehicle")
|
|
if motorcar in NO_VALUES or motor_vehicle in NO_VALUES or vehicle in NO_VALUES:
|
|
return False
|
|
if access in NO_VALUES and motorcar not in YES_VALUES and motor_vehicle not in YES_VALUES:
|
|
return False
|
|
if highway in {"footway", "path", "pedestrian", "steps", "cycleway", "bridleway"}:
|
|
return motorcar in YES_VALUES or motor_vehicle in YES_VALUES
|
|
return True
|
|
|
|
|
|
def _oneway_direction(tags: dict[str, str], highway: str) -> str:
|
|
oneway = _tag_value(tags, "oneway")
|
|
if oneway in ONEWAY_REVERSE:
|
|
return "reverse"
|
|
if oneway in ONEWAY_FORWARD or tags.get("junction") == "roundabout" or highway == "motorway":
|
|
return "forward"
|
|
return "both"
|
|
|
|
|
|
def _drive_speed_mps(tags: dict[str, str], highway: str) -> float:
|
|
maxspeed = _parse_maxspeed(tags.get("maxspeed"))
|
|
kmh = maxspeed or DEFAULT_DRIVE_SPEED_KMH.get(highway, 30)
|
|
return max(5.0, float(kmh) / 3.6)
|
|
|
|
|
|
def _parse_maxspeed(value: str | None) -> float | None:
|
|
if not value:
|
|
return None
|
|
text = value.strip().lower()
|
|
if text in {"signals", "none", "walk", "variable"}:
|
|
return None
|
|
if text.endswith("mph"):
|
|
number = _leading_float(text[:-3])
|
|
return None if number is None else number * 1.60934
|
|
return _leading_float(text)
|
|
|
|
|
|
def _leading_float(value: str) -> float | None:
|
|
digits = []
|
|
for char in value.strip():
|
|
if char.isdigit() or char == ".":
|
|
digits.append(char)
|
|
elif digits:
|
|
break
|
|
if not digits:
|
|
return None
|
|
try:
|
|
return float("".join(digits))
|
|
except ValueError:
|
|
return None
|
|
|
|
|
|
def _routing_tags_json(tags: dict[str, str]) -> str:
|
|
selected = {
|
|
key: value
|
|
for key, value in tags.items()
|
|
if key in {"access", "bicycle", "bridge", "foot", "highway", "junction", "maxspeed", "motor_vehicle", "motorcar", "name", "oneway", "service", "surface", "tunnel", "vehicle"}
|
|
}
|
|
return json.dumps(selected, separators=(",", ":"))
|
|
|
|
|
|
def _tag_value(tags: dict[str, str], key: str) -> str:
|
|
return str(tags.get(key) or "").strip().lower()
|
|
|
|
|
|
def _distance_m(lat_a: float, lon_a: float, lat_b: float, lon_b: float) -> float:
|
|
radius = 6_371_000.0
|
|
phi_a = math.radians(lat_a)
|
|
phi_b = math.radians(lat_b)
|
|
delta_phi = math.radians(lat_b - lat_a)
|
|
delta_lambda = math.radians(lon_b - lon_a)
|
|
hav = math.sin(delta_phi / 2) ** 2 + math.cos(phi_a) * math.cos(phi_b) * math.sin(delta_lambda / 2) ** 2
|
|
return radius * 2 * math.atan2(math.sqrt(hav), math.sqrt(1 - hav))
|
|
|
|
|
|
def _metadata(dataset: Dataset) -> dict[str, object]:
|
|
try:
|
|
value = json.loads(dataset.metadata_json or "{}")
|
|
except json.JSONDecodeError:
|
|
return {}
|
|
return value if isinstance(value, dict) else {}
|
|
|
|
|
|
def _emit(
|
|
progress_callback: ProgressCallback | None,
|
|
event_type: str,
|
|
message: str,
|
|
progress_current: int | None,
|
|
progress_total: int | None,
|
|
metadata: dict[str, object] | None = None,
|
|
) -> None:
|
|
if progress_callback is not None:
|
|
progress_callback(event_type, message, progress_current, progress_total, metadata)
|