Alpha stage commit
This commit is contained in:
473
app/pipeline/routing_layer.py
Normal file
473
app/pipeline/routing_layer.py
Normal file
@@ -0,0 +1,473 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import math
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
from typing import Callable
|
||||
|
||||
import osmium
|
||||
from sqlalchemy import delete, func, select, text
|
||||
from sqlalchemy.dialects.postgresql import insert as postgresql_insert
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.config import settings
|
||||
from app.models import Dataset, RoutingEdge, RoutingNode
|
||||
from app.spatial import analyze_postgresql_tables, refresh_postgis_geometries
|
||||
|
||||
|
||||
ProgressCallback = Callable[[str, str, int | None, int | None, dict[str, object] | None], None]
|
||||
ROUTING_LAYER_VERSION = "routing_layer_v2_osm_highway_segments_service_tags"
|
||||
|
||||
DRIVE_HIGHWAYS = {
|
||||
"motorway",
|
||||
"motorway_link",
|
||||
"trunk",
|
||||
"trunk_link",
|
||||
"primary",
|
||||
"primary_link",
|
||||
"secondary",
|
||||
"secondary_link",
|
||||
"tertiary",
|
||||
"tertiary_link",
|
||||
"unclassified",
|
||||
"residential",
|
||||
"living_street",
|
||||
"service",
|
||||
"road",
|
||||
"track",
|
||||
}
|
||||
WALK_HIGHWAYS = {
|
||||
"pedestrian",
|
||||
"footway",
|
||||
"path",
|
||||
"steps",
|
||||
"cycleway",
|
||||
"bridleway",
|
||||
"living_street",
|
||||
"residential",
|
||||
"service",
|
||||
"track",
|
||||
"unclassified",
|
||||
"tertiary",
|
||||
"tertiary_link",
|
||||
"secondary",
|
||||
"secondary_link",
|
||||
"primary",
|
||||
"primary_link",
|
||||
"road",
|
||||
}
|
||||
EXCLUDED_HIGHWAYS = {"construction", "proposed", "abandoned", "platform", "raceway"}
|
||||
NO_VALUES = {"no", "private", "agricultural", "forestry", "delivery", "customers"}
|
||||
YES_VALUES = {"yes", "designated", "permissive", "destination"}
|
||||
ONEWAY_FORWARD = {"yes", "true", "1"}
|
||||
ONEWAY_REVERSE = {"-1", "reverse"}
|
||||
DEFAULT_DRIVE_SPEED_KMH = {
|
||||
"motorway": 110,
|
||||
"motorway_link": 50,
|
||||
"trunk": 90,
|
||||
"trunk_link": 45,
|
||||
"primary": 70,
|
||||
"primary_link": 40,
|
||||
"secondary": 60,
|
||||
"secondary_link": 35,
|
||||
"tertiary": 50,
|
||||
"tertiary_link": 30,
|
||||
"unclassified": 40,
|
||||
"residential": 30,
|
||||
"living_street": 10,
|
||||
"service": 15,
|
||||
"road": 30,
|
||||
"track": 15,
|
||||
}
|
||||
DEFAULT_WALK_SPEED_MPS = 1.35
|
||||
STEP_WALK_SPEED_MPS = 0.65
|
||||
|
||||
|
||||
@dataclass
|
||||
class RoutingImportResult:
|
||||
dataset_id: int
|
||||
input_path: str
|
||||
nodes: int
|
||||
edges: int
|
||||
walk_edges: int
|
||||
drive_edges: int
|
||||
skipped_ways: int
|
||||
version: str = ROUTING_LAYER_VERSION
|
||||
|
||||
def as_dict(self) -> dict[str, object]:
|
||||
return {
|
||||
"version": self.version,
|
||||
"dataset_id": self.dataset_id,
|
||||
"input_path": self.input_path,
|
||||
"nodes": self.nodes,
|
||||
"edges": self.edges,
|
||||
"walk_edges": self.walk_edges,
|
||||
"drive_edges": self.drive_edges,
|
||||
"skipped_ways": self.skipped_ways,
|
||||
}
|
||||
|
||||
|
||||
def active_routing_dataset(session: Session) -> Dataset | None:
|
||||
active_osm = session.scalar(
|
||||
select(Dataset).where(Dataset.kind == "osm_geojson", Dataset.is_active.is_(True)).order_by(Dataset.id.desc())
|
||||
)
|
||||
if active_osm is not None:
|
||||
metadata = _metadata(active_osm)
|
||||
raw_dataset_id = metadata.get("raw_dataset_id")
|
||||
if raw_dataset_id is not None:
|
||||
raw = session.get(Dataset, int(raw_dataset_id))
|
||||
if raw is not None and Path(raw.local_path).exists():
|
||||
return raw
|
||||
return session.scalar(
|
||||
select(Dataset)
|
||||
.where(Dataset.kind == "osm_pbf_raw")
|
||||
.order_by(Dataset.is_active.desc(), Dataset.id.desc())
|
||||
)
|
||||
|
||||
|
||||
def rebuild_routing_layer(
|
||||
session: Session,
|
||||
*,
|
||||
dataset_id: int | None = None,
|
||||
input_path: str | Path | None = None,
|
||||
reset: bool = True,
|
||||
batch_size: int = 5000,
|
||||
progress_callback: ProgressCallback | None = None,
|
||||
) -> dict[str, object]:
|
||||
if not settings.is_postgresql_database:
|
||||
raise RuntimeError("The routing layer importer requires PostgreSQL/PostGIS.")
|
||||
dataset = session.get(Dataset, dataset_id) if dataset_id is not None else active_routing_dataset(session)
|
||||
if dataset is None:
|
||||
raise ValueError("No OSM PBF dataset is available for routing import.")
|
||||
path = Path(input_path or dataset.local_path)
|
||||
if not path.exists():
|
||||
raise FileNotFoundError(f"Routing import PBF does not exist: {path}")
|
||||
|
||||
if reset:
|
||||
_emit(progress_callback, "routing_layer_clear_started", "Clearing existing routing graph.", None, None, {"dataset_id": dataset.id})
|
||||
session.execute(delete(RoutingEdge).where(RoutingEdge.dataset_id == dataset.id))
|
||||
session.execute(delete(RoutingNode).where(RoutingNode.dataset_id == dataset.id))
|
||||
session.commit()
|
||||
|
||||
_emit(progress_callback, "routing_layer_import_started", "Importing routable OSM highway graph.", None, None, {"dataset_id": dataset.id, "path": str(path)})
|
||||
handler = _RoutingGraphHandler(session=session, dataset_id=dataset.id, batch_size=batch_size, progress_callback=progress_callback)
|
||||
handler.apply_file(str(path), locations=True)
|
||||
handler.flush()
|
||||
|
||||
return finalize_routing_layer(
|
||||
session,
|
||||
dataset_id=dataset.id,
|
||||
input_path=str(path),
|
||||
skipped_way_count=handler.skipped_way_count,
|
||||
progress_callback=progress_callback,
|
||||
)
|
||||
|
||||
|
||||
def finalize_routing_layer(
|
||||
session: Session,
|
||||
*,
|
||||
dataset_id: int | None = None,
|
||||
input_path: str | Path | None = None,
|
||||
skipped_way_count: int = 0,
|
||||
progress_callback: ProgressCallback | None = None,
|
||||
) -> dict[str, object]:
|
||||
if not settings.is_postgresql_database:
|
||||
raise RuntimeError("The routing layer finalizer requires PostgreSQL/PostGIS.")
|
||||
dataset = session.get(Dataset, dataset_id) if dataset_id is not None else active_routing_dataset(session)
|
||||
if dataset is None:
|
||||
raise ValueError("No routing dataset is available to finalize.")
|
||||
path = Path(input_path or dataset.local_path)
|
||||
_emit(progress_callback, "routing_layer_geometry_indexes_dropped", "Dropping routing geometry indexes before bulk refresh.", None, None, {"dataset_id": dataset.id})
|
||||
_drop_routing_geometry_indexes(session)
|
||||
session.commit()
|
||||
_emit(progress_callback, "routing_layer_geometry_started", "Refreshing routing node PostGIS geometries.", None, None, {"dataset_id": dataset.id})
|
||||
refresh_postgis_geometries(session, dataset_id=dataset.id, tables=["routing_nodes"], only_missing=False)
|
||||
session.commit()
|
||||
_emit(progress_callback, "routing_layer_geometry_indexes_started", "Rebuilding routing geometry indexes.", None, None, {"dataset_id": dataset.id})
|
||||
_create_routing_geometry_indexes(session)
|
||||
session.commit()
|
||||
analyze_postgresql_tables(session, ["routing_nodes", "routing_edges"])
|
||||
node_count = int(session.scalar(select(func.count()).select_from(RoutingNode).where(RoutingNode.dataset_id == dataset.id)) or 0)
|
||||
edge_count = int(session.scalar(select(func.count()).select_from(RoutingEdge).where(RoutingEdge.dataset_id == dataset.id)) or 0)
|
||||
walk_edge_count = int(session.scalar(select(func.count()).select_from(RoutingEdge).where(RoutingEdge.dataset_id == dataset.id, RoutingEdge.walk_cost_s.is_not(None))) or 0)
|
||||
drive_edge_count = int(session.scalar(select(func.count()).select_from(RoutingEdge).where(RoutingEdge.dataset_id == dataset.id, RoutingEdge.drive_cost_s.is_not(None))) or 0)
|
||||
dataset_metadata = _metadata(dataset)
|
||||
dataset_metadata["routing_layer"] = {
|
||||
"version": ROUTING_LAYER_VERSION,
|
||||
"nodes": node_count,
|
||||
"edges": edge_count,
|
||||
"walk_edges": walk_edge_count,
|
||||
"drive_edges": drive_edge_count,
|
||||
"input_path": str(path),
|
||||
}
|
||||
dataset.metadata_json = json.dumps(dataset_metadata, indent=2)
|
||||
session.commit()
|
||||
result = RoutingImportResult(
|
||||
dataset_id=dataset.id,
|
||||
input_path=str(path),
|
||||
nodes=node_count,
|
||||
edges=edge_count,
|
||||
walk_edges=walk_edge_count,
|
||||
drive_edges=drive_edge_count,
|
||||
skipped_ways=skipped_way_count,
|
||||
).as_dict()
|
||||
_emit(progress_callback, "routing_layer_import_completed", "Routing graph import completed.", edge_count, edge_count, result)
|
||||
return result
|
||||
|
||||
|
||||
def _drop_routing_geometry_indexes(session: Session) -> None:
|
||||
session.execute(text("DROP INDEX IF EXISTS ix_routing_nodes_geom_gist"))
|
||||
session.execute(text("DROP INDEX IF EXISTS ix_routing_edges_geom_gist"))
|
||||
session.execute(text("DROP INDEX IF EXISTS ix_routing_edges_bbox_box_gist"))
|
||||
|
||||
|
||||
def _create_routing_geometry_indexes(session: Session) -> None:
|
||||
session.execute(text("CREATE INDEX IF NOT EXISTS ix_routing_nodes_geom_gist ON routing_nodes USING GIST (geom)"))
|
||||
session.execute(text("CREATE INDEX IF NOT EXISTS ix_routing_edges_bbox_box_gist ON routing_edges USING GIST (box(point(max_lon, max_lat), point(min_lon, min_lat)))"))
|
||||
|
||||
|
||||
class _RoutingGraphHandler(osmium.SimpleHandler):
|
||||
def __init__(
|
||||
self,
|
||||
*,
|
||||
session: Session,
|
||||
dataset_id: int,
|
||||
batch_size: int,
|
||||
progress_callback: ProgressCallback | None,
|
||||
) -> None:
|
||||
super().__init__()
|
||||
self.session = session
|
||||
self.dataset_id = dataset_id
|
||||
self.batch_size = max(500, int(batch_size))
|
||||
self.progress_callback = progress_callback
|
||||
self.nodes: dict[int, dict[str, object]] = {}
|
||||
self.edges: list[dict[str, object]] = []
|
||||
self.node_count = int(
|
||||
session.scalar(select(func.count()).select_from(RoutingNode).where(RoutingNode.dataset_id == dataset_id)) or 0
|
||||
)
|
||||
self.edge_count = int(
|
||||
session.scalar(select(func.count()).select_from(RoutingEdge).where(RoutingEdge.dataset_id == dataset_id)) or 0
|
||||
)
|
||||
self.walk_edge_count = 0
|
||||
self.drive_edge_count = 0
|
||||
self.skipped_way_count = 0
|
||||
self.processed_way_count = 0
|
||||
|
||||
def way(self, way) -> None:
|
||||
tags = {tag.k: tag.v for tag in way.tags}
|
||||
highway = tags.get("highway")
|
||||
if not highway or highway in EXCLUDED_HIGHWAYS:
|
||||
self.skipped_way_count += 1
|
||||
return
|
||||
walkable = _walkable(tags, highway)
|
||||
drivable = _drivable(tags, highway)
|
||||
if not walkable and not drivable:
|
||||
self.skipped_way_count += 1
|
||||
return
|
||||
|
||||
nodes = []
|
||||
for node in way.nodes:
|
||||
if not node.location.valid():
|
||||
continue
|
||||
nodes.append((int(node.ref), float(node.location.lon), float(node.location.lat)))
|
||||
if len(nodes) < 2:
|
||||
self.skipped_way_count += 1
|
||||
return
|
||||
|
||||
oneway = _oneway_direction(tags, highway)
|
||||
drive_speed_mps = _drive_speed_mps(tags, highway)
|
||||
walk_speed_mps = STEP_WALK_SPEED_MPS if highway == "steps" else DEFAULT_WALK_SPEED_MPS
|
||||
for left, right in zip(nodes, nodes[1:]):
|
||||
source_id, source_lon, source_lat = left
|
||||
target_id, target_lon, target_lat = right
|
||||
if source_id == target_id:
|
||||
continue
|
||||
length_m = _distance_m(source_lat, source_lon, target_lat, target_lon)
|
||||
if length_m <= 0:
|
||||
continue
|
||||
if oneway == "reverse":
|
||||
source_id, target_id = target_id, source_id
|
||||
source_lon, target_lon = target_lon, source_lon
|
||||
source_lat, target_lat = target_lat, source_lat
|
||||
|
||||
walk_cost = length_m / walk_speed_mps if walkable else None
|
||||
drive_cost = length_m / drive_speed_mps if drivable and drive_speed_mps > 0 else None
|
||||
reverse_walk_cost = walk_cost
|
||||
reverse_drive_cost = None if oneway in {"forward", "reverse"} else drive_cost
|
||||
self.nodes[source_id] = {"dataset_id": self.dataset_id, "osm_node_id": source_id, "lon": source_lon, "lat": source_lat}
|
||||
self.nodes[target_id] = {"dataset_id": self.dataset_id, "osm_node_id": target_id, "lon": target_lon, "lat": target_lat}
|
||||
self.edges.append(
|
||||
{
|
||||
"dataset_id": self.dataset_id,
|
||||
"osm_way_id": int(way.id),
|
||||
"source_osm_node_id": source_id,
|
||||
"target_osm_node_id": target_id,
|
||||
"source_lon": source_lon,
|
||||
"source_lat": source_lat,
|
||||
"target_lon": target_lon,
|
||||
"target_lat": target_lat,
|
||||
"highway": highway,
|
||||
"name": tags.get("name"),
|
||||
"length_m": length_m,
|
||||
"walk_cost_s": walk_cost,
|
||||
"reverse_walk_cost_s": reverse_walk_cost,
|
||||
"drive_cost_s": drive_cost,
|
||||
"reverse_drive_cost_s": reverse_drive_cost,
|
||||
"geometry_geojson": json.dumps({"type": "LineString", "coordinates": [[source_lon, source_lat], [target_lon, target_lat]]}, separators=(",", ":")),
|
||||
"min_lon": min(source_lon, target_lon),
|
||||
"min_lat": min(source_lat, target_lat),
|
||||
"max_lon": max(source_lon, target_lon),
|
||||
"max_lat": max(source_lat, target_lat),
|
||||
"tags_json": _routing_tags_json(tags),
|
||||
}
|
||||
)
|
||||
self.edge_count += 1
|
||||
if walk_cost is not None:
|
||||
self.walk_edge_count += 1
|
||||
if drive_cost is not None:
|
||||
self.drive_edge_count += 1
|
||||
|
||||
self.processed_way_count += 1
|
||||
if len(self.edges) >= self.batch_size:
|
||||
self.flush()
|
||||
if self.processed_way_count % 100_000 == 0:
|
||||
_emit(
|
||||
self.progress_callback,
|
||||
"routing_layer_import_batch",
|
||||
f"Imported {self.edge_count:,} routing edges.",
|
||||
self.edge_count,
|
||||
None,
|
||||
{"processed_ways": self.processed_way_count, "nodes_pending": len(self.nodes), "edges": self.edge_count},
|
||||
)
|
||||
|
||||
def flush(self) -> None:
|
||||
if not self.nodes and not self.edges:
|
||||
return
|
||||
node_rows = list(self.nodes.values())
|
||||
edge_rows = self.edges
|
||||
if node_rows:
|
||||
stmt = postgresql_insert(RoutingNode).values(node_rows)
|
||||
stmt = stmt.on_conflict_do_nothing(index_elements=["dataset_id", "osm_node_id"])
|
||||
self.session.execute(stmt)
|
||||
self.node_count += len(node_rows)
|
||||
self.nodes.clear()
|
||||
if edge_rows:
|
||||
self.session.bulk_insert_mappings(RoutingEdge, edge_rows)
|
||||
self.edges = []
|
||||
self.session.commit()
|
||||
|
||||
|
||||
def _walkable(tags: dict[str, str], highway: str) -> bool:
|
||||
if highway not in WALK_HIGHWAYS:
|
||||
return False
|
||||
access = _tag_value(tags, "access")
|
||||
foot = _tag_value(tags, "foot")
|
||||
if foot in NO_VALUES:
|
||||
return False
|
||||
if access in NO_VALUES and foot not in YES_VALUES:
|
||||
return False
|
||||
if highway in {"motorway", "motorway_link", "trunk", "trunk_link"} and foot not in YES_VALUES:
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def _drivable(tags: dict[str, str], highway: str) -> bool:
|
||||
if highway not in DRIVE_HIGHWAYS:
|
||||
return False
|
||||
access = _tag_value(tags, "access")
|
||||
motor_vehicle = _tag_value(tags, "motor_vehicle")
|
||||
motorcar = _tag_value(tags, "motorcar")
|
||||
vehicle = _tag_value(tags, "vehicle")
|
||||
if motorcar in NO_VALUES or motor_vehicle in NO_VALUES or vehicle in NO_VALUES:
|
||||
return False
|
||||
if access in NO_VALUES and motorcar not in YES_VALUES and motor_vehicle not in YES_VALUES:
|
||||
return False
|
||||
if highway in {"footway", "path", "pedestrian", "steps", "cycleway", "bridleway"}:
|
||||
return motorcar in YES_VALUES or motor_vehicle in YES_VALUES
|
||||
return True
|
||||
|
||||
|
||||
def _oneway_direction(tags: dict[str, str], highway: str) -> str:
|
||||
oneway = _tag_value(tags, "oneway")
|
||||
if oneway in ONEWAY_REVERSE:
|
||||
return "reverse"
|
||||
if oneway in ONEWAY_FORWARD or tags.get("junction") == "roundabout" or highway == "motorway":
|
||||
return "forward"
|
||||
return "both"
|
||||
|
||||
|
||||
def _drive_speed_mps(tags: dict[str, str], highway: str) -> float:
|
||||
maxspeed = _parse_maxspeed(tags.get("maxspeed"))
|
||||
kmh = maxspeed or DEFAULT_DRIVE_SPEED_KMH.get(highway, 30)
|
||||
return max(5.0, float(kmh) / 3.6)
|
||||
|
||||
|
||||
def _parse_maxspeed(value: str | None) -> float | None:
|
||||
if not value:
|
||||
return None
|
||||
text = value.strip().lower()
|
||||
if text in {"signals", "none", "walk", "variable"}:
|
||||
return None
|
||||
if text.endswith("mph"):
|
||||
number = _leading_float(text[:-3])
|
||||
return None if number is None else number * 1.60934
|
||||
return _leading_float(text)
|
||||
|
||||
|
||||
def _leading_float(value: str) -> float | None:
|
||||
digits = []
|
||||
for char in value.strip():
|
||||
if char.isdigit() or char == ".":
|
||||
digits.append(char)
|
||||
elif digits:
|
||||
break
|
||||
if not digits:
|
||||
return None
|
||||
try:
|
||||
return float("".join(digits))
|
||||
except ValueError:
|
||||
return None
|
||||
|
||||
|
||||
def _routing_tags_json(tags: dict[str, str]) -> str:
|
||||
selected = {
|
||||
key: value
|
||||
for key, value in tags.items()
|
||||
if key in {"access", "bicycle", "bridge", "foot", "highway", "junction", "maxspeed", "motor_vehicle", "motorcar", "name", "oneway", "service", "surface", "tunnel", "vehicle"}
|
||||
}
|
||||
return json.dumps(selected, separators=(",", ":"))
|
||||
|
||||
|
||||
def _tag_value(tags: dict[str, str], key: str) -> str:
|
||||
return str(tags.get(key) or "").strip().lower()
|
||||
|
||||
|
||||
def _distance_m(lat_a: float, lon_a: float, lat_b: float, lon_b: float) -> float:
|
||||
radius = 6_371_000.0
|
||||
phi_a = math.radians(lat_a)
|
||||
phi_b = math.radians(lat_b)
|
||||
delta_phi = math.radians(lat_b - lat_a)
|
||||
delta_lambda = math.radians(lon_b - lon_a)
|
||||
hav = math.sin(delta_phi / 2) ** 2 + math.cos(phi_a) * math.cos(phi_b) * math.sin(delta_lambda / 2) ** 2
|
||||
return radius * 2 * math.atan2(math.sqrt(hav), math.sqrt(1 - hav))
|
||||
|
||||
|
||||
def _metadata(dataset: Dataset) -> dict[str, object]:
|
||||
try:
|
||||
value = json.loads(dataset.metadata_json or "{}")
|
||||
except json.JSONDecodeError:
|
||||
return {}
|
||||
return value if isinstance(value, dict) else {}
|
||||
|
||||
|
||||
def _emit(
|
||||
progress_callback: ProgressCallback | None,
|
||||
event_type: str,
|
||||
message: str,
|
||||
progress_current: int | None,
|
||||
progress_total: int | None,
|
||||
metadata: dict[str, object] | None = None,
|
||||
) -> None:
|
||||
if progress_callback is not None:
|
||||
progress_callback(event_type, message, progress_current, progress_total, metadata)
|
||||
Reference in New Issue
Block a user