from __future__ import annotations import json import math import re import threading import time from dataclasses import dataclass from datetime import date, datetime from typing import Iterator, Optional from shapely.geometry import LineString, MultiLineString, Point, mapping, shape from shapely.ops import linemerge, substring from sqlalchemy import and_, bindparam, case, exists, func, or_, select, text from sqlalchemy.orm import Session, aliased from app.address_search import ( address_by_token, address_point_by_token, address_point_token, address_token, coordinate_token, is_coordinate_token, is_address_point_token, is_address_token, is_location_token, parse_coordinate_token, ) from app.config import settings from app.gtfs_storage import ( GTFS_STOP_TIME_COLUMNS, SQLITE_IN_CHUNK_SIZE, all_scheduled_stop_ids, execute_sidecar_query, has_scheduled_stop as storage_has_scheduled_stop, scheduled_stop_ids as storage_scheduled_stop_ids, stop_times_by_trip as storage_stop_times_by_trip, stop_times_for_trip_range, uses_sidecar_stop_times, ) from app.models import ( CanonicalStop, CanonicalStopLink, Dataset, GtfsCalendar, GtfsCalendarDate, GtfsRoute, GtfsShape, GtfsStop, GtfsStopTime, GtfsTrip, OsmAddress, OsmFeature, RoutePattern, Source, ) from app.osm_storage import query_osm_features from app.pipeline.route_layer import ( canonical_stop_for_gtfs_stop, logical_stop_group_id, route_pattern_for_trip, ) from app.routing import route_between_points, snap_point_to_routing_graph from app.serializers import feature_collection MAX_DIRECT_ROWS = 12000 MAX_TRANSFER_BOARDINGS = 350 MAX_TARGET_DESTINATION_ARRIVALS = 1400 MAX_TARGET_SECOND_LEGS_PER_STOP = 48 MAX_TARGET_TRANSFER_CANDIDATES = 4500 MAX_BACKWARD_SECOND_LEG_OPTIONS = 160 OSM_STOP_MATCH_RADIUS_DEG = 0.0012 LEG_GEOMETRY_MAX_STOP_DISTANCE_DEG = 0.08 MAX_STOP_SEARCH_ROWS = 700 MAX_GROUP_STOP_IDS = 120 MAX_ROUTER_BOARDING_CANDIDATES = 2200 MAX_ROUTER_TRANSIT_LEGS = 6 MAX_JOURNEY_DATASET_PAIRS = 40 WALKING_TRANSFER_RADIUS_M = 450 WALKING_TRANSFER_RADIUS_DEG = WALKING_TRANSFER_RADIUS_M / 111_320 WALKING_TRANSFER_SPEED_MPS = 1.25 MAX_WALKING_TRANSFER_SOURCE_STOPS = 80 MAX_WALKING_TRANSFER_NEIGHBORS_PER_STOP = 8 ACCESS_TRANSFER_MAX_SECONDS = 45 * 60 MAX_ACCESS_TRANSFER_CANDIDATES = 4 PUBLIC_TRANSPORT_WALK_OPTION_MAX_SECONDS = 45 * 60 ADDRESS_ACCESS_RADIUS_M = 1800 ADDRESS_ACCESS_MAX_SECONDS = 30 * 60 ADDRESS_ACCESS_STOP_CANDIDATES = 4 ADDRESS_ACCESS_MAX_PAIR_CANDIDATES = 8 ADDRESS_ACCESS_MAX_DEEP_PAIR_CANDIDATES = 4 ADDRESS_ACCESS_SHORT_DIRECT_WALK_SECONDS = 20 * 60 ADDRESS_ACCESS_LONG_DISTANCE_HUB_THRESHOLD_M = 50_000 ADDRESS_ACCESS_MAJOR_HUB_RADIUS_M = 12_000 ADDRESS_ACCESS_MAJOR_HUB_CANDIDATES = 3 ADDRESS_ACCESS_NORMAL_PRIORITY = 100 ADDRESS_ACCESS_MAJOR_HUB_PRIORITY = 10 WALK_GEOMETRY_CACHE_TTL_SECONDS = 10 * 60 WALK_GEOMETRY_CACHE_MAX_ENTRIES = 1024 LEG_GEOMETRY_CACHE_TTL_SECONDS = 10 * 60 LEG_GEOMETRY_CACHE_MAX_ENTRIES = 2048 STOP_GROUP_PREFIX = "group:" STOP_EXACT_PREFIX = "stop:" STOP_PLACE_PREFIX = "place:" _walk_geometry_cache_lock = threading.RLock() _walk_geometry_cache: dict[tuple[float, float, float, float], tuple[float, tuple[dict | None, float, float | None]]] = {} _leg_geometry_cache_lock = threading.RLock() _leg_geometry_cache: dict[tuple[object, ...], tuple[float, dict | None, str, int | None]] = {} @dataclass(frozen=True) class StopSummary: id: int dataset_id: int stop_id: str name: str | None lat: float | None lon: float | None @dataclass(frozen=True) class StopSelection: display: StopSummary stop_ids_by_dataset: dict[int, tuple[str, ...]] canonical_stop_id: int | None = None @property def dataset_id(self) -> int: return next(iter(self.stop_ids_by_dataset)) @property def stop_ids(self) -> tuple[str, ...]: return self.stop_ids_by_dataset[self.dataset_id] @property def dataset_ids(self) -> tuple[int, ...]: return tuple(self.stop_ids_by_dataset) @dataclass(frozen=True) class _AccessStopCandidate: token: str selection: StopSelection distance_m: float priority: int = ADDRESS_ACCESS_NORMAL_PRIORITY def search_scheduled_stops( db: Session, query: str | None = None, source_ids: list[int] | None = None, limit: int = 25, bbox: tuple[float, float, float, float] | None = None, ) -> list[dict]: """Return stops that have imported stop_times. The importer may intentionally cap stop_times for large feeds. Searching only scheduled stops prevents the UI from offering stops that cannot route yet. """ active_dataset_ids = _active_gtfs_dataset_ids(db, source_ids=source_ids) if not active_dataset_ids: return [] stmt = ( select(GtfsStop, Source.id, Source.name) .join(Dataset, Dataset.id == GtfsStop.dataset_id) .join(Source, Source.id == Dataset.source_id) .where(GtfsStop.dataset_id.in_(active_dataset_ids)) ) q = (query or "").strip() if q: pattern = f"%{q}%" tokens = [token for token in re.split(r"[\s,;/]+", q) if token] token_filters = [ or_(GtfsStop.name.ilike(f"%{token}%"), GtfsStop.stop_id.ilike(f"%{token}%")) for token in tokens ] where_parts = [GtfsStop.name.ilike(pattern), GtfsStop.stop_id.ilike(pattern)] if token_filters: where_parts.append(and_(*token_filters)) stmt = stmt.where(or_(*where_parts)) rank = case( (GtfsStop.name.ilike(q), 0), (GtfsStop.name.ilike(f"{q}%"), 1), (GtfsStop.name.ilike(pattern), 2), (GtfsStop.stop_id.ilike(f"{q}%"), 3), else_=4, ) if bbox is not None: stmt = stmt.order_by(rank, *_bbox_order_expressions(GtfsStop, bbox), GtfsStop.name, GtfsStop.id) else: stmt = stmt.order_by(rank, GtfsStop.name, GtfsStop.id) else: if bbox is not None: stmt = stmt.order_by(*_bbox_order_expressions(GtfsStop, bbox), GtfsStop.name, GtfsStop.id) else: stmt = stmt.order_by(GtfsStop.name, GtfsStop.id) stmt = stmt.limit(MAX_STOP_SEARCH_ROWS * (3 if bbox is not None else 1)) groups: dict[tuple[int, str], dict] = {} for stop, source_id, source_name in db.execute(stmt).all(): group_id = logical_stop_group_id(stop) key = (stop.dataset_id, group_id) rank_value = _stop_match_rank(stop, q) group = groups.setdefault( key, { "dataset_id": stop.dataset_id, "group_id": group_id, "source_id": source_id, "source_name": source_name, "rank": rank_value, "matches": [], }, ) group["rank"] = min(int(group["rank"]), rank_value) group["matches"].append(stop) if not groups: return [] parents = _parent_stops_for_groups(db, groups.keys()) scheduled = _scheduled_stops_for_groups(db, groups.keys()) results = [] for key, group in groups.items(): scheduled_stops = scheduled.get(key, []) if not scheduled_stops: continue parent = parents.get(key) display_stop = parent or _best_display_stop(str(group["group_id"]), group["matches"], scheduled_stops) canonical = _canonical_stop_for_group(db, scheduled_stops) name = canonical.name if canonical is not None else display_stop.name display_parts = _city_stop_display_parts( name, display_stop.name, parent.name if parent is not None else None, *(stop.name for stop in scheduled_stops), ) display_name = display_parts["display_name"] result_id = ( _stop_place_token(canonical.id, display_stop.dataset_id) if canonical is not None else _stop_group_token(display_stop.dataset_id, str(group["group_id"])) ) display_rank = _stop_match_rank(display_stop, q) result_lat = canonical.lat if canonical is not None else display_stop.lat result_lon = canonical.lon if canonical is not None else display_stop.lon bbox_rank, bbox_distance_m = _bbox_rank(result_lat, result_lon, bbox) results.append( { "id": result_id, "canonical_stop_id": None if canonical is None else canonical.id, "dataset_id": display_stop.dataset_id, "stop_id": str(group["group_id"]), "name": name, "display_name": display_name, "city": display_parts["city"], "local_name": display_parts["local_name"], "lat": result_lat, "lon": result_lon, "source_id": group["source_id"], "source_name": group["source_name"], "scheduled": True, "grouped": True, "grouped_stop_count": len(scheduled_stops), "sample_stop_ids": [stop.stop_id for stop in scheduled_stops[:5]], "_display_rank": display_rank, "_match_rank": group["rank"], "_bbox_rank": bbox_rank, "_bbox_distance_m": bbox_distance_m, "_importance_rank": _station_importance_rank( display_name, name, display_stop.name, parent.name if parent is not None else None, *(stop.name for stop in scheduled_stops), ), } ) results.sort( key=lambda item: ( item["_bbox_rank"], item["_importance_rank"], item["_display_rank"], item["_match_rank"], item["_bbox_distance_m"], -(int(item["grouped_stop_count"])), item["name"] or "", item["stop_id"], ) ) if not source_ids or len(source_ids) > 1: results = _merge_canonical_stop_results(results) _enrich_canonical_stop_sources(db, results, active_dataset_ids) results.sort( key=lambda item: ( item["_bbox_rank"], item["_importance_rank"], item["_display_rank"], item["_match_rank"], item["_bbox_distance_m"], -(int(item["grouped_stop_count"])), item["name"] or "", item["stop_id"], ) ) selected = results[: max(1, min(limit, 100))] for item in selected: item.pop("_display_rank", None) item.pop("_match_rank", None) item.pop("_bbox_rank", None) item.pop("_bbox_distance_m", None) item.pop("_importance_rank", None) return selected def nearest_scheduled_stops( db: Session, *, lat: float, lon: float, source_ids: list[int] | None = None, limit: int = 3, radius_m: float = 900, ) -> list[dict]: active_dataset_ids = _active_gtfs_dataset_ids(db, source_ids=source_ids) if not active_dataset_ids: return [] selected_limit = max(1, min(int(limit), 25)) if settings.is_postgresql_database: rows = _nearest_canonical_stop_rows_postgresql( db, lat=lat, lon=lon, dataset_ids=active_dataset_ids, limit=selected_limit * 8, radius_m=radius_m, ) rows.extend( _nearest_visual_stop_rows_postgresql( db, lat=lat, lon=lon, dataset_ids=active_dataset_ids, limit=selected_limit * 8, radius_m=radius_m, ) ) rows.sort(key=lambda item: (float(item[2] or 0), int(item[0]), int(item[1]))) results: list[dict] = [] seen: set[int] = set() for canonical_stop_id, preferred_dataset_id, distance_m in rows: if int(canonical_stop_id) in seen: continue try: selection = _selection_for_canonical_stop( db, int(canonical_stop_id), dataset_ids=active_dataset_ids, preferred_dataset_id=int(preferred_dataset_id), ) except ValueError: continue seen.add(int(canonical_stop_id)) source = _source_payload_for_dataset_id(db, selection.dataset_id) or {} payload = _stop_payload(selection.display) payload.update( { "id": _stop_place_token(int(canonical_stop_id), selection.dataset_id), "kind": "stop", "canonical_stop_id": int(canonical_stop_id), "display_name": selection.display.name, "source_id": source.get("id"), "source_name": source.get("name"), "scheduled": True, "grouped": True, "grouped_stop_count": sum(len(stop_ids) for stop_ids in selection.stop_ids_by_dataset.values()), "distance_m": float(distance_m or 0), } ) results.append(payload) if len(results) >= selected_limit: break return results radius_deg = float(radius_m) / 111_320 bbox = (float(lon) - radius_deg, float(lat) - radius_deg, float(lon) + radius_deg, float(lat) + radius_deg) candidates = search_scheduled_stops(db, source_ids=source_ids, bbox=bbox, limit=selected_limit * 4) for item in candidates: if item.get("lat") is None or item.get("lon") is None: item["distance_m"] = float("inf") else: item["distance_m"] = _distance_m(float(lat), float(lon), float(item["lat"]), float(item["lon"])) item["kind"] = "stop" candidates = [item for item in candidates if float(item.get("distance_m") or 0) <= radius_m] candidates.sort(key=lambda item: (float(item.get("distance_m") or 0), item.get("display_name") or item.get("name") or "")) return candidates[:selected_limit] def _nearest_canonical_stop_rows_postgresql( db: Session, *, lat: float, lon: float, dataset_ids: list[int], limit: int, radius_m: float, ) -> list[tuple[int, int, float]]: radius_deg = float(radius_m) / 111_320 stmt = text( """ WITH point AS ( SELECT ST_SetSRID(ST_MakePoint(:lon, :lat), 4326) AS geom ) SELECT canonical_stops.id AS canonical_stop_id, canonical_stop_links.dataset_id AS dataset_id, ST_DistanceSphere(canonical_stops.geom, point.geom) AS distance_m FROM canonical_stops JOIN canonical_stop_links ON canonical_stop_links.canonical_stop_id = canonical_stops.id AND canonical_stop_links.object_type = 'gtfs_stop' JOIN datasets ON datasets.id = canonical_stop_links.dataset_id AND datasets.kind = 'gtfs' AND datasets.is_active IS TRUE CROSS JOIN point WHERE canonical_stop_links.dataset_id IN :dataset_ids AND canonical_stops.geom IS NOT NULL AND canonical_stops.geom && ST_Expand(point.geom, :radius_deg) AND ST_DWithin(canonical_stops.geom::geography, point.geom::geography, :radius_m) GROUP BY canonical_stops.id, canonical_stop_links.dataset_id, canonical_stops.geom, point.geom ORDER BY canonical_stops.geom <-> point.geom, canonical_stops.id LIMIT :limit """ ).bindparams(bindparam("dataset_ids", expanding=True)) rows = db.execute( stmt, { "lat": float(lat), "lon": float(lon), "dataset_ids": tuple(dataset_ids), "radius_deg": radius_deg, "radius_m": float(radius_m), "limit": max(1, int(limit)), }, ).all() return [(int(row[0]), int(row[1]), float(row[2] or 0)) for row in rows] def _nearest_visual_stop_rows_postgresql( db: Session, *, lat: float, lon: float, dataset_ids: list[int], limit: int, radius_m: float, ) -> list[tuple[int, int, float]]: radius_deg = float(radius_m) / 111_320 stmt = text( """ WITH point AS ( SELECT ST_SetSRID(ST_MakePoint(:lon, :lat), 4326) AS geom ), visual_hits AS ( SELECT osm_link.canonical_stop_id AS canonical_stop_id, gtfs_link.dataset_id AS dataset_id, ST_DistanceSphere(osm_features.geom, point.geom) AS distance_m FROM osm_features JOIN canonical_stop_links AS osm_link ON osm_link.object_type = 'osm_feature' AND osm_link.object_id = osm_features.id JOIN canonical_stop_links AS gtfs_link ON gtfs_link.canonical_stop_id = osm_link.canonical_stop_id AND gtfs_link.object_type = 'gtfs_stop' JOIN datasets ON datasets.id = gtfs_link.dataset_id AND datasets.kind = 'gtfs' AND datasets.is_active IS TRUE CROSS JOIN point WHERE gtfs_link.dataset_id IN :dataset_ids AND osm_features.kind IN ('stop', 'station', 'terminal') AND osm_features.geom IS NOT NULL AND osm_features.geom && ST_Expand(point.geom, :radius_deg) AND ST_DWithin(osm_features.geom::geography, point.geom::geography, :radius_m) ) SELECT canonical_stop_id, dataset_id, MIN(distance_m) AS distance_m FROM visual_hits GROUP BY canonical_stop_id, dataset_id ORDER BY MIN(distance_m), canonical_stop_id LIMIT :limit """ ).bindparams(bindparam("dataset_ids", expanding=True)) rows = db.execute( stmt, { "lat": float(lat), "lon": float(lon), "dataset_ids": tuple(dataset_ids), "radius_deg": radius_deg, "radius_m": float(radius_m), "limit": max(1, int(limit)), }, ).all() return [(int(row[0]), int(row[1]), float(row[2] or 0)) for row in rows] def _enrich_canonical_stop_sources(db: Session, results: list[dict], active_dataset_ids: list[int]) -> None: canonical_stop_ids = sorted( { int(item["canonical_stop_id"]) for item in results if item.get("canonical_stop_id") is not None } ) if not canonical_stop_ids or not active_dataset_ids: for item in results: item.setdefault("source_names", [item["source_name"]] if item.get("source_name") else []) return rows = db.execute( select(CanonicalStopLink.canonical_stop_id, Source.id, Source.name, func.count(CanonicalStopLink.id)) .join(Dataset, Dataset.id == CanonicalStopLink.dataset_id) .join(Source, Source.id == Dataset.source_id) .where( CanonicalStopLink.object_type == "gtfs_stop", CanonicalStopLink.canonical_stop_id.in_(canonical_stop_ids), CanonicalStopLink.dataset_id.in_(active_dataset_ids), ) .group_by(CanonicalStopLink.canonical_stop_id, Source.id, Source.name) .order_by(CanonicalStopLink.canonical_stop_id, Source.name, Source.id) ).all() summaries: dict[int, dict] = {} for canonical_stop_id, source_id, source_name, linked_stop_count in rows: summary = summaries.setdefault(int(canonical_stop_id), {"source_ids": [], "source_names": [], "linked_stop_count": 0}) summary["source_ids"].append(int(source_id)) summary["source_names"].append(str(source_name)) summary["linked_stop_count"] += int(linked_stop_count or 0) for item in results: canonical_stop_id = item.get("canonical_stop_id") if canonical_stop_id is None: item.setdefault("source_names", [item["source_name"]] if item.get("source_name") else []) continue summary = summaries.get(int(canonical_stop_id)) if not summary: item.setdefault("source_names", [item["source_name"]] if item.get("source_name") else []) continue source_names = summary["source_names"] item["source_names"] = source_names item["source_name"] = ", ".join(source_names[:3]) if len(source_names) > 3: item["source_name"] += f" +{len(source_names) - 3}" item["source_id"] = summary["source_ids"][0] if len(summary["source_ids"]) == 1 else None item["grouped_stop_count"] = max(int(item.get("grouped_stop_count") or 0), int(summary["linked_stop_count"])) def _merge_canonical_stop_results(results: list[dict]) -> list[dict]: merged: dict[tuple[object, ...], dict] = {} for item in results: canonical_stop_id = item.get("canonical_stop_id") key = ( ("canonical", canonical_stop_id) if canonical_stop_id is not None else ("group", item.get("dataset_id"), item.get("stop_id")) ) current = merged.get(key) if current is None: copied = dict(item) copied["source_names"] = [item["source_name"]] if item.get("source_name") else [] merged[key] = copied continue current["_display_rank"] = min(int(current["_display_rank"]), int(item["_display_rank"])) current["_match_rank"] = min(int(current["_match_rank"]), int(item["_match_rank"])) current["_bbox_rank"] = min(int(current.get("_bbox_rank", 2)), int(item.get("_bbox_rank", 2))) current["_bbox_distance_m"] = min( float(current.get("_bbox_distance_m", float("inf"))), float(item.get("_bbox_distance_m", float("inf"))), ) current["_importance_rank"] = min( int(current.get("_importance_rank", 3)), int(item.get("_importance_rank", 3)), ) current["grouped_stop_count"] = int(current.get("grouped_stop_count") or 0) + int(item.get("grouped_stop_count") or 0) current["sample_stop_ids"] = _merge_sample_stop_ids(current.get("sample_stop_ids", []), item.get("sample_stop_ids", [])) source_names = _merge_source_names(current.get("source_names", []), [item["source_name"]] if item.get("source_name") else []) current["source_names"] = source_names current["source_name"] = ", ".join(source_names[:3]) if len(source_names) > 3: current["source_name"] += f" +{len(source_names) - 3}" if len(source_names) > 1: current["source_id"] = None selected = list(merged.values()) selected.sort( key=lambda item: ( item.get("_bbox_rank", 2), item.get("_importance_rank", 3), item["_display_rank"], item["_match_rank"], item.get("_bbox_distance_m", float("inf")), -(int(item["grouped_stop_count"])), item["name"] or "", item["stop_id"], ) ) return selected def _merge_sample_stop_ids(left: list[str], right: list[str]) -> list[str]: merged = [] seen = set() for stop_id in [*left, *right]: if stop_id in seen: continue seen.add(stop_id) merged.append(stop_id) if len(merged) >= 8: break return merged def _merge_source_names(left: list[str], right: list[str]) -> list[str]: names = [] seen = set() for name in [*left, *right]: if not name or name in seen: continue seen.add(name) names.append(name) return names def _city_stop_display_parts(primary: str | None, *candidates: str | None) -> dict[str, str | None]: primary_name = _clean_stop_name(primary) if not primary_name: return {"display_name": None, "city": None, "local_name": None} if "," in primary_name: pairs = _candidate_city_stop_pairs(primary_name) if pairs: city, stop_name = pairs[0] return {"display_name": f"{city}, {stop_name}", "city": city, "local_name": stop_name} leading = _split_leading_city_stop_name(primary_name) if leading is not None: city, stop_name = leading return {"display_name": f"{city}, {stop_name}", "city": city, "local_name": stop_name} for candidate in candidates: for city, stop_name in _candidate_city_stop_pairs(candidate): local_name = _local_stop_name(primary_name, city, stop_name) if stop_name and (_stop_names_match(primary_name, stop_name) or _stop_names_match(local_name, stop_name)): return {"display_name": f"{city}, {local_name}", "city": city, "local_name": local_name} return {"display_name": primary_name, "city": None, "local_name": primary_name} def _normalize_city_stop_name(value: str) -> str: city, stop_name = _split_city_stop_name(value) if city and stop_name: return f"{city}, {stop_name}" return _clean_stop_name(value) or value def _split_city_stop_name(value: str | None, primary_name: str | None = None) -> tuple[str | None, str | None]: pairs = _candidate_city_stop_pairs(value, primary_name=primary_name) if pairs: return pairs[0] name = _clean_stop_name(value) return (None, name) def _candidate_city_stop_pairs(value: str | None, primary_name: str | None = None) -> list[tuple[str, str]]: name = _clean_stop_name(value) parts = _split_first_comma_outside_parentheses(name) if parts is None: return [] left, right = parts left = _clean_stop_name(left) right = _clean_stop_name(right) if not left or not right: return [] left_stop = _looks_like_stop_name(left) right_stop = _looks_like_stop_name(right) pairs: list[tuple[str, str]] = [] if primary_name: left_matches = _stop_names_match(primary_name, left) right_matches = _stop_names_match(primary_name, right) if left_matches and not right_matches and not right_stop: pairs.append((right, left)) if right_matches and not left_matches and not left_stop: pairs.append((left, right)) if left_stop and not right_stop: pairs.append((right, left)) elif right_stop and not left_stop: pairs.append((left, right)) elif not left_stop and not right_stop: pairs.append((left, right)) deduped: list[tuple[str, str]] = [] seen = set() for pair in pairs: if pair in seen: continue seen.add(pair) deduped.append(pair) return deduped def _split_first_comma_outside_parentheses(value: str | None) -> tuple[str, str] | None: if not value: return None depth = 0 for index, char in enumerate(value): if char == "(": depth += 1 elif char == ")" and depth > 0: depth -= 1 elif char == "," and depth == 0: return value[:index], value[index + 1 :] return None def _looks_like_stop_name(value: str) -> bool: normalized = _normalize_stop_search(value) if re.search(r"(^|[\s,(/-])hbf\.?($|[\s,)/-])", normalized): return True stop_tokens = ( "hauptbahnhof", "bahnhof", "station", "central station", "central train station", "steig", "tram", "bus", "zob", "ostseite", "westseite", ) return any(token in normalized for token in stop_tokens) def _split_leading_city_stop_name(value: str) -> tuple[str, str] | None: name = _clean_stop_name(value) if not name: return None match = re.match( r"^(.+?)\s+(central train station|central station|main station|hauptbahnhof(?:\s+.*)?|hbf\.?(?:\s+.*)?)$", name, flags=re.IGNORECASE, ) if not match: return None city = _clean_stop_name(match.group(1)) stop_name = _clean_stop_name(match.group(2)) if not city or not stop_name or _looks_like_stop_name(city): return None return city, stop_name def _local_stop_name(primary_name: str, city: str, candidate_stop_name: str | None) -> str: if _normalize_stop_search(primary_name).startswith(f"{_normalize_stop_search(city)} "): remainder = primary_name[len(city) :].strip(" ,") if remainder: return remainder if candidate_stop_name and _normalize_station_synonyms(primary_name) == _normalize_station_synonyms(candidate_stop_name): return candidate_stop_name return primary_name def _stop_names_match(left: str | None, right: str | None) -> bool: left_norm = _normalize_stop_search(left or "") right_norm = _normalize_stop_search(right or "") if not left_norm or not right_norm: return False if left_norm == right_norm or left_norm in right_norm or right_norm in left_norm: return True return _normalize_station_synonyms(left_norm) == _normalize_station_synonyms(right_norm) def _normalize_station_synonyms(value: str) -> str: normalized = _normalize_stop_search(value) normalized = re.sub(r"\bcentral train station\b", "mainstation", normalized) normalized = re.sub(r"\bcentral station\b", "mainstation", normalized) normalized = re.sub(r"\bmain station\b", "mainstation", normalized) normalized = re.sub(r"\bhauptbahnhof\b", "mainstation", normalized) normalized = re.sub(r"(^|[\s,(/-])hbf($|[\s,)/-])", " mainstation ", normalized) return re.sub(r"[^a-z0-9]+", "", normalized) def _clean_stop_name(value: str | None) -> str | None: cleaned = re.sub(r"\s+", " ", str(value or "")).strip() return cleaned or None def _stop_group_token(dataset_id: int, group_id: str) -> str: return f"{STOP_GROUP_PREFIX}{dataset_id}:{group_id}" def _stop_place_token(canonical_stop_id: int, dataset_id: int) -> str: return f"{STOP_PLACE_PREFIX}{canonical_stop_id}:{dataset_id}" def _canonical_stop_for_group(db: Session, stops: list[GtfsStop]) -> CanonicalStop | None: stop_ids = [stop.id for stop in stops] if not stop_ids: return None link = db.scalar( select(CanonicalStopLink) .where(CanonicalStopLink.object_type == "gtfs_stop", CanonicalStopLink.object_id.in_(stop_ids)) .order_by(CanonicalStopLink.role, CanonicalStopLink.id) ) if link is None: return None return db.get(CanonicalStop, link.canonical_stop_id) def _stop_match_rank(stop: GtfsStop, query: str) -> int: if not query: return 4 needle = _normalize_stop_search(query) name = _normalize_stop_search(stop.name or "") stop_id = _normalize_stop_search(stop.stop_id) if needle in {name, stop_id}: return 0 if name.startswith(needle) or stop_id.startswith(needle): return 1 if needle in name or needle in stop_id: return 2 tokens = [token for token in re.split(r"[\s,;/]+", needle) if token] haystack = f"{name} {stop_id}" if tokens and all(token in haystack for token in tokens): return 3 return 4 def _bbox_order_expressions(model, bbox: tuple[float, float, float, float]): min_lon, min_lat, max_lon, max_lat = bbox center_lon = (min_lon + max_lon) / 2 center_lat = (min_lat + max_lat) / 2 missing = or_(model.lon.is_(None), model.lat.is_(None)) inside = and_(model.lon >= min_lon, model.lon <= max_lon, model.lat >= min_lat, model.lat <= max_lat) bbox_rank = case((missing, 2), (inside, 0), else_=1) lon_offset = func.coalesce(model.lon, center_lon) - center_lon lat_offset = func.coalesce(model.lat, center_lat) - center_lat return (bbox_rank, lon_offset * lon_offset + lat_offset * lat_offset) def _bbox_rank( lat: float | None, lon: float | None, bbox: tuple[float, float, float, float] | None, ) -> tuple[int, float]: if bbox is None: return (1, 0.0) if lat is None or lon is None: return (2, float("inf")) min_lon, min_lat, max_lon, max_lat = bbox if min_lon <= lon <= max_lon and min_lat <= lat <= max_lat: return (0, 0.0) clamped_lon = min(max(lon, min_lon), max_lon) clamped_lat = min(max(lat, min_lat), max_lat) return (1, _distance_m(lat, lon, clamped_lat, clamped_lon)) def _station_importance_rank(*names: str | None) -> int: normalized_names = [_normalize_stop_search(name or "") for name in names if name] if any( re.search(r"(^|[\\s,(/-])hbf($|[\\s,)/-])", name) or "hauptbahnhof" in name or "central station" in name or "central train station" in name for name in normalized_names ): return 0 if any( re.search(r"(^|[\\s,(/-])bf($|[\\s,)/-])", name) or "bahnhof" in name or "station" in name for name in normalized_names ): return 1 if any("zob" in name or "busbahnhof" in name for name in normalized_names): return 2 return 3 def _normalize_stop_search(value: str) -> str: return re.sub(r"\s+", " ", value.casefold().strip()) def _parent_stops_for_groups(db: Session, group_keys) -> dict[tuple[int, str], GtfsStop]: requested = set(group_keys) if not requested: return {} dataset_ids = {dataset_id for dataset_id, _ in requested} group_ids = {group_id for _, group_id in requested} rows = db.scalars( select(GtfsStop).where(GtfsStop.dataset_id.in_(dataset_ids), GtfsStop.stop_id.in_(group_ids)) ).all() return { (stop.dataset_id, stop.stop_id): stop for stop in rows if (stop.dataset_id, stop.stop_id) in requested } def _scheduled_stops_for_groups(db: Session, group_keys) -> dict[tuple[int, str], list[GtfsStop]]: requested = set(group_keys) if not requested: return {} dataset_ids = {dataset_id for dataset_id, _ in requested} group_ids = {group_id for _, group_id in requested} if settings.is_postgresql_database: group_condition = or_( GtfsStop.stop_id.in_(group_ids), GtfsStop.parent_station.in_(group_ids), func.split_part(GtfsStop.stop_id, "::", 1).in_(group_ids), ) else: inferred_child_filters = [GtfsStop.stop_id.ilike(f"{group_id}::%") for group_id in group_ids] group_condition = or_(GtfsStop.stop_id.in_(group_ids), GtfsStop.parent_station.in_(group_ids), *inferred_child_filters) rows = db.scalars( select(GtfsStop) .where( GtfsStop.dataset_id.in_(dataset_ids), group_condition, *(_scheduled_gtfs_stop_condition() if settings.is_postgresql_database else ()), ) .order_by(GtfsStop.name, GtfsStop.stop_id) ).all() scheduled_by_dataset = {} if settings.is_postgresql_database else {dataset_id: all_scheduled_stop_ids(db, dataset_id) for dataset_id in dataset_ids} grouped: dict[tuple[int, str], list[GtfsStop]] = {} for stop in rows: if scheduled_by_dataset and stop.stop_id not in scheduled_by_dataset.get(stop.dataset_id, set()): continue group_id = logical_stop_group_id(stop) key = (stop.dataset_id, group_id) if key not in requested: continue bucket = grouped.setdefault(key, []) if len(bucket) < MAX_GROUP_STOP_IDS: bucket.append(stop) return grouped def _scheduled_gtfs_stop_condition(): scheduled_exists = ( select(GtfsStopTime.id) .where( GtfsStopTime.dataset_id == GtfsStop.dataset_id, GtfsStopTime.stop_id == GtfsStop.stop_id, ) .limit(1) .exists() ) return (scheduled_exists,) def _best_display_stop(group_id: str, matches: list[GtfsStop], scheduled_stops: list[GtfsStop]) -> GtfsStop: candidates = [*matches, *scheduled_stops] return min( candidates, key=lambda stop: ( 0 if stop.stop_id == group_id and stop.parent_station is None else 1, 0 if stop.parent_station == group_id else 1, 0 if stop.parent_station is not None else 1, 0 if stop.lat is not None and stop.lon is not None else 1, stop.name or "", stop.stop_id, ), ) def _resolve_stop_selection(db: Session, value: int | str, source_ids: list[int] | None = None) -> StopSelection: token = str(value).strip() if is_location_token(token): raise ValueError("selected location must be routed through location-aware search") active_dataset_ids = _active_gtfs_dataset_ids(db, source_ids=source_ids) if token.startswith(STOP_PLACE_PREFIX): canonical_stop_id, dataset_id = _parse_stop_place_token(token) return _selection_for_canonical_stop( db, canonical_stop_id, dataset_ids=active_dataset_ids, preferred_dataset_id=dataset_id, ) if token.startswith(STOP_GROUP_PREFIX): dataset_id, group_id = _parse_stop_group_token(token) selection = _selection_for_group(db, dataset_id, group_id) if selection.canonical_stop_id is not None: return _selection_for_canonical_stop( db, selection.canonical_stop_id, dataset_ids=active_dataset_ids, preferred_dataset_id=dataset_id, ) return selection exact_external_stop_id = False if token.startswith(STOP_EXACT_PREFIX): token = token[len(STOP_EXACT_PREFIX) :] exact_external_stop_id = True stop = _active_stop_by_external_stop_id(db, token, active_dataset_ids) if token else None if stop is None and not exact_external_stop_id and token.isdigit(): candidate = db.get(GtfsStop, int(token)) if candidate is not None and (not active_dataset_ids or candidate.dataset_id in active_dataset_ids): stop = candidate if stop is None: raise ValueError("from_stop_id and to_stop_id must reference existing GTFS stops") return _selection_for_stop(db, stop, active_dataset_ids) def resolve_location_summary(db: Session, value: int | str, source_ids: list[int] | None = None) -> StopSummary: token = str(value).strip() if is_coordinate_token(token): lat, lon = parse_coordinate_token(token) return _coordinate_summary(db, lat, lon) if is_address_point_token(token): address, lat, lon = address_point_by_token(db, token) return _address_summary(address, db=db, lat=lat, lon=lon, street_level=True) if is_address_token(token): return _address_summary(address_by_token(db, token), db=db) return _resolve_stop_selection(db, token, source_ids=source_ids).display def _address_summary( address: OsmAddress, *, db: Session | None = None, lat: float | None = None, lon: float | None = None, street_level: bool = False, ) -> StopSummary: resolved_lat = address.lat if lat is None else lat resolved_lon = address.lon if lon is None else lon snapped = _snap_walk_location(db, lat=resolved_lat, lon=resolved_lon) if snapped is not None: resolved_lat, resolved_lon = snapped if street_level and resolved_lat is not None and resolved_lon is not None: stop_id = address_point_token(address.id, float(resolved_lat), float(resolved_lon)) name = _street_address_name(address) else: stop_id = address_token(address.id) name = address.display_name return StopSummary( id=address.id, dataset_id=address.dataset_id, stop_id=stop_id, name=name, lat=resolved_lat, lon=resolved_lon, ) def _coordinate_summary(db: Session, lat: float, lon: float) -> StopSummary: token = coordinate_token(lat, lon) snapped = _snap_walk_location(db, lat=lat, lon=lon) resolved_lat, resolved_lon = snapped if snapped is not None else (float(lat), float(lon)) return StopSummary( id=0, dataset_id=0, stop_id=token, name=f"Map point {lat:.5f}, {lon:.5f}", lat=resolved_lat, lon=resolved_lon, ) def _snap_walk_location(db: Session | None, *, lat: float | None, lon: float | None) -> tuple[float, float] | None: if db is None or lat is None or lon is None: return None try: snapped = snap_point_to_routing_graph(db, lon=float(lon), lat=float(lat), mode="walk", max_distance_m=250) except Exception: # noqa: BLE001 - snapping must not break address/coordinate routing return None if snapped is None: return None return float(snapped["lat"]), float(snapped["lon"]) def _street_address_name(address: OsmAddress) -> str: local_name = address.street or address.place or address.name or address.display_name or "Address" locality = " ".join(str(part) for part in [address.postcode, address.city] if part).strip() return f"{local_name}, {locality}" if locality else str(local_name) def _active_stop_by_external_stop_id(db: Session, stop_id: str, active_dataset_ids: list[int]) -> GtfsStop | None: stmt = ( select(GtfsStop) .join(Dataset, Dataset.id == GtfsStop.dataset_id) .where(Dataset.is_active.is_(True), Dataset.kind == "gtfs", GtfsStop.stop_id == stop_id) .order_by( GtfsStop.dataset_id, case((GtfsStop.parent_station.is_(None), 0), else_=1), GtfsStop.id, ) ) if active_dataset_ids: stmt = stmt.where(GtfsStop.dataset_id.in_(active_dataset_ids)) return db.scalar(stmt) def _selection_for_stop(db: Session, stop: GtfsStop, active_dataset_ids: list[int]) -> StopSelection: if _has_scheduled_stop(db, stop): canonical = canonical_stop_for_gtfs_stop(db, stop) if canonical is not None: return _selection_for_canonical_stop( db, canonical.id, dataset_ids=active_dataset_ids, preferred_dataset_id=stop.dataset_id, ) return StopSelection( display=_stop_summary(stop), stop_ids_by_dataset={stop.dataset_id: (stop.stop_id,)}, canonical_stop_id=None if canonical is None else canonical.id, ) selection = _selection_for_group(db, stop.dataset_id, stop.parent_station or stop.stop_id) if selection.canonical_stop_id is not None: return _selection_for_canonical_stop( db, selection.canonical_stop_id, dataset_ids=active_dataset_ids, preferred_dataset_id=stop.dataset_id, ) return selection def _parse_stop_group_token(token: str) -> tuple[int, str]: rest = token[len(STOP_GROUP_PREFIX) :] if ":" not in rest: raise ValueError("invalid grouped stop token") dataset_text, group_id = rest.split(":", 1) try: dataset_id = int(dataset_text) except ValueError as exc: raise ValueError("invalid grouped stop token") from exc if not group_id: raise ValueError("invalid grouped stop token") return dataset_id, group_id def _parse_stop_place_token(token: str) -> tuple[int, int]: rest = token[len(STOP_PLACE_PREFIX) :] if ":" not in rest: raise ValueError("invalid canonical stop token") canonical_text, dataset_text = rest.split(":", 1) try: canonical_stop_id = int(canonical_text) dataset_id = int(dataset_text) except ValueError as exc: raise ValueError("invalid canonical stop token") from exc return canonical_stop_id, dataset_id def _selection_for_canonical_stop( db: Session, canonical_stop_id: int, dataset_ids: list[int] | None = None, preferred_dataset_id: int | None = None, ) -> StopSelection: canonical = db.get(CanonicalStop, canonical_stop_id) if canonical is None: raise ValueError("selected stop place does not exist") active_dataset_ids = _active_gtfs_dataset_ids(db) if dataset_ids is None else dataset_ids stop_ids_by_dataset = _gtfs_stop_ids_for_canonical_stop_by_dataset(db, canonical_stop_id, active_dataset_ids) scheduled_by_dataset: dict[int, tuple[str, ...]] = {} for dataset_id in _preferred_dataset_order(stop_ids_by_dataset, preferred_dataset_id): scheduled_stop_ids = _scheduled_stop_ids(db, dataset_id, stop_ids_by_dataset[dataset_id]) if scheduled_stop_ids: scheduled_by_dataset[dataset_id] = scheduled_stop_ids if not scheduled_by_dataset: raise ValueError("selected stop place has no imported scheduled stop_times in the selected source scope") display_dataset_id = preferred_dataset_id if preferred_dataset_id in scheduled_by_dataset else next(iter(scheduled_by_dataset)) return StopSelection( display=StopSummary( id=canonical.id, dataset_id=display_dataset_id, stop_id=f"canonical:{canonical.id}", name=canonical.name, lat=canonical.lat, lon=canonical.lon, ), stop_ids_by_dataset=scheduled_by_dataset, canonical_stop_id=canonical.id, ) def _selection_for_group(db: Session, dataset_id: int, group_id: str) -> StopSelection: scheduled = _scheduled_stops_for_groups(db, [(dataset_id, group_id)]).get((dataset_id, group_id), []) if not scheduled: raise ValueError("selected stop group has no imported scheduled stop_times") parent = _parent_stops_for_groups(db, [(dataset_id, group_id)]).get((dataset_id, group_id)) display = parent or _best_display_stop(group_id, [], scheduled) canonical = _canonical_stop_for_group(db, scheduled) return StopSelection( display=_stop_summary(display), stop_ids_by_dataset={dataset_id: tuple(stop.stop_id for stop in scheduled[:MAX_GROUP_STOP_IDS])}, canonical_stop_id=None if canonical is None else canonical.id, ) def _gtfs_stop_ids_for_canonical_stop_by_dataset( db: Session, canonical_stop_id: int, dataset_ids: list[int] ) -> dict[int, tuple[str, ...]]: if not dataset_ids: return {} rows = db.execute( select(CanonicalStopLink.dataset_id, CanonicalStopLink.external_id) .where( CanonicalStopLink.canonical_stop_id == canonical_stop_id, CanonicalStopLink.object_type == "gtfs_stop", CanonicalStopLink.dataset_id.in_(dataset_ids), ) .order_by(CanonicalStopLink.dataset_id, CanonicalStopLink.role, CanonicalStopLink.external_id) ).all() grouped: dict[int, list[str]] = {} for dataset_id, external_id in rows: bucket = grouped.setdefault(int(dataset_id), []) if len(bucket) < MAX_GROUP_STOP_IDS: bucket.append(str(external_id)) return {dataset_id: tuple(stop_ids) for dataset_id, stop_ids in grouped.items()} def _preferred_dataset_order(stop_ids_by_dataset: dict[int, tuple[str, ...]], preferred_dataset_id: int | None) -> list[int]: dataset_ids = sorted(stop_ids_by_dataset) if preferred_dataset_id is None or preferred_dataset_id not in stop_ids_by_dataset: return dataset_ids return [preferred_dataset_id, *[dataset_id for dataset_id in dataset_ids if dataset_id != preferred_dataset_id]] def _scheduled_stop_ids(db: Session, dataset_id: int, stop_ids: tuple[str, ...]) -> tuple[str, ...]: return storage_scheduled_stop_ids(db, dataset_id, stop_ids)[:MAX_GROUP_STOP_IDS] def _has_scheduled_stop(db: Session, stop: GtfsStop) -> bool: return storage_has_scheduled_stop(db, stop.dataset_id, stop.stop_id) def find_journeys( db: Session, from_stop_id: int | str, to_stop_id: int | str, departure: str, max_transfers: int = 0, limit: int = 5, transfer_seconds: int = 120, source_ids: list[int] | None = None, via_stop_id: int | str | None = None, service_date: str | date | None = None, _allow_access_transfer: bool = True, _allow_address_access: bool = True, ) -> dict: if via_stop_id is not None and str(via_stop_id).strip(): return _find_journeys_via( db=db, from_stop_id=from_stop_id, via_stop_id=via_stop_id, to_stop_id=to_stop_id, departure=departure, max_transfers=max_transfers, transfer_seconds=transfer_seconds, limit=limit, source_ids=source_ids, service_date=service_date, ) if _allow_address_access and (is_location_token(from_stop_id) or is_location_token(to_stop_id)): return _find_journeys_with_address_access( db=db, from_stop_id=from_stop_id, to_stop_id=to_stop_id, departure=departure, max_transfers=max_transfers, transfer_seconds=transfer_seconds, limit=limit, source_ids=source_ids, service_date=service_date, ) from_selection = _resolve_stop_selection(db, from_stop_id, source_ids=source_ids) to_selection = _resolve_stop_selection(db, to_stop_id, source_ids=source_ids) departure_seconds = parse_gtfs_time(departure) if departure_seconds is None: raise ValueError("departure must be HH:MM or HH:MM:SS") parsed_service_date = parse_service_date(service_date) stop_cache: dict[tuple[int, str], StopSummary] = {} for dataset_id, stop_ids in from_selection.stop_ids_by_dataset.items(): for stop_id in stop_ids: stop_cache.setdefault((dataset_id, stop_id), _stop_summary_for_stop_id(db, dataset_id, stop_id)) for dataset_id, stop_ids in to_selection.stop_ids_by_dataset.items(): for stop_id in stop_ids: stop_cache.setdefault((dataset_id, stop_id), _stop_summary_for_stop_id(db, dataset_id, stop_id)) osm_stop_cache: dict[tuple[int, str], dict] = {} max_journeys = max(1, min(limit, 10)) common_dataset_ids = sorted(set(from_selection.stop_ids_by_dataset) & set(to_selection.stop_ids_by_dataset)) service_ids_by_dataset = _service_ids_by_dataset(db, sorted(set(from_selection.stop_ids_by_dataset) | set(to_selection.stop_ids_by_dataset)), parsed_service_date) direct: list[dict] = [] for dataset_id in common_dataset_ids: service_ids = service_ids_by_dataset.get(dataset_id) if service_ids == set(): continue direct.extend( _find_direct_journeys( db=db, dataset_id=dataset_id, service_ids=service_ids, from_stop_ids=from_selection.stop_ids_by_dataset[dataset_id], to_stop_ids=to_selection.stop_ids_by_dataset[dataset_id], earliest_departure=departure_seconds, limit=max_journeys, stop_cache=stop_cache, osm_stop_cache=osm_stop_cache, ) ) direct = sorted(direct, key=_journey_sort_key)[:max_journeys] if max_transfers > 0: direct_arrival = direct[0]["arrival_seconds"] if direct else None transfer_journeys: list[dict] = [] for first_dataset_id, second_dataset_id in _journey_dataset_pairs(from_selection, to_selection): first_service_ids = service_ids_by_dataset.get(first_dataset_id) second_service_ids = service_ids_by_dataset.get(second_dataset_id) if first_service_ids == set() or second_service_ids == set(): continue transfer_journeys.extend( _find_one_transfer_journeys( db=db, first_dataset_id=first_dataset_id, second_dataset_id=second_dataset_id, first_service_ids=first_service_ids, second_service_ids=second_service_ids, from_stop_ids=from_selection.stop_ids_by_dataset[first_dataset_id], to_stop_ids=to_selection.stop_ids_by_dataset[second_dataset_id], origin_canonical_stop_id=from_selection.canonical_stop_id, target_canonical_stop_id=to_selection.canonical_stop_id, earliest_departure=departure_seconds, latest_arrival=direct_arrival, transfer_seconds=max(0, transfer_seconds), limit=max_journeys, stop_cache=stop_cache, osm_stop_cache=osm_stop_cache, ) ) transfer_journeys = sorted( transfer_journeys, key=_journey_sort_key, )[: max_journeys * 3] if max_transfers > 1: best_known_arrival = min( ( int(journey["arrival_seconds"]) for journey in [*direct, *transfer_journeys] if journey.get("arrival_seconds") is not None ), default=None, ) round_journeys: list[dict] = [] for dataset_id in common_dataset_ids: service_ids = service_ids_by_dataset.get(dataset_id) if service_ids == set(): continue round_journeys.extend( _find_round_journeys( db=db, dataset_id=dataset_id, service_ids=service_ids, from_selection=from_selection, to_selection=to_selection, earliest_departure=departure_seconds, max_transfers=max(0, max_transfers), transfer_seconds=max(0, transfer_seconds), latest_arrival=best_known_arrival, limit=max_journeys, stop_cache=stop_cache, osm_stop_cache=osm_stop_cache, ) ) transfer_journeys = sorted( [*transfer_journeys, *round_journeys], key=_journey_sort_key, )[: max_journeys * 3] else: transfer_journeys = [] walk_journey = _find_walk_only_journey( db, from_selection=from_selection, to_selection=to_selection, departure_seconds=departure_seconds, ) walk_journeys = [] if walk_journey is None else [walk_journey] journeys = _filter_reasonable_journeys([*walk_journeys, *transfer_journeys, *direct]) unique: dict[tuple[str, ...], dict] = {} for journey in sorted(journeys, key=_journey_sort_key): key = tuple(_journey_leg_signature(leg) for leg in journey["legs"]) unique.setdefault(key, journey) selected = _select_diverse_journeys(unique.values(), limit=max_journeys) if not selected and _allow_access_transfer and max_transfers > 0: access_journeys = _find_access_transfer_journeys( db=db, from_selection=from_selection, to_stop_id=to_stop_id, earliest_departure=departure_seconds, max_transfers=max_transfers, transfer_seconds=max(0, transfer_seconds), limit=max_journeys, source_ids=source_ids, service_date=parsed_service_date, stop_cache=stop_cache, osm_stop_cache=osm_stop_cache, ) selected = list( { tuple(_journey_leg_signature(leg) for leg in journey["legs"]): journey for journey in sorted(access_journeys, key=_journey_sort_key) }.values() )[:max_journeys] selected_dataset_ids = sorted( { int(leg["dataset_id"]) for journey in selected for leg in journey.get("legs", []) if leg.get("dataset_id") is not None } ) searched_dataset_ids = sorted(set(from_selection.stop_ids_by_dataset) | set(to_selection.stop_ids_by_dataset)) source_payloads = _source_payloads_for_dataset_ids(db, selected_dataset_ids or searched_dataset_ids) return { "from": _stop_payload(from_selection.display), "to": _stop_payload(to_selection.display), "source": source_payloads[0] if len(source_payloads) == 1 else None, "sources": source_payloads, "dataset_id": selected_dataset_ids[0] if len(selected_dataset_ids) == 1 else None, "dataset_ids": selected_dataset_ids or searched_dataset_ids, "departure_time": format_gtfs_time(departure_seconds), "departure_time_label": format_gtfs_time_label(departure_seconds), "service_date": None if parsed_service_date is None else parsed_service_date.isoformat(), "max_transfers": max(0, max_transfers), "journeys": selected, } def _find_journeys_with_address_access( db: Session, from_stop_id: int | str, to_stop_id: int | str, departure: str, max_transfers: int, transfer_seconds: int, limit: int, source_ids: list[int] | None, service_date: str | date | None, ) -> dict: departure_seconds = parse_gtfs_time(departure) if departure_seconds is None: raise ValueError("departure must be HH:MM or HH:MM:SS") parsed_service_date = parse_service_date(service_date) active_dataset_ids = _active_gtfs_dataset_ids(db, source_ids=source_ids) from_location = resolve_location_summary(db, from_stop_id, source_ids=source_ids) to_location = resolve_location_summary(db, to_stop_id, source_ids=source_ids) max_journeys = max(1, min(limit, 10)) journeys: list[dict] = [] direct_walk = _walk_only_journey_between_summaries( db, from_location=from_location, to_location=to_location, departure_seconds=departure_seconds, dataset_id=(active_dataset_ids[0] if active_dataset_ids else from_location.dataset_id), route_geometry=True, ) if direct_walk is not None: journeys.append(direct_walk) origin_is_address = is_location_token(from_stop_id) destination_is_address = is_location_token(to_stop_id) short_direct_walk_only = ( direct_walk is not None and origin_is_address != destination_is_address and int(direct_walk.get("duration_seconds") or 0) <= ADDRESS_ACCESS_SHORT_DIRECT_WALK_SECONDS ) access_distance_m = ( _distance_m(float(from_location.lat), float(from_location.lon), float(to_location.lat), float(to_location.lon)) if from_location.lat is not None and from_location.lon is not None and to_location.lat is not None and to_location.lon is not None else 0 ) include_major_hubs = ( origin_is_address and destination_is_address and access_distance_m >= ADDRESS_ACCESS_LONG_DISTANCE_HUB_THRESHOLD_M ) origin_candidates = _location_stop_candidates( db, from_stop_id, from_location, active_dataset_ids, source_ids=source_ids, include_major_hubs=include_major_hubs, ) destination_candidates = _location_stop_candidates( db, to_stop_id, to_location, active_dataset_ids, source_ids=source_ids, include_major_hubs=include_major_hubs, ) if short_direct_walk_only: origin_candidates = [] destination_candidates = [] candidate_pairs = [] else: candidate_pairs = _address_access_candidate_pairs( origin_candidates, destination_candidates, origin_is_address=origin_is_address, destination_is_address=destination_is_address, max_pairs=ADDRESS_ACCESS_MAX_DEEP_PAIR_CANDIDATES if max_transfers > 1 else ADDRESS_ACCESS_MAX_PAIR_CANDIDATES, ) access_leg_cache: dict[str, dict | None] = {} transit_departure_cache: dict[str, int | None] = {} for origin, destination in candidate_pairs: access_leg = access_leg_cache.get(origin.token) transit_departure_seconds = transit_departure_cache.get(origin.token) if origin.token not in transit_departure_cache: access_leg = None transit_departure_seconds = departure_seconds if origin_is_address: access_leg = _walk_leg_between_summaries( db, from_stop=from_location, to_stop=origin.selection.display, departure_seconds=departure_seconds, dataset_id=origin.selection.dataset_id, max_duration_seconds=ADDRESS_ACCESS_MAX_SECONDS, route_geometry=True, ) if access_leg is None: transit_departure_seconds = None else: transit_departure_seconds = int(access_leg["arrival_seconds"]) access_leg_cache[origin.token] = access_leg transit_departure_cache[origin.token] = transit_departure_seconds if transit_departure_seconds is None: continue transit_departure = format_gtfs_time(transit_departure_seconds) if transit_departure is None: continue try: transit = find_journeys( db=db, from_stop_id=origin.token, to_stop_id=destination.token, departure=transit_departure, max_transfers=max_transfers, transfer_seconds=transfer_seconds, limit=max(max_journeys, 6), source_ids=source_ids, service_date=parsed_service_date, _allow_access_transfer=include_major_hubs, _allow_address_access=False, ) except ValueError: continue for transit_journey in transit.get("journeys", [])[: max_journeys * 2]: egress_leg = None if destination_is_address: arrival_seconds = transit_journey.get("arrival_seconds") if arrival_seconds is None: continue egress_leg = _walk_leg_between_summaries( db, from_stop=destination.selection.display, to_stop=to_location, departure_seconds=int(arrival_seconds), dataset_id=destination.selection.dataset_id, max_duration_seconds=ADDRESS_ACCESS_MAX_SECONDS, route_geometry=True, ) if egress_leg is None: continue combined = _compose_address_access_journey( transit_journey, access_leg=access_leg, egress_leg=egress_leg, ) if combined is not None: journeys.append(combined) if include_major_hubs and len(journeys) >= max_journeys: break if include_major_hubs and len(journeys) >= max_journeys: break unique: dict[tuple[str, ...], dict] = {} for journey in sorted(_filter_reasonable_journeys(journeys), key=_journey_sort_key): key = tuple(_journey_leg_signature(leg) for leg in journey["legs"]) unique.setdefault(key, journey) selected = _select_diverse_journeys(unique.values(), limit=max_journeys) selected_dataset_ids = sorted( { int(leg["dataset_id"]) for journey in selected for leg in journey.get("legs", []) if leg.get("dataset_id") is not None } ) searched_dataset_ids = sorted(active_dataset_ids) source_payloads = _source_payloads_for_dataset_ids(db, selected_dataset_ids or searched_dataset_ids) diagnostics = { "address_access": { "origin_candidates": len(origin_candidates), "destination_candidates": len(destination_candidates), "searched_pairs": len(candidate_pairs), "max_pairs": ADDRESS_ACCESS_MAX_DEEP_PAIR_CANDIDATES if max_transfers > 1 else ADDRESS_ACCESS_MAX_PAIR_CANDIDATES, "major_hubs": include_major_hubs, } } return { "from": _stop_payload(from_location), "to": _stop_payload(to_location), "source": source_payloads[0] if len(source_payloads) == 1 else None, "sources": source_payloads, "dataset_id": selected_dataset_ids[0] if len(selected_dataset_ids) == 1 else None, "dataset_ids": selected_dataset_ids or searched_dataset_ids, "departure_time": format_gtfs_time(departure_seconds), "departure_time_label": format_gtfs_time_label(departure_seconds), "service_date": None if parsed_service_date is None else parsed_service_date.isoformat(), "max_transfers": max(0, max_transfers), "diagnostics": diagnostics, "journeys": selected, } def _address_access_candidate_pairs( origins: list[_AccessStopCandidate], destinations: list[_AccessStopCandidate], *, origin_is_address: bool, destination_is_address: bool, max_pairs: int, ) -> list[tuple[_AccessStopCandidate, _AccessStopCandidate]]: pairs = [ ( (origin.distance_m if origin_is_address else 0) + (destination.distance_m if destination_is_address else 0), origin, destination, ) for origin in origins for destination in destinations ] pairs.sort(key=lambda item: (item[0], item[1].distance_m, item[2].distance_m, item[1].token, item[2].token)) if not origin_is_address or not destination_is_address: return [(origin, destination) for _, origin, destination in pairs] closest_count = max(2, max_pairs // 2) selected: list[tuple[float, _AccessStopCandidate, _AccessStopCandidate]] = [] seen: set[tuple[str, str]] = set() priority_pairs = sorted( ( item for item in pairs if item[2].priority < ADDRESS_ACCESS_NORMAL_PRIORITY ), key=lambda item: ( item[2].priority, item[1].distance_m, item[2].distance_m, item[0], item[1].token, item[2].token, ), ) def append_item(item: tuple[float, _AccessStopCandidate, _AccessStopCandidate]) -> bool: _, origin, destination = item key = (origin.token, destination.token) if key in seen: return False seen.add(key) selected.append(item) return True priority_budget = max(0, max_pairs - closest_count) if priority_budget > 0: for item in priority_pairs: append_item(item) if len(selected) >= priority_budget: break for item in pairs[:closest_count]: append_item(item) for item in priority_pairs: append_item(item) if len(selected) >= max_pairs: break for item in pairs: append_item(item) if len(selected) >= max_pairs: break return [(origin, destination) for _, origin, destination in selected[:max_pairs]] def _location_stop_candidates( db: Session, token: int | str, location: StopSummary, active_dataset_ids: list[int], *, source_ids: list[int] | None, include_major_hubs: bool = False, ) -> list[_AccessStopCandidate]: if not is_location_token(token): selection = _resolve_stop_selection(db, token, source_ids=source_ids) if selection.canonical_stop_id is not None: return [ _AccessStopCandidate( token=_stop_place_token(selection.canonical_stop_id, selection.dataset_id), selection=selection, distance_m=0, ) ] return [_AccessStopCandidate(token=str(token), selection=selection, distance_m=0)] if location.lon is None or location.lat is None or not active_dataset_ids: return [] rows = ( _nearby_canonical_stops_postgresql(db, location, active_dataset_ids) if settings.is_postgresql_database else _nearby_canonical_stops_sqlite(db, location, active_dataset_ids) ) candidates: list[_AccessStopCandidate] = [] seen: set[int] = set() for canonical_stop_id, preferred_dataset_id, distance_m in rows: if int(canonical_stop_id) in seen: continue seen.add(int(canonical_stop_id)) try: selection = _selection_for_canonical_stop( db, int(canonical_stop_id), dataset_ids=active_dataset_ids, preferred_dataset_id=int(preferred_dataset_id), ) except ValueError: continue candidates.append( _AccessStopCandidate( token=_stop_place_token(int(canonical_stop_id), selection.dataset_id), selection=selection, distance_m=float(distance_m or 0), ) ) if len(candidates) >= ADDRESS_ACCESS_STOP_CANDIDATES: break if include_major_hubs: candidates = _merge_access_stop_candidates( candidates, _location_major_hub_stop_candidates(db, token, location, active_dataset_ids), ) return candidates def _merge_access_stop_candidates( primary: list[_AccessStopCandidate], extra: list[_AccessStopCandidate], ) -> list[_AccessStopCandidate]: merged = list(primary) seen_tokens = {candidate.token for candidate in merged} seen_canonical_ids = { candidate.selection.canonical_stop_id for candidate in merged if candidate.selection.canonical_stop_id is not None } for candidate in extra: canonical_stop_id = candidate.selection.canonical_stop_id if candidate.token in seen_tokens or (canonical_stop_id is not None and canonical_stop_id in seen_canonical_ids): continue merged.append(candidate) seen_tokens.add(candidate.token) if canonical_stop_id is not None: seen_canonical_ids.add(canonical_stop_id) return merged def _location_major_hub_stop_candidates( db: Session, token: int | str, location: StopSummary, active_dataset_ids: list[int], ) -> list[_AccessStopCandidate]: if location.lon is None or location.lat is None or not active_dataset_ids: return [] locality = _address_city_for_token(db, token) rows = ( _major_hub_canonical_stops_postgresql(db, location, active_dataset_ids, locality=locality) if settings.is_postgresql_database else _major_hub_canonical_stops_sqlite(db, location, active_dataset_ids, locality=locality) ) candidates: list[_AccessStopCandidate] = [] seen: set[int] = set() for canonical_stop_id, preferred_dataset_id, distance_m in rows: if int(canonical_stop_id) in seen: continue seen.add(int(canonical_stop_id)) try: selection = _selection_for_canonical_stop( db, int(canonical_stop_id), dataset_ids=active_dataset_ids, preferred_dataset_id=int(preferred_dataset_id), ) except ValueError: continue candidates.append( _AccessStopCandidate( token=_stop_place_token(int(canonical_stop_id), selection.dataset_id), selection=selection, distance_m=float(distance_m or 0), priority=ADDRESS_ACCESS_MAJOR_HUB_PRIORITY, ) ) if len(candidates) >= ADDRESS_ACCESS_MAJOR_HUB_CANDIDATES: break return candidates def _address_city_for_token(db: Session, token: int | str) -> str: try: if is_coordinate_token(token): return "" if is_address_point_token(token): address, _, _ = address_point_by_token(db, token) elif is_address_token(token): address = address_by_token(db, token) else: return "" except ValueError: return "" return _normalize_stop_search(address.city or "") def _is_major_station_name(value: str | None) -> bool: normalized = _normalize_stop_search(value or "") return ( bool(re.search(r"(^|[\s,(/-])hbf($|[\s,)/-])", normalized)) or "hauptbahnhof" in normalized or "central station" in normalized or "central train station" in normalized ) def _major_hub_canonical_stops_postgresql( db: Session, location: StopSummary, active_dataset_ids: list[int], *, locality: str, ) -> list[tuple[int, int, float]]: radius_deg = ADDRESS_ACCESS_MAJOR_HUB_RADIUS_M / 111_320 stmt = text( """ WITH point AS ( SELECT ST_SetSRID(ST_MakePoint(:lon, :lat), 4326) AS geom ), hub_rows AS ( SELECT canonical_stops.id AS canonical_stop_id, canonical_stop_links.dataset_id AS dataset_id, ST_DistanceSphere(canonical_stops.geom, point.geom) AS distance_m, MIN( CASE WHEN :locality = '' THEN 1 WHEN LOWER(COALESCE(canonical_stops.name, '')) LIKE :locality_pattern THEN 0 ELSE 1 END ) AS locality_rank FROM canonical_stops JOIN canonical_stop_links ON canonical_stop_links.canonical_stop_id = canonical_stops.id AND canonical_stop_links.object_type = 'gtfs_stop' JOIN datasets ON datasets.id = canonical_stop_links.dataset_id AND datasets.kind = 'gtfs' AND datasets.is_active IS TRUE CROSS JOIN point WHERE canonical_stop_links.dataset_id IN :dataset_ids AND canonical_stops.geom IS NOT NULL AND canonical_stops.geom && ST_Expand(point.geom, :radius_deg) AND ST_DWithin(canonical_stops.geom::geography, point.geom::geography, :radius_m) AND ( LOWER(COALESCE(canonical_stops.name, '')) ~ '(^|[[:space:],(/-])hbf($|[[:space:],)/-])' OR LOWER(COALESCE(canonical_stops.name, '')) LIKE '%hauptbahnhof%' OR LOWER(COALESCE(canonical_stops.name, '')) LIKE '%central station%' OR LOWER(COALESCE(canonical_stops.name, '')) LIKE '%central train station%' ) GROUP BY canonical_stops.id, canonical_stop_links.dataset_id, canonical_stops.geom, point.geom ) SELECT canonical_stop_id, dataset_id, distance_m FROM hub_rows ORDER BY locality_rank, distance_m, canonical_stop_id LIMIT :limit """ ).bindparams(bindparam("dataset_ids", expanding=True)) rows = db.execute( stmt, { "lon": float(location.lon), "lat": float(location.lat), "dataset_ids": tuple(active_dataset_ids), "radius_deg": radius_deg, "radius_m": ADDRESS_ACCESS_MAJOR_HUB_RADIUS_M, "locality": locality, "locality_pattern": f"%{locality}%" if locality else "", "limit": ADDRESS_ACCESS_MAJOR_HUB_CANDIDATES * 6, }, ).all() return [(int(row[0]), int(row[1]), float(row[2] or 0)) for row in rows] def _major_hub_canonical_stops_sqlite( db: Session, location: StopSummary, active_dataset_ids: list[int], *, locality: str, ) -> list[tuple[int, int, float]]: lon = float(location.lon) lat = float(location.lat) radius_deg = ADDRESS_ACCESS_MAJOR_HUB_RADIUS_M / 111_320 rows = db.execute( select( CanonicalStop.id, CanonicalStopLink.dataset_id, CanonicalStop.name, CanonicalStop.lat, CanonicalStop.lon, ) .join(CanonicalStopLink, CanonicalStopLink.canonical_stop_id == CanonicalStop.id) .where( CanonicalStopLink.object_type == "gtfs_stop", CanonicalStopLink.dataset_id.in_(active_dataset_ids), CanonicalStop.lat.is_not(None), CanonicalStop.lon.is_not(None), CanonicalStop.lat >= lat - radius_deg, CanonicalStop.lat <= lat + radius_deg, CanonicalStop.lon >= lon - radius_deg, CanonicalStop.lon <= lon + radius_deg, ) .limit(ADDRESS_ACCESS_MAJOR_HUB_CANDIDATES * 100) ).all() result: list[tuple[int, int, float, int]] = [] seen: set[int] = set() for canonical_stop_id, dataset_id, canonical_name, stop_lat, stop_lon in rows: if not _is_major_station_name(canonical_name): continue distance_m = _distance_m(lat, lon, float(stop_lat), float(stop_lon)) if distance_m > ADDRESS_ACCESS_MAJOR_HUB_RADIUS_M: continue locality_rank = ( 0 if locality and locality in _normalize_stop_search(canonical_name or "") else 1 ) if int(canonical_stop_id) in seen: continue seen.add(int(canonical_stop_id)) result.append((int(canonical_stop_id), int(dataset_id), distance_m, locality_rank)) result.sort(key=lambda item: (item[3], item[2], item[0])) return [(canonical_stop_id, dataset_id, distance_m) for canonical_stop_id, dataset_id, distance_m, _ in result] def _nearby_canonical_stops_postgresql( db: Session, location: StopSummary, active_dataset_ids: list[int], ) -> list[tuple[int, int, float]]: radius_deg = ADDRESS_ACCESS_RADIUS_M / 111_320 stmt = text( """ WITH point AS ( SELECT ST_SetSRID(ST_MakePoint(:lon, :lat), 4326) AS geom ) SELECT canonical_stops.id AS canonical_stop_id, canonical_stop_links.dataset_id AS dataset_id, ST_DistanceSphere(canonical_stops.geom, point.geom) AS distance_m FROM canonical_stops JOIN canonical_stop_links ON canonical_stop_links.canonical_stop_id = canonical_stops.id AND canonical_stop_links.object_type = 'gtfs_stop' JOIN datasets ON datasets.id = canonical_stop_links.dataset_id AND datasets.kind = 'gtfs' AND datasets.is_active IS TRUE CROSS JOIN point WHERE canonical_stop_links.dataset_id IN :dataset_ids AND canonical_stops.geom IS NOT NULL AND canonical_stops.geom && ST_Expand(point.geom, :radius_deg) AND ST_DWithin(canonical_stops.geom::geography, point.geom::geography, :radius_m) GROUP BY canonical_stops.id, canonical_stop_links.dataset_id, canonical_stops.geom, point.geom ORDER BY canonical_stops.geom <-> point.geom, canonical_stops.id LIMIT :limit """ ).bindparams(bindparam("dataset_ids", expanding=True)) rows = db.execute( stmt, { "lon": float(location.lon), "lat": float(location.lat), "dataset_ids": tuple(active_dataset_ids), "radius_deg": radius_deg, "radius_m": ADDRESS_ACCESS_RADIUS_M, "limit": ADDRESS_ACCESS_STOP_CANDIDATES * 8, }, ).all() return [(int(row[0]), int(row[1]), float(row[2] or 0)) for row in rows] def _nearby_canonical_stops_sqlite( db: Session, location: StopSummary, active_dataset_ids: list[int], ) -> list[tuple[int, int, float]]: lon = float(location.lon) lat = float(location.lat) distance_expr = (CanonicalStop.lon - lon) * (CanonicalStop.lon - lon) + (CanonicalStop.lat - lat) * (CanonicalStop.lat - lat) rows = db.execute( select(CanonicalStop.id, CanonicalStopLink.dataset_id, CanonicalStop.lat, CanonicalStop.lon) .join(CanonicalStopLink, CanonicalStopLink.canonical_stop_id == CanonicalStop.id) .where( CanonicalStopLink.object_type == "gtfs_stop", CanonicalStopLink.dataset_id.in_(active_dataset_ids), CanonicalStop.lat.is_not(None), CanonicalStop.lon.is_not(None), ) .order_by(distance_expr) .limit(ADDRESS_ACCESS_STOP_CANDIDATES * 8) ).all() result = [] for canonical_stop_id, dataset_id, stop_lat, stop_lon in rows: distance_m = _distance_m(lat, lon, float(stop_lat), float(stop_lon)) if distance_m <= ADDRESS_ACCESS_RADIUS_M: result.append((int(canonical_stop_id), int(dataset_id), distance_m)) result.sort(key=lambda item: item[2]) return result def _walk_only_journey_between_summaries( db: Session, *, from_location: StopSummary, to_location: StopSummary, departure_seconds: int, dataset_id: int, route_geometry: bool = True, ) -> dict | None: if from_location.lon is None or from_location.lat is None or to_location.lon is None or to_location.lat is None: return None direct_distance_m = _distance_m(float(from_location.lat), float(from_location.lon), float(to_location.lat), float(to_location.lon)) if direct_distance_m > PUBLIC_TRANSPORT_WALK_OPTION_MAX_SECONDS * 1.35: return None leg = _walk_leg_between_summaries( db, from_stop=from_location, to_stop=to_location, departure_seconds=departure_seconds, dataset_id=dataset_id, max_duration_seconds=PUBLIC_TRANSPORT_WALK_OPTION_MAX_SECONDS, route_geometry=route_geometry, ) if leg is None: return None leg["route_name"] = "Walk only" return _journey_payload([leg]) def _walk_leg_between_summaries( db: Session, *, from_stop: StopSummary, to_stop: StopSummary, departure_seconds: int, dataset_id: int, max_duration_seconds: int, route_geometry: bool = True, ) -> dict | None: if from_stop.lon is None or from_stop.lat is None or to_stop.lon is None or to_stop.lat is None: return None distance_m = _distance_m(float(from_stop.lat), float(from_stop.lon), float(to_stop.lat), float(to_stop.lon)) estimated_duration_seconds = int(math.ceil(distance_m / 1.35)) if estimated_duration_seconds > max_duration_seconds * 1.5: return None leg = _walk_leg_payload( db, _RouterWalkBacklink( previous_label=_RouterLabel(canonical_stop_id=0, arrival_seconds=departure_seconds), from_stop=from_stop, to_stop=to_stop, distance_m=distance_m, departure_seconds=departure_seconds, arrival_seconds=departure_seconds + estimated_duration_seconds, ), dataset_id, route_geometry=route_geometry, ) if int(leg.get("duration_seconds") or 0) > max_duration_seconds: return None return leg def _compose_address_access_journey( journey: dict, *, access_leg: dict | None, egress_leg: dict | None, ) -> dict | None: public_legs: list[dict] = [] features: list[dict] = [] leg_offset = 0 if access_leg is not None: public_legs.append(_leg_public_payload(access_leg)) features.extend(_offset_feature_legs(_feature_items(_journey_payload([access_leg])), leg_offset)) leg_offset += 1 public_legs.extend(journey.get("legs") or []) features.extend(_offset_feature_legs(_feature_items(journey), leg_offset)) leg_offset += len(journey.get("legs") or []) if egress_leg is not None: public_legs.append(_leg_public_payload(egress_leg)) features.extend(_offset_feature_legs(_feature_items(_journey_payload([egress_leg])), leg_offset)) if not public_legs: return None departure = access_leg["departure_seconds"] if access_leg is not None else journey.get("departure_seconds") arrival = egress_leg["arrival_seconds"] if egress_leg is not None else journey.get("arrival_seconds") if departure is None or arrival is None: return None transit_legs = [leg for leg in public_legs if leg.get("mode") != "walk"] duration_seconds = max(0, int(arrival) - int(departure)) return { "transfers": max(0, len(transit_legs) - 1), "departure_seconds": int(departure), "arrival_seconds": int(arrival), "departure_time": format_gtfs_time(int(departure)), "arrival_time": format_gtfs_time(int(arrival)), "departure_time_label": format_gtfs_time_label(int(departure)), "arrival_time_label": format_gtfs_time_label(int(arrival)), "duration_seconds": duration_seconds, "duration_minutes": duration_minutes_ceil(duration_seconds), "duration_label": format_duration_label(duration_seconds), "legs": public_legs, "features": feature_collection(features), } def _feature_items(payload: dict) -> list[dict]: features = payload.get("features") if isinstance(payload, dict) else None if isinstance(features, dict): items = features.get("features") else: items = None return list(items or []) if isinstance(items, list) else [] def _offset_feature_legs(features: list[dict], offset: int) -> list[dict]: if offset <= 0: return json.loads(json.dumps(features)) copied = json.loads(json.dumps(features)) for feature in copied: props = feature.get("properties") if isinstance(feature, dict) else None if isinstance(props, dict) and isinstance(props.get("leg"), int): props["leg"] = int(props["leg"]) + offset return copied def _select_diverse_journeys(journeys, *, limit: int) -> list[dict]: ranked = sorted((dict(journey) for journey in journeys), key=_journey_sort_key) selected: list[dict] = [] seen_exact: set[str] = set() seen_diversity: set[tuple[object, ...]] = set() for journey in ranked: exact = "||".join(_journey_leg_signature(leg) for leg in journey.get("legs") or []) if exact in seen_exact: continue seen_exact.add(exact) diversity_key = _journey_diversity_key(journey) if diversity_key in seen_diversity and len(selected) >= max(3, limit // 2): continue seen_diversity.add(diversity_key) selected.append(journey) if len(selected) >= limit: break if len(selected) < min(limit, 3): for journey in ranked: exact = "||".join(_journey_leg_signature(leg) for leg in journey.get("legs") or []) if exact in {"||".join(_journey_leg_signature(leg) for leg in existing.get("legs") or []) for existing in selected}: continue selected.append(journey) if len(selected) >= min(limit, 3): break return _ensure_walk_only_option(selected, ranked, limit=limit) def _ensure_walk_only_option(selected: list[dict], ranked: list[dict], *, limit: int) -> list[dict]: if any(_journey_is_walk_only(journey) for journey in selected): return selected walk = next((journey for journey in ranked if _journey_is_walk_only(journey)), None) if walk is None: return selected if len(selected) < limit: return [*selected, walk] if selected: selected[-1] = walk return selected def _journey_is_walk_only(journey: dict) -> bool: legs = journey.get("legs") or [] return bool(legs) and all(leg.get("mode") == "walk" for leg in legs) def _journey_diversity_key(journey: dict) -> tuple[object, ...]: route_signature = tuple( str(leg.get("route_ref") or leg.get("route_id") or leg.get("mode") or "") for leg in journey.get("legs") or [] if leg.get("mode") != "walk" ) departure = journey.get("departure_seconds") time_band = None if departure is None else int(departure) // (30 * 60) return (int(journey.get("transfers") or 0), route_signature, time_band) def _find_journeys_via( db: Session, from_stop_id: int | str, via_stop_id: int | str, to_stop_id: int | str, departure: str, max_transfers: int, transfer_seconds: int, limit: int, source_ids: list[int] | None, service_date: str | date | None, ) -> dict: max_journeys = max(1, min(limit, 10)) first_result = find_journeys( db=db, from_stop_id=from_stop_id, to_stop_id=via_stop_id, departure=departure, max_transfers=max_transfers, transfer_seconds=transfer_seconds, limit=max_journeys, source_ids=source_ids, via_stop_id=None, service_date=service_date, ) combined = [] for first in first_result.get("journeys", [])[:max_journeys]: first_arrival = first.get("arrival_seconds") if first_arrival is None: continue onward_departure = format_gtfs_time(int(first_arrival) + max(0, transfer_seconds)) second_result = find_journeys( db=db, from_stop_id=via_stop_id, to_stop_id=to_stop_id, departure=onward_departure or departure, max_transfers=max_transfers, transfer_seconds=transfer_seconds, limit=max_journeys, source_ids=source_ids, via_stop_id=None, service_date=service_date, ) for second in second_result.get("journeys", [])[:max_journeys]: combined.append(_combine_via_journey(first, second)) unique: dict[tuple[str, ...], dict] = {} for journey in sorted(combined, key=_journey_sort_key): key = tuple(_journey_leg_signature(leg) for leg in journey["legs"]) unique.setdefault(key, journey) selected = list(unique.values())[:max_journeys] dataset_ids = sorted( { int(leg["dataset_id"]) for journey in selected for leg in journey.get("legs", []) if leg.get("dataset_id") is not None } ) searched_dataset_ids = sorted(set(first_result.get("dataset_ids") or []) | set(dataset_ids)) return { "from": first_result.get("from"), "to": selected[0]["legs"][-1]["to"] if selected else None, "via": first_result.get("to"), "source": None, "sources": _source_payloads_for_dataset_ids(db, dataset_ids or searched_dataset_ids), "dataset_id": dataset_ids[0] if len(dataset_ids) == 1 else None, "dataset_ids": dataset_ids or searched_dataset_ids, "departure_time": first_result.get("departure_time"), "service_date": first_result.get("service_date"), "max_transfers": max(0, max_transfers), "via_transfer_seconds": max(0, transfer_seconds), "journeys": selected, } def _combine_via_journey(first: dict, second: dict) -> dict: legs = [*first.get("legs", []), *second.get("legs", [])] departure = first.get("departure_seconds") arrival = second.get("arrival_seconds") duration_seconds = None if departure is None or arrival is None else max(0, int(arrival) - int(departure)) features = _combine_via_features(first.get("features") or {}, second.get("features") or {}, first_leg_count=len(first.get("legs", []))) return { "transfers": max(0, len(legs) - 1), "departure_seconds": departure, "arrival_seconds": arrival, "departure_time": format_gtfs_time(departure), "arrival_time": format_gtfs_time(arrival), "departure_time_label": format_gtfs_time_label(departure), "arrival_time_label": format_gtfs_time_label(arrival), "duration_seconds": duration_seconds, "duration_minutes": duration_minutes_ceil(duration_seconds), "duration_label": format_duration_label(duration_seconds), "legs": legs, "features": feature_collection(features), "via_forced": True, } def _combine_via_features(first_features: dict, second_features: dict, first_leg_count: int) -> list[dict]: features = [] first_collection = first_features.get("features") if isinstance(first_features, dict) else [] second_collection = second_features.get("features") if isinstance(second_features, dict) else [] for feature in first_collection or []: features.append(_copy_via_feature(feature, leg_offset=0, first_part=True)) for feature in second_collection or []: features.append(_copy_via_feature(feature, leg_offset=first_leg_count, first_part=False)) return features def _copy_via_feature(feature: dict, *, leg_offset: int, first_part: bool) -> dict: copied = json.loads(json.dumps(feature)) props = copied.setdefault("properties", {}) if isinstance(props.get("leg"), int): props["leg"] = int(props["leg"]) + leg_offset if props.get("feature_type") == "journey_stop": if first_part and props.get("role") == "end": props["role"] = "transfer" elif not first_part and props.get("role") == "start": props["role"] = "transfer" return copied def _journey_dataset_pairs(from_selection: StopSelection, to_selection: StopSelection) -> list[tuple[int, int]]: pairs = [ (first_dataset_id, second_dataset_id) for first_dataset_id in from_selection.stop_ids_by_dataset for second_dataset_id in to_selection.stop_ids_by_dataset ] pairs.sort(key=lambda item: (item[0] != item[1], item[0], item[1])) return pairs[:MAX_JOURNEY_DATASET_PAIRS] def _source_payloads_for_dataset_ids(db: Session, dataset_ids: list[int]) -> list[dict]: if not dataset_ids: return [] rows = db.execute( select(Dataset.id, Source.id, Source.name) .join(Source, Source.id == Dataset.source_id) .where(Dataset.id.in_(dataset_ids)) .order_by(Source.name, Source.id) ).all() payloads = [] seen = set() for dataset_id, source_id, source_name in rows: if source_id in seen: continue seen.add(source_id) payloads.append({"id": source_id, "name": source_name, "dataset_id": dataset_id}) return payloads def _journey_sort_key(journey: dict) -> tuple[float, float, float, int, int]: arrival = journey.get("arrival_seconds") departure = journey.get("departure_seconds") transfers = int(journey.get("transfers") or 0) walking_seconds = sum( float(leg.get("distance_m") or 0) / 1.35 for leg in journey.get("legs") or [] if leg.get("mode") == "walk" ) recommended_arrival = None if arrival is None else float(arrival) + transfers * 600 + walking_seconds transit_legs = sum(1 for leg in journey.get("legs") or [] if leg.get("mode") != "walk") return ( float("inf") if recommended_arrival is None else recommended_arrival, float("inf") if arrival is None else float(arrival), float("inf") if departure is None else -float(departure), transfers, 1 if transit_legs == 0 else 0, ) def _filter_reasonable_journeys(journeys: list[dict]) -> list[dict]: return [journey for journey in journeys if _journey_is_reasonable(journey)] def _journey_is_reasonable(journey: dict) -> bool: path: list[int] = [] for leg in journey.get("legs") or []: path.extend(_leg_endpoint_canonical_ids(leg)) collapsed: list[int] = [] for canonical_stop_id in path: if not collapsed or collapsed[-1] != canonical_stop_id: collapsed.append(canonical_stop_id) seen: set[int] = set() for canonical_stop_id in collapsed: if canonical_stop_id in seen: return False seen.add(canonical_stop_id) return True def _leg_endpoint_canonical_ids(leg: dict) -> tuple[int, ...]: ids: list[int] = [] stops = leg.get("stops") or [] for stop in stops: canonical_id = (stop.get("canonical_stop") or {}).get("id") or stop.get("canonical_stop_id") if canonical_id is None: continue try: ids.append(int(canonical_id)) except (TypeError, ValueError): continue return tuple(ids) def parse_service_date(value: str | date | None) -> date | None: if value is None or value == "": return None if isinstance(value, date) and not isinstance(value, datetime): return value text = str(value).strip() try: return date.fromisoformat(text) except ValueError as exc: raise ValueError("service_date must be YYYY-MM-DD") from exc def _service_ids_by_dataset(db: Session, dataset_ids: list[int], service_date: date | None) -> dict[int, set[str] | None]: if service_date is None or not dataset_ids: return {dataset_id: None for dataset_id in dataset_ids} return {dataset_id: _active_service_ids(db, dataset_id, service_date) for dataset_id in dataset_ids} def _active_service_ids(db: Session, dataset_id: int, service_date: date) -> set[str] | None: has_calendar = bool(db.scalar(select(exists().where(GtfsCalendar.dataset_id == dataset_id)))) has_calendar_dates = bool(db.scalar(select(exists().where(GtfsCalendarDate.dataset_id == dataset_id)))) if not has_calendar and not has_calendar_dates: return None date_int = int(service_date.strftime("%Y%m%d")) weekday_column = [ GtfsCalendar.monday, GtfsCalendar.tuesday, GtfsCalendar.wednesday, GtfsCalendar.thursday, GtfsCalendar.friday, GtfsCalendar.saturday, GtfsCalendar.sunday, ][service_date.weekday()] active = set( db.scalars( select(GtfsCalendar.service_id).where( GtfsCalendar.dataset_id == dataset_id, GtfsCalendar.start_date <= date_int, GtfsCalendar.end_date >= date_int, weekday_column.is_(True), ) ).all() ) exceptions = db.execute( select(GtfsCalendarDate.service_id, GtfsCalendarDate.exception_type).where( GtfsCalendarDate.dataset_id == dataset_id, GtfsCalendarDate.date == date_int, ) ).all() for service_id, exception_type in exceptions: if int(exception_type or 0) == 1: active.add(str(service_id)) elif int(exception_type or 0) == 2: active.discard(str(service_id)) return active def _where_trip_service_active(stmt, trip_model, service_ids: set[str] | None): if service_ids is None: return stmt return stmt.where(trip_model.service_id.in_(service_ids)) def _sidecar_service_filter(service_ids: set[str] | None, alias: str = "trips") -> tuple[str, list[object]]: if service_ids is None: return "", [] if not service_ids: return " AND 0", [] service_list = sorted(str(service_id) for service_id in service_ids) placeholders = ", ".join(["?"] * len(service_list)) return f" AND {alias}.service_id IN ({placeholders})", list(service_list) def _sidecar_stop_time_columns(alias: str, prefix: str) -> str: return ", ".join(f"{alias}.{column} AS {prefix}_{column}" for column in GTFS_STOP_TIME_COLUMNS) def _sidecar_stop_time_from_row(dataset_id: int, row, prefix: str) -> GtfsStopTime: return GtfsStopTime( dataset_id=dataset_id, trip_id=str(row[f"{prefix}_trip_id"]), stop_id=str(row[f"{prefix}_stop_id"]), stop_sequence=int(row[f"{prefix}_stop_sequence"]), arrival_time=row[f"{prefix}_arrival_time"], departure_time=row[f"{prefix}_departure_time"], arrival_seconds=row[f"{prefix}_arrival_seconds"], departure_seconds=row[f"{prefix}_departure_seconds"], ) def _trip_route_lookup( db: Session, dataset_id: int, trip_ids: list[str], service_ids: set[str] | None = None, ) -> dict[str, tuple[GtfsTrip, GtfsRoute]]: if service_ids == set() or not trip_ids: return {} service_filter = None if service_ids is None else {str(service_id) for service_id in service_ids} lookup: dict[str, tuple[GtfsTrip, GtfsRoute]] = {} for chunk in _chunks(sorted(set(trip_ids)), SQLITE_IN_CHUNK_SIZE): stmt = ( select(GtfsTrip, GtfsRoute) .join(GtfsRoute, and_(GtfsRoute.dataset_id == GtfsTrip.dataset_id, GtfsRoute.route_id == GtfsTrip.route_id)) .where(GtfsTrip.dataset_id == dataset_id, GtfsTrip.trip_id.in_(chunk)) ) for trip, route in db.execute(stmt).all(): if service_filter is not None and str(trip.service_id) not in service_filter: continue lookup.setdefault(trip.trip_id, (trip, route)) return lookup def _sidecar_direct_leg_rows( db: Session, dataset_id: int, service_ids: set[str] | None, from_stop_ids: tuple[str, ...], to_stop_ids: tuple[str, ...], earliest_departure: int, limit: int, ) -> list[tuple[GtfsStopTime, GtfsStopTime, GtfsTrip, GtfsRoute]]: service_sql, service_params = _sidecar_service_filter(service_ids) origin_columns = _sidecar_stop_time_columns("origin", "origin") dest_columns = _sidecar_stop_time_columns("dest", "dest") from_placeholders = ", ".join(["?"] * len(from_stop_ids)) to_placeholders = ", ".join(["?"] * len(to_stop_ids)) rows = execute_sidecar_query( db, dataset_id, f""" SELECT {origin_columns}, {dest_columns}, trips.trip_id AS lookup_trip_id FROM gtfs_stop_times AS origin JOIN gtfs_stop_times AS dest ON dest.trip_id = origin.trip_id AND dest.stop_sequence > origin.stop_sequence JOIN gtfs_trips AS trips ON trips.trip_id = origin.trip_id WHERE origin.stop_id IN ({from_placeholders}) AND dest.stop_id IN ({to_placeholders}) AND (origin.departure_seconds IS NULL OR origin.departure_seconds >= ?) {service_sql} ORDER BY origin.departure_seconds, origin.departure_time, dest.arrival_seconds, dest.arrival_time, origin.trip_id LIMIT ? """, [*from_stop_ids, *to_stop_ids, earliest_departure, *service_params, limit], ) trip_lookup = _trip_route_lookup(db, dataset_id, [str(row["lookup_trip_id"]) for row in rows], service_ids) results = [] for row in rows: trip_route = trip_lookup.get(str(row["lookup_trip_id"])) if trip_route is None: continue trip, route = trip_route results.append( ( _sidecar_stop_time_from_row(dataset_id, row, "origin"), _sidecar_stop_time_from_row(dataset_id, row, "dest"), trip, route, ) ) return results def _sidecar_latest_direct_leg_rows( db: Session, dataset_id: int, service_ids: set[str] | None, from_stop_ids: tuple[str, ...], to_stop_ids: tuple[str, ...], earliest_departure: int, latest_arrival: int, excluded_trip_id: str | None, ) -> list[tuple[GtfsStopTime, GtfsStopTime, GtfsTrip, GtfsRoute]]: service_sql, service_params = _sidecar_service_filter(service_ids) excluded_sql = " AND origin.trip_id != ?" if excluded_trip_id else "" origin_columns = _sidecar_stop_time_columns("origin", "origin") dest_columns = _sidecar_stop_time_columns("dest", "dest") from_placeholders = ", ".join(["?"] * len(from_stop_ids)) to_placeholders = ", ".join(["?"] * len(to_stop_ids)) params: list[object] = [*from_stop_ids, *to_stop_ids, earliest_departure, latest_arrival, *service_params] if excluded_trip_id: params.append(excluded_trip_id) params.append(120) rows = execute_sidecar_query( db, dataset_id, f""" SELECT {origin_columns}, {dest_columns}, trips.trip_id AS lookup_trip_id FROM gtfs_stop_times AS origin JOIN gtfs_stop_times AS dest ON dest.trip_id = origin.trip_id AND dest.stop_sequence > origin.stop_sequence JOIN gtfs_trips AS trips ON trips.trip_id = origin.trip_id WHERE origin.stop_id IN ({from_placeholders}) AND dest.stop_id IN ({to_placeholders}) AND (origin.departure_seconds IS NULL OR origin.departure_seconds >= ?) AND (dest.arrival_seconds IS NULL OR dest.arrival_seconds <= ?) {service_sql} {excluded_sql} ORDER BY origin.departure_seconds DESC, origin.departure_time DESC, dest.arrival_seconds DESC, dest.arrival_time DESC, origin.trip_id LIMIT ? """, params, ) trip_lookup = _trip_route_lookup(db, dataset_id, [str(row["lookup_trip_id"]) for row in rows], service_ids) results = [] for row in rows: trip_route = trip_lookup.get(str(row["lookup_trip_id"])) if trip_route is None: continue trip, route = trip_route results.append( ( _sidecar_stop_time_from_row(dataset_id, row, "origin"), _sidecar_stop_time_from_row(dataset_id, row, "dest"), trip, route, ) ) return results def _sidecar_destination_arrival_rows( db: Session, dataset_id: int, service_ids: set[str] | None, stop_ids: tuple[str, ...], earliest_departure: int, latest_arrival: int | None, ) -> list[tuple[GtfsStopTime, GtfsTrip, GtfsRoute]]: service_sql, service_params = _sidecar_service_filter(service_ids) latest_sql = " AND (call.arrival_seconds IS NULL OR call.arrival_seconds <= ?)" if latest_arrival is not None else "" call_columns = _sidecar_stop_time_columns("call", "call") stop_placeholders = ", ".join(["?"] * len(stop_ids)) params: list[object] = [*stop_ids, earliest_departure] if latest_arrival is not None: params.append(latest_arrival) params.extend(service_params) params.append(MAX_TARGET_DESTINATION_ARRIVALS) rows = execute_sidecar_query( db, dataset_id, f""" SELECT {call_columns}, trips.trip_id AS lookup_trip_id FROM gtfs_stop_times AS call JOIN gtfs_trips AS trips ON trips.trip_id = call.trip_id WHERE call.stop_id IN ({stop_placeholders}) AND (call.arrival_seconds IS NULL OR call.arrival_seconds >= ?) {latest_sql} {service_sql} ORDER BY call.arrival_seconds, call.arrival_time, call.trip_id LIMIT ? """, params, ) trip_lookup = _trip_route_lookup(db, dataset_id, [str(row["lookup_trip_id"]) for row in rows], service_ids) results = [] for row in rows: trip_route = trip_lookup.get(str(row["lookup_trip_id"])) if trip_route is None: continue trip, route = trip_route results.append((_sidecar_stop_time_from_row(dataset_id, row, "call"), trip, route)) return results def _sidecar_boarding_rows( db: Session, dataset_id: int, service_ids: set[str] | None, stop_ids: tuple[str, ...], earliest_departure: int, limit: int, latest_departure: int | None = None, ) -> list[tuple[GtfsStopTime, GtfsTrip, GtfsRoute]]: service_sql, service_params = _sidecar_service_filter(service_ids) latest_sql = " AND (call.departure_seconds IS NULL OR call.departure_seconds < ?)" if latest_departure is not None else "" call_columns = _sidecar_stop_time_columns("call", "call") stop_placeholders = ", ".join(["?"] * len(stop_ids)) params: list[object] = [*stop_ids, earliest_departure] if latest_departure is not None: params.append(latest_departure) params.extend(service_params) params.append(limit) rows = execute_sidecar_query( db, dataset_id, f""" SELECT {call_columns}, trips.trip_id AS lookup_trip_id FROM gtfs_stop_times AS call JOIN gtfs_trips AS trips ON trips.trip_id = call.trip_id WHERE call.stop_id IN ({stop_placeholders}) AND (call.departure_seconds IS NULL OR call.departure_seconds >= ?) {latest_sql} {service_sql} ORDER BY call.departure_seconds, call.departure_time, call.trip_id LIMIT ? """, params, ) trip_lookup = _trip_route_lookup(db, dataset_id, [str(row["lookup_trip_id"]) for row in rows], service_ids) results = [] for row in rows: trip_route = trip_lookup.get(str(row["lookup_trip_id"])) if trip_route is None: continue trip, route = trip_route results.append((_sidecar_stop_time_from_row(dataset_id, row, "call"), trip, route)) return results def _chunks[T](items: list[T], size: int) -> Iterator[list[T]]: for index in range(0, len(items), size): yield items[index : index + size] def parse_gtfs_time(value: str | None) -> int | None: if not value: return None parts = value.strip().split(":") if len(parts) == 2: parts.append("0") if len(parts) != 3: return None try: hours, minutes, seconds = [int(part) for part in parts] except ValueError: return None if hours < 0 or minutes < 0 or minutes > 59 or seconds < 0 or seconds > 59: return None return hours * 3600 + minutes * 60 + seconds def format_gtfs_time(seconds: int | None) -> str | None: if seconds is None: return None hours = seconds // 3600 minutes = (seconds % 3600) // 60 secs = seconds % 60 return f"{hours:02d}:{minutes:02d}:{secs:02d}" def format_gtfs_time_label(seconds: int | None) -> str | None: if seconds is None: return None service_day = seconds // 86_400 seconds_in_day = seconds % 86_400 hours = seconds_in_day // 3600 minutes = (seconds_in_day % 3600) // 60 secs = seconds_in_day % 60 clock = f"{hours:02d}:{minutes:02d}" if secs == 0 else f"{hours:02d}:{minutes:02d}:{secs:02d}" return clock if service_day == 0 else f"+{service_day}d {clock}" def duration_minutes_ceil(seconds: int | float | None) -> int | None: if seconds is None: return None return max(0, int(math.ceil(float(seconds) / 60))) def format_duration_label(seconds: int | float | None) -> str | None: minutes_total = duration_minutes_ceil(seconds) if minutes_total is None: return None days = minutes_total // (24 * 60) remaining = minutes_total % (24 * 60) hours = remaining // 60 minutes = remaining % 60 if days: return f"{days}d {hours:02d}:{minutes:02d}" if hours: return f"{hours}:{minutes:02d}" return f"{minutes} min" @dataclass class _RouterLabel: canonical_stop_id: int arrival_seconds: int previous: "_RouterLegBacklink | _RouterWalkBacklink | None" = None @dataclass(frozen=True) class _RouterLegBacklink: previous_label: _RouterLabel route: GtfsRoute trip: GtfsTrip origin: GtfsStopTime dest: GtfsStopTime @dataclass(frozen=True) class _RouterWalkBacklink: previous_label: _RouterLabel from_stop: StopSummary to_stop: StopSummary distance_m: float departure_seconds: int arrival_seconds: int @dataclass(frozen=True) class _RouterBoarding: canonical_stop_id: int call: GtfsStopTime trip: GtfsTrip route: GtfsRoute ready_seconds: int def _find_round_journeys( db: Session, dataset_id: int, service_ids: set[str] | None, from_selection: StopSelection, to_selection: StopSelection, earliest_departure: int, max_transfers: int, transfer_seconds: int, latest_arrival: int | None, limit: int, stop_cache: dict[tuple[int, str], StopSummary], osm_stop_cache: dict[tuple[int, str], dict], ) -> list[dict]: if from_selection.canonical_stop_id is None or to_selection.canonical_stop_id is None: return [] origin_id = from_selection.canonical_stop_id target_id = to_selection.canonical_stop_id best: dict[int, _RouterLabel] = {origin_id: _RouterLabel(origin_id, earliest_departure)} marked = {origin_id} solutions: list[_RouterLabel] = [] max_legs = max(1, min(max_transfers + 1, MAX_ROUTER_TRANSIT_LEGS)) for round_index in range(max_legs): if not marked: break boarding_labels = { stop_id: label for stop_id in marked if (label := best.get(stop_id)) is not None } walking_labels = _walking_transfer_labels( db, dataset_id=dataset_id, source_labels=boarding_labels, latest_arrival=latest_arrival, ) for stop_id, label in walking_labels.items(): current = best.get(stop_id) accepted = current is None or label.arrival_seconds < current.arrival_seconds if accepted: best[stop_id] = label boarding_labels[stop_id] = label if stop_id == target_id: solutions.append(label) elif stop_id not in boarding_labels: boarding_labels[stop_id] = current board_ready = { stop_id: ready_seconds for stop_id, label in boarding_labels.items() if (ready_seconds := label.arrival_seconds + (0 if label.previous is None else transfer_seconds)) is not None and (latest_arrival is None or ready_seconds < latest_arrival) } if not board_ready: break boardings = _router_boardings_for_marked_stops( db=db, dataset_id=dataset_id, service_ids=service_ids, board_ready=board_ready, latest_arrival=latest_arrival, ) if not boardings: break next_marked: set[int] = set() calls_by_trip = _stop_times_by_trip(db, dataset_id, sorted({boarding.trip.trip_id for boarding in boardings})) stop_to_canonical = _canonical_ids_for_trip_calls(db, dataset_id, calls_by_trip) for boarding in boardings: previous_label = best.get(boarding.canonical_stop_id) if previous_label is None: continue calls = calls_by_trip.get(boarding.trip.trip_id, []) for call in calls: if call.stop_sequence <= boarding.call.stop_sequence: continue canonical_stop_id = stop_to_canonical.get(call.stop_id) if canonical_stop_id is None: continue arrival = _arrival_seconds(call) if arrival is None or arrival < boarding.ready_seconds: continue if latest_arrival is not None and arrival >= latest_arrival: continue current = best.get(canonical_stop_id) if current is not None and current.arrival_seconds <= arrival: continue label = _RouterLabel( canonical_stop_id=canonical_stop_id, arrival_seconds=arrival, previous=_RouterLegBacklink( previous_label=previous_label, route=boarding.route, trip=boarding.trip, origin=boarding.call, dest=call, ), ) best[canonical_stop_id] = label next_marked.add(canonical_stop_id) if canonical_stop_id == target_id: solutions.append(label) marked = next_marked if len(solutions) >= limit and round_index > 0: break journeys = [] for label in sorted(solutions, key=lambda item: item.arrival_seconds)[: max(limit * 2, limit)]: legs = _router_label_legs(db, dataset_id, label, stop_cache, osm_stop_cache) if legs: journeys.append(_journey_payload(legs)) return sorted(journeys, key=_journey_sort_key)[:limit] def _walking_transfer_labels( db: Session, dataset_id: int, source_labels: dict[int, _RouterLabel], latest_arrival: int | None, ) -> dict[int, _RouterLabel]: if not source_labels: return {} source_labels = dict( sorted(source_labels.items(), key=lambda item: (item[1].arrival_seconds, item[0]))[ :MAX_WALKING_TRANSFER_SOURCE_STOPS ] ) nearby_rows = ( _walking_transfer_rows_postgres(db, dataset_id, tuple(source_labels)) if settings.is_postgresql_database else _walking_transfer_rows_sqlite(db, dataset_id, tuple(source_labels)) ) labels: dict[int, _RouterLabel] = {} stop_summaries = _canonical_stop_summaries( db, dataset_id, {stop_id for row in nearby_rows for stop_id in (int(row[0]), int(row[1]))}, ) for source_id, target_id, distance_m in nearby_rows: source_label = source_labels.get(source_id) if source_label is None: continue from_stop = stop_summaries.get(int(source_id)) to_stop = stop_summaries.get(int(target_id)) if from_stop is None or to_stop is None: continue walk_seconds = _walking_transfer_seconds(distance_m) arrival = source_label.arrival_seconds + walk_seconds if latest_arrival is not None and arrival >= latest_arrival: continue current = labels.get(target_id) if current is not None and current.arrival_seconds <= arrival: continue labels[target_id] = _RouterLabel( canonical_stop_id=target_id, arrival_seconds=arrival, previous=_RouterWalkBacklink( previous_label=source_label, from_stop=from_stop, to_stop=to_stop, distance_m=float(distance_m or 0), departure_seconds=source_label.arrival_seconds, arrival_seconds=arrival, ), ) return labels def _canonical_stop_summaries(db: Session, dataset_id: int, canonical_stop_ids: set[int]) -> dict[int, StopSummary]: if not canonical_stop_ids: return {} rows = db.scalars(select(CanonicalStop).where(CanonicalStop.id.in_(canonical_stop_ids))).all() return { canonical.id: StopSummary( id=canonical.id, dataset_id=dataset_id, stop_id=f"canonical:{canonical.id}", name=canonical.name, lat=canonical.lat, lon=canonical.lon, ) for canonical in rows } def _walking_transfer_rows_postgres( db: Session, dataset_id: int, source_ids: tuple[int, ...], ) -> list[tuple[int, int, float]]: if not source_ids: return [] stmt = text( """ WITH nearby AS ( SELECT src.id AS source_id, dest.id AS target_id, ST_DistanceSphere(src.geom, dest.geom) AS distance_m, row_number() OVER ( PARTITION BY src.id ORDER BY ST_DistanceSphere(src.geom, dest.geom), dest.id ) AS rn FROM canonical_stops AS src JOIN canonical_stops AS dest ON dest.id != src.id AND src.geom IS NOT NULL AND dest.geom IS NOT NULL AND dest.geom && ST_Expand(src.geom, :radius_deg) AND ST_DWithin(src.geom, dest.geom, :radius_deg) WHERE src.id IN :source_ids AND EXISTS ( SELECT 1 FROM canonical_stop_links AS link WHERE link.canonical_stop_id = dest.id AND link.dataset_id = :dataset_id AND link.object_type = 'gtfs_stop' ) ) SELECT source_id, target_id, distance_m FROM nearby WHERE rn <= :neighbor_limit ORDER BY source_id, distance_m, target_id """ ).bindparams(bindparam("source_ids", expanding=True)) rows = db.execute( stmt, { "dataset_id": dataset_id, "source_ids": source_ids, "radius_deg": WALKING_TRANSFER_RADIUS_DEG, "neighbor_limit": MAX_WALKING_TRANSFER_NEIGHBORS_PER_STOP, }, ).all() return [(int(source_id), int(target_id), float(distance_m or 0)) for source_id, target_id, distance_m in rows] def _walking_transfer_rows_sqlite( db: Session, dataset_id: int, source_ids: tuple[int, ...], ) -> list[tuple[int, int, float]]: if not source_ids: return [] source_rows = db.execute( select(CanonicalStop.id, CanonicalStop.lat, CanonicalStop.lon).where(CanonicalStop.id.in_(source_ids)) ).all() sources = { int(stop_id): (float(lat), float(lon)) for stop_id, lat, lon in source_rows if lat is not None and lon is not None } if not sources: return [] lat_delta = WALKING_TRANSFER_RADIUS_M / 111_320 min_lat = min(lat for lat, _ in sources.values()) - lat_delta max_lat = max(lat for lat, _ in sources.values()) + lat_delta min_lon = min(lon for _, lon in sources.values()) - lat_delta max_lon = max(lon for _, lon in sources.values()) + lat_delta dest_rows = db.execute( select(CanonicalStop.id, CanonicalStop.lat, CanonicalStop.lon) .join(CanonicalStopLink, CanonicalStopLink.canonical_stop_id == CanonicalStop.id) .where( CanonicalStopLink.dataset_id == dataset_id, CanonicalStopLink.object_type == "gtfs_stop", CanonicalStop.lat >= min_lat, CanonicalStop.lat <= max_lat, CanonicalStop.lon >= min_lon, CanonicalStop.lon <= max_lon, ) .distinct() ).all() rows: list[tuple[int, int, float]] = [] for source_id, (source_lat, source_lon) in sources.items(): candidates = [] for target_id, target_lat, target_lon in dest_rows: if int(target_id) == source_id or target_lat is None or target_lon is None: continue distance_m = _distance_m(source_lat, source_lon, float(target_lat), float(target_lon)) if distance_m <= WALKING_TRANSFER_RADIUS_M: candidates.append((source_id, int(target_id), distance_m)) rows.extend( sorted(candidates, key=lambda item: (item[2], item[1]))[:MAX_WALKING_TRANSFER_NEIGHBORS_PER_STOP] ) return rows def _walking_transfer_seconds(distance_m: float) -> int: return max(30, int(math.ceil(float(distance_m or 0) / WALKING_TRANSFER_SPEED_MPS))) def _distance_m(lat_a: float, lon_a: float, lat_b: float, lon_b: float) -> float: mean_lat = math.radians((lat_a + lat_b) / 2) meters_per_lon = 111_320 * math.cos(mean_lat) dx = (lon_b - lon_a) * meters_per_lon dy = (lat_b - lat_a) * 111_320 return math.hypot(dx, dy) def _router_boardings_for_marked_stops( db: Session, dataset_id: int, service_ids: set[str] | None, board_ready: dict[int, int], latest_arrival: int | None = None, ) -> list[_RouterBoarding]: if not board_ready: return [] stop_ids_by_canonical = _gtfs_stop_ids_for_canonical_ids(db, dataset_id, set(board_ready)) stop_to_canonical = { stop_id: canonical_stop_id for canonical_stop_id, stop_ids in stop_ids_by_canonical.items() for stop_id in stop_ids } stop_ids = tuple(stop_to_canonical) if not stop_ids: return [] boardings: list[_RouterBoarding] = [] seen: set[str] = set() earliest = min(board_ready.values()) for call, trip, route in _router_boarding_rows(db, dataset_id, service_ids, stop_ids, earliest, latest_arrival): canonical_stop_id = stop_to_canonical.get(call.stop_id) if canonical_stop_id is None: continue ready = board_ready.get(canonical_stop_id) departure = _departure_seconds(call) if ready is None or departure is None or departure < ready: continue if trip.trip_id in seen: continue seen.add(trip.trip_id) boardings.append( _RouterBoarding( canonical_stop_id=canonical_stop_id, call=call, trip=trip, route=route, ready_seconds=ready, ) ) if len(boardings) >= MAX_ROUTER_BOARDING_CANDIDATES: break return sorted(boardings, key=lambda item: (_departure_seconds(item.call) or 10**9, item.trip.trip_id)) def _router_boarding_rows( db: Session, dataset_id: int, service_ids: set[str] | None, stop_ids: tuple[str, ...], earliest: int, latest_departure: int | None = None, ) -> list[tuple[GtfsStopTime, GtfsTrip, GtfsRoute]]: if service_ids == set(): return [] if uses_sidecar_stop_times(db, dataset_id): return _sidecar_boarding_rows( db=db, dataset_id=dataset_id, service_ids=service_ids, stop_ids=stop_ids, earliest_departure=earliest, latest_departure=latest_departure, limit=MAX_ROUTER_BOARDING_CANDIDATES * 2, ) stmt = ( select(GtfsStopTime, GtfsTrip, GtfsRoute) .join(GtfsTrip, and_(GtfsTrip.dataset_id == GtfsStopTime.dataset_id, GtfsTrip.trip_id == GtfsStopTime.trip_id)) .join(GtfsRoute, and_(GtfsRoute.dataset_id == GtfsTrip.dataset_id, GtfsRoute.route_id == GtfsTrip.route_id)) .where( GtfsStopTime.dataset_id == dataset_id, GtfsStopTime.stop_id.in_(stop_ids), or_(GtfsStopTime.departure_seconds.is_(None), GtfsStopTime.departure_seconds >= earliest), ) .order_by(GtfsStopTime.departure_seconds, GtfsStopTime.departure_time, GtfsStopTime.trip_id) .limit(MAX_ROUTER_BOARDING_CANDIDATES * 2) ) stmt = _where_trip_service_active(stmt, GtfsTrip, service_ids) if latest_departure is not None: stmt = stmt.where(or_(GtfsStopTime.departure_seconds.is_(None), GtfsStopTime.departure_seconds < latest_departure)) return db.execute(stmt).all() def _gtfs_stop_ids_for_canonical_ids( db: Session, dataset_id: int, canonical_stop_ids: set[int], ) -> dict[int, tuple[str, ...]]: if not canonical_stop_ids: return {} rows = db.execute( select(CanonicalStopLink.canonical_stop_id, CanonicalStopLink.external_id) .where( CanonicalStopLink.object_type == "gtfs_stop", CanonicalStopLink.dataset_id == dataset_id, CanonicalStopLink.canonical_stop_id.in_(canonical_stop_ids), ) .order_by(CanonicalStopLink.canonical_stop_id, CanonicalStopLink.external_id) ).all() grouped: dict[int, list[str]] = {} for canonical_stop_id, stop_id in rows: grouped.setdefault(int(canonical_stop_id), []).append(str(stop_id)) return {canonical_stop_id: tuple(stop_ids) for canonical_stop_id, stop_ids in grouped.items()} def _canonical_ids_for_trip_calls( db: Session, dataset_id: int, calls_by_trip: dict[str, list[GtfsStopTime]], ) -> dict[str, int]: stop_ids = sorted({call.stop_id for calls in calls_by_trip.values() for call in calls}) if not stop_ids: return {} rows = db.execute( select(CanonicalStopLink.external_id, CanonicalStopLink.canonical_stop_id) .where( CanonicalStopLink.object_type == "gtfs_stop", CanonicalStopLink.dataset_id == dataset_id, CanonicalStopLink.external_id.in_(stop_ids), ) ).all() return {str(stop_id): int(canonical_stop_id) for stop_id, canonical_stop_id in rows} def _router_label_legs( db: Session, dataset_id: int, label: _RouterLabel, stop_cache: dict[tuple[int, str], StopSummary], osm_stop_cache: dict[tuple[int, str], dict], ) -> list[dict]: backlinks: list[_RouterLegBacklink | _RouterWalkBacklink] = [] current = label while current.previous is not None: backlinks.append(current.previous) current = current.previous.previous_label backlinks.reverse() legs = [] for backlink in backlinks: if isinstance(backlink, _RouterWalkBacklink): legs.append(_walk_leg_payload(db, backlink, dataset_id)) continue legs.append( _leg_payload( db=db, dataset_id=dataset_id, route=backlink.route, trip=backlink.trip, origin=backlink.origin, dest=backlink.dest, stop_cache=stop_cache, osm_stop_cache=osm_stop_cache, ) ) return legs def _find_walk_only_journey( db: Session, *, from_selection: StopSelection, to_selection: StopSelection, departure_seconds: int, ) -> dict | None: if from_selection.canonical_stop_id is None or to_selection.canonical_stop_id is None: return None if from_selection.canonical_stop_id == to_selection.canonical_stop_id: return None if ( from_selection.display.lon is None or from_selection.display.lat is None or to_selection.display.lon is None or to_selection.display.lat is None ): return None direct_distance_m = _distance_m( float(from_selection.display.lat), float(from_selection.display.lon), float(to_selection.display.lat), float(to_selection.display.lon), ) if direct_distance_m > PUBLIC_TRANSPORT_WALK_OPTION_MAX_SECONDS * 1.35: return None try: route = route_between_points( db, from_lon=float(from_selection.display.lon), from_lat=float(from_selection.display.lat), to_lon=float(to_selection.display.lon), to_lat=float(to_selection.display.lat), mode="walk", max_visited=80_000, ) except Exception: # noqa: BLE001 - walking comparison is optional return None duration_seconds = float(route.get("duration_seconds") or 0) if duration_seconds <= 0 or duration_seconds > PUBLIC_TRANSPORT_WALK_OPTION_MAX_SECONDS: return None arrival_seconds = departure_seconds + int(math.ceil(duration_seconds)) leg = _walk_leg_payload( db, _RouterWalkBacklink( previous_label=_RouterLabel( canonical_stop_id=from_selection.canonical_stop_id, arrival_seconds=departure_seconds, ), from_stop=from_selection.display, to_stop=to_selection.display, distance_m=float(route.get("distance_m") or 0), departure_seconds=departure_seconds, arrival_seconds=arrival_seconds, ), from_selection.display.dataset_id, ) leg["route_name"] = "Walk only" leg["duration_seconds"] = duration_seconds return _journey_payload([leg]) def _find_direct_journeys( db: Session, dataset_id: int, service_ids: set[str] | None, from_stop_ids: tuple[str, ...], to_stop_ids: tuple[str, ...], earliest_departure: int, limit: int, stop_cache: dict[tuple[int, str], StopSummary], osm_stop_cache: dict[tuple[int, str], dict], ) -> list[dict]: candidates = [ _journey_payload([leg]) for leg in _find_direct_legs( db, dataset_id, service_ids, from_stop_ids, to_stop_ids, earliest_departure, stop_cache, osm_stop_cache, max_legs=max(limit * 4, limit), ) ] return sorted(candidates, key=_journey_sort_key)[:limit] def _find_direct_legs( db: Session, dataset_id: int, service_ids: set[str] | None, from_stop_ids: tuple[str, ...], to_stop_ids: tuple[str, ...], earliest_departure: int, stop_cache: dict[tuple[int, str], StopSummary], osm_stop_cache: dict[tuple[int, str], dict], max_legs: int = 20, ) -> list[dict]: if not from_stop_ids or not to_stop_ids: return [] if service_ids == set(): return [] if uses_sidecar_stop_times(db, dataset_id): rows = _sidecar_direct_leg_rows( db=db, dataset_id=dataset_id, service_ids=service_ids, from_stop_ids=from_stop_ids, to_stop_ids=to_stop_ids, earliest_departure=earliest_departure, limit=MAX_DIRECT_ROWS, ) candidates: list[dict] = [] seen: set[tuple[object, ...]] = set() for origin, dest, trip, route in rows: dep_seconds = _departure_seconds(origin) arr_seconds = _arrival_seconds(dest) if dep_seconds is None or arr_seconds is None: continue if dep_seconds < earliest_departure or arr_seconds < dep_seconds: continue key = (route.route_id, route.short_name, origin.stop_id, dest.stop_id, dep_seconds, arr_seconds) if key in seen: continue seen.add(key) candidates.append(_leg_payload(db, dataset_id, route, trip, origin, dest, stop_cache, osm_stop_cache)) if len(candidates) >= max(1, max_legs): break return sorted(candidates, key=lambda item: (item["arrival_seconds"], -(item["departure_seconds"] or -1))) Origin = aliased(GtfsStopTime) Dest = aliased(GtfsStopTime) stmt = ( select(Origin, Dest, GtfsTrip, GtfsRoute) .join( Dest, and_( Dest.dataset_id == Origin.dataset_id, Dest.trip_id == Origin.trip_id, Dest.stop_sequence > Origin.stop_sequence, ), ) .join(GtfsTrip, and_(GtfsTrip.dataset_id == Origin.dataset_id, GtfsTrip.trip_id == Origin.trip_id)) .join(GtfsRoute, and_(GtfsRoute.dataset_id == GtfsTrip.dataset_id, GtfsRoute.route_id == GtfsTrip.route_id)) .where(Origin.dataset_id == dataset_id, Origin.stop_id.in_(from_stop_ids), Dest.stop_id.in_(to_stop_ids)) .where(or_(Origin.departure_seconds.is_(None), Origin.departure_seconds >= earliest_departure)) .order_by(Origin.departure_seconds, Origin.departure_time, Dest.arrival_seconds, Dest.arrival_time, Origin.trip_id) .limit(MAX_DIRECT_ROWS) ) stmt = _where_trip_service_active(stmt, GtfsTrip, service_ids) candidates: list[dict] = [] seen: set[tuple[object, ...]] = set() for origin, dest, trip, route in db.execute(stmt).all(): dep_seconds = _departure_seconds(origin) arr_seconds = _arrival_seconds(dest) if dep_seconds is None or arr_seconds is None: continue if dep_seconds < earliest_departure or arr_seconds < dep_seconds: continue key = (route.route_id, route.short_name, origin.stop_id, dest.stop_id, dep_seconds, arr_seconds) if key in seen: continue seen.add(key) leg = _leg_payload(db, dataset_id, route, trip, origin, dest, stop_cache, osm_stop_cache) candidates.append(leg) if len(candidates) >= max(1, max_legs): break return sorted(candidates, key=lambda item: (item["arrival_seconds"], -(item["departure_seconds"] or -1))) @dataclass(frozen=True) class _FirstLegOption: departure_seconds: int arrival_seconds: int origin: GtfsStopTime dest: GtfsStopTime trip: GtfsTrip route: GtfsRoute @dataclass(frozen=True) class _SecondLegOption: canonical_stop_id: int departure_seconds: int arrival_seconds: int origin: GtfsStopTime dest: GtfsStopTime trip: GtfsTrip route: GtfsRoute @dataclass(frozen=True) class _OneTransferCandidate: arrival_seconds: int departure_seconds: int first_route: GtfsRoute first_trip: GtfsTrip first_origin: GtfsStopTime first_dest: GtfsStopTime second: _SecondLegOption final_walk: _RouterWalkBacklink | None = None @dataclass(frozen=True) class _AccessTransferCandidate: canonical_stop_id: int option: _FirstLegOption rank: int def _find_one_transfer_journeys( db: Session, first_dataset_id: int, second_dataset_id: int, first_service_ids: set[str] | None, second_service_ids: set[str] | None, from_stop_ids: tuple[str, ...], to_stop_ids: tuple[str, ...], origin_canonical_stop_id: int | None, target_canonical_stop_id: int | None, earliest_departure: int, latest_arrival: int | None, transfer_seconds: int, limit: int, stop_cache: dict[tuple[int, str], StopSummary], osm_stop_cache: dict[tuple[int, str], dict], ) -> list[dict]: if first_service_ids == set() or second_service_ids == set(): return [] if latest_arrival is not None and latest_arrival <= earliest_departure: return [] destination_groups = _destination_stop_groups_with_final_walks( db, dataset_id=second_dataset_id, to_stop_ids=to_stop_ids, target_canonical_stop_id=target_canonical_stop_id, ) second_legs: dict[int, list[_SecondLegOption]] = {} final_walk_by_canonical: dict[int, _RouterWalkBacklink] = {} for destination_stop_ids, group_walks in destination_groups: group_second_legs = _targeted_second_leg_options( db, second_dataset_id, second_service_ids, destination_stop_ids, earliest_departure, latest_arrival, ) for canonical_stop_id, options in group_second_legs.items(): second_legs.setdefault(canonical_stop_id, []).extend(options) final_walk_by_canonical.update(group_walks) if not second_legs: return [] second_dest_canonical = _canonical_ids_for_stop_ids( db, second_dataset_id, {option.dest.stop_id for options in second_legs.values() for option in options}, ) transfer_stop_ids_by_canonical = _gtfs_stop_ids_for_canonical_ids(db, first_dataset_id, set(second_legs)) candidates: list[_OneTransferCandidate] = [] seen: set[tuple[object, ...]] = set() second_leg_options = sorted( [ (canonical_stop_id, option) for canonical_stop_id, options in second_legs.items() for option in options ], key=lambda item: (item[1].arrival_seconds, -item[1].departure_seconds), ) latest_first_arrival_limit = max( ( option.departure_seconds - transfer_seconds for _, option in second_leg_options if option.departure_seconds - transfer_seconds >= earliest_departure ), default=earliest_departure, ) first_options_by_canonical = _first_leg_options_to_transfer_stops( db=db, dataset_id=first_dataset_id, service_ids=first_service_ids, from_stop_ids=from_stop_ids, transfer_stop_ids_by_canonical=transfer_stop_ids_by_canonical, earliest_departure=earliest_departure, latest_arrival=latest_first_arrival_limit, ) searched_second_legs = 0 best_candidate_arrival: int | None = None for canonical_stop_id, second in second_leg_options: if searched_second_legs >= MAX_BACKWARD_SECOND_LEG_OPTIONS and candidates: break if best_candidate_arrival is not None and candidates and second.arrival_seconds > best_candidate_arrival: break searched_second_legs += 1 transfer_stop_ids = transfer_stop_ids_by_canonical.get(canonical_stop_id) if not transfer_stop_ids: continue latest_first_arrival = second.departure_seconds - transfer_seconds if latest_first_arrival < earliest_departure: continue excluded_trip_id = second.trip.trip_id if first_dataset_id == second_dataset_id else None first = _best_first_leg_for_second( first_options_by_canonical.get(canonical_stop_id, []), latest_arrival=latest_first_arrival, excluded_trip_id=excluded_trip_id, ) if first is None: continue if origin_canonical_stop_id is not None and canonical_stop_id == origin_canonical_stop_id: continue final_walk_template = final_walk_by_canonical.get(second_dest_canonical.get(second.dest.stop_id)) final_walk = None candidate_arrival = second.arrival_seconds if final_walk_template is not None: if origin_canonical_stop_id is not None and final_walk_template.from_stop.id == origin_canonical_stop_id: continue candidate_arrival = second.arrival_seconds + _walking_transfer_seconds(final_walk_template.distance_m) if latest_arrival is not None and candidate_arrival >= latest_arrival: continue final_walk = _RouterWalkBacklink( previous_label=final_walk_template.previous_label, from_stop=final_walk_template.from_stop, to_stop=final_walk_template.to_stop, distance_m=final_walk_template.distance_m, departure_seconds=second.arrival_seconds, arrival_seconds=candidate_arrival, ) key = ( first_dataset_id, first.trip.trip_id, first.origin.stop_sequence, first.dest.stop_id, second_dataset_id, second.trip.trip_id, second.origin.stop_sequence, second.dest.stop_sequence, None if final_walk is None else final_walk.to_stop.stop_id, ) if key in seen: continue seen.add(key) best_candidate_arrival = candidate_arrival if best_candidate_arrival is None else min(best_candidate_arrival, candidate_arrival) candidates.append( _OneTransferCandidate( arrival_seconds=candidate_arrival, departure_seconds=first.departure_seconds, first_route=first.route, first_trip=first.trip, first_origin=first.origin, first_dest=first.dest, second=second, final_walk=final_walk, ) ) if len(candidates) >= MAX_TARGET_TRANSFER_CANDIDATES: break tightened_candidates = _latest_feeder_by_onward_leg(candidates) journeys: list[dict] = [] for candidate in sorted(tightened_candidates, key=_one_transfer_candidate_sort_key)[ : max(limit * 4, limit) ]: first_leg = _leg_payload( db, first_dataset_id, candidate.first_route, candidate.first_trip, candidate.first_origin, candidate.first_dest, stop_cache, osm_stop_cache, ) second_leg = _leg_payload( db, second_dataset_id, candidate.second.route, candidate.second.trip, candidate.second.origin, candidate.second.dest, stop_cache, osm_stop_cache, ) legs = [first_leg, second_leg] if candidate.final_walk is not None: legs.append(_walk_leg_payload(db, candidate.final_walk, second_dataset_id)) journeys.append(_journey_payload(legs)) return sorted(journeys, key=_journey_sort_key)[:limit] def _destination_stop_groups_with_final_walks( db: Session, dataset_id: int, to_stop_ids: tuple[str, ...], target_canonical_stop_id: int | None, ) -> list[tuple[tuple[str, ...], dict[int, _RouterWalkBacklink]]]: if target_canonical_stop_id is None: return [(to_stop_ids, {})] target_summary = _canonical_stop_summaries(db, dataset_id, {target_canonical_stop_id}).get(target_canonical_stop_id) if target_summary is None: return [(to_stop_ids, {})] nearby_rows = ( _walking_transfer_rows_postgres(db, dataset_id, (target_canonical_stop_id,)) if settings.is_postgresql_database else _walking_transfer_rows_sqlite(db, dataset_id, (target_canonical_stop_id,)) ) nearby_ids = [int(target_id) for _, target_id, _ in nearby_rows] if not nearby_ids: return [(to_stop_ids, {})] summaries = _canonical_stop_summaries(db, dataset_id, set(nearby_ids)) final_walk_by_canonical: dict[int, _RouterWalkBacklink] = {} for _, nearby_id, distance_m in nearby_rows: nearby_id = int(nearby_id) from_summary = summaries.get(nearby_id) if from_summary is None: continue final_walk_by_canonical[nearby_id] = _RouterWalkBacklink( previous_label=_RouterLabel(nearby_id, 0), from_stop=from_summary, to_stop=target_summary, distance_m=float(distance_m or 0), departure_seconds=0, arrival_seconds=0, ) stop_ids_by_canonical = _gtfs_stop_ids_for_canonical_ids(db, dataset_id, set(final_walk_by_canonical)) groups: list[tuple[tuple[str, ...], dict[int, _RouterWalkBacklink]]] = [(to_stop_ids, {})] for canonical_stop_id, stop_ids in stop_ids_by_canonical.items(): walk = final_walk_by_canonical.get(canonical_stop_id) if not stop_ids or walk is None: continue groups.append((stop_ids[:MAX_GROUP_STOP_IDS], {canonical_stop_id: walk})) return groups def _canonical_ids_for_stop_ids(db: Session, dataset_id: int, stop_ids: set[str]) -> dict[str, int]: if not stop_ids: return {} rows = db.execute( select(CanonicalStopLink.external_id, CanonicalStopLink.canonical_stop_id) .where( CanonicalStopLink.object_type == "gtfs_stop", CanonicalStopLink.dataset_id == dataset_id, CanonicalStopLink.external_id.in_(stop_ids), ) ).all() return {str(stop_id): int(canonical_stop_id) for stop_id, canonical_stop_id in rows} def _find_access_transfer_journeys( db: Session, from_selection: StopSelection, to_stop_id: int | str, earliest_departure: int, max_transfers: int, transfer_seconds: int, limit: int, source_ids: list[int] | None, service_date: date | None, stop_cache: dict[tuple[int, str], StopSummary], osm_stop_cache: dict[tuple[int, str], dict], ) -> list[dict]: journeys: list[dict] = [] for dataset_id, from_stop_ids in from_selection.stop_ids_by_dataset.items(): service_ids = _service_ids_by_dataset(db, [dataset_id], service_date).get(dataset_id) if service_ids == set(): continue candidates = _access_transfer_candidates( db=db, dataset_id=dataset_id, service_ids=service_ids, from_selection=from_selection, from_stop_ids=from_stop_ids, earliest_departure=earliest_departure, ) for candidate in candidates: access_leg = _leg_payload( db=db, dataset_id=dataset_id, route=candidate.option.route, trip=candidate.option.trip, origin=candidate.option.origin, dest=candidate.option.dest, stop_cache=stop_cache, osm_stop_cache=osm_stop_cache, ) onward_departure = format_gtfs_time(candidate.option.arrival_seconds + transfer_seconds) if onward_departure is None: continue try: onward = find_journeys( db=db, from_stop_id=_stop_place_token(candidate.canonical_stop_id, dataset_id), to_stop_id=to_stop_id, departure=onward_departure, max_transfers=max(0, max_transfers - 1), limit=limit, transfer_seconds=transfer_seconds, source_ids=source_ids, service_date=service_date, _allow_access_transfer=False, ) except ValueError: continue for onward_journey in onward.get("journeys", [])[:limit]: journeys.append(_prepend_access_leg_to_journey(access_leg, onward_journey)) if len(journeys) >= limit * 3: break if len(journeys) >= limit * 3: break return sorted(journeys, key=_journey_sort_key)[:limit] def _access_transfer_candidates( db: Session, dataset_id: int, service_ids: set[str] | None, from_selection: StopSelection, from_stop_ids: tuple[str, ...], earliest_departure: int, ) -> list[_AccessTransferCandidate]: boardings = _origin_boardings( db=db, dataset_id=dataset_id, service_ids=service_ids, stop_ids=from_stop_ids, earliest_departure=earliest_departure, latest_departure=earliest_departure + ACCESS_TRANSFER_MAX_SECONDS, ) if not boardings: return [] calls_by_trip = _stop_times_by_trip(db, dataset_id, sorted({boarding.trip.trip_id for boarding in boardings})) stop_to_canonical = _canonical_ids_for_trip_calls(db, dataset_id, calls_by_trip) canonical_ids = sorted(set(stop_to_canonical.values())) canonical_names = { int(canonical.id): canonical.name for canonical in db.scalars(select(CanonicalStop).where(CanonicalStop.id.in_(canonical_ids))).all() } candidates: dict[int, _AccessTransferCandidate] = {} for boarding in boardings: departure = _departure_seconds(boarding.call) if departure is None or departure < earliest_departure: continue for call in calls_by_trip.get(boarding.trip.trip_id, []): if call.stop_sequence <= boarding.call.stop_sequence: continue arrival = _arrival_seconds(call) if arrival is None or arrival < departure: continue if arrival - earliest_departure > ACCESS_TRANSFER_MAX_SECONDS: break canonical_stop_id = stop_to_canonical.get(call.stop_id) if canonical_stop_id is None or canonical_stop_id == from_selection.canonical_stop_id: continue stop_name = _stop_name_for_stop_id(db, dataset_id, call.stop_id) rank = _station_importance_rank(canonical_names.get(canonical_stop_id), stop_name) if rank > 1: continue option = _FirstLegOption( departure_seconds=departure, arrival_seconds=arrival, origin=boarding.call, dest=call, trip=boarding.trip, route=boarding.route, ) current = candidates.get(canonical_stop_id) candidate = _AccessTransferCandidate(canonical_stop_id=canonical_stop_id, option=option, rank=rank) if current is None or _access_transfer_sort_key(candidate) < _access_transfer_sort_key(current): candidates[canonical_stop_id] = candidate return sorted(candidates.values(), key=_access_transfer_sort_key)[:MAX_ACCESS_TRANSFER_CANDIDATES] def _access_transfer_sort_key(candidate: _AccessTransferCandidate) -> tuple[int, int, int, str]: return ( candidate.rank, candidate.option.arrival_seconds, candidate.option.arrival_seconds - candidate.option.departure_seconds, candidate.option.dest.stop_id, ) def _stop_name_for_stop_id(db: Session, dataset_id: int, stop_id: str) -> str | None: stop = db.scalar(select(GtfsStop).where(GtfsStop.dataset_id == dataset_id, GtfsStop.stop_id == stop_id)) return None if stop is None else stop.name def _prepend_access_leg_to_journey(access_leg: dict, onward_journey: dict) -> dict: access_payload = _journey_payload([access_leg]) access_features = access_payload.get("features") or {} onward_features = onward_journey.get("features") or {} features = _combine_via_features(access_features, onward_features, first_leg_count=1) legs = [access_payload["legs"][0], *(onward_journey.get("legs") or [])] departure = access_leg.get("departure_seconds") arrival = onward_journey.get("arrival_seconds") transit_legs = [leg for leg in legs if leg.get("mode") != "walk"] duration_seconds = None if departure is None or arrival is None else max(0, int(arrival) - int(departure)) return { "transfers": max(0, len(transit_legs) - 1), "departure_seconds": departure, "arrival_seconds": arrival, "departure_time": format_gtfs_time(departure), "arrival_time": format_gtfs_time(arrival), "departure_time_label": format_gtfs_time_label(departure), "arrival_time_label": format_gtfs_time_label(arrival), "duration_seconds": duration_seconds, "duration_minutes": duration_minutes_ceil(duration_seconds), "duration_label": format_duration_label(duration_seconds), "legs": legs, "features": feature_collection(features), "access_transfer_composed": True, } def _first_leg_options_to_transfer_stops( db: Session, dataset_id: int, service_ids: set[str] | None, from_stop_ids: tuple[str, ...], transfer_stop_ids_by_canonical: dict[int, tuple[str, ...]], earliest_departure: int, latest_arrival: int, ) -> dict[int, list[_FirstLegOption]]: if not transfer_stop_ids_by_canonical: return {} stop_to_canonical = { stop_id: canonical_stop_id for canonical_stop_id, stop_ids in transfer_stop_ids_by_canonical.items() for stop_id in stop_ids } if not stop_to_canonical: return {} boardings = _origin_boardings( db=db, dataset_id=dataset_id, service_ids=service_ids, stop_ids=from_stop_ids, earliest_departure=earliest_departure, latest_departure=latest_arrival, ) if not boardings: return {} calls_by_trip = _stop_times_by_trip(db, dataset_id, sorted({boarding.trip.trip_id for boarding in boardings})) grouped: dict[int, list[_FirstLegOption]] = {} seen: set[tuple[object, ...]] = set() for boarding in boardings: departure = _departure_seconds(boarding.call) if departure is None or departure < earliest_departure: continue calls = calls_by_trip.get(boarding.trip.trip_id, []) for call in calls: if call.stop_sequence <= boarding.call.stop_sequence: continue canonical_stop_id = stop_to_canonical.get(call.stop_id) if canonical_stop_id is None: continue arrival = _arrival_seconds(call) if arrival is None or arrival < departure or arrival > latest_arrival: continue key = (canonical_stop_id, boarding.trip.trip_id, boarding.call.stop_sequence, call.stop_sequence) if key in seen: continue seen.add(key) grouped.setdefault(canonical_stop_id, []).append( _FirstLegOption( departure_seconds=departure, arrival_seconds=arrival, origin=boarding.call, dest=call, trip=boarding.trip, route=boarding.route, ) ) for canonical_stop_id, options in grouped.items(): grouped[canonical_stop_id] = sorted( options, key=lambda option: (option.departure_seconds, option.arrival_seconds), reverse=True, )[:MAX_TRANSFER_BOARDINGS] return grouped def _best_first_leg_for_second( options: list[_FirstLegOption], latest_arrival: int, excluded_trip_id: str | None, ) -> _FirstLegOption | None: for option in options: if excluded_trip_id and option.trip.trip_id == excluded_trip_id: continue if option.arrival_seconds <= latest_arrival: return option return None def _latest_feeder_by_onward_leg(candidates: list[_OneTransferCandidate]) -> list[_OneTransferCandidate]: latest_by_second_leg: dict[tuple[object, ...], _OneTransferCandidate] = {} for candidate in candidates: key = ( candidate.second.canonical_stop_id, candidate.second.trip.dataset_id, candidate.second.trip.trip_id, candidate.second.origin.stop_sequence, candidate.second.dest.stop_sequence, candidate.second.departure_seconds, candidate.second.arrival_seconds, ) current = latest_by_second_leg.get(key) if current is None or _one_transfer_feeder_rank(candidate) > _one_transfer_feeder_rank(current): latest_by_second_leg[key] = candidate return list(latest_by_second_leg.values()) def _one_transfer_feeder_rank(candidate: _OneTransferCandidate) -> tuple[int, int]: first_arrival = _arrival_seconds(candidate.first_dest) or -1 return (candidate.departure_seconds, first_arrival) def _one_transfer_candidate_sort_key(candidate: _OneTransferCandidate) -> tuple[float, float, int]: return ( float(candidate.arrival_seconds), -float(candidate.departure_seconds), 1, ) def _latest_direct_leg_to_stops( db: Session, dataset_id: int, service_ids: set[str] | None, from_stop_ids: tuple[str, ...], to_stop_ids: tuple[str, ...], earliest_departure: int, latest_arrival: int, excluded_trip_id: str | None = None, ) -> _FirstLegOption | None: if not from_stop_ids or not to_stop_ids: return None if service_ids == set(): return None if uses_sidecar_stop_times(db, dataset_id): rows = _sidecar_latest_direct_leg_rows( db=db, dataset_id=dataset_id, service_ids=service_ids, from_stop_ids=from_stop_ids, to_stop_ids=to_stop_ids, earliest_departure=earliest_departure, latest_arrival=latest_arrival, excluded_trip_id=excluded_trip_id, ) for origin, dest, trip, route in rows: departure = _departure_seconds(origin) arrival = _arrival_seconds(dest) if departure is None or arrival is None: continue if departure < earliest_departure or arrival > latest_arrival or arrival < departure: continue return _FirstLegOption( departure_seconds=departure, arrival_seconds=arrival, origin=origin, dest=dest, trip=trip, route=route, ) return None Origin = aliased(GtfsStopTime) Dest = aliased(GtfsStopTime) stmt = ( select(Origin, Dest, GtfsTrip, GtfsRoute) .join( Dest, and_( Dest.dataset_id == Origin.dataset_id, Dest.trip_id == Origin.trip_id, Dest.stop_sequence > Origin.stop_sequence, ), ) .join(GtfsTrip, and_(GtfsTrip.dataset_id == Origin.dataset_id, GtfsTrip.trip_id == Origin.trip_id)) .join(GtfsRoute, and_(GtfsRoute.dataset_id == GtfsTrip.dataset_id, GtfsRoute.route_id == GtfsTrip.route_id)) .where( Origin.dataset_id == dataset_id, Origin.stop_id.in_(from_stop_ids), Dest.stop_id.in_(to_stop_ids), or_(Origin.departure_seconds.is_(None), Origin.departure_seconds >= earliest_departure), or_(Dest.arrival_seconds.is_(None), Dest.arrival_seconds <= latest_arrival), ) .order_by( Origin.departure_seconds.desc(), Origin.departure_time.desc(), Dest.arrival_seconds.desc(), Dest.arrival_time.desc(), Origin.trip_id, ) .limit(120) ) stmt = _where_trip_service_active(stmt, GtfsTrip, service_ids) if excluded_trip_id: stmt = stmt.where(GtfsTrip.trip_id != excluded_trip_id) for origin, dest, trip, route in db.execute(stmt).all(): departure = _departure_seconds(origin) arrival = _arrival_seconds(dest) if departure is None or arrival is None: continue if departure < earliest_departure or arrival > latest_arrival or arrival < departure: continue return _FirstLegOption( departure_seconds=departure, arrival_seconds=arrival, origin=origin, dest=dest, trip=trip, route=route, ) return None def _targeted_second_leg_options( db: Session, dataset_id: int, service_ids: set[str] | None, to_stop_ids: tuple[str, ...], earliest_departure: int, latest_arrival: int | None, ) -> dict[int, list[_SecondLegOption]]: if not to_stop_ids: return {} if service_ids == set(): return {} destination_rows = _destination_arrivals(db, dataset_id, service_ids, to_stop_ids, earliest_departure, latest_arrival) if not destination_rows: return {} calls_by_trip = _stop_times_by_trip(db, dataset_id, sorted({trip.trip_id for _, trip, _ in destination_rows})) stop_to_canonical = _canonical_ids_for_trip_calls(db, dataset_id, calls_by_trip) grouped: dict[int, list[_SecondLegOption]] = {} seen: set[tuple[object, ...]] = set() to_stop_id_set = set(to_stop_ids) for dest, trip, route in destination_rows: dest_arrival = _arrival_seconds(dest) if dest_arrival is None: continue for call in calls_by_trip.get(trip.trip_id, []): if call.stop_sequence >= dest.stop_sequence: break if call.stop_id in to_stop_id_set: continue departure = _departure_seconds(call) if departure is None or departure < earliest_departure or departure > dest_arrival: continue canonical_stop_id = stop_to_canonical.get(call.stop_id) if canonical_stop_id is None: continue key = (canonical_stop_id, trip.trip_id, call.stop_sequence, dest.stop_sequence) if key in seen: continue seen.add(key) grouped.setdefault(canonical_stop_id, []).append( _SecondLegOption( canonical_stop_id=canonical_stop_id, departure_seconds=departure, arrival_seconds=dest_arrival, origin=call, dest=dest, trip=trip, route=route, ) ) capped: dict[int, list[_SecondLegOption]] = {} for canonical_stop_id, options in grouped.items(): selected = sorted(options, key=lambda item: (item.departure_seconds, item.arrival_seconds))[ :MAX_TARGET_SECOND_LEGS_PER_STOP ] if selected: capped[canonical_stop_id] = selected return capped def _destination_arrivals( db: Session, dataset_id: int, service_ids: set[str] | None, stop_ids: tuple[str, ...], earliest_departure: int, latest_arrival: int | None, ) -> list[tuple[GtfsStopTime, GtfsTrip, GtfsRoute]]: if service_ids == set(): return [] if uses_sidecar_stop_times(db, dataset_id): rows = _sidecar_destination_arrival_rows( db=db, dataset_id=dataset_id, service_ids=service_ids, stop_ids=stop_ids, earliest_departure=earliest_departure, latest_arrival=latest_arrival, ) selected = [] for stop_time, trip, route in rows: arrival = _arrival_seconds(stop_time) if arrival is None or arrival < earliest_departure: continue if latest_arrival is not None and arrival >= latest_arrival: continue selected.append((stop_time, trip, route)) return selected stmt = ( select(GtfsStopTime, GtfsTrip, GtfsRoute) .join(GtfsTrip, and_(GtfsTrip.dataset_id == GtfsStopTime.dataset_id, GtfsTrip.trip_id == GtfsStopTime.trip_id)) .join(GtfsRoute, and_(GtfsRoute.dataset_id == GtfsTrip.dataset_id, GtfsRoute.route_id == GtfsTrip.route_id)) .where(GtfsStopTime.dataset_id == dataset_id, GtfsStopTime.stop_id.in_(stop_ids)) .where(or_(GtfsStopTime.arrival_seconds.is_(None), GtfsStopTime.arrival_seconds >= earliest_departure)) .order_by(GtfsStopTime.arrival_seconds, GtfsStopTime.arrival_time, GtfsStopTime.trip_id) .limit(MAX_TARGET_DESTINATION_ARRIVALS) ) stmt = _where_trip_service_active(stmt, GtfsTrip, service_ids) if latest_arrival is not None: stmt = stmt.where(or_(GtfsStopTime.arrival_seconds.is_(None), GtfsStopTime.arrival_seconds <= latest_arrival)) rows = [] for stop_time, trip, route in db.execute(stmt).all(): arrival = _arrival_seconds(stop_time) if arrival is None or arrival < earliest_departure: continue if latest_arrival is not None and arrival >= latest_arrival: continue rows.append((stop_time, trip, route)) return rows @dataclass(frozen=True) class _Boarding: call: GtfsStopTime trip: GtfsTrip route: GtfsRoute def _origin_boardings( db: Session, dataset_id: int, service_ids: set[str] | None, stop_ids: tuple[str, ...], earliest_departure: int, latest_departure: int | None = None, ) -> list[_Boarding]: if not stop_ids: return [] if service_ids == set(): return [] if uses_sidecar_stop_times(db, dataset_id): boardings: list[_Boarding] = [] for call, trip, route in _sidecar_boarding_rows( db=db, dataset_id=dataset_id, service_ids=service_ids, stop_ids=stop_ids, earliest_departure=earliest_departure, latest_departure=latest_departure, limit=MAX_DIRECT_ROWS, ): departure = _departure_seconds(call) if departure is None or departure < earliest_departure: continue if latest_departure is not None and departure >= latest_departure: continue boardings.append(_Boarding(call=call, trip=trip, route=route)) if len(boardings) >= MAX_TRANSFER_BOARDINGS: break return boardings stmt = ( select(GtfsStopTime, GtfsTrip, GtfsRoute) .join(GtfsTrip, and_(GtfsTrip.dataset_id == GtfsStopTime.dataset_id, GtfsTrip.trip_id == GtfsStopTime.trip_id)) .join(GtfsRoute, and_(GtfsRoute.dataset_id == GtfsTrip.dataset_id, GtfsRoute.route_id == GtfsTrip.route_id)) .where(GtfsStopTime.dataset_id == dataset_id, GtfsStopTime.stop_id.in_(stop_ids)) .where(or_(GtfsStopTime.departure_seconds.is_(None), GtfsStopTime.departure_seconds >= earliest_departure)) .order_by(GtfsStopTime.departure_seconds, GtfsStopTime.departure_time, GtfsStopTime.trip_id) .limit(MAX_DIRECT_ROWS) ) stmt = _where_trip_service_active(stmt, GtfsTrip, service_ids) if latest_departure is not None: stmt = stmt.where(or_(GtfsStopTime.departure_seconds.is_(None), GtfsStopTime.departure_seconds < latest_departure)) boardings: list[_Boarding] = [] for call, trip, route in db.execute(stmt).all(): departure = _departure_seconds(call) if departure is None or departure < earliest_departure: continue if latest_departure is not None and departure >= latest_departure: continue boardings.append(_Boarding(call=call, trip=trip, route=route)) if len(boardings) >= MAX_TRANSFER_BOARDINGS: break return boardings def _stop_times_by_trip(db: Session, dataset_id: int, trip_ids: list[str]) -> dict[str, list[GtfsStopTime]]: return storage_stop_times_by_trip(db, dataset_id, trip_ids) def _leg_payload( db: Session, dataset_id: int, route: GtfsRoute, trip: GtfsTrip, origin: GtfsStopTime, dest: GtfsStopTime, stop_cache: dict[tuple[int, str], StopSummary], osm_stop_cache: dict[tuple[int, str], dict], ) -> dict: from_stop = _stop_for_id(db, dataset_id, origin.stop_id, stop_cache) to_stop = _stop_for_id(db, dataset_id, dest.stop_id, stop_cache) departure_seconds = _departure_seconds(origin) arrival_seconds = _arrival_seconds(dest) linked_route_pattern = route_pattern_for_trip(db, route, trip) stops = _leg_stop_payloads( db=db, dataset_id=dataset_id, trip_id=trip.trip_id, start_sequence=origin.stop_sequence, end_sequence=dest.stop_sequence, stop_cache=stop_cache, osm_stop_cache=osm_stop_cache, ) geometry, geometry_source, route_pattern = _leg_geometry(db, linked_route_pattern, route, trip, from_stop, to_stop, stops) source = _source_payload_for_dataset_id(db, dataset_id) stop_count = len(stops) return { "dataset_id": dataset_id, "source_id": None if source is None else source["id"], "source_name": None if source is None else source["name"], "route_db_id": route.id, "route_id": route.route_id, "route_ref": route.short_name, "route_name": route.long_name, "mode": route.mode, "operator": route.operator_name, "trip_id": trip.trip_id, "route_pattern_id": None if route_pattern is None else route_pattern.id, "route_pattern_source": None if route_pattern is None else route_pattern.source_kind, "route_pattern_status": None if route_pattern is None else route_pattern.status, "from": _stop_payload(from_stop), "to": _stop_payload(to_stop), "departure_seconds": departure_seconds, "arrival_seconds": arrival_seconds, "departure_time": format_gtfs_time(departure_seconds), "arrival_time": format_gtfs_time(arrival_seconds), "departure_time_label": format_gtfs_time_label(departure_seconds), "arrival_time_label": format_gtfs_time_label(arrival_seconds), "stop_count": stop_count, "intermediate_stop_count": max(0, stop_count - 2), "geometry": geometry, "geometry_source": geometry_source, "stops": stops, } def _journey_payload(legs: list[dict]) -> dict: departure = legs[0]["departure_seconds"] arrival = legs[-1]["arrival_seconds"] duration_seconds = None if departure is None or arrival is None else max(0, int(arrival) - int(departure)) transit_legs = [leg for leg in legs if leg.get("mode") != "walk"] features = [] for index, leg in enumerate(legs, start=1): if leg["geometry"] is None: continue features.append( { "type": "Feature", "geometry": leg["geometry"], "properties": { "leg": index, "route_id": leg["route_id"], "route_ref": leg["route_ref"], "mode": leg["mode"], "trip_id": leg["trip_id"], "route_pattern_id": leg.get("route_pattern_id"), "route_pattern_source": leg.get("route_pattern_source"), "route_pattern_status": leg.get("route_pattern_status"), "geometry_source": leg["geometry_source"], }, } ) features.extend(_journey_stop_features(legs)) return { "transfers": max(0, len(transit_legs) - 1), "departure_seconds": departure, "arrival_seconds": arrival, "departure_time": format_gtfs_time(departure), "arrival_time": format_gtfs_time(arrival), "departure_time_label": format_gtfs_time_label(departure), "arrival_time_label": format_gtfs_time_label(arrival), "duration_seconds": duration_seconds, "duration_minutes": duration_minutes_ceil(duration_seconds), "duration_label": format_duration_label(duration_seconds), "legs": [_leg_public_payload(leg) for leg in legs], "features": feature_collection(features), } def _leg_public_payload(leg: dict) -> dict: return {key: value for key, value in leg.items() if key not in {"geometry", "departure_seconds", "arrival_seconds"}} def _walk_leg_payload(db: Session, backlink: _RouterWalkBacklink, dataset_id: int, *, route_geometry: bool = True) -> dict: geometry = None geometry_source = "walking_transfer" distance_m = round(float(backlink.distance_m or 0), 1) duration_seconds = max(0, int(backlink.arrival_seconds) - int(backlink.departure_seconds)) arrival_seconds = backlink.arrival_seconds if ( backlink.from_stop.lon is not None and backlink.from_stop.lat is not None and backlink.to_stop.lon is not None and backlink.to_stop.lat is not None ): if route_geometry: routed_geometry, routed_distance, routed_duration_seconds = _walk_geometry_from_routing(db, backlink.from_stop, backlink.to_stop) else: routed_geometry, routed_distance, routed_duration_seconds = None, 0.0, None if routed_geometry is not None: geometry = routed_geometry geometry_source = "routing_layer:walk" distance_m = routed_distance if routed_duration_seconds is not None: duration_seconds = max(0, int(math.ceil(routed_duration_seconds))) arrival_seconds = backlink.departure_seconds + duration_seconds if geometry is None: geometry = { "type": "LineString", "coordinates": [ [backlink.from_stop.lon, backlink.from_stop.lat], [backlink.to_stop.lon, backlink.to_stop.lat], ], } return { "dataset_id": dataset_id, "source_id": None, "source_name": None, "route_db_id": None, "route_id": "walk", "route_ref": "Walk", "route_name": "Walking transfer", "mode": "walk", "operator": None, "trip_id": None, "route_pattern_id": None, "route_pattern_source": None, "route_pattern_status": None, "from": _stop_payload(backlink.from_stop), "to": _stop_payload(backlink.to_stop), "departure_seconds": backlink.departure_seconds, "arrival_seconds": arrival_seconds, "departure_time": format_gtfs_time(backlink.departure_seconds), "arrival_time": format_gtfs_time(arrival_seconds), "departure_time_label": format_gtfs_time_label(backlink.departure_seconds), "arrival_time_label": format_gtfs_time_label(arrival_seconds), "distance_m": distance_m, "duration_seconds": duration_seconds, "geometry": geometry, "geometry_source": geometry_source, "stops": [ _canonical_walk_stop_payload(backlink.from_stop, 1), _canonical_walk_stop_payload(backlink.to_stop, 2), ], } def _walk_geometry_from_routing(db: Session, from_stop: StopSummary, to_stop: StopSummary) -> tuple[dict | None, float, float | None]: if from_stop.lon is None or from_stop.lat is None or to_stop.lon is None or to_stop.lat is None: return None, 0.0, None cache_key = ( round(float(from_stop.lon), 6), round(float(from_stop.lat), 6), round(float(to_stop.lon), 6), round(float(to_stop.lat), 6), ) cached = _walk_geometry_cache_get(cache_key) if cached is not None: return cached try: route = route_between_points( db, from_lon=float(from_stop.lon), from_lat=float(from_stop.lat), to_lon=float(to_stop.lon), to_lat=float(to_stop.lat), mode="walk", max_visited=5_000, ) except Exception: # noqa: BLE001 - routing graph may be unavailable during import return None, 0.0, None features = (route.get("features") or {}).get("features") if isinstance(route, dict) else None if not isinstance(features, list): return None, 0.0, None lines = [ feature.get("geometry") for feature in features if isinstance(feature, dict) and (feature.get("geometry") or {}).get("type") == "LineString" ] coordinates = [ geometry.get("coordinates") for geometry in lines if isinstance(geometry, dict) and len(geometry.get("coordinates") or []) >= 2 ] if not coordinates: return None, 0.0, None geometry = coordinates[0] if len(coordinates) == 1 else None duration_seconds = float(route.get("duration_seconds") or 0) if geometry is not None: result = ({"type": "LineString", "coordinates": geometry}, float(route.get("distance_m") or 0), duration_seconds) else: result = ({"type": "MultiLineString", "coordinates": coordinates}, float(route.get("distance_m") or 0), duration_seconds) _walk_geometry_cache_put(cache_key, result) return _copy_walk_geometry_cache_value(result) def _walk_geometry_cache_get(key: tuple[float, float, float, float]) -> tuple[dict | None, float, float | None] | None: now = time.monotonic() with _walk_geometry_cache_lock: cached = _walk_geometry_cache.get(key) if cached is None: return None expires_at, value = cached if expires_at <= now: _walk_geometry_cache.pop(key, None) return None return _copy_walk_geometry_cache_value(value) def _walk_geometry_cache_put(key: tuple[float, float, float, float], value: tuple[dict | None, float, float | None]) -> None: with _walk_geometry_cache_lock: _walk_geometry_cache[key] = (time.monotonic() + WALK_GEOMETRY_CACHE_TTL_SECONDS, _copy_walk_geometry_cache_value(value)) if len(_walk_geometry_cache) <= WALK_GEOMETRY_CACHE_MAX_ENTRIES: return oldest = sorted(_walk_geometry_cache.items(), key=lambda item: item[1][0])[ : len(_walk_geometry_cache) - WALK_GEOMETRY_CACHE_MAX_ENTRIES ] for old_key, _ in oldest: _walk_geometry_cache.pop(old_key, None) def _copy_walk_geometry_cache_value(value: tuple[dict | None, float, float | None]) -> tuple[dict | None, float, float | None]: geometry, distance_m, duration_seconds = value copied_geometry = None if geometry is None else json.loads(json.dumps(geometry)) return copied_geometry, distance_m, duration_seconds def _canonical_walk_stop_payload(stop: StopSummary, sequence: int) -> dict: payload = _stop_payload(stop) payload["stop_sequence"] = sequence is_external_location = is_location_token(stop.stop_id) payload["visual_source"] = "address" if is_external_location else "canonical_stop" payload["visual_lon"] = stop.lon payload["visual_lat"] = stop.lat payload["osm"] = None payload["canonical_stop"] = None if is_external_location else {"id": stop.id, "name": stop.name} return payload def _leg_geometry( db: Session, linked_route_pattern: RoutePattern | None, route: GtfsRoute, trip: GtfsTrip, from_stop: StopSummary, to_stop: StopSummary, fallback_stops: list[dict], ) -> tuple[dict | None, str, RoutePattern | None]: cache_key = _leg_geometry_cache_key(route, trip, linked_route_pattern, from_stop, to_stop) cached = _leg_geometry_cache_get(db, cache_key) if cached is not None: return cached route_layer_candidates: list[tuple[str, str | None, RoutePattern | None]] = [] gtfs_shape_candidates: list[tuple[str, str | None, RoutePattern | None]] = [] legacy_candidates: list[tuple[str, str | None, RoutePattern | None]] = [] if linked_route_pattern is not None: route_layer_candidates.append((f"route_layer:{linked_route_pattern.source_kind}", linked_route_pattern.geometry_geojson, linked_route_pattern)) route_layer_candidates.extend(_alternate_route_pattern_geometry_candidates(db, route, linked_route_pattern)) if trip.shape_id: shape_row = db.scalar( select(GtfsShape).where( GtfsShape.dataset_id == trip.dataset_id, GtfsShape.shape_id == trip.shape_id, ) ) if shape_row is not None: gtfs_shape_candidates.append(("gtfs_shape", shape_row.geometry_geojson, None)) legacy_candidates.append(("legacy_gtfs_route", route.geometry_geojson, None)) full_geometry_candidates = [*route_layer_candidates, *gtfs_shape_candidates] usable_route_layer_candidates = _usable_geometry_candidates(route_layer_candidates) for geometry_source, geometry_text, candidate_pattern in _usable_geometry_candidates(full_geometry_candidates): geometry = _validated_leg_geometry(geometry_text, from_stop, to_stop) if geometry is not None: return _leg_geometry_cache_put(cache_key, geometry, geometry_source, candidate_pattern) stop_coords = _stop_sequence_coords(fallback_stops, from_stop, to_stop) for geometry_source, geometry_text, candidate_pattern in usable_route_layer_candidates: stitched = _stitched_partial_geometry(geometry_text, stop_coords) if stitched is not None: return _leg_geometry_cache_put(cache_key, stitched, f"{geometry_source}:stitched", candidate_pattern) for geometry_source, geometry_text, candidate_pattern in _usable_geometry_candidates(legacy_candidates): geometry = _validated_leg_geometry(geometry_text, from_stop, to_stop) if geometry is not None: return _leg_geometry_cache_put(cache_key, geometry, geometry_source, candidate_pattern) for geometry_source, geometry_text, candidate_pattern in _usable_geometry_candidates(gtfs_shape_candidates): stitched = _stitched_partial_geometry(geometry_text, stop_coords) if stitched is not None: return _leg_geometry_cache_put(cache_key, stitched, f"{geometry_source}:stitched", candidate_pattern) fallback_geometry, fallback_source = _stop_sequence_fallback_geometry(stop_coords) if fallback_geometry is not None: return _leg_geometry_cache_put(cache_key, fallback_geometry, fallback_source, None) return _leg_geometry_cache_put(cache_key, None, "none", None) def _leg_geometry_cache_key( route: GtfsRoute, trip: GtfsTrip, linked_route_pattern: RoutePattern | None, from_stop: StopSummary, to_stop: StopSummary, ) -> tuple[object, ...]: return ( route.dataset_id, route.route_id, route.id, _geometry_text_fingerprint(route.geometry_geojson), trip.shape_id or "", None if linked_route_pattern is None else linked_route_pattern.id, _geometry_text_fingerprint(None if linked_route_pattern is None else linked_route_pattern.geometry_geojson), from_stop.id, from_stop.stop_id, to_stop.id, to_stop.stop_id, ) def _geometry_text_fingerprint(value: str | None) -> tuple[int, str, str]: if not value: return (0, "", "") text_value = str(value) return (len(text_value), text_value[:96], text_value[-96:]) def _leg_geometry_cache_get( db: Session, cache_key: tuple[object, ...], ) -> tuple[dict | None, str, RoutePattern | None] | None: now = time.monotonic() with _leg_geometry_cache_lock: cached = _leg_geometry_cache.get(cache_key) if cached is None: return None expires_at, geometry, geometry_source, route_pattern_id = cached if expires_at <= now: _leg_geometry_cache.pop(cache_key, None) return None pattern = db.get(RoutePattern, route_pattern_id) if route_pattern_id is not None else None return json.loads(json.dumps(geometry)) if geometry is not None else None, geometry_source, pattern def _leg_geometry_cache_put( cache_key: tuple[object, ...], geometry: dict | None, geometry_source: str, route_pattern: RoutePattern | None, ) -> tuple[dict | None, str, RoutePattern | None]: stored_geometry = json.loads(json.dumps(geometry)) if geometry is not None else None with _leg_geometry_cache_lock: _leg_geometry_cache[cache_key] = ( time.monotonic() + LEG_GEOMETRY_CACHE_TTL_SECONDS, stored_geometry, geometry_source, None if route_pattern is None else int(route_pattern.id), ) if len(_leg_geometry_cache) > LEG_GEOMETRY_CACHE_MAX_ENTRIES: oldest_keys = sorted( _leg_geometry_cache, key=lambda key: _leg_geometry_cache[key][0], )[: len(_leg_geometry_cache) - LEG_GEOMETRY_CACHE_MAX_ENTRIES] for oldest_key in oldest_keys: _leg_geometry_cache.pop(oldest_key, None) return geometry, geometry_source, route_pattern def _usable_geometry_candidates( candidates: list[tuple[str, str | None, RoutePattern | None]] ) -> list[tuple[str, str, RoutePattern | None]]: seen_geometry: set[str] = set() usable: list[tuple[str, str, RoutePattern | None]] = [] for geometry_source, geometry_text, candidate_pattern in candidates: if not geometry_text or geometry_text in seen_geometry: continue seen_geometry.add(geometry_text) usable.append((geometry_source, geometry_text, candidate_pattern)) return usable def _alternate_route_pattern_geometry_candidates( db: Session, route: GtfsRoute, linked_route_pattern: RoutePattern | None, ) -> list[tuple[str, str | None, RoutePattern | None]]: route_refs = [value for value in [route.short_name, route.route_id] if value] if not route_refs: return [] stmt = ( select(RoutePattern) .where(RoutePattern.route_ref.in_(route_refs)) .order_by( case((RoutePattern.source_kind == "osm", 0), else_=1), RoutePattern.confidence.desc(), RoutePattern.id, ) .limit(40) ) if route.mode: stmt = stmt.where(or_(RoutePattern.mode == route.mode, RoutePattern.mode.is_(None))) if linked_route_pattern is not None: stmt = stmt.where(RoutePattern.id != linked_route_pattern.id) return [ (f"route_layer:{pattern.source_kind}:alternate", pattern.geometry_geojson, pattern) for pattern in db.scalars(stmt).all() ] def _validated_leg_geometry(geometry_text: str, from_stop: StopSummary, to_stop: StopSummary) -> dict | None: full_geometry = json.loads(geometry_text) if from_stop.lon is None or from_stop.lat is None or to_stop.lon is None or to_stop.lat is None: return full_geometry try: segment = _segment_between_stops(shape(full_geometry), from_stop, to_stop) if segment is None or segment.is_empty or segment.length == 0: return None return mapping(segment) except Exception: # noqa: BLE001 - route geometry clipping should not break journey search return None def _stop_sequence_fallback_geometry( coords: list[tuple[float, float]], ) -> tuple[dict | None, str]: if len(coords) < 2: return None, "none" source = "stop_sequence_fallback" if len(coords) > 2 else "stop_straight_line_fallback" return mapping(LineString(coords)), source def _stop_sequence_coords( stops: list[dict], from_stop: StopSummary, to_stop: StopSummary, ) -> list[tuple[float, float]]: coords: list[tuple[float, float]] = [] for stop in stops: lon = _float_or_none(stop.get("visual_lon", stop.get("lon"))) lat = _float_or_none(stop.get("visual_lat", stop.get("lat"))) _append_coord(coords, lon, lat) if not stops: _append_coord(coords, from_stop.lon, from_stop.lat) _append_coord(coords, to_stop.lon, to_stop.lat) else: if _stop_payload_coord(stops[0]) is None: _prepend_coord(coords, from_stop.lon, from_stop.lat) if _stop_payload_coord(stops[-1]) is None: _append_coord(coords, to_stop.lon, to_stop.lat) if len(coords) < 2: _prepend_coord(coords, from_stop.lon, from_stop.lat) _append_coord(coords, to_stop.lon, to_stop.lat) return coords def _stitched_partial_geometry(geometry_text: str, stop_coords: list[tuple[float, float]]) -> dict | None: if len(stop_coords) < 2: return None try: geom = shape(json.loads(geometry_text)) except Exception: # noqa: BLE001 - invalid geometry should not break routing return None line = _stitchable_line_for_geometry(geom, stop_coords) if line is None or line.length == 0: return None matches = _stop_projection_matches(line, stop_coords) if not matches: return None first_match = matches[0] last_match = matches[-1] start_stop_index, start_measure, end_stop_index, end_measure = _partial_line_measure_range(line, stop_coords, matches) if start_stop_index is None or end_stop_index is None or start_measure is None or end_measure is None: return None if abs(end_measure - start_measure) <= 1e-12: return None route_segment = substring(line, min(start_measure, end_measure), max(start_measure, end_measure)) if route_segment.is_empty or route_segment.length == 0 or not isinstance(route_segment, LineString): return None if start_measure > end_measure: route_segment = LineString(list(route_segment.coords)[::-1]) coords: list[tuple[float, float]] = [] for coord in stop_coords[:start_stop_index]: _append_coord(coords, coord[0], coord[1]) for coord in route_segment.coords: _append_coord(coords, float(coord[0]), float(coord[1])) for coord in stop_coords[end_stop_index + 1 :]: _append_coord(coords, coord[0], coord[1]) if len(coords) < 2: return None if len(coords) == len(stop_coords) and all(_coords_equal(left, right) for left, right in zip(coords, stop_coords)): return None return mapping(LineString(coords)) def _stitchable_line_for_geometry(geom, stop_coords: list[tuple[float, float]]) -> LineString | None: if isinstance(geom, LineString): return geom if not isinstance(geom, MultiLineString): return None merged = linemerge(geom) if isinstance(merged, LineString): return merged if not isinstance(merged, MultiLineString): return None stop_points = [Point(coord) for coord in stop_coords] def score(line: LineString) -> tuple[int, float, float]: distances = [line.distance(point) for point in stop_points] near_count = sum(distance <= LEG_GEOMETRY_MAX_STOP_DISTANCE_DEG for distance in distances) return (near_count, -sum(distances), line.length) best = max(merged.geoms, key=score, default=None) if best is None or score(best)[0] == 0: return None return best def _stop_projection_matches(line: LineString, stop_coords: list[tuple[float, float]]) -> list[tuple[int, float, float]]: matches = [] for index, coord in enumerate(stop_coords): point = Point(coord) distance = line.distance(point) if distance <= LEG_GEOMETRY_MAX_STOP_DISTANCE_DEG: matches.append((index, line.project(point), distance)) return matches def _partial_line_measure_range( line: LineString, stop_coords: list[tuple[float, float]], matches: list[tuple[int, float, float]], ) -> tuple[int | None, float | None, int | None, float | None]: first_match = matches[0] last_match = matches[-1] direction = _projection_direction(matches) start_index = first_match[0] end_index = last_match[0] start_measure = first_match[1] end_measure = last_match[1] if direction is None: only_index, only_measure, _ = first_match if only_index < len(stop_coords) - 1: endpoint = _line_endpoint_toward(line, only_measure, stop_coords[only_index + 1]) if endpoint is None: return None, None, None, None start_index = only_index end_index = only_index start_measure = only_measure end_measure = endpoint elif only_index > 0: endpoint = _line_endpoint_toward(line, only_measure, stop_coords[only_index - 1]) if endpoint is None: return None, None, None, None start_index = only_index end_index = only_index start_measure = endpoint end_measure = only_measure else: return None, None, None, None elif direction > 0: if start_index > 0: endpoint = _line_endpoint_toward(line, start_measure, stop_coords[start_index - 1], preferred="before") if endpoint is not None: start_measure = endpoint if end_index < len(stop_coords) - 1: endpoint = _line_endpoint_toward(line, end_measure, stop_coords[end_index + 1], preferred="after") if endpoint is not None: end_measure = endpoint else: if end_index < len(stop_coords) - 1: endpoint = _line_endpoint_toward(line, end_measure, stop_coords[end_index + 1], preferred="before") if endpoint is not None: end_measure = endpoint if start_index > 0: endpoint = _line_endpoint_toward(line, start_measure, stop_coords[start_index - 1], preferred="after") if endpoint is not None: start_measure = endpoint return start_index, start_measure, end_index, end_measure def _projection_direction(matches: list[tuple[int, float, float]]) -> int | None: if len(matches) < 2: return None first = matches[0][1] last = matches[-1][1] if abs(last - first) <= 1e-12: return None return 1 if last > first else -1 def _line_endpoint_toward( line: LineString, from_measure: float, target_coord: tuple[float, float], preferred: str | None = None, ) -> float | None: target = Point(target_coord) candidates = [] if preferred in {None, "before"} and from_measure > 1e-12: candidates.append(0.0) if preferred in {None, "after"} and from_measure < line.length - 1e-12: candidates.append(float(line.length)) if not candidates: return None projected_point = line.interpolate(from_measure) projected_distance = projected_point.distance(target) endpoint = min(candidates, key=lambda measure: line.interpolate(measure).distance(target)) if line.interpolate(endpoint).distance(target) >= projected_distance: return None return endpoint def _coords_equal(left: tuple[float, float], right: tuple[float, float]) -> bool: return abs(left[0] - right[0]) < 1e-12 and abs(left[1] - right[1]) < 1e-12 def _append_coord(coords: list[tuple[float, float]], lon: float | None, lat: float | None) -> None: if lon is None or lat is None: return coord = (float(lon), float(lat)) if coords and abs(coords[-1][0] - coord[0]) < 1e-12 and abs(coords[-1][1] - coord[1]) < 1e-12: return coords.append(coord) def _prepend_coord(coords: list[tuple[float, float]], lon: float | None, lat: float | None) -> None: if lon is None or lat is None: return coord = (float(lon), float(lat)) if coords and abs(coords[0][0] - coord[0]) < 1e-12 and abs(coords[0][1] - coord[1]) < 1e-12: return coords.insert(0, coord) def _stop_payload_coord(stop: dict) -> tuple[float, float] | None: lon = _float_or_none(stop.get("visual_lon", stop.get("lon"))) lat = _float_or_none(stop.get("visual_lat", stop.get("lat"))) if lon is None or lat is None: return None return (lon, lat) def _float_or_none(value) -> float | None: if value is None: return None try: return float(value) except (TypeError, ValueError): return None def _segment_between_stops(geom, from_stop: StopSummary, to_stop: StopSummary) -> LineString | None: start_point = Point(from_stop.lon, from_stop.lat) end_point = Point(to_stop.lon, to_stop.lat) if geom.distance(start_point) > LEG_GEOMETRY_MAX_STOP_DISTANCE_DEG: return None if geom.distance(end_point) > LEG_GEOMETRY_MAX_STOP_DISTANCE_DEG: return None if isinstance(geom, LineString): return _substring_for_points(geom, start_point, end_point) if isinstance(geom, MultiLineString): merged = linemerge(geom) if isinstance(merged, LineString): return _substring_for_points(merged, start_point, end_point) if isinstance(merged, MultiLineString): path = _network_path_for_points(merged, start_point, end_point) if path is not None: return path line = _best_line_for_points(merged, start_point, end_point) if line is not None: return _substring_for_points(line, start_point, end_point) return None def _substring_for_points(line: LineString, start_point: Point, end_point: Point) -> LineString | None: if line.length == 0: return None start = line.project(start_point) end = line.project(end_point) if abs(start - end) <= 1e-12: return None segment = substring(line, min(start, end), max(start, end)) if segment.is_empty or segment.length == 0: return None if start > end and isinstance(segment, LineString): segment = LineString(list(segment.coords)[::-1]) return segment if isinstance(segment, LineString) else None def _network_path_for_points(geom: MultiLineString, start_point: Point, end_point: Point) -> LineString | None: nodes: dict[tuple[float, float], tuple[float, float]] = {} graph: dict[tuple[float, float], list[tuple[tuple[float, float], float]]] = {} def key(coord) -> tuple[float, float]: return (round(float(coord[0]), 6), round(float(coord[1]), 6)) def add_node(coord) -> tuple[float, float]: node = key(coord) nodes.setdefault(node, (float(coord[0]), float(coord[1]))) graph.setdefault(node, []) return node for line in geom.geoms: coords = list(line.coords) for left, right in zip(coords, coords[1:]): left_key = add_node(left) right_key = add_node(right) weight = Point(nodes[left_key]).distance(Point(nodes[right_key])) if weight == 0: continue graph[left_key].append((right_key, weight)) graph[right_key].append((left_key, weight)) if not nodes: return None start_key = _nearest_graph_node(nodes, start_point) end_key = _nearest_graph_node(nodes, end_point) if start_key is None or end_key is None: return None path_keys = _shortest_path(graph, start_key, end_key) if not path_keys: return None coords = [(start_point.x, start_point.y)] coords.extend(nodes[node] for node in path_keys) coords.append((end_point.x, end_point.y)) deduped = [] for coord in coords: if not deduped or Point(deduped[-1]).distance(Point(coord)) > 1e-10: deduped.append(coord) if len(deduped) < 2: return None return LineString(deduped) def _nearest_graph_node(nodes: dict[tuple[float, float], tuple[float, float]], point: Point) -> tuple[float, float] | None: if not nodes: return None return min(nodes, key=lambda node: Point(nodes[node]).distance(point)) def _shortest_path( graph: dict[tuple[float, float], list[tuple[tuple[float, float], float]]], start: tuple[float, float], end: tuple[float, float], ) -> list[tuple[float, float]] | None: unvisited = {start} distances = {start: 0.0} previous: dict[tuple[float, float], tuple[float, float]] = {} visited: set[tuple[float, float]] = set() while unvisited: current = min(unvisited, key=lambda node: distances.get(node, float("inf"))) unvisited.remove(current) if current == end: break visited.add(current) for neighbor, weight in graph.get(current, []): if neighbor in visited: continue candidate = distances[current] + weight if candidate < distances.get(neighbor, float("inf")): distances[neighbor] = candidate previous[neighbor] = current unvisited.add(neighbor) if end not in distances: return None path = [end] while path[-1] != start: parent = previous.get(path[-1]) if parent is None: return None path.append(parent) path.reverse() return path def _best_line_for_points(geom: MultiLineString, start: Point, end: Point) -> LineString | None: return min(geom.geoms, key=lambda line: line.distance(start) + line.distance(end), default=None) def _leg_stop_payloads( db: Session, dataset_id: int, trip_id: str, start_sequence: int, end_sequence: int, stop_cache: dict[tuple[int, str], StopSummary], osm_stop_cache: dict[tuple[int, str], dict], ) -> list[dict]: rows = stop_times_for_trip_range(db, dataset_id, trip_id, start_sequence, end_sequence) stops = [] for row in rows: stop = _stop_for_id(db, dataset_id, row.stop_id, stop_cache) stops.append(_visual_stop_payload(db, stop, row.stop_sequence, osm_stop_cache)) return stops def _visual_stop_payload(db: Session, stop: StopSummary, stop_sequence: int, osm_stop_cache: dict[tuple[int, str], dict]) -> dict: payload = _stop_payload(stop) payload["stop_sequence"] = stop_sequence payload["visual_source"] = "gtfs" payload["visual_lon"] = stop.lon payload["visual_lat"] = stop.lat payload["osm"] = None payload["canonical_stop"] = None canonical = _canonical_visual_stop(db, stop) if canonical is not None: payload["visual_source"] = "canonical_stop" payload["visual_lon"] = canonical["lon"] payload["visual_lat"] = canonical["lat"] payload["canonical_stop"] = { "id": canonical["id"], "name": canonical["name"], } if canonical["name"]: payload["name"] = canonical["name"] return payload cache_key = (stop.dataset_id, stop.stop_id) if cache_key not in osm_stop_cache: osm_stop_cache[cache_key] = _nearest_osm_stop(db, stop) or {} osm = osm_stop_cache[cache_key] if osm: payload["visual_source"] = "osm" payload["visual_lon"] = osm["lon"] payload["visual_lat"] = osm["lat"] payload["osm"] = { "id": osm["id"], "dataset_id": osm["dataset_id"], "osm_type": osm["osm_type"], "osm_id": osm["osm_id"], "name": osm["name"], "distance_m": osm["distance_m"], } return payload def _canonical_visual_stop(db: Session, stop: StopSummary) -> dict | None: if not stop.id: return None link = db.scalar( select(CanonicalStopLink) .where(CanonicalStopLink.object_type == "gtfs_stop", CanonicalStopLink.object_id == stop.id) .order_by(CanonicalStopLink.id) ) if link is None: return None canonical = db.get(CanonicalStop, link.canonical_stop_id) if canonical is None or canonical.lon is None or canonical.lat is None: return None return { "id": canonical.id, "name": canonical.name, "lon": canonical.lon, "lat": canonical.lat, } def _nearest_osm_stop(db: Session, stop: StopSummary) -> dict | None: if stop.lon is None or stop.lat is None: return None active_osm_dataset_ids = [ row[0] for row in db.execute(select(Dataset.id).where(Dataset.is_active.is_(True), Dataset.kind == "osm_geojson")).all() ] if not active_osm_dataset_ids: return None min_lon = stop.lon - OSM_STOP_MATCH_RADIUS_DEG max_lon = stop.lon + OSM_STOP_MATCH_RADIUS_DEG min_lat = stop.lat - OSM_STOP_MATCH_RADIUS_DEG max_lat = stop.lat + OSM_STOP_MATCH_RADIUS_DEG candidates = query_osm_features( db, active_osm_dataset_ids, kinds=["stop", "station", "terminal"], bbox=(min_lon, min_lat, max_lon, max_lat), limit=80, ) point = Point(stop.lon, stop.lat) best = None for candidate in candidates: if not candidate.geometry_geojson: continue try: geom = shape(json.loads(candidate.geometry_geojson)) except Exception: # noqa: BLE001 - ignore malformed feature geometry in visual stop matching continue representative = geom if isinstance(geom, Point) else geom.representative_point() distance_deg = representative.distance(point) if best is None or distance_deg < best["distance_deg"]: best = { "id": candidate.id, "dataset_id": candidate.dataset_id, "osm_type": candidate.osm_type, "osm_id": candidate.osm_id, "name": candidate.name, "lon": representative.x, "lat": representative.y, "distance_deg": distance_deg, "distance_m": round(distance_deg * 111_320, 1), } if best is None or best["distance_deg"] > OSM_STOP_MATCH_RADIUS_DEG: return None best.pop("distance_deg", None) return best def _journey_stop_features(legs: list[dict]) -> list[dict]: features_by_key: dict[str, dict] = {} for leg_index, leg in enumerate(legs, start=1): stops = leg.get("stops", []) for stop_index, stop in enumerate(stops): lon = stop.get("visual_lon") lat = stop.get("visual_lat") if lon is None or lat is None: continue role = "passed" if leg_index == 1 and stop_index == 0: role = "start" elif leg_index == len(legs) and stop_index == len(stops) - 1: role = "end" elif (stop_index == len(stops) - 1 and leg_index < len(legs)) or (stop_index == 0 and leg_index > 1): role = "transfer" key = f"{stop['dataset_id']}:{stop['stop_id']}:{round(float(lon), 6)}:{round(float(lat), 6)}" current = features_by_key.get(key) if current is not None and _stop_role_rank(current["properties"]["role"]) >= _stop_role_rank(role): continue features_by_key[key] = { "type": "Feature", "geometry": {"type": "Point", "coordinates": [lon, lat]}, "properties": { "feature_type": "journey_stop", "role": role, "leg": leg_index, "route_ref": leg.get("route_ref"), "mode": leg.get("mode"), "stop_id": stop.get("stop_id"), "name": stop.get("name"), "visual_source": stop.get("visual_source"), "canonical_stop_id": (stop.get("canonical_stop") or {}).get("id"), "osm_id": (stop.get("osm") or {}).get("osm_id"), }, } return list(features_by_key.values()) def _stop_role_rank(role: str) -> int: return {"passed": 0, "transfer": 1, "start": 2, "end": 2}.get(role, 0) def _arrival_seconds(stop_time: GtfsStopTime) -> int | None: return stop_time.arrival_seconds if stop_time.arrival_seconds is not None else parse_gtfs_time(stop_time.arrival_time or stop_time.departure_time) def _departure_seconds(stop_time: GtfsStopTime) -> int | None: return stop_time.departure_seconds if stop_time.departure_seconds is not None else parse_gtfs_time(stop_time.departure_time or stop_time.arrival_time) def _stop_for_id(db: Session, dataset_id: int, stop_id: str, stop_cache: dict[tuple[int, str], StopSummary]) -> StopSummary: key = (dataset_id, stop_id) if key in stop_cache: return stop_cache[key] summary = _stop_summary_for_stop_id(db, dataset_id, stop_id) stop_cache[key] = summary return summary def _source_payload_for_dataset_id(db: Session, dataset_id: int) -> dict | None: row = db.execute( select(Source.id, Source.name) .join(Dataset, Dataset.source_id == Source.id) .where(Dataset.id == dataset_id) ).first() if row is None: return None source_id, source_name = row return {"id": source_id, "name": source_name, "dataset_id": dataset_id} def _stop_summary_for_stop_id(db: Session, dataset_id: int, stop_id: str) -> StopSummary: stop = db.scalar(select(GtfsStop).where(GtfsStop.dataset_id == dataset_id, GtfsStop.stop_id == stop_id)) if stop is None: return StopSummary(id=0, dataset_id=dataset_id, stop_id=stop_id, name=stop_id, lat=None, lon=None) return _stop_summary(stop) def _stop_summary(stop: GtfsStop) -> StopSummary: return StopSummary( id=stop.id, dataset_id=stop.dataset_id, stop_id=stop.stop_id, name=stop.name, lat=stop.lat, lon=stop.lon, ) def _stop_payload(stop: StopSummary) -> dict: return { "id": stop.id, "dataset_id": stop.dataset_id, "stop_id": stop.stop_id, "name": stop.name, "lat": stop.lat, "lon": stop.lon, } def _active_gtfs_dataset_ids(db: Session, source_ids: Optional[list[int]] = None) -> list[int]: stmt = select(Dataset.id).where(Dataset.is_active.is_(True), Dataset.kind == "gtfs") if source_ids: stmt = stmt.where(Dataset.source_id.in_(source_ids)) return [row[0] for row in db.execute(stmt).all()] def _journey_leg_signature(leg: dict) -> str: return "|".join( str(part or "") for part in [ leg.get("dataset_id"), leg.get("route_id"), leg.get("route_ref"), leg.get("from", {}).get("name") or leg.get("from", {}).get("stop_id"), leg.get("to", {}).get("name") or leg.get("to", {}).get("stop_id"), leg.get("departure_seconds") or leg.get("departure_time"), leg.get("arrival_seconds") or leg.get("arrival_time"), ] )