from __future__ import annotations import json import sqlite3 from contextlib import contextmanager from pathlib import Path from typing import Iterator, Sequence from sqlalchemy import and_, func, insert, not_, or_, select, text from sqlalchemy.dialects.postgresql import insert as postgresql_insert from sqlalchemy.orm import Session from app.config import settings from app.models import Dataset, OsmFeature from app.spatial import refresh_postgis_geometries OSM_STORAGE_METADATA_KEY = "osm_storage" OSM_STORAGE_MAIN = "main" OSM_STORAGE_SIDECAR_FEATURES = "sidecar_features" SQLITE_IN_CHUNK_SIZE = 800 OSM_SIDECAR_ROUTE_SCOPE_INDEXES = ["ix_osm_sidecar_scope_bbox"] OSM_FEATURE_COLUMNS = [ "dataset_id", "osm_type", "osm_id", "kind", "mode", "route_scope", "name", "ref", "operator", "network", "geometry_geojson", "min_lon", "min_lat", "max_lon", "max_lat", "tags_json", "route_key", "operator_key", ] def effective_osm_feature_storage(value: str | None = None) -> str: configured = str(value or settings.osm_feature_storage or OSM_STORAGE_SIDECAR_FEATURES).strip().lower() if configured in {OSM_STORAGE_MAIN, "main", "main_db", "postgres", "postgresql"}: return OSM_STORAGE_MAIN if settings.is_postgresql_database and not settings.postgres_use_sidecars: return OSM_STORAGE_MAIN return OSM_STORAGE_SIDECAR_FEATURES class MissingOsmSidecar(FileNotFoundError): pass def dataset_metadata(dataset: Dataset) -> dict: try: metadata = json.loads(dataset.metadata_json or "{}") except json.JSONDecodeError: return {} return metadata if isinstance(metadata, dict) else {} def features_are_sidecar(dataset: Dataset | None) -> bool: if dataset is None: return False storage = dataset_metadata(dataset).get(OSM_STORAGE_METADATA_KEY) if not isinstance(storage, dict): return False tables = storage.get("tables") if isinstance(tables, dict): return tables.get("osm_features") == "sidecar" return storage.get("mode") == OSM_STORAGE_SIDECAR_FEATURES def sidecar_path(dataset: Dataset | None) -> Path | None: if dataset is None: return None storage = dataset_metadata(dataset).get(OSM_STORAGE_METADATA_KEY) if not isinstance(storage, dict): return None value = storage.get("sidecar_path") if not value: return None return Path(str(value)) def dataset_sidecar_paths(dataset: Dataset) -> list[Path]: path = sidecar_path(dataset) return [] if path is None else [path] def missing_sidecar_paths(dataset: Dataset | None) -> list[str]: if not features_are_sidecar(dataset): return [] path = sidecar_path(dataset) if path is None or path.exists(): return [] return [str(path)] @contextmanager def sidecar_connection(dataset: Dataset) -> Iterator[sqlite3.Connection]: path = sidecar_path(dataset) if path is None: raise MissingOsmSidecar(f"dataset #{dataset.id} does not reference an OSM sidecar") if not path.exists(): raise MissingOsmSidecar(f"OSM sidecar does not exist: {path}") connection = sqlite3.connect(f"file:{path}?mode=ro", uri=True) connection.row_factory = sqlite3.Row try: yield connection finally: connection.close() @contextmanager def writable_sidecar_connection(dataset: Dataset) -> Iterator[sqlite3.Connection]: path = sidecar_path(dataset) if path is None: raise MissingOsmSidecar(f"dataset #{dataset.id} does not reference an OSM sidecar") if not path.exists(): raise MissingOsmSidecar(f"OSM sidecar does not exist: {path}") connection = sqlite3.connect(path) connection.row_factory = sqlite3.Row try: connection.execute(f"PRAGMA busy_timeout={int(settings.sqlite_busy_timeout_ms)}") connection.execute("PRAGMA synchronous=NORMAL") yield connection finally: connection.close() def create_osm_sidecar(dataset: Dataset, rows: Sequence[dict[str, object]], *, source_hash: str | None = None) -> dict: path = _new_sidecar_path(dataset, source_hash or dataset.sha256) path.parent.mkdir(parents=True, exist_ok=True) if path.exists(): path.unlink() connection = sqlite3.connect(path) try: connection.execute("PRAGMA journal_mode=OFF") connection.execute("PRAGMA synchronous=OFF") _create_schema(connection) deduped_rows, duplicate_count = dedupe_osm_feature_rows(rows) inserted = 0 counts = {"route": 0, "stop": 0, "station": 0, "terminal": 0, "infra": 0, "feature": 0} insert_sql = f""" INSERT INTO osm_features ({", ".join(["id", *OSM_FEATURE_COLUMNS])}) VALUES ({", ".join(["?"] * (len(OSM_FEATURE_COLUMNS) + 1))}) """ batch = [] for index, row in enumerate(deduped_rows, start=1): kind = str(row.get("kind") or "feature") counts[kind] = counts.get(kind, 0) + 1 batch.append((index, *[row.get(column) for column in OSM_FEATURE_COLUMNS])) if len(batch) >= 5000: connection.executemany(insert_sql, batch) inserted += len(batch) batch.clear() if batch: connection.executemany(insert_sql, batch) inserted += len(batch) connection.commit() _create_indexes(connection) connection.commit() finally: connection.close() return { "mode": OSM_STORAGE_SIDECAR_FEATURES, "tables": {"osm_features": "sidecar"}, "sidecar_path": str(path), "features": inserted, "duplicate_features_skipped": duplicate_count, "counts": counts, } def ensure_osm_sidecar_schema(connection: sqlite3.Connection) -> None: columns = _sidecar_columns(connection) if "route_scope" not in columns: connection.execute("ALTER TABLE osm_features ADD COLUMN route_scope TEXT") connection.commit() def drop_osm_sidecar_route_scope_indexes(connection: sqlite3.Connection) -> None: for index_name in OSM_SIDECAR_ROUTE_SCOPE_INDEXES: connection.execute(f"DROP INDEX IF EXISTS {index_name}") def rebuild_osm_sidecar_indexes(connection: sqlite3.Connection) -> None: _create_indexes(connection) def osm_feature_count(session: Session, dataset_id: int, *, kind: str | Sequence[str] | None = None) -> int: dataset = session.get(Dataset, dataset_id) if features_are_sidecar(dataset): kinds = _as_list(kind) sql = "SELECT COUNT(*) FROM osm_features" params: list[object] = [] if kinds: placeholders = ", ".join(["?"] * len(kinds)) sql += f" WHERE kind IN ({placeholders})" params.extend(kinds) try: with sidecar_connection(dataset) as connection: return int(connection.execute(sql, params).fetchone()[0] or 0) except MissingOsmSidecar: return 0 stmt = select(func.count()).select_from(OsmFeature).where(OsmFeature.dataset_id == dataset_id) kinds = _as_list(kind) if kinds: stmt = stmt.where(OsmFeature.kind.in_(kinds)) return int(session.scalar(stmt) or 0) def osm_feature_bbox( session: Session, dataset_ids: Sequence[int], *, kinds: Sequence[str] | None = None, ) -> tuple[float | None, float | None, float | None, float | None]: if not dataset_ids: return (None, None, None, None) datasets = { dataset.id: dataset for dataset in session.scalars(select(Dataset).where(Dataset.id.in_([int(value) for value in dataset_ids]))).all() } boxes: list[tuple[float, float, float, float]] = [] main_dataset_ids = [dataset_id for dataset_id, dataset in datasets.items() if not features_are_sidecar(dataset)] if main_dataset_ids: stmt = select(func.min(OsmFeature.min_lon), func.min(OsmFeature.min_lat), func.max(OsmFeature.max_lon), func.max(OsmFeature.max_lat)).where( OsmFeature.dataset_id.in_(main_dataset_ids) ) if kinds: stmt = stmt.where(OsmFeature.kind.in_(list(kinds))) row = session.execute(stmt).one() if None not in row: boxes.append((float(row[0]), float(row[1]), float(row[2]), float(row[3]))) for dataset in datasets.values(): if not features_are_sidecar(dataset): continue where = [] params: list[object] = [] if kinds: placeholders = ", ".join(["?"] * len(kinds)) where.append(f"kind IN ({placeholders})") params.extend(list(kinds)) sql = "SELECT MIN(min_lon), MIN(min_lat), MAX(max_lon), MAX(max_lat) FROM osm_features" if where: sql += " WHERE " + " AND ".join(where) try: with sidecar_connection(dataset) as connection: row = connection.execute(sql, params).fetchone() if row is not None and None not in row: boxes.append((float(row[0]), float(row[1]), float(row[2]), float(row[3]))) except MissingOsmSidecar: continue if not boxes: return (None, None, None, None) return ( min(box[0] for box in boxes), min(box[1] for box in boxes), max(box[2] for box in boxes), max(box[3] for box in boxes), ) def query_osm_features( session: Session, dataset_ids: Sequence[int], *, kinds: Sequence[str] | None = None, modes: Sequence[str] | None = None, bbox: tuple[float, float, float, float] | None = None, geometry_required: bool | None = None, search: str | None = None, route_key: str | None = None, route_scopes: Sequence[str] | None = None, ref: str | None = None, osm_type: str | None = None, osm_id: str | None = None, limit: int | None = None, offset: int | None = None, prefer_materialized_ids: bool = True, ) -> list[OsmFeature]: if not dataset_ids: return [] datasets = { dataset.id: dataset for dataset in session.scalars(select(Dataset).where(Dataset.id.in_([int(value) for value in dataset_ids]))).all() } materialized_ids = _materialized_ids_by_identity(session, list(datasets)) if prefer_materialized_ids else {} rows: list[OsmFeature] = [] main_dataset_ids = [dataset_id for dataset_id, dataset in datasets.items() if not features_are_sidecar(dataset)] if main_dataset_ids: stmt = select(OsmFeature).where(OsmFeature.dataset_id.in_(main_dataset_ids)) stmt = _apply_main_filters( stmt, kinds=kinds, modes=modes, bbox=bbox, geometry_required=geometry_required, search=search, route_key=route_key, route_scopes=route_scopes, ref=ref, osm_type=osm_type, osm_id=osm_id, ) if offset: stmt = stmt.offset(max(0, int(offset))) rows.extend( session.scalars( stmt.order_by(OsmFeature.kind, OsmFeature.mode, OsmFeature.ref, OsmFeature.name, OsmFeature.id).limit(limit) ).all() ) for dataset_id, dataset in datasets.items(): if not features_are_sidecar(dataset): continue rows.extend( _query_sidecar_features( dataset, kinds=kinds, modes=modes, bbox=bbox, geometry_required=geometry_required, search=search, route_key=route_key, route_scopes=route_scopes, ref=ref, osm_type=osm_type, osm_id=osm_id, limit=limit, offset=offset, materialized_ids=materialized_ids, ) ) rows.sort(key=lambda row: (row.kind or "", row.mode or "", row.ref or "", row.name or "", int(row.id or 0))) if limit is not None: return rows[: max(1, int(limit))] return rows def get_osm_feature(session: Session, feature_id: int) -> OsmFeature | None: return session.get(OsmFeature, feature_id) def osm_feature_identity_key(feature: OsmFeature) -> str: return f"{feature.dataset_id}|{feature.osm_type}|{feature.osm_id}" def osm_feature_public_id(feature: OsmFeature) -> int | str | None: if getattr(feature, "_osm_sidecar_source", False): return osm_feature_identity_key(feature) return feature.id def resolve_osm_feature(session: Session, value: int | str) -> OsmFeature | None: int_value = _safe_int(value) if int_value is not None: feature = session.get(OsmFeature, int_value) if feature is not None: return feature parsed = parse_osm_feature_identity_key(str(value)) if parsed is None: return None dataset_id, osm_type, osm_id = parsed existing = session.scalar( select(OsmFeature).where( OsmFeature.dataset_id == dataset_id, OsmFeature.osm_type == osm_type, OsmFeature.osm_id == osm_id, ) ) if existing is not None: return existing dataset = session.get(Dataset, dataset_id) if not features_are_sidecar(dataset): return None try: with sidecar_connection(dataset) as connection: select_columns = ", ".join(_sidecar_select_columns(_sidecar_columns(connection))) row = connection.execute( f""" SELECT id, {select_columns} FROM osm_features WHERE dataset_id = ? AND osm_type = ? AND osm_id = ? """, (dataset_id, osm_type, osm_id), ).fetchone() except MissingOsmSidecar: return None if row is None: return None return _feature_from_row(row, {}) def parse_osm_feature_identity_key(value: str) -> tuple[int, str, str] | None: parts = value.split("|", 2) if len(parts) != 3: return None dataset_id = _safe_int(parts[0]) if dataset_id is None: return None osm_type = parts[1].strip() osm_id = parts[2].strip() if not osm_type or not osm_id: return None return dataset_id, osm_type, osm_id def ensure_main_osm_feature(session: Session, feature: OsmFeature) -> OsmFeature: existing = session.scalar( select(OsmFeature).where( OsmFeature.dataset_id == feature.dataset_id, OsmFeature.osm_type == feature.osm_type, OsmFeature.osm_id == feature.osm_id, ) ) if existing is not None: return existing values = dict( dataset_id=feature.dataset_id, osm_type=feature.osm_type, osm_id=feature.osm_id, kind=feature.kind, mode=feature.mode, route_scope=feature.route_scope, name=feature.name, ref=feature.ref, operator=feature.operator, network=feature.network, geometry_geojson=feature.geometry_geojson, min_lon=feature.min_lon, min_lat=feature.min_lat, max_lon=feature.max_lon, max_lat=feature.max_lat, tags_json=feature.tags_json, route_key=feature.route_key, operator_key=feature.operator_key, ) if settings.is_postgresql_database: session.execute( postgresql_insert(OsmFeature) .values(**values) .on_conflict_do_nothing(index_elements=["dataset_id", "osm_type", "osm_id"]) ) else: session.execute(insert(OsmFeature).values(**values).prefix_with("OR IGNORE")) session.flush() refresh_postgis_geometries(session, dataset_id=feature.dataset_id, tables=["osm_features"]) existing = session.scalar( select(OsmFeature).where( OsmFeature.dataset_id == feature.dataset_id, OsmFeature.osm_type == feature.osm_type, OsmFeature.osm_id == feature.osm_id, ) ) if existing is None: raise RuntimeError(f"Could not materialize OSM feature {feature.dataset_id}:{feature.osm_type}:{feature.osm_id}") return existing def materialize_osm_features(session: Session, features: Sequence[OsmFeature]) -> list[OsmFeature]: return [ensure_main_osm_feature(session, feature) for feature in features] def _new_sidecar_path(dataset: Dataset, source_hash: str | None) -> Path: suffix = (source_hash or dataset.sha256 or str(dataset.id))[:12] return settings.data_dir / "sidecars" / f"source_{dataset.source_id}" / f"osm_dataset_{dataset.id}_{suffix}.sqlite" def dedupe_osm_feature_rows(rows: Sequence[dict[str, object]]) -> tuple[list[dict[str, object]], int]: selected: dict[tuple[int, str, str], dict[str, object]] = {} for row in rows: key = (int(row["dataset_id"]), str(row["osm_type"]), str(row["osm_id"])) current = selected.get(key) if current is None or _feature_row_preference(row) < _feature_row_preference(current): selected[key] = dict(row) return list(selected.values()), max(0, len(rows) - len(selected)) def _feature_row_preference(row: dict[str, object]) -> tuple[int, int, int]: kind_rank = { "route": 0, "station": 1, "terminal": 2, "stop": 3, "infra": 4, "feature": 5, }.get(str(row.get("kind") or "feature"), 6) has_geometry = 0 if row.get("geometry_geojson") else 1 geometry_size = -len(str(row.get("geometry_geojson") or "")) return (kind_rank, has_geometry, geometry_size) def _create_schema(connection: sqlite3.Connection) -> None: connection.execute( """ CREATE TABLE osm_features ( id INTEGER PRIMARY KEY, dataset_id INTEGER NOT NULL, osm_type TEXT NOT NULL, osm_id TEXT NOT NULL, kind TEXT NOT NULL, mode TEXT, route_scope TEXT, name TEXT, ref TEXT, operator TEXT, network TEXT, geometry_geojson TEXT, min_lon REAL, min_lat REAL, max_lon REAL, max_lat REAL, tags_json TEXT, route_key TEXT, operator_key TEXT, UNIQUE(dataset_id, osm_type, osm_id) ) """ ) def _create_indexes(connection: sqlite3.Connection) -> None: statements = [ "CREATE INDEX IF NOT EXISTS ix_osm_sidecar_kind_mode_bbox ON osm_features (kind, mode, min_lon, max_lon, min_lat, max_lat)", "CREATE INDEX IF NOT EXISTS ix_osm_sidecar_scope_bbox ON osm_features (kind, mode, route_scope, min_lon, max_lon, min_lat, max_lat)", "CREATE INDEX IF NOT EXISTS ix_osm_sidecar_route_key ON osm_features (route_key)", "CREATE INDEX IF NOT EXISTS ix_osm_sidecar_ref ON osm_features (ref)", "CREATE INDEX IF NOT EXISTS ix_osm_sidecar_identity ON osm_features (dataset_id, osm_type, osm_id)", "CREATE INDEX IF NOT EXISTS ix_osm_sidecar_kind_ref_mode ON osm_features (kind, ref, mode)", ] for statement in statements: connection.execute(statement) def _apply_main_filters(stmt, *, kinds, modes, bbox, geometry_required, search, route_key, route_scopes, ref, osm_type, osm_id): if kinds: stmt = stmt.where(OsmFeature.kind.in_(list(kinds))) if modes: stmt = stmt.where(OsmFeature.mode.in_(list(modes))) if route_scopes: stmt = stmt.where(_main_route_scope_condition([str(scope) for scope in route_scopes])) if bbox: min_lon, min_lat, max_lon, max_lat = bbox if settings.is_postgresql_database: stmt = stmt.where( text( """ ( osm_features.geom && ST_MakeEnvelope(:bbox_min_lon, :bbox_min_lat, :bbox_max_lon, :bbox_max_lat, 4326) OR ( osm_features.geom IS NULL AND osm_features.min_lon <= :bbox_max_lon AND osm_features.max_lon >= :bbox_min_lon AND osm_features.min_lat <= :bbox_max_lat AND osm_features.max_lat >= :bbox_min_lat ) ) """ ) ).params( bbox_min_lon=min_lon, bbox_min_lat=min_lat, bbox_max_lon=max_lon, bbox_max_lat=max_lat, ) else: stmt = stmt.where(OsmFeature.min_lon <= max_lon, OsmFeature.max_lon >= min_lon, OsmFeature.min_lat <= max_lat, OsmFeature.max_lat >= min_lat) if geometry_required is True: stmt = stmt.where(OsmFeature.geometry_geojson.is_not(None)) elif geometry_required is False: stmt = stmt.where(OsmFeature.geometry_geojson.is_(None)) if search: if settings.is_postgresql_database: stmt = stmt.where( text( """ ( LOWER(COALESCE(osm_features.ref, '')) LIKE :search_pattern OR LOWER(COALESCE(osm_features.name, '')) LIKE :search_pattern OR LOWER(COALESCE(osm_features.tags_json, '')) LIKE :search_pattern ) """ ) ).params(search_pattern=f"%{search.lower()}%") else: pattern = f"%{search}%" stmt = stmt.where( (OsmFeature.ref.ilike(pattern)) | (OsmFeature.name.ilike(pattern)) | (OsmFeature.tags_json.ilike(pattern)) ) if route_key: stmt = stmt.where(OsmFeature.route_key == route_key) if ref: stmt = stmt.where(OsmFeature.ref == ref) if osm_type: stmt = stmt.where(OsmFeature.osm_type == osm_type) if osm_id: stmt = stmt.where(OsmFeature.osm_id == osm_id) return stmt def _main_route_scope_condition(route_scopes: list[str]): fallback = _main_route_scope_fallback_condition(route_scopes) stored = OsmFeature.route_scope.in_(route_scopes) if "local" in route_scopes: non_local_bus_fallback = _main_route_scope_fallback_condition(["long_distance", "regional"]) stored = and_(stored, not_(and_(OsmFeature.mode.in_(["bus", "trolleybus"]), non_local_bus_fallback))) return or_(stored, fallback) def _main_route_scope_fallback_condition(route_scopes: list[str]): ref = func.upper(func.coalesce(OsmFeature.ref, "")) name = func.upper(func.coalesce(OsmFeature.name, "")) network = func.upper(func.coalesce(OsmFeature.network, "")) tags = func.lower(func.coalesce(OsmFeature.tags_json, "")) train_long_distance = and_( OsmFeature.mode == "train", or_( ref.like("ICE%"), ref.like("IC%"), ref.like("EC%"), ref.like("ECE%"), ref.like("EN%"), ref.like("NJ%"), ref.like("RJ%"), ref.like("RJX%"), ref.like("TGV%"), ref.like("THA%"), ref.like("FLX%"), name.like("%INTERCITY%"), name.like("%EUROCITY%"), name.like("%NIGHTJET%"), name.like("%FLIXTRAIN%"), tags.like('%"service":"long_distance"%'), tags.like('%"train":"long_distance"%'), tags.like('%"train":"high_speed"%'), tags.like('%"train":"intercity"%'), ), ) bus_long_distance = and_( OsmFeature.mode.in_(["bus", "trolleybus"]), or_( name.like("%FLIXBUS%"), network.like("%FLIXBUS%"), name.like("%EUROLINES%"), network.like("%EUROLINES%"), name.like("%INTERCITYBUS%"), name.like("%IC BUS%"), name.like("%FERNBUS%"), tags.like('%"service":"long_distance"%'), tags.like('%"bus":"long_distance"%'), tags.like('%"bus":"intercity"%'), tags.like('%"network:type":"long_distance"%'), ), ) long_distance = or_(OsmFeature.mode == "coach", train_long_distance, bus_long_distance) bus_regional = and_( OsmFeature.mode.in_(["bus", "trolleybus"]), not_(bus_long_distance), or_( name.like("%REGIONALBUS%"), name.like("%REGIOBUS%"), name.like("%REGIONAL BUS%"), name.like("%REGIONALVERKEHR%"), network.like("%REGIONALBUS%"), network.like("%REGIOBUS%"), network.like("%REGIONALVERKEHR%"), tags.like('%"service":"regional"%'), tags.like('%"bus":"regional"%'), tags.like('%"bus":"interurban"%'), tags.like('%"network:type":"regional"%'), ), ) local = or_( OsmFeature.mode.in_(["tram", "light_rail", "subway", "ferry", "funicular", "aerialway", "monorail"]), and_(OsmFeature.mode.in_(["bus", "trolleybus"]), not_(or_(bus_long_distance, bus_regional))), and_( OsmFeature.mode == "train", or_(ref.like("S%"), name.like("%S-BAHN%"), network.like("%S-BAHN%"), tags.like('%"train":"commuter"%')), ), ) train_regional = and_( OsmFeature.mode == "train", not_(train_long_distance), or_( ref.like("IRE%"), ref.like("RE%"), ref.like("RB%"), ref.like("RER%"), ref.like("TER%"), ref.like("REX%"), ref.like("MEX%"), ref.like("ALX%"), ref.like("WFB%"), ref.like("R%"), name.like("%REGIONAL%"), name.like("%REGIO%"), tags.like('%"service":"regional"%'), tags.like('%"train":"regional"%'), ), ) regional = or_(train_regional, bus_regional) conditions = [] if "long_distance" in route_scopes: conditions.append(long_distance) if "regional" in route_scopes: conditions.append(regional) if "local" in route_scopes: conditions.append(local) if "unknown" in route_scopes: conditions.append(and_(OsmFeature.mode == "train", not_(or_(long_distance, regional, local)))) return or_(*conditions) if conditions else OsmFeature.route_scope.is_(None) def _query_sidecar_features( dataset: Dataset, *, kinds: Sequence[str] | None, modes: Sequence[str] | None, bbox: tuple[float, float, float, float] | None, geometry_required: bool | None, search: str | None, route_key: str | None, route_scopes: Sequence[str] | None, ref: str | None, osm_type: str | None, osm_id: str | None, limit: int | None, offset: int | None, materialized_ids: dict[tuple[int, str, str], int], ) -> list[OsmFeature]: where = [] params: list[object] = [] try: with sidecar_connection(dataset) as connection: available_columns = _sidecar_columns(connection) if kinds: placeholders = ", ".join(["?"] * len(kinds)) where.append(f"kind IN ({placeholders})") params.extend(list(kinds)) if modes: placeholders = ", ".join(["?"] * len(modes)) where.append(f"mode IN ({placeholders})") params.extend(list(modes)) if bbox: min_lon, min_lat, max_lon, max_lat = bbox where.extend(["min_lon <= ?", "max_lon >= ?", "min_lat <= ?", "max_lat >= ?"]) params.extend([max_lon, min_lon, max_lat, min_lat]) if geometry_required is True: where.append("geometry_geojson IS NOT NULL") elif geometry_required is False: where.append("geometry_geojson IS NULL") if search: where.append("(LOWER(COALESCE(ref, '')) LIKE ? OR LOWER(COALESCE(name, '')) LIKE ? OR LOWER(COALESCE(tags_json, '')) LIKE ?)") pattern = f"%{search.lower()}%" params.extend([pattern, pattern, pattern]) if route_key: where.append("route_key = ?") params.append(route_key) if route_scopes: condition, condition_params = _sidecar_route_scope_condition([str(scope) for scope in route_scopes], has_route_scope="route_scope" in available_columns) where.append(condition) params.extend(condition_params) if ref: where.append("ref = ?") params.append(ref) if osm_type: where.append("osm_type = ?") params.append(osm_type) if osm_id: where.append("osm_id = ?") params.append(osm_id) select_columns = ", ".join(_sidecar_select_columns(available_columns)) sql = f"SELECT id, {select_columns} FROM osm_features" if where: sql += " WHERE " + " AND ".join(where) sql += " ORDER BY kind, mode, ref, name, id" if limit is not None: sql += " LIMIT ?" params.append(max(1, int(limit))) if offset: if limit is None: sql += " LIMIT -1" sql += " OFFSET ?" params.append(max(0, int(offset))) return [_feature_from_row(row, materialized_ids) for row in connection.execute(sql, params).fetchall()] except MissingOsmSidecar: return [] def _sidecar_columns(connection: sqlite3.Connection) -> set[str]: return {str(row["name"]) for row in connection.execute("PRAGMA table_info(osm_features)").fetchall()} def _sidecar_select_columns(available_columns: set[str]) -> list[str]: return [column if column in available_columns else f"NULL AS {column}" for column in OSM_FEATURE_COLUMNS] def _sidecar_route_scope_condition(route_scopes: list[str], *, has_route_scope: bool) -> tuple[str, list[object]]: fallback_sql, fallback_params = _sidecar_route_scope_fallback_condition(route_scopes) if has_route_scope: placeholders = ", ".join(["?"] * len(route_scopes)) stored_sql = f"route_scope IN ({placeholders})" params: list[object] = [*route_scopes] if "local" in route_scopes: non_local_sql, non_local_params = _sidecar_route_scope_fallback_condition(["long_distance", "regional"]) stored_sql = f"({stored_sql} AND NOT (mode IN ('bus', 'trolleybus') AND {non_local_sql}))" params.extend(non_local_params) return f"({stored_sql} OR {fallback_sql})", [*params, *fallback_params] return fallback_sql, fallback_params def _sidecar_route_scope_fallback_condition(route_scopes: list[str]) -> tuple[str, list[object]]: train_long_distance = """( mode = 'train' AND ( UPPER(COALESCE(ref, '')) LIKE 'ICE%' OR UPPER(COALESCE(ref, '')) LIKE 'IC%' OR UPPER(COALESCE(ref, '')) LIKE 'EC%' OR UPPER(COALESCE(ref, '')) LIKE 'ECE%' OR UPPER(COALESCE(ref, '')) LIKE 'EN%' OR UPPER(COALESCE(ref, '')) LIKE 'NJ%' OR UPPER(COALESCE(ref, '')) LIKE 'RJ%' OR UPPER(COALESCE(ref, '')) LIKE 'RJX%' OR UPPER(COALESCE(ref, '')) LIKE 'TGV%' OR UPPER(COALESCE(ref, '')) LIKE 'THA%' OR UPPER(COALESCE(ref, '')) LIKE 'FLX%' OR UPPER(COALESCE(name, '')) LIKE '%INTERCITY%' OR UPPER(COALESCE(name, '')) LIKE '%EUROCITY%' OR UPPER(COALESCE(name, '')) LIKE '%NIGHTJET%' OR UPPER(COALESCE(name, '')) LIKE '%FLIXTRAIN%' OR LOWER(COALESCE(tags_json, '')) LIKE '%"service":"long_distance"%' OR LOWER(COALESCE(tags_json, '')) LIKE '%"train":"long_distance"%' OR LOWER(COALESCE(tags_json, '')) LIKE '%"train":"high_speed"%' OR LOWER(COALESCE(tags_json, '')) LIKE '%"train":"intercity"%' ) )""" bus_long_distance = """( mode IN ('bus', 'trolleybus') AND ( UPPER(COALESCE(name, '')) LIKE '%FLIXBUS%' OR UPPER(COALESCE(network, '')) LIKE '%FLIXBUS%' OR UPPER(COALESCE(name, '')) LIKE '%EUROLINES%' OR UPPER(COALESCE(network, '')) LIKE '%EUROLINES%' OR UPPER(COALESCE(name, '')) LIKE '%INTERCITYBUS%' OR UPPER(COALESCE(name, '')) LIKE '%IC BUS%' OR UPPER(COALESCE(name, '')) LIKE '%FERNBUS%' OR LOWER(COALESCE(tags_json, '')) LIKE '%"service":"long_distance"%' OR LOWER(COALESCE(tags_json, '')) LIKE '%"bus":"long_distance"%' OR LOWER(COALESCE(tags_json, '')) LIKE '%"bus":"intercity"%' OR LOWER(COALESCE(tags_json, '')) LIKE '%"network:type":"long_distance"%' ) )""" long_distance = f"(mode = 'coach' OR {train_long_distance} OR {bus_long_distance})" bus_regional = f"""( mode IN ('bus', 'trolleybus') AND NOT {bus_long_distance} AND ( UPPER(COALESCE(name, '')) LIKE '%REGIONALBUS%' OR UPPER(COALESCE(name, '')) LIKE '%REGIOBUS%' OR UPPER(COALESCE(name, '')) LIKE '%REGIONAL BUS%' OR UPPER(COALESCE(name, '')) LIKE '%REGIONALVERKEHR%' OR UPPER(COALESCE(network, '')) LIKE '%REGIONALBUS%' OR UPPER(COALESCE(network, '')) LIKE '%REGIOBUS%' OR UPPER(COALESCE(network, '')) LIKE '%REGIONALVERKEHR%' OR LOWER(COALESCE(tags_json, '')) LIKE '%"service":"regional"%' OR LOWER(COALESCE(tags_json, '')) LIKE '%"bus":"regional"%' OR LOWER(COALESCE(tags_json, '')) LIKE '%"bus":"interurban"%' OR LOWER(COALESCE(tags_json, '')) LIKE '%"network:type":"regional"%' ) )""" train_regional = f"""( mode = 'train' AND NOT {train_long_distance} AND ( UPPER(COALESCE(ref, '')) LIKE 'IRE%' OR UPPER(COALESCE(ref, '')) LIKE 'RE%' OR UPPER(COALESCE(ref, '')) LIKE 'RB%' OR UPPER(COALESCE(ref, '')) LIKE 'RER%' OR UPPER(COALESCE(ref, '')) LIKE 'TER%' OR UPPER(COALESCE(ref, '')) LIKE 'REX%' OR UPPER(COALESCE(ref, '')) LIKE 'MEX%' OR UPPER(COALESCE(ref, '')) LIKE 'ALX%' OR UPPER(COALESCE(ref, '')) LIKE 'WFB%' OR UPPER(COALESCE(ref, '')) LIKE 'R%' OR UPPER(COALESCE(name, '')) LIKE '%REGIONAL%' OR UPPER(COALESCE(name, '')) LIKE '%REGIO%' OR LOWER(COALESCE(tags_json, '')) LIKE '%"service":"regional"%' OR LOWER(COALESCE(tags_json, '')) LIKE '%"train":"regional"%' ) )""" regional = f"({train_regional} OR {bus_regional})" local = f"""( mode IN ('tram', 'light_rail', 'subway', 'ferry', 'funicular', 'aerialway', 'monorail') OR (mode IN ('bus', 'trolleybus') AND NOT ({bus_long_distance} OR {bus_regional})) OR ( mode = 'train' AND ( UPPER(COALESCE(ref, '')) LIKE 'S%' OR UPPER(COALESCE(name, '')) LIKE '%S-BAHN%' OR UPPER(COALESCE(network, '')) LIKE '%S-BAHN%' OR LOWER(COALESCE(tags_json, '')) LIKE '%"train":"commuter"%' ) ) )""" parts = [] if "long_distance" in route_scopes: parts.append(long_distance) if "regional" in route_scopes: parts.append(regional) if "local" in route_scopes: parts.append(local) if "unknown" in route_scopes: parts.append(f"(mode = 'train' AND NOT ({long_distance} OR {regional} OR {local}))") return "(" + " OR ".join(parts or ["0"]) + ")", [] def _feature_from_row(row: sqlite3.Row, materialized_ids: dict[tuple[int, str, str], int]) -> OsmFeature: dataset_id = int(row["dataset_id"]) osm_type = str(row["osm_type"]) osm_id = str(row["osm_id"]) feature_id = materialized_ids.get((dataset_id, osm_type, osm_id), int(row["id"])) feature = OsmFeature( id=feature_id, dataset_id=dataset_id, osm_type=osm_type, osm_id=osm_id, kind=str(row["kind"]), mode=row["mode"], route_scope=row["route_scope"], name=row["name"], ref=row["ref"], operator=row["operator"], network=row["network"], geometry_geojson=row["geometry_geojson"], min_lon=row["min_lon"], min_lat=row["min_lat"], max_lon=row["max_lon"], max_lat=row["max_lat"], tags_json=row["tags_json"], route_key=row["route_key"], operator_key=row["operator_key"], ) setattr(feature, "_osm_sidecar_source", True) setattr(feature, "_osm_sidecar_row_id", int(row["id"])) return feature def _materialized_ids_by_identity(session: Session, dataset_ids: Sequence[int]) -> dict[tuple[int, str, str], int]: if not dataset_ids: return {} rows = session.execute( select(OsmFeature.dataset_id, OsmFeature.osm_type, OsmFeature.osm_id, OsmFeature.id).where(OsmFeature.dataset_id.in_(dataset_ids)) ).all() return {(int(dataset_id), str(osm_type), str(osm_id)): int(feature_id) for dataset_id, osm_type, osm_id, feature_id in rows} def _as_list(value: str | Sequence[str] | None) -> list[str]: if value is None: return [] if isinstance(value, str): return [value] return [str(item) for item in value] def _safe_int(value: object) -> int | None: try: return int(value) # type: ignore[arg-type] except (TypeError, ValueError): return None