Alpha stage commit
This commit is contained in:
981
app/osm_storage.py
Normal file
981
app/osm_storage.py
Normal file
@@ -0,0 +1,981 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import sqlite3
|
||||
from contextlib import contextmanager
|
||||
from pathlib import Path
|
||||
from typing import Iterator, Sequence
|
||||
|
||||
from sqlalchemy import and_, func, insert, not_, or_, select, text
|
||||
from sqlalchemy.dialects.postgresql import insert as postgresql_insert
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.config import settings
|
||||
from app.models import Dataset, OsmFeature
|
||||
from app.spatial import refresh_postgis_geometries
|
||||
|
||||
|
||||
OSM_STORAGE_METADATA_KEY = "osm_storage"
|
||||
OSM_STORAGE_MAIN = "main"
|
||||
OSM_STORAGE_SIDECAR_FEATURES = "sidecar_features"
|
||||
SQLITE_IN_CHUNK_SIZE = 800
|
||||
OSM_SIDECAR_ROUTE_SCOPE_INDEXES = ["ix_osm_sidecar_scope_bbox"]
|
||||
OSM_FEATURE_COLUMNS = [
|
||||
"dataset_id",
|
||||
"osm_type",
|
||||
"osm_id",
|
||||
"kind",
|
||||
"mode",
|
||||
"route_scope",
|
||||
"name",
|
||||
"ref",
|
||||
"operator",
|
||||
"network",
|
||||
"geometry_geojson",
|
||||
"min_lon",
|
||||
"min_lat",
|
||||
"max_lon",
|
||||
"max_lat",
|
||||
"tags_json",
|
||||
"route_key",
|
||||
"operator_key",
|
||||
]
|
||||
|
||||
|
||||
def effective_osm_feature_storage(value: str | None = None) -> str:
|
||||
configured = str(value or settings.osm_feature_storage or OSM_STORAGE_SIDECAR_FEATURES).strip().lower()
|
||||
if configured in {OSM_STORAGE_MAIN, "main", "main_db", "postgres", "postgresql"}:
|
||||
return OSM_STORAGE_MAIN
|
||||
if settings.is_postgresql_database and not settings.postgres_use_sidecars:
|
||||
return OSM_STORAGE_MAIN
|
||||
return OSM_STORAGE_SIDECAR_FEATURES
|
||||
|
||||
|
||||
class MissingOsmSidecar(FileNotFoundError):
|
||||
pass
|
||||
|
||||
|
||||
def dataset_metadata(dataset: Dataset) -> dict:
|
||||
try:
|
||||
metadata = json.loads(dataset.metadata_json or "{}")
|
||||
except json.JSONDecodeError:
|
||||
return {}
|
||||
return metadata if isinstance(metadata, dict) else {}
|
||||
|
||||
|
||||
def features_are_sidecar(dataset: Dataset | None) -> bool:
|
||||
if dataset is None:
|
||||
return False
|
||||
storage = dataset_metadata(dataset).get(OSM_STORAGE_METADATA_KEY)
|
||||
if not isinstance(storage, dict):
|
||||
return False
|
||||
tables = storage.get("tables")
|
||||
if isinstance(tables, dict):
|
||||
return tables.get("osm_features") == "sidecar"
|
||||
return storage.get("mode") == OSM_STORAGE_SIDECAR_FEATURES
|
||||
|
||||
|
||||
def sidecar_path(dataset: Dataset | None) -> Path | None:
|
||||
if dataset is None:
|
||||
return None
|
||||
storage = dataset_metadata(dataset).get(OSM_STORAGE_METADATA_KEY)
|
||||
if not isinstance(storage, dict):
|
||||
return None
|
||||
value = storage.get("sidecar_path")
|
||||
if not value:
|
||||
return None
|
||||
return Path(str(value))
|
||||
|
||||
|
||||
def dataset_sidecar_paths(dataset: Dataset) -> list[Path]:
|
||||
path = sidecar_path(dataset)
|
||||
return [] if path is None else [path]
|
||||
|
||||
|
||||
def missing_sidecar_paths(dataset: Dataset | None) -> list[str]:
|
||||
if not features_are_sidecar(dataset):
|
||||
return []
|
||||
path = sidecar_path(dataset)
|
||||
if path is None or path.exists():
|
||||
return []
|
||||
return [str(path)]
|
||||
|
||||
|
||||
@contextmanager
|
||||
def sidecar_connection(dataset: Dataset) -> Iterator[sqlite3.Connection]:
|
||||
path = sidecar_path(dataset)
|
||||
if path is None:
|
||||
raise MissingOsmSidecar(f"dataset #{dataset.id} does not reference an OSM sidecar")
|
||||
if not path.exists():
|
||||
raise MissingOsmSidecar(f"OSM sidecar does not exist: {path}")
|
||||
connection = sqlite3.connect(f"file:{path}?mode=ro", uri=True)
|
||||
connection.row_factory = sqlite3.Row
|
||||
try:
|
||||
yield connection
|
||||
finally:
|
||||
connection.close()
|
||||
|
||||
|
||||
@contextmanager
|
||||
def writable_sidecar_connection(dataset: Dataset) -> Iterator[sqlite3.Connection]:
|
||||
path = sidecar_path(dataset)
|
||||
if path is None:
|
||||
raise MissingOsmSidecar(f"dataset #{dataset.id} does not reference an OSM sidecar")
|
||||
if not path.exists():
|
||||
raise MissingOsmSidecar(f"OSM sidecar does not exist: {path}")
|
||||
connection = sqlite3.connect(path)
|
||||
connection.row_factory = sqlite3.Row
|
||||
try:
|
||||
connection.execute(f"PRAGMA busy_timeout={int(settings.sqlite_busy_timeout_ms)}")
|
||||
connection.execute("PRAGMA synchronous=NORMAL")
|
||||
yield connection
|
||||
finally:
|
||||
connection.close()
|
||||
|
||||
|
||||
def create_osm_sidecar(dataset: Dataset, rows: Sequence[dict[str, object]], *, source_hash: str | None = None) -> dict:
|
||||
path = _new_sidecar_path(dataset, source_hash or dataset.sha256)
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
if path.exists():
|
||||
path.unlink()
|
||||
connection = sqlite3.connect(path)
|
||||
try:
|
||||
connection.execute("PRAGMA journal_mode=OFF")
|
||||
connection.execute("PRAGMA synchronous=OFF")
|
||||
_create_schema(connection)
|
||||
deduped_rows, duplicate_count = dedupe_osm_feature_rows(rows)
|
||||
inserted = 0
|
||||
counts = {"route": 0, "stop": 0, "station": 0, "terminal": 0, "infra": 0, "feature": 0}
|
||||
insert_sql = f"""
|
||||
INSERT INTO osm_features
|
||||
({", ".join(["id", *OSM_FEATURE_COLUMNS])})
|
||||
VALUES
|
||||
({", ".join(["?"] * (len(OSM_FEATURE_COLUMNS) + 1))})
|
||||
"""
|
||||
batch = []
|
||||
for index, row in enumerate(deduped_rows, start=1):
|
||||
kind = str(row.get("kind") or "feature")
|
||||
counts[kind] = counts.get(kind, 0) + 1
|
||||
batch.append((index, *[row.get(column) for column in OSM_FEATURE_COLUMNS]))
|
||||
if len(batch) >= 5000:
|
||||
connection.executemany(insert_sql, batch)
|
||||
inserted += len(batch)
|
||||
batch.clear()
|
||||
if batch:
|
||||
connection.executemany(insert_sql, batch)
|
||||
inserted += len(batch)
|
||||
connection.commit()
|
||||
_create_indexes(connection)
|
||||
connection.commit()
|
||||
finally:
|
||||
connection.close()
|
||||
return {
|
||||
"mode": OSM_STORAGE_SIDECAR_FEATURES,
|
||||
"tables": {"osm_features": "sidecar"},
|
||||
"sidecar_path": str(path),
|
||||
"features": inserted,
|
||||
"duplicate_features_skipped": duplicate_count,
|
||||
"counts": counts,
|
||||
}
|
||||
|
||||
|
||||
def ensure_osm_sidecar_schema(connection: sqlite3.Connection) -> None:
|
||||
columns = _sidecar_columns(connection)
|
||||
if "route_scope" not in columns:
|
||||
connection.execute("ALTER TABLE osm_features ADD COLUMN route_scope TEXT")
|
||||
connection.commit()
|
||||
|
||||
|
||||
def drop_osm_sidecar_route_scope_indexes(connection: sqlite3.Connection) -> None:
|
||||
for index_name in OSM_SIDECAR_ROUTE_SCOPE_INDEXES:
|
||||
connection.execute(f"DROP INDEX IF EXISTS {index_name}")
|
||||
|
||||
|
||||
def rebuild_osm_sidecar_indexes(connection: sqlite3.Connection) -> None:
|
||||
_create_indexes(connection)
|
||||
|
||||
|
||||
def osm_feature_count(session: Session, dataset_id: int, *, kind: str | Sequence[str] | None = None) -> int:
|
||||
dataset = session.get(Dataset, dataset_id)
|
||||
if features_are_sidecar(dataset):
|
||||
kinds = _as_list(kind)
|
||||
sql = "SELECT COUNT(*) FROM osm_features"
|
||||
params: list[object] = []
|
||||
if kinds:
|
||||
placeholders = ", ".join(["?"] * len(kinds))
|
||||
sql += f" WHERE kind IN ({placeholders})"
|
||||
params.extend(kinds)
|
||||
try:
|
||||
with sidecar_connection(dataset) as connection:
|
||||
return int(connection.execute(sql, params).fetchone()[0] or 0)
|
||||
except MissingOsmSidecar:
|
||||
return 0
|
||||
stmt = select(func.count()).select_from(OsmFeature).where(OsmFeature.dataset_id == dataset_id)
|
||||
kinds = _as_list(kind)
|
||||
if kinds:
|
||||
stmt = stmt.where(OsmFeature.kind.in_(kinds))
|
||||
return int(session.scalar(stmt) or 0)
|
||||
|
||||
|
||||
def osm_feature_bbox(
|
||||
session: Session,
|
||||
dataset_ids: Sequence[int],
|
||||
*,
|
||||
kinds: Sequence[str] | None = None,
|
||||
) -> tuple[float | None, float | None, float | None, float | None]:
|
||||
if not dataset_ids:
|
||||
return (None, None, None, None)
|
||||
datasets = {
|
||||
dataset.id: dataset
|
||||
for dataset in session.scalars(select(Dataset).where(Dataset.id.in_([int(value) for value in dataset_ids]))).all()
|
||||
}
|
||||
boxes: list[tuple[float, float, float, float]] = []
|
||||
main_dataset_ids = [dataset_id for dataset_id, dataset in datasets.items() if not features_are_sidecar(dataset)]
|
||||
if main_dataset_ids:
|
||||
stmt = select(func.min(OsmFeature.min_lon), func.min(OsmFeature.min_lat), func.max(OsmFeature.max_lon), func.max(OsmFeature.max_lat)).where(
|
||||
OsmFeature.dataset_id.in_(main_dataset_ids)
|
||||
)
|
||||
if kinds:
|
||||
stmt = stmt.where(OsmFeature.kind.in_(list(kinds)))
|
||||
row = session.execute(stmt).one()
|
||||
if None not in row:
|
||||
boxes.append((float(row[0]), float(row[1]), float(row[2]), float(row[3])))
|
||||
for dataset in datasets.values():
|
||||
if not features_are_sidecar(dataset):
|
||||
continue
|
||||
where = []
|
||||
params: list[object] = []
|
||||
if kinds:
|
||||
placeholders = ", ".join(["?"] * len(kinds))
|
||||
where.append(f"kind IN ({placeholders})")
|
||||
params.extend(list(kinds))
|
||||
sql = "SELECT MIN(min_lon), MIN(min_lat), MAX(max_lon), MAX(max_lat) FROM osm_features"
|
||||
if where:
|
||||
sql += " WHERE " + " AND ".join(where)
|
||||
try:
|
||||
with sidecar_connection(dataset) as connection:
|
||||
row = connection.execute(sql, params).fetchone()
|
||||
if row is not None and None not in row:
|
||||
boxes.append((float(row[0]), float(row[1]), float(row[2]), float(row[3])))
|
||||
except MissingOsmSidecar:
|
||||
continue
|
||||
if not boxes:
|
||||
return (None, None, None, None)
|
||||
return (
|
||||
min(box[0] for box in boxes),
|
||||
min(box[1] for box in boxes),
|
||||
max(box[2] for box in boxes),
|
||||
max(box[3] for box in boxes),
|
||||
)
|
||||
|
||||
|
||||
def query_osm_features(
|
||||
session: Session,
|
||||
dataset_ids: Sequence[int],
|
||||
*,
|
||||
kinds: Sequence[str] | None = None,
|
||||
modes: Sequence[str] | None = None,
|
||||
bbox: tuple[float, float, float, float] | None = None,
|
||||
geometry_required: bool | None = None,
|
||||
search: str | None = None,
|
||||
route_key: str | None = None,
|
||||
route_scopes: Sequence[str] | None = None,
|
||||
ref: str | None = None,
|
||||
osm_type: str | None = None,
|
||||
osm_id: str | None = None,
|
||||
limit: int | None = None,
|
||||
offset: int | None = None,
|
||||
prefer_materialized_ids: bool = True,
|
||||
) -> list[OsmFeature]:
|
||||
if not dataset_ids:
|
||||
return []
|
||||
datasets = {
|
||||
dataset.id: dataset
|
||||
for dataset in session.scalars(select(Dataset).where(Dataset.id.in_([int(value) for value in dataset_ids]))).all()
|
||||
}
|
||||
materialized_ids = _materialized_ids_by_identity(session, list(datasets)) if prefer_materialized_ids else {}
|
||||
rows: list[OsmFeature] = []
|
||||
main_dataset_ids = [dataset_id for dataset_id, dataset in datasets.items() if not features_are_sidecar(dataset)]
|
||||
if main_dataset_ids:
|
||||
stmt = select(OsmFeature).where(OsmFeature.dataset_id.in_(main_dataset_ids))
|
||||
stmt = _apply_main_filters(
|
||||
stmt,
|
||||
kinds=kinds,
|
||||
modes=modes,
|
||||
bbox=bbox,
|
||||
geometry_required=geometry_required,
|
||||
search=search,
|
||||
route_key=route_key,
|
||||
route_scopes=route_scopes,
|
||||
ref=ref,
|
||||
osm_type=osm_type,
|
||||
osm_id=osm_id,
|
||||
)
|
||||
if offset:
|
||||
stmt = stmt.offset(max(0, int(offset)))
|
||||
rows.extend(
|
||||
session.scalars(
|
||||
stmt.order_by(OsmFeature.kind, OsmFeature.mode, OsmFeature.ref, OsmFeature.name, OsmFeature.id).limit(limit)
|
||||
).all()
|
||||
)
|
||||
for dataset_id, dataset in datasets.items():
|
||||
if not features_are_sidecar(dataset):
|
||||
continue
|
||||
rows.extend(
|
||||
_query_sidecar_features(
|
||||
dataset,
|
||||
kinds=kinds,
|
||||
modes=modes,
|
||||
bbox=bbox,
|
||||
geometry_required=geometry_required,
|
||||
search=search,
|
||||
route_key=route_key,
|
||||
route_scopes=route_scopes,
|
||||
ref=ref,
|
||||
osm_type=osm_type,
|
||||
osm_id=osm_id,
|
||||
limit=limit,
|
||||
offset=offset,
|
||||
materialized_ids=materialized_ids,
|
||||
)
|
||||
)
|
||||
rows.sort(key=lambda row: (row.kind or "", row.mode or "", row.ref or "", row.name or "", int(row.id or 0)))
|
||||
if limit is not None:
|
||||
return rows[: max(1, int(limit))]
|
||||
return rows
|
||||
|
||||
|
||||
def get_osm_feature(session: Session, feature_id: int) -> OsmFeature | None:
|
||||
return session.get(OsmFeature, feature_id)
|
||||
|
||||
|
||||
def osm_feature_identity_key(feature: OsmFeature) -> str:
|
||||
return f"{feature.dataset_id}|{feature.osm_type}|{feature.osm_id}"
|
||||
|
||||
|
||||
def osm_feature_public_id(feature: OsmFeature) -> int | str | None:
|
||||
if getattr(feature, "_osm_sidecar_source", False):
|
||||
return osm_feature_identity_key(feature)
|
||||
return feature.id
|
||||
|
||||
|
||||
def resolve_osm_feature(session: Session, value: int | str) -> OsmFeature | None:
|
||||
int_value = _safe_int(value)
|
||||
if int_value is not None:
|
||||
feature = session.get(OsmFeature, int_value)
|
||||
if feature is not None:
|
||||
return feature
|
||||
parsed = parse_osm_feature_identity_key(str(value))
|
||||
if parsed is None:
|
||||
return None
|
||||
dataset_id, osm_type, osm_id = parsed
|
||||
existing = session.scalar(
|
||||
select(OsmFeature).where(
|
||||
OsmFeature.dataset_id == dataset_id,
|
||||
OsmFeature.osm_type == osm_type,
|
||||
OsmFeature.osm_id == osm_id,
|
||||
)
|
||||
)
|
||||
if existing is not None:
|
||||
return existing
|
||||
dataset = session.get(Dataset, dataset_id)
|
||||
if not features_are_sidecar(dataset):
|
||||
return None
|
||||
try:
|
||||
with sidecar_connection(dataset) as connection:
|
||||
select_columns = ", ".join(_sidecar_select_columns(_sidecar_columns(connection)))
|
||||
row = connection.execute(
|
||||
f"""
|
||||
SELECT id, {select_columns}
|
||||
FROM osm_features
|
||||
WHERE dataset_id = ?
|
||||
AND osm_type = ?
|
||||
AND osm_id = ?
|
||||
""",
|
||||
(dataset_id, osm_type, osm_id),
|
||||
).fetchone()
|
||||
except MissingOsmSidecar:
|
||||
return None
|
||||
if row is None:
|
||||
return None
|
||||
return _feature_from_row(row, {})
|
||||
|
||||
|
||||
def parse_osm_feature_identity_key(value: str) -> tuple[int, str, str] | None:
|
||||
parts = value.split("|", 2)
|
||||
if len(parts) != 3:
|
||||
return None
|
||||
dataset_id = _safe_int(parts[0])
|
||||
if dataset_id is None:
|
||||
return None
|
||||
osm_type = parts[1].strip()
|
||||
osm_id = parts[2].strip()
|
||||
if not osm_type or not osm_id:
|
||||
return None
|
||||
return dataset_id, osm_type, osm_id
|
||||
|
||||
|
||||
def ensure_main_osm_feature(session: Session, feature: OsmFeature) -> OsmFeature:
|
||||
existing = session.scalar(
|
||||
select(OsmFeature).where(
|
||||
OsmFeature.dataset_id == feature.dataset_id,
|
||||
OsmFeature.osm_type == feature.osm_type,
|
||||
OsmFeature.osm_id == feature.osm_id,
|
||||
)
|
||||
)
|
||||
if existing is not None:
|
||||
return existing
|
||||
values = dict(
|
||||
dataset_id=feature.dataset_id,
|
||||
osm_type=feature.osm_type,
|
||||
osm_id=feature.osm_id,
|
||||
kind=feature.kind,
|
||||
mode=feature.mode,
|
||||
route_scope=feature.route_scope,
|
||||
name=feature.name,
|
||||
ref=feature.ref,
|
||||
operator=feature.operator,
|
||||
network=feature.network,
|
||||
geometry_geojson=feature.geometry_geojson,
|
||||
min_lon=feature.min_lon,
|
||||
min_lat=feature.min_lat,
|
||||
max_lon=feature.max_lon,
|
||||
max_lat=feature.max_lat,
|
||||
tags_json=feature.tags_json,
|
||||
route_key=feature.route_key,
|
||||
operator_key=feature.operator_key,
|
||||
)
|
||||
if settings.is_postgresql_database:
|
||||
session.execute(
|
||||
postgresql_insert(OsmFeature)
|
||||
.values(**values)
|
||||
.on_conflict_do_nothing(index_elements=["dataset_id", "osm_type", "osm_id"])
|
||||
)
|
||||
else:
|
||||
session.execute(insert(OsmFeature).values(**values).prefix_with("OR IGNORE"))
|
||||
session.flush()
|
||||
refresh_postgis_geometries(session, dataset_id=feature.dataset_id, tables=["osm_features"])
|
||||
existing = session.scalar(
|
||||
select(OsmFeature).where(
|
||||
OsmFeature.dataset_id == feature.dataset_id,
|
||||
OsmFeature.osm_type == feature.osm_type,
|
||||
OsmFeature.osm_id == feature.osm_id,
|
||||
)
|
||||
)
|
||||
if existing is None:
|
||||
raise RuntimeError(f"Could not materialize OSM feature {feature.dataset_id}:{feature.osm_type}:{feature.osm_id}")
|
||||
return existing
|
||||
|
||||
|
||||
def materialize_osm_features(session: Session, features: Sequence[OsmFeature]) -> list[OsmFeature]:
|
||||
return [ensure_main_osm_feature(session, feature) for feature in features]
|
||||
|
||||
|
||||
def _new_sidecar_path(dataset: Dataset, source_hash: str | None) -> Path:
|
||||
suffix = (source_hash or dataset.sha256 or str(dataset.id))[:12]
|
||||
return settings.data_dir / "sidecars" / f"source_{dataset.source_id}" / f"osm_dataset_{dataset.id}_{suffix}.sqlite"
|
||||
|
||||
|
||||
def dedupe_osm_feature_rows(rows: Sequence[dict[str, object]]) -> tuple[list[dict[str, object]], int]:
|
||||
selected: dict[tuple[int, str, str], dict[str, object]] = {}
|
||||
for row in rows:
|
||||
key = (int(row["dataset_id"]), str(row["osm_type"]), str(row["osm_id"]))
|
||||
current = selected.get(key)
|
||||
if current is None or _feature_row_preference(row) < _feature_row_preference(current):
|
||||
selected[key] = dict(row)
|
||||
return list(selected.values()), max(0, len(rows) - len(selected))
|
||||
|
||||
|
||||
def _feature_row_preference(row: dict[str, object]) -> tuple[int, int, int]:
|
||||
kind_rank = {
|
||||
"route": 0,
|
||||
"station": 1,
|
||||
"terminal": 2,
|
||||
"stop": 3,
|
||||
"infra": 4,
|
||||
"feature": 5,
|
||||
}.get(str(row.get("kind") or "feature"), 6)
|
||||
has_geometry = 0 if row.get("geometry_geojson") else 1
|
||||
geometry_size = -len(str(row.get("geometry_geojson") or ""))
|
||||
return (kind_rank, has_geometry, geometry_size)
|
||||
|
||||
|
||||
def _create_schema(connection: sqlite3.Connection) -> None:
|
||||
connection.execute(
|
||||
"""
|
||||
CREATE TABLE osm_features (
|
||||
id INTEGER PRIMARY KEY,
|
||||
dataset_id INTEGER NOT NULL,
|
||||
osm_type TEXT NOT NULL,
|
||||
osm_id TEXT NOT NULL,
|
||||
kind TEXT NOT NULL,
|
||||
mode TEXT,
|
||||
route_scope TEXT,
|
||||
name TEXT,
|
||||
ref TEXT,
|
||||
operator TEXT,
|
||||
network TEXT,
|
||||
geometry_geojson TEXT,
|
||||
min_lon REAL,
|
||||
min_lat REAL,
|
||||
max_lon REAL,
|
||||
max_lat REAL,
|
||||
tags_json TEXT,
|
||||
route_key TEXT,
|
||||
operator_key TEXT,
|
||||
UNIQUE(dataset_id, osm_type, osm_id)
|
||||
)
|
||||
"""
|
||||
)
|
||||
|
||||
|
||||
def _create_indexes(connection: sqlite3.Connection) -> None:
|
||||
statements = [
|
||||
"CREATE INDEX IF NOT EXISTS ix_osm_sidecar_kind_mode_bbox ON osm_features (kind, mode, min_lon, max_lon, min_lat, max_lat)",
|
||||
"CREATE INDEX IF NOT EXISTS ix_osm_sidecar_scope_bbox ON osm_features (kind, mode, route_scope, min_lon, max_lon, min_lat, max_lat)",
|
||||
"CREATE INDEX IF NOT EXISTS ix_osm_sidecar_route_key ON osm_features (route_key)",
|
||||
"CREATE INDEX IF NOT EXISTS ix_osm_sidecar_ref ON osm_features (ref)",
|
||||
"CREATE INDEX IF NOT EXISTS ix_osm_sidecar_identity ON osm_features (dataset_id, osm_type, osm_id)",
|
||||
"CREATE INDEX IF NOT EXISTS ix_osm_sidecar_kind_ref_mode ON osm_features (kind, ref, mode)",
|
||||
]
|
||||
for statement in statements:
|
||||
connection.execute(statement)
|
||||
|
||||
|
||||
def _apply_main_filters(stmt, *, kinds, modes, bbox, geometry_required, search, route_key, route_scopes, ref, osm_type, osm_id):
|
||||
if kinds:
|
||||
stmt = stmt.where(OsmFeature.kind.in_(list(kinds)))
|
||||
if modes:
|
||||
stmt = stmt.where(OsmFeature.mode.in_(list(modes)))
|
||||
if route_scopes:
|
||||
stmt = stmt.where(_main_route_scope_condition([str(scope) for scope in route_scopes]))
|
||||
if bbox:
|
||||
min_lon, min_lat, max_lon, max_lat = bbox
|
||||
if settings.is_postgresql_database:
|
||||
stmt = stmt.where(
|
||||
text(
|
||||
"""
|
||||
(
|
||||
osm_features.geom && ST_MakeEnvelope(:bbox_min_lon, :bbox_min_lat, :bbox_max_lon, :bbox_max_lat, 4326)
|
||||
OR (
|
||||
osm_features.geom IS NULL
|
||||
AND osm_features.min_lon <= :bbox_max_lon
|
||||
AND osm_features.max_lon >= :bbox_min_lon
|
||||
AND osm_features.min_lat <= :bbox_max_lat
|
||||
AND osm_features.max_lat >= :bbox_min_lat
|
||||
)
|
||||
)
|
||||
"""
|
||||
)
|
||||
).params(
|
||||
bbox_min_lon=min_lon,
|
||||
bbox_min_lat=min_lat,
|
||||
bbox_max_lon=max_lon,
|
||||
bbox_max_lat=max_lat,
|
||||
)
|
||||
else:
|
||||
stmt = stmt.where(OsmFeature.min_lon <= max_lon, OsmFeature.max_lon >= min_lon, OsmFeature.min_lat <= max_lat, OsmFeature.max_lat >= min_lat)
|
||||
if geometry_required is True:
|
||||
stmt = stmt.where(OsmFeature.geometry_geojson.is_not(None))
|
||||
elif geometry_required is False:
|
||||
stmt = stmt.where(OsmFeature.geometry_geojson.is_(None))
|
||||
if search:
|
||||
if settings.is_postgresql_database:
|
||||
stmt = stmt.where(
|
||||
text(
|
||||
"""
|
||||
(
|
||||
LOWER(COALESCE(osm_features.ref, '')) LIKE :search_pattern
|
||||
OR LOWER(COALESCE(osm_features.name, '')) LIKE :search_pattern
|
||||
OR LOWER(COALESCE(osm_features.tags_json, '')) LIKE :search_pattern
|
||||
)
|
||||
"""
|
||||
)
|
||||
).params(search_pattern=f"%{search.lower()}%")
|
||||
else:
|
||||
pattern = f"%{search}%"
|
||||
stmt = stmt.where(
|
||||
(OsmFeature.ref.ilike(pattern))
|
||||
| (OsmFeature.name.ilike(pattern))
|
||||
| (OsmFeature.tags_json.ilike(pattern))
|
||||
)
|
||||
if route_key:
|
||||
stmt = stmt.where(OsmFeature.route_key == route_key)
|
||||
if ref:
|
||||
stmt = stmt.where(OsmFeature.ref == ref)
|
||||
if osm_type:
|
||||
stmt = stmt.where(OsmFeature.osm_type == osm_type)
|
||||
if osm_id:
|
||||
stmt = stmt.where(OsmFeature.osm_id == osm_id)
|
||||
return stmt
|
||||
|
||||
|
||||
def _main_route_scope_condition(route_scopes: list[str]):
|
||||
fallback = _main_route_scope_fallback_condition(route_scopes)
|
||||
stored = OsmFeature.route_scope.in_(route_scopes)
|
||||
if "local" in route_scopes:
|
||||
non_local_bus_fallback = _main_route_scope_fallback_condition(["long_distance", "regional"])
|
||||
stored = and_(stored, not_(and_(OsmFeature.mode.in_(["bus", "trolleybus"]), non_local_bus_fallback)))
|
||||
return or_(stored, fallback)
|
||||
|
||||
|
||||
def _main_route_scope_fallback_condition(route_scopes: list[str]):
|
||||
ref = func.upper(func.coalesce(OsmFeature.ref, ""))
|
||||
name = func.upper(func.coalesce(OsmFeature.name, ""))
|
||||
network = func.upper(func.coalesce(OsmFeature.network, ""))
|
||||
tags = func.lower(func.coalesce(OsmFeature.tags_json, ""))
|
||||
train_long_distance = and_(
|
||||
OsmFeature.mode == "train",
|
||||
or_(
|
||||
ref.like("ICE%"),
|
||||
ref.like("IC%"),
|
||||
ref.like("EC%"),
|
||||
ref.like("ECE%"),
|
||||
ref.like("EN%"),
|
||||
ref.like("NJ%"),
|
||||
ref.like("RJ%"),
|
||||
ref.like("RJX%"),
|
||||
ref.like("TGV%"),
|
||||
ref.like("THA%"),
|
||||
ref.like("FLX%"),
|
||||
name.like("%INTERCITY%"),
|
||||
name.like("%EUROCITY%"),
|
||||
name.like("%NIGHTJET%"),
|
||||
name.like("%FLIXTRAIN%"),
|
||||
tags.like('%"service":"long_distance"%'),
|
||||
tags.like('%"train":"long_distance"%'),
|
||||
tags.like('%"train":"high_speed"%'),
|
||||
tags.like('%"train":"intercity"%'),
|
||||
),
|
||||
)
|
||||
bus_long_distance = and_(
|
||||
OsmFeature.mode.in_(["bus", "trolleybus"]),
|
||||
or_(
|
||||
name.like("%FLIXBUS%"),
|
||||
network.like("%FLIXBUS%"),
|
||||
name.like("%EUROLINES%"),
|
||||
network.like("%EUROLINES%"),
|
||||
name.like("%INTERCITYBUS%"),
|
||||
name.like("%IC BUS%"),
|
||||
name.like("%FERNBUS%"),
|
||||
tags.like('%"service":"long_distance"%'),
|
||||
tags.like('%"bus":"long_distance"%'),
|
||||
tags.like('%"bus":"intercity"%'),
|
||||
tags.like('%"network:type":"long_distance"%'),
|
||||
),
|
||||
)
|
||||
long_distance = or_(OsmFeature.mode == "coach", train_long_distance, bus_long_distance)
|
||||
bus_regional = and_(
|
||||
OsmFeature.mode.in_(["bus", "trolleybus"]),
|
||||
not_(bus_long_distance),
|
||||
or_(
|
||||
name.like("%REGIONALBUS%"),
|
||||
name.like("%REGIOBUS%"),
|
||||
name.like("%REGIONAL BUS%"),
|
||||
name.like("%REGIONALVERKEHR%"),
|
||||
network.like("%REGIONALBUS%"),
|
||||
network.like("%REGIOBUS%"),
|
||||
network.like("%REGIONALVERKEHR%"),
|
||||
tags.like('%"service":"regional"%'),
|
||||
tags.like('%"bus":"regional"%'),
|
||||
tags.like('%"bus":"interurban"%'),
|
||||
tags.like('%"network:type":"regional"%'),
|
||||
),
|
||||
)
|
||||
local = or_(
|
||||
OsmFeature.mode.in_(["tram", "light_rail", "subway", "ferry", "funicular", "aerialway", "monorail"]),
|
||||
and_(OsmFeature.mode.in_(["bus", "trolleybus"]), not_(or_(bus_long_distance, bus_regional))),
|
||||
and_(
|
||||
OsmFeature.mode == "train",
|
||||
or_(ref.like("S%"), name.like("%S-BAHN%"), network.like("%S-BAHN%"), tags.like('%"train":"commuter"%')),
|
||||
),
|
||||
)
|
||||
train_regional = and_(
|
||||
OsmFeature.mode == "train",
|
||||
not_(train_long_distance),
|
||||
or_(
|
||||
ref.like("IRE%"),
|
||||
ref.like("RE%"),
|
||||
ref.like("RB%"),
|
||||
ref.like("RER%"),
|
||||
ref.like("TER%"),
|
||||
ref.like("REX%"),
|
||||
ref.like("MEX%"),
|
||||
ref.like("ALX%"),
|
||||
ref.like("WFB%"),
|
||||
ref.like("R%"),
|
||||
name.like("%REGIONAL%"),
|
||||
name.like("%REGIO%"),
|
||||
tags.like('%"service":"regional"%'),
|
||||
tags.like('%"train":"regional"%'),
|
||||
),
|
||||
)
|
||||
regional = or_(train_regional, bus_regional)
|
||||
conditions = []
|
||||
if "long_distance" in route_scopes:
|
||||
conditions.append(long_distance)
|
||||
if "regional" in route_scopes:
|
||||
conditions.append(regional)
|
||||
if "local" in route_scopes:
|
||||
conditions.append(local)
|
||||
if "unknown" in route_scopes:
|
||||
conditions.append(and_(OsmFeature.mode == "train", not_(or_(long_distance, regional, local))))
|
||||
return or_(*conditions) if conditions else OsmFeature.route_scope.is_(None)
|
||||
|
||||
|
||||
def _query_sidecar_features(
|
||||
dataset: Dataset,
|
||||
*,
|
||||
kinds: Sequence[str] | None,
|
||||
modes: Sequence[str] | None,
|
||||
bbox: tuple[float, float, float, float] | None,
|
||||
geometry_required: bool | None,
|
||||
search: str | None,
|
||||
route_key: str | None,
|
||||
route_scopes: Sequence[str] | None,
|
||||
ref: str | None,
|
||||
osm_type: str | None,
|
||||
osm_id: str | None,
|
||||
limit: int | None,
|
||||
offset: int | None,
|
||||
materialized_ids: dict[tuple[int, str, str], int],
|
||||
) -> list[OsmFeature]:
|
||||
where = []
|
||||
params: list[object] = []
|
||||
try:
|
||||
with sidecar_connection(dataset) as connection:
|
||||
available_columns = _sidecar_columns(connection)
|
||||
if kinds:
|
||||
placeholders = ", ".join(["?"] * len(kinds))
|
||||
where.append(f"kind IN ({placeholders})")
|
||||
params.extend(list(kinds))
|
||||
if modes:
|
||||
placeholders = ", ".join(["?"] * len(modes))
|
||||
where.append(f"mode IN ({placeholders})")
|
||||
params.extend(list(modes))
|
||||
if bbox:
|
||||
min_lon, min_lat, max_lon, max_lat = bbox
|
||||
where.extend(["min_lon <= ?", "max_lon >= ?", "min_lat <= ?", "max_lat >= ?"])
|
||||
params.extend([max_lon, min_lon, max_lat, min_lat])
|
||||
if geometry_required is True:
|
||||
where.append("geometry_geojson IS NOT NULL")
|
||||
elif geometry_required is False:
|
||||
where.append("geometry_geojson IS NULL")
|
||||
if search:
|
||||
where.append("(LOWER(COALESCE(ref, '')) LIKE ? OR LOWER(COALESCE(name, '')) LIKE ? OR LOWER(COALESCE(tags_json, '')) LIKE ?)")
|
||||
pattern = f"%{search.lower()}%"
|
||||
params.extend([pattern, pattern, pattern])
|
||||
if route_key:
|
||||
where.append("route_key = ?")
|
||||
params.append(route_key)
|
||||
if route_scopes:
|
||||
condition, condition_params = _sidecar_route_scope_condition([str(scope) for scope in route_scopes], has_route_scope="route_scope" in available_columns)
|
||||
where.append(condition)
|
||||
params.extend(condition_params)
|
||||
if ref:
|
||||
where.append("ref = ?")
|
||||
params.append(ref)
|
||||
if osm_type:
|
||||
where.append("osm_type = ?")
|
||||
params.append(osm_type)
|
||||
if osm_id:
|
||||
where.append("osm_id = ?")
|
||||
params.append(osm_id)
|
||||
select_columns = ", ".join(_sidecar_select_columns(available_columns))
|
||||
sql = f"SELECT id, {select_columns} FROM osm_features"
|
||||
if where:
|
||||
sql += " WHERE " + " AND ".join(where)
|
||||
sql += " ORDER BY kind, mode, ref, name, id"
|
||||
if limit is not None:
|
||||
sql += " LIMIT ?"
|
||||
params.append(max(1, int(limit)))
|
||||
if offset:
|
||||
if limit is None:
|
||||
sql += " LIMIT -1"
|
||||
sql += " OFFSET ?"
|
||||
params.append(max(0, int(offset)))
|
||||
return [_feature_from_row(row, materialized_ids) for row in connection.execute(sql, params).fetchall()]
|
||||
except MissingOsmSidecar:
|
||||
return []
|
||||
|
||||
|
||||
def _sidecar_columns(connection: sqlite3.Connection) -> set[str]:
|
||||
return {str(row["name"]) for row in connection.execute("PRAGMA table_info(osm_features)").fetchall()}
|
||||
|
||||
|
||||
def _sidecar_select_columns(available_columns: set[str]) -> list[str]:
|
||||
return [column if column in available_columns else f"NULL AS {column}" for column in OSM_FEATURE_COLUMNS]
|
||||
|
||||
|
||||
def _sidecar_route_scope_condition(route_scopes: list[str], *, has_route_scope: bool) -> tuple[str, list[object]]:
|
||||
fallback_sql, fallback_params = _sidecar_route_scope_fallback_condition(route_scopes)
|
||||
if has_route_scope:
|
||||
placeholders = ", ".join(["?"] * len(route_scopes))
|
||||
stored_sql = f"route_scope IN ({placeholders})"
|
||||
params: list[object] = [*route_scopes]
|
||||
if "local" in route_scopes:
|
||||
non_local_sql, non_local_params = _sidecar_route_scope_fallback_condition(["long_distance", "regional"])
|
||||
stored_sql = f"({stored_sql} AND NOT (mode IN ('bus', 'trolleybus') AND {non_local_sql}))"
|
||||
params.extend(non_local_params)
|
||||
return f"({stored_sql} OR {fallback_sql})", [*params, *fallback_params]
|
||||
return fallback_sql, fallback_params
|
||||
|
||||
|
||||
def _sidecar_route_scope_fallback_condition(route_scopes: list[str]) -> tuple[str, list[object]]:
|
||||
train_long_distance = """(
|
||||
mode = 'train'
|
||||
AND (
|
||||
UPPER(COALESCE(ref, '')) LIKE 'ICE%'
|
||||
OR UPPER(COALESCE(ref, '')) LIKE 'IC%'
|
||||
OR UPPER(COALESCE(ref, '')) LIKE 'EC%'
|
||||
OR UPPER(COALESCE(ref, '')) LIKE 'ECE%'
|
||||
OR UPPER(COALESCE(ref, '')) LIKE 'EN%'
|
||||
OR UPPER(COALESCE(ref, '')) LIKE 'NJ%'
|
||||
OR UPPER(COALESCE(ref, '')) LIKE 'RJ%'
|
||||
OR UPPER(COALESCE(ref, '')) LIKE 'RJX%'
|
||||
OR UPPER(COALESCE(ref, '')) LIKE 'TGV%'
|
||||
OR UPPER(COALESCE(ref, '')) LIKE 'THA%'
|
||||
OR UPPER(COALESCE(ref, '')) LIKE 'FLX%'
|
||||
OR UPPER(COALESCE(name, '')) LIKE '%INTERCITY%'
|
||||
OR UPPER(COALESCE(name, '')) LIKE '%EUROCITY%'
|
||||
OR UPPER(COALESCE(name, '')) LIKE '%NIGHTJET%'
|
||||
OR UPPER(COALESCE(name, '')) LIKE '%FLIXTRAIN%'
|
||||
OR LOWER(COALESCE(tags_json, '')) LIKE '%"service":"long_distance"%'
|
||||
OR LOWER(COALESCE(tags_json, '')) LIKE '%"train":"long_distance"%'
|
||||
OR LOWER(COALESCE(tags_json, '')) LIKE '%"train":"high_speed"%'
|
||||
OR LOWER(COALESCE(tags_json, '')) LIKE '%"train":"intercity"%'
|
||||
)
|
||||
)"""
|
||||
bus_long_distance = """(
|
||||
mode IN ('bus', 'trolleybus')
|
||||
AND (
|
||||
UPPER(COALESCE(name, '')) LIKE '%FLIXBUS%'
|
||||
OR UPPER(COALESCE(network, '')) LIKE '%FLIXBUS%'
|
||||
OR UPPER(COALESCE(name, '')) LIKE '%EUROLINES%'
|
||||
OR UPPER(COALESCE(network, '')) LIKE '%EUROLINES%'
|
||||
OR UPPER(COALESCE(name, '')) LIKE '%INTERCITYBUS%'
|
||||
OR UPPER(COALESCE(name, '')) LIKE '%IC BUS%'
|
||||
OR UPPER(COALESCE(name, '')) LIKE '%FERNBUS%'
|
||||
OR LOWER(COALESCE(tags_json, '')) LIKE '%"service":"long_distance"%'
|
||||
OR LOWER(COALESCE(tags_json, '')) LIKE '%"bus":"long_distance"%'
|
||||
OR LOWER(COALESCE(tags_json, '')) LIKE '%"bus":"intercity"%'
|
||||
OR LOWER(COALESCE(tags_json, '')) LIKE '%"network:type":"long_distance"%'
|
||||
)
|
||||
)"""
|
||||
long_distance = f"(mode = 'coach' OR {train_long_distance} OR {bus_long_distance})"
|
||||
bus_regional = f"""(
|
||||
mode IN ('bus', 'trolleybus')
|
||||
AND NOT {bus_long_distance}
|
||||
AND (
|
||||
UPPER(COALESCE(name, '')) LIKE '%REGIONALBUS%'
|
||||
OR UPPER(COALESCE(name, '')) LIKE '%REGIOBUS%'
|
||||
OR UPPER(COALESCE(name, '')) LIKE '%REGIONAL BUS%'
|
||||
OR UPPER(COALESCE(name, '')) LIKE '%REGIONALVERKEHR%'
|
||||
OR UPPER(COALESCE(network, '')) LIKE '%REGIONALBUS%'
|
||||
OR UPPER(COALESCE(network, '')) LIKE '%REGIOBUS%'
|
||||
OR UPPER(COALESCE(network, '')) LIKE '%REGIONALVERKEHR%'
|
||||
OR LOWER(COALESCE(tags_json, '')) LIKE '%"service":"regional"%'
|
||||
OR LOWER(COALESCE(tags_json, '')) LIKE '%"bus":"regional"%'
|
||||
OR LOWER(COALESCE(tags_json, '')) LIKE '%"bus":"interurban"%'
|
||||
OR LOWER(COALESCE(tags_json, '')) LIKE '%"network:type":"regional"%'
|
||||
)
|
||||
)"""
|
||||
train_regional = f"""(
|
||||
mode = 'train'
|
||||
AND NOT {train_long_distance}
|
||||
AND (
|
||||
UPPER(COALESCE(ref, '')) LIKE 'IRE%'
|
||||
OR UPPER(COALESCE(ref, '')) LIKE 'RE%'
|
||||
OR UPPER(COALESCE(ref, '')) LIKE 'RB%'
|
||||
OR UPPER(COALESCE(ref, '')) LIKE 'RER%'
|
||||
OR UPPER(COALESCE(ref, '')) LIKE 'TER%'
|
||||
OR UPPER(COALESCE(ref, '')) LIKE 'REX%'
|
||||
OR UPPER(COALESCE(ref, '')) LIKE 'MEX%'
|
||||
OR UPPER(COALESCE(ref, '')) LIKE 'ALX%'
|
||||
OR UPPER(COALESCE(ref, '')) LIKE 'WFB%'
|
||||
OR UPPER(COALESCE(ref, '')) LIKE 'R%'
|
||||
OR UPPER(COALESCE(name, '')) LIKE '%REGIONAL%'
|
||||
OR UPPER(COALESCE(name, '')) LIKE '%REGIO%'
|
||||
OR LOWER(COALESCE(tags_json, '')) LIKE '%"service":"regional"%'
|
||||
OR LOWER(COALESCE(tags_json, '')) LIKE '%"train":"regional"%'
|
||||
)
|
||||
)"""
|
||||
regional = f"({train_regional} OR {bus_regional})"
|
||||
local = f"""(
|
||||
mode IN ('tram', 'light_rail', 'subway', 'ferry', 'funicular', 'aerialway', 'monorail')
|
||||
OR (mode IN ('bus', 'trolleybus') AND NOT ({bus_long_distance} OR {bus_regional}))
|
||||
OR (
|
||||
mode = 'train'
|
||||
AND (
|
||||
UPPER(COALESCE(ref, '')) LIKE 'S%'
|
||||
OR UPPER(COALESCE(name, '')) LIKE '%S-BAHN%'
|
||||
OR UPPER(COALESCE(network, '')) LIKE '%S-BAHN%'
|
||||
OR LOWER(COALESCE(tags_json, '')) LIKE '%"train":"commuter"%'
|
||||
)
|
||||
)
|
||||
)"""
|
||||
parts = []
|
||||
if "long_distance" in route_scopes:
|
||||
parts.append(long_distance)
|
||||
if "regional" in route_scopes:
|
||||
parts.append(regional)
|
||||
if "local" in route_scopes:
|
||||
parts.append(local)
|
||||
if "unknown" in route_scopes:
|
||||
parts.append(f"(mode = 'train' AND NOT ({long_distance} OR {regional} OR {local}))")
|
||||
return "(" + " OR ".join(parts or ["0"]) + ")", []
|
||||
|
||||
|
||||
def _feature_from_row(row: sqlite3.Row, materialized_ids: dict[tuple[int, str, str], int]) -> OsmFeature:
|
||||
dataset_id = int(row["dataset_id"])
|
||||
osm_type = str(row["osm_type"])
|
||||
osm_id = str(row["osm_id"])
|
||||
feature_id = materialized_ids.get((dataset_id, osm_type, osm_id), int(row["id"]))
|
||||
feature = OsmFeature(
|
||||
id=feature_id,
|
||||
dataset_id=dataset_id,
|
||||
osm_type=osm_type,
|
||||
osm_id=osm_id,
|
||||
kind=str(row["kind"]),
|
||||
mode=row["mode"],
|
||||
route_scope=row["route_scope"],
|
||||
name=row["name"],
|
||||
ref=row["ref"],
|
||||
operator=row["operator"],
|
||||
network=row["network"],
|
||||
geometry_geojson=row["geometry_geojson"],
|
||||
min_lon=row["min_lon"],
|
||||
min_lat=row["min_lat"],
|
||||
max_lon=row["max_lon"],
|
||||
max_lat=row["max_lat"],
|
||||
tags_json=row["tags_json"],
|
||||
route_key=row["route_key"],
|
||||
operator_key=row["operator_key"],
|
||||
)
|
||||
setattr(feature, "_osm_sidecar_source", True)
|
||||
setattr(feature, "_osm_sidecar_row_id", int(row["id"]))
|
||||
return feature
|
||||
|
||||
|
||||
def _materialized_ids_by_identity(session: Session, dataset_ids: Sequence[int]) -> dict[tuple[int, str, str], int]:
|
||||
if not dataset_ids:
|
||||
return {}
|
||||
rows = session.execute(
|
||||
select(OsmFeature.dataset_id, OsmFeature.osm_type, OsmFeature.osm_id, OsmFeature.id).where(OsmFeature.dataset_id.in_(dataset_ids))
|
||||
).all()
|
||||
return {(int(dataset_id), str(osm_type), str(osm_id)): int(feature_id) for dataset_id, osm_type, osm_id, feature_id in rows}
|
||||
|
||||
|
||||
def _as_list(value: str | Sequence[str] | None) -> list[str]:
|
||||
if value is None:
|
||||
return []
|
||||
if isinstance(value, str):
|
||||
return [value]
|
||||
return [str(item) for item in value]
|
||||
|
||||
|
||||
def _safe_int(value: object) -> int | None:
|
||||
try:
|
||||
return int(value) # type: ignore[arg-type]
|
||||
except (TypeError, ValueError):
|
||||
return None
|
||||
Reference in New Issue
Block a user