Files
meubility-workbench/app/osm_classification.py
2026-07-01 23:29:51 +02:00

112 lines
4.1 KiB
Python

from __future__ import annotations
import json
import re
from typing import Mapping
LOCAL_SCOPE = "local"
REGIONAL_SCOPE = "regional"
LONG_DISTANCE_SCOPE = "long_distance"
UNKNOWN_SCOPE = "unknown"
OSM_ROUTE_SCOPE_CLASSIFIER_VERSION = "route_scope_v2"
BUS_MODES = {"bus", "trolleybus"}
LOCAL_MODES = {"tram", "light_rail", "subway", "ferry", "funicular", "aerialway", "monorail"}
LONG_DISTANCE_MODES = {"coach"}
LONG_DISTANCE_SERVICE_VALUES = {
"high_speed",
"long_distance",
"intercity",
"international",
"night",
"sleeper",
}
REGIONAL_SERVICE_VALUES = {"regional", "interurban", "commuter", "branch", "suburban"}
LOCAL_SERVICE_VALUES = {"local", "urban", "city", "subway", "tram", "light_rail", "s-bahn", "sbahn"}
LONG_DISTANCE_PREFIX_RE = re.compile(r"^(ICE|IC|EC|ECE|EN|NJ|RJ|RJX|TGV|THA|EST|FLX|WB)\b|^(ICE|IC|EC|ECE|EN|NJ|RJ|RJX|TGV|THA|EST|FLX|WB)\d")
REGIONAL_PREFIX_RE = re.compile(r"^(IRE|RE|RB|RER|TER|REX|MEX|ALX|WFB|R)\b|^(IRE|RE|RB|RER|TER|REX|MEX|ALX|WFB|R)\d")
LOCAL_TRAIN_PREFIX_RE = re.compile(r"^(S|S-BAHN)\b|^S\d")
def infer_osm_route_scope(
*,
mode: str | None,
ref: str | None = None,
name: str | None = None,
network: str | None = None,
tags: Mapping[str, object] | str | None = None,
) -> str | None:
"""Classify a public-transport route into a display scope.
OSM tagging varies by country and operator, so this intentionally combines
explicit service tags with conservative reference-prefix heuristics.
"""
normalized_mode = (mode or "").strip().lower()
tags_dict = _tags_dict(tags)
values = {
str(tags_dict.get(key) or "").strip().lower()
for key in ("service", "train", "bus", "passenger", "network:type", "route_scope")
if tags_dict.get(key)
}
if values & LONG_DISTANCE_SERVICE_VALUES:
return LONG_DISTANCE_SCOPE
if values & LOCAL_SERVICE_VALUES:
return LOCAL_SCOPE
if values & REGIONAL_SERVICE_VALUES:
return REGIONAL_SCOPE
if normalized_mode in LOCAL_MODES:
return LOCAL_SCOPE
if normalized_mode in LONG_DISTANCE_MODES:
return LONG_DISTANCE_SCOPE
text = _classification_text(ref, name, network, tags_dict)
if normalized_mode in BUS_MODES:
if any(marker in text for marker in ("FLIXBUS", "EUROLINES", "INTERCITYBUS", "IC BUS", "LONG DISTANCE", "FERNBUS")):
return LONG_DISTANCE_SCOPE
if any(marker in text for marker in ("REGIONALBUS", "REGIOBUS", "REGIONAL BUS", "REGIONALVERKEHR", "REGIONAL VERKEHR")):
return REGIONAL_SCOPE
return LOCAL_SCOPE
if normalized_mode == "train":
if LONG_DISTANCE_PREFIX_RE.search(text) or any(marker in text for marker in ("INTERCITY", "EUROCITY", "NIGHTJET", "FLIXTRAIN")):
return LONG_DISTANCE_SCOPE
if LOCAL_TRAIN_PREFIX_RE.search(text) or "S-BAHN" in text or "SBahn".upper() in text:
return LOCAL_SCOPE
if REGIONAL_PREFIX_RE.search(text) or any(marker in text for marker in ("REGIONAL", "REGIO", "REGIONALBAHN", "REGIONALEXPRESS")):
return REGIONAL_SCOPE
return UNKNOWN_SCOPE
return None
def infer_osm_route_scope_from_tags(mode: str | None, ref: str | None, name: str | None, network: str | None, tags_json: str | None) -> str | None:
return infer_osm_route_scope(mode=mode, ref=ref, name=name, network=network, tags=tags_json)
def _tags_dict(tags: Mapping[str, object] | str | None) -> dict[str, object]:
if isinstance(tags, str):
try:
data = json.loads(tags or "{}")
except json.JSONDecodeError:
return {}
return data if isinstance(data, dict) else {}
if isinstance(tags, Mapping):
return dict(tags)
return {}
def _classification_text(ref: str | None, name: str | None, network: str | None, tags: Mapping[str, object]) -> str:
parts = [
ref or "",
name or "",
network or "",
str(tags.get("ref") or ""),
str(tags.get("name") or ""),
str(tags.get("network") or ""),
str(tags.get("network:short") or ""),
]
return " ".join(parts).strip().upper().replace("_", " ")