from __future__ import annotations import json import re from typing import Mapping LOCAL_SCOPE = "local" REGIONAL_SCOPE = "regional" LONG_DISTANCE_SCOPE = "long_distance" UNKNOWN_SCOPE = "unknown" OSM_ROUTE_SCOPE_CLASSIFIER_VERSION = "route_scope_v2" BUS_MODES = {"bus", "trolleybus"} LOCAL_MODES = {"tram", "light_rail", "subway", "ferry", "funicular", "aerialway", "monorail"} LONG_DISTANCE_MODES = {"coach"} LONG_DISTANCE_SERVICE_VALUES = { "high_speed", "long_distance", "intercity", "international", "night", "sleeper", } REGIONAL_SERVICE_VALUES = {"regional", "interurban", "commuter", "branch", "suburban"} LOCAL_SERVICE_VALUES = {"local", "urban", "city", "subway", "tram", "light_rail", "s-bahn", "sbahn"} LONG_DISTANCE_PREFIX_RE = re.compile(r"^(ICE|IC|EC|ECE|EN|NJ|RJ|RJX|TGV|THA|EST|FLX|WB)\b|^(ICE|IC|EC|ECE|EN|NJ|RJ|RJX|TGV|THA|EST|FLX|WB)\d") REGIONAL_PREFIX_RE = re.compile(r"^(IRE|RE|RB|RER|TER|REX|MEX|ALX|WFB|R)\b|^(IRE|RE|RB|RER|TER|REX|MEX|ALX|WFB|R)\d") LOCAL_TRAIN_PREFIX_RE = re.compile(r"^(S|S-BAHN)\b|^S\d") def infer_osm_route_scope( *, mode: str | None, ref: str | None = None, name: str | None = None, network: str | None = None, tags: Mapping[str, object] | str | None = None, ) -> str | None: """Classify a public-transport route into a display scope. OSM tagging varies by country and operator, so this intentionally combines explicit service tags with conservative reference-prefix heuristics. """ normalized_mode = (mode or "").strip().lower() tags_dict = _tags_dict(tags) values = { str(tags_dict.get(key) or "").strip().lower() for key in ("service", "train", "bus", "passenger", "network:type", "route_scope") if tags_dict.get(key) } if values & LONG_DISTANCE_SERVICE_VALUES: return LONG_DISTANCE_SCOPE if values & LOCAL_SERVICE_VALUES: return LOCAL_SCOPE if values & REGIONAL_SERVICE_VALUES: return REGIONAL_SCOPE if normalized_mode in LOCAL_MODES: return LOCAL_SCOPE if normalized_mode in LONG_DISTANCE_MODES: return LONG_DISTANCE_SCOPE text = _classification_text(ref, name, network, tags_dict) if normalized_mode in BUS_MODES: if any(marker in text for marker in ("FLIXBUS", "EUROLINES", "INTERCITYBUS", "IC BUS", "LONG DISTANCE", "FERNBUS")): return LONG_DISTANCE_SCOPE if any(marker in text for marker in ("REGIONALBUS", "REGIOBUS", "REGIONAL BUS", "REGIONALVERKEHR", "REGIONAL VERKEHR")): return REGIONAL_SCOPE return LOCAL_SCOPE if normalized_mode == "train": if LONG_DISTANCE_PREFIX_RE.search(text) or any(marker in text for marker in ("INTERCITY", "EUROCITY", "NIGHTJET", "FLIXTRAIN")): return LONG_DISTANCE_SCOPE if LOCAL_TRAIN_PREFIX_RE.search(text) or "S-BAHN" in text or "SBahn".upper() in text: return LOCAL_SCOPE if REGIONAL_PREFIX_RE.search(text) or any(marker in text for marker in ("REGIONAL", "REGIO", "REGIONALBAHN", "REGIONALEXPRESS")): return REGIONAL_SCOPE return UNKNOWN_SCOPE return None def infer_osm_route_scope_from_tags(mode: str | None, ref: str | None, name: str | None, network: str | None, tags_json: str | None) -> str | None: return infer_osm_route_scope(mode=mode, ref=ref, name=name, network=network, tags=tags_json) def _tags_dict(tags: Mapping[str, object] | str | None) -> dict[str, object]: if isinstance(tags, str): try: data = json.loads(tags or "{}") except json.JSONDecodeError: return {} return data if isinstance(data, dict) else {} if isinstance(tags, Mapping): return dict(tags) return {} def _classification_text(ref: str | None, name: str | None, network: str | None, tags: Mapping[str, object]) -> str: parts = [ ref or "", name or "", network or "", str(tags.get("ref") or ""), str(tags.get("name") or ""), str(tags.get("network") or ""), str(tags.get("network:short") or ""), ] return " ".join(parts).strip().upper().replace("_", " ")