112 lines
4.1 KiB
Python
112 lines
4.1 KiB
Python
from __future__ import annotations
|
|
|
|
import json
|
|
import re
|
|
from typing import Mapping
|
|
|
|
|
|
LOCAL_SCOPE = "local"
|
|
REGIONAL_SCOPE = "regional"
|
|
LONG_DISTANCE_SCOPE = "long_distance"
|
|
UNKNOWN_SCOPE = "unknown"
|
|
OSM_ROUTE_SCOPE_CLASSIFIER_VERSION = "route_scope_v2"
|
|
|
|
BUS_MODES = {"bus", "trolleybus"}
|
|
LOCAL_MODES = {"tram", "light_rail", "subway", "ferry", "funicular", "aerialway", "monorail"}
|
|
LONG_DISTANCE_MODES = {"coach"}
|
|
|
|
LONG_DISTANCE_SERVICE_VALUES = {
|
|
"high_speed",
|
|
"long_distance",
|
|
"intercity",
|
|
"international",
|
|
"night",
|
|
"sleeper",
|
|
}
|
|
REGIONAL_SERVICE_VALUES = {"regional", "interurban", "commuter", "branch", "suburban"}
|
|
LOCAL_SERVICE_VALUES = {"local", "urban", "city", "subway", "tram", "light_rail", "s-bahn", "sbahn"}
|
|
|
|
LONG_DISTANCE_PREFIX_RE = re.compile(r"^(ICE|IC|EC|ECE|EN|NJ|RJ|RJX|TGV|THA|EST|FLX|WB)\b|^(ICE|IC|EC|ECE|EN|NJ|RJ|RJX|TGV|THA|EST|FLX|WB)\d")
|
|
REGIONAL_PREFIX_RE = re.compile(r"^(IRE|RE|RB|RER|TER|REX|MEX|ALX|WFB|R)\b|^(IRE|RE|RB|RER|TER|REX|MEX|ALX|WFB|R)\d")
|
|
LOCAL_TRAIN_PREFIX_RE = re.compile(r"^(S|S-BAHN)\b|^S\d")
|
|
|
|
|
|
def infer_osm_route_scope(
|
|
*,
|
|
mode: str | None,
|
|
ref: str | None = None,
|
|
name: str | None = None,
|
|
network: str | None = None,
|
|
tags: Mapping[str, object] | str | None = None,
|
|
) -> str | None:
|
|
"""Classify a public-transport route into a display scope.
|
|
|
|
OSM tagging varies by country and operator, so this intentionally combines
|
|
explicit service tags with conservative reference-prefix heuristics.
|
|
"""
|
|
normalized_mode = (mode or "").strip().lower()
|
|
tags_dict = _tags_dict(tags)
|
|
values = {
|
|
str(tags_dict.get(key) or "").strip().lower()
|
|
for key in ("service", "train", "bus", "passenger", "network:type", "route_scope")
|
|
if tags_dict.get(key)
|
|
}
|
|
if values & LONG_DISTANCE_SERVICE_VALUES:
|
|
return LONG_DISTANCE_SCOPE
|
|
if values & LOCAL_SERVICE_VALUES:
|
|
return LOCAL_SCOPE
|
|
if values & REGIONAL_SERVICE_VALUES:
|
|
return REGIONAL_SCOPE
|
|
if normalized_mode in LOCAL_MODES:
|
|
return LOCAL_SCOPE
|
|
if normalized_mode in LONG_DISTANCE_MODES:
|
|
return LONG_DISTANCE_SCOPE
|
|
|
|
text = _classification_text(ref, name, network, tags_dict)
|
|
if normalized_mode in BUS_MODES:
|
|
if any(marker in text for marker in ("FLIXBUS", "EUROLINES", "INTERCITYBUS", "IC BUS", "LONG DISTANCE", "FERNBUS")):
|
|
return LONG_DISTANCE_SCOPE
|
|
if any(marker in text for marker in ("REGIONALBUS", "REGIOBUS", "REGIONAL BUS", "REGIONALVERKEHR", "REGIONAL VERKEHR")):
|
|
return REGIONAL_SCOPE
|
|
return LOCAL_SCOPE
|
|
|
|
if normalized_mode == "train":
|
|
if LONG_DISTANCE_PREFIX_RE.search(text) or any(marker in text for marker in ("INTERCITY", "EUROCITY", "NIGHTJET", "FLIXTRAIN")):
|
|
return LONG_DISTANCE_SCOPE
|
|
if LOCAL_TRAIN_PREFIX_RE.search(text) or "S-BAHN" in text or "SBahn".upper() in text:
|
|
return LOCAL_SCOPE
|
|
if REGIONAL_PREFIX_RE.search(text) or any(marker in text for marker in ("REGIONAL", "REGIO", "REGIONALBAHN", "REGIONALEXPRESS")):
|
|
return REGIONAL_SCOPE
|
|
return UNKNOWN_SCOPE
|
|
|
|
return None
|
|
|
|
|
|
def infer_osm_route_scope_from_tags(mode: str | None, ref: str | None, name: str | None, network: str | None, tags_json: str | None) -> str | None:
|
|
return infer_osm_route_scope(mode=mode, ref=ref, name=name, network=network, tags=tags_json)
|
|
|
|
|
|
def _tags_dict(tags: Mapping[str, object] | str | None) -> dict[str, object]:
|
|
if isinstance(tags, str):
|
|
try:
|
|
data = json.loads(tags or "{}")
|
|
except json.JSONDecodeError:
|
|
return {}
|
|
return data if isinstance(data, dict) else {}
|
|
if isinstance(tags, Mapping):
|
|
return dict(tags)
|
|
return {}
|
|
|
|
|
|
def _classification_text(ref: str | None, name: str | None, network: str | None, tags: Mapping[str, object]) -> str:
|
|
parts = [
|
|
ref or "",
|
|
name or "",
|
|
network or "",
|
|
str(tags.get("ref") or ""),
|
|
str(tags.get("name") or ""),
|
|
str(tags.get("network") or ""),
|
|
str(tags.get("network:short") or ""),
|
|
]
|
|
return " ".join(parts).strip().upper().replace("_", " ")
|