Alpha stage commit
This commit is contained in:
111
app/osm_classification.py
Normal file
111
app/osm_classification.py
Normal file
@@ -0,0 +1,111 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import re
|
||||
from typing import Mapping
|
||||
|
||||
|
||||
LOCAL_SCOPE = "local"
|
||||
REGIONAL_SCOPE = "regional"
|
||||
LONG_DISTANCE_SCOPE = "long_distance"
|
||||
UNKNOWN_SCOPE = "unknown"
|
||||
OSM_ROUTE_SCOPE_CLASSIFIER_VERSION = "route_scope_v2"
|
||||
|
||||
BUS_MODES = {"bus", "trolleybus"}
|
||||
LOCAL_MODES = {"tram", "light_rail", "subway", "ferry", "funicular", "aerialway", "monorail"}
|
||||
LONG_DISTANCE_MODES = {"coach"}
|
||||
|
||||
LONG_DISTANCE_SERVICE_VALUES = {
|
||||
"high_speed",
|
||||
"long_distance",
|
||||
"intercity",
|
||||
"international",
|
||||
"night",
|
||||
"sleeper",
|
||||
}
|
||||
REGIONAL_SERVICE_VALUES = {"regional", "interurban", "commuter", "branch", "suburban"}
|
||||
LOCAL_SERVICE_VALUES = {"local", "urban", "city", "subway", "tram", "light_rail", "s-bahn", "sbahn"}
|
||||
|
||||
LONG_DISTANCE_PREFIX_RE = re.compile(r"^(ICE|IC|EC|ECE|EN|NJ|RJ|RJX|TGV|THA|EST|FLX|WB)\b|^(ICE|IC|EC|ECE|EN|NJ|RJ|RJX|TGV|THA|EST|FLX|WB)\d")
|
||||
REGIONAL_PREFIX_RE = re.compile(r"^(IRE|RE|RB|RER|TER|REX|MEX|ALX|WFB|R)\b|^(IRE|RE|RB|RER|TER|REX|MEX|ALX|WFB|R)\d")
|
||||
LOCAL_TRAIN_PREFIX_RE = re.compile(r"^(S|S-BAHN)\b|^S\d")
|
||||
|
||||
|
||||
def infer_osm_route_scope(
|
||||
*,
|
||||
mode: str | None,
|
||||
ref: str | None = None,
|
||||
name: str | None = None,
|
||||
network: str | None = None,
|
||||
tags: Mapping[str, object] | str | None = None,
|
||||
) -> str | None:
|
||||
"""Classify a public-transport route into a display scope.
|
||||
|
||||
OSM tagging varies by country and operator, so this intentionally combines
|
||||
explicit service tags with conservative reference-prefix heuristics.
|
||||
"""
|
||||
normalized_mode = (mode or "").strip().lower()
|
||||
tags_dict = _tags_dict(tags)
|
||||
values = {
|
||||
str(tags_dict.get(key) or "").strip().lower()
|
||||
for key in ("service", "train", "bus", "passenger", "network:type", "route_scope")
|
||||
if tags_dict.get(key)
|
||||
}
|
||||
if values & LONG_DISTANCE_SERVICE_VALUES:
|
||||
return LONG_DISTANCE_SCOPE
|
||||
if values & LOCAL_SERVICE_VALUES:
|
||||
return LOCAL_SCOPE
|
||||
if values & REGIONAL_SERVICE_VALUES:
|
||||
return REGIONAL_SCOPE
|
||||
if normalized_mode in LOCAL_MODES:
|
||||
return LOCAL_SCOPE
|
||||
if normalized_mode in LONG_DISTANCE_MODES:
|
||||
return LONG_DISTANCE_SCOPE
|
||||
|
||||
text = _classification_text(ref, name, network, tags_dict)
|
||||
if normalized_mode in BUS_MODES:
|
||||
if any(marker in text for marker in ("FLIXBUS", "EUROLINES", "INTERCITYBUS", "IC BUS", "LONG DISTANCE", "FERNBUS")):
|
||||
return LONG_DISTANCE_SCOPE
|
||||
if any(marker in text for marker in ("REGIONALBUS", "REGIOBUS", "REGIONAL BUS", "REGIONALVERKEHR", "REGIONAL VERKEHR")):
|
||||
return REGIONAL_SCOPE
|
||||
return LOCAL_SCOPE
|
||||
|
||||
if normalized_mode == "train":
|
||||
if LONG_DISTANCE_PREFIX_RE.search(text) or any(marker in text for marker in ("INTERCITY", "EUROCITY", "NIGHTJET", "FLIXTRAIN")):
|
||||
return LONG_DISTANCE_SCOPE
|
||||
if LOCAL_TRAIN_PREFIX_RE.search(text) or "S-BAHN" in text or "SBahn".upper() in text:
|
||||
return LOCAL_SCOPE
|
||||
if REGIONAL_PREFIX_RE.search(text) or any(marker in text for marker in ("REGIONAL", "REGIO", "REGIONALBAHN", "REGIONALEXPRESS")):
|
||||
return REGIONAL_SCOPE
|
||||
return UNKNOWN_SCOPE
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def infer_osm_route_scope_from_tags(mode: str | None, ref: str | None, name: str | None, network: str | None, tags_json: str | None) -> str | None:
|
||||
return infer_osm_route_scope(mode=mode, ref=ref, name=name, network=network, tags=tags_json)
|
||||
|
||||
|
||||
def _tags_dict(tags: Mapping[str, object] | str | None) -> dict[str, object]:
|
||||
if isinstance(tags, str):
|
||||
try:
|
||||
data = json.loads(tags or "{}")
|
||||
except json.JSONDecodeError:
|
||||
return {}
|
||||
return data if isinstance(data, dict) else {}
|
||||
if isinstance(tags, Mapping):
|
||||
return dict(tags)
|
||||
return {}
|
||||
|
||||
|
||||
def _classification_text(ref: str | None, name: str | None, network: str | None, tags: Mapping[str, object]) -> str:
|
||||
parts = [
|
||||
ref or "",
|
||||
name or "",
|
||||
network or "",
|
||||
str(tags.get("ref") or ""),
|
||||
str(tags.get("name") or ""),
|
||||
str(tags.get("network") or ""),
|
||||
str(tags.get("network:short") or ""),
|
||||
]
|
||||
return " ".join(parts).strip().upper().replace("_", " ")
|
||||
Reference in New Issue
Block a user