Alpha stage commit
This commit is contained in:
20
.env.example
Normal file
20
.env.example
Normal file
@@ -0,0 +1,20 @@
|
||||
# SQLite is the default for the prototype. It keeps the project runnable without Docker.
|
||||
DATABASE_URL=sqlite:///./data/workbench.sqlite
|
||||
# For large imports, use PostgreSQL/PostGIS instead:
|
||||
# DATABASE_URL=postgresql://USER:PASSWORD@localhost:5432/meubility
|
||||
# POSTGRES_USE_SIDECARS=false
|
||||
DATA_DIR=./data
|
||||
GTFS_STOP_TIMES_IMPORT_LIMIT=250000
|
||||
|
||||
# Start separate queue worker processes from the API server lifespan.
|
||||
# Workers survive normal server restarts by default; stale leases are recovered.
|
||||
QUEUE_WORKER_AUTOSTART=true
|
||||
QUEUE_WORKER_COUNT=1
|
||||
QUEUE_WORKER_POLL_INTERVAL_SECONDS=2
|
||||
QUEUE_JOB_LEASE_SECONDS=7200
|
||||
QUEUE_WORKER_STOP_ON_SHUTDOWN=false
|
||||
|
||||
# Chunk sizes for queued data-preparation jobs.
|
||||
ROUTE_MATCHING_BATCH_SIZE=100
|
||||
ROUTE_LAYER_OSM_ROUTE_BATCH_SIZE=1000
|
||||
ROUTE_LAYER_OSM_STOP_BATCH_SIZE=5000
|
||||
330
.gitignore
vendored
330
.gitignore
vendored
@@ -1,328 +1,8 @@
|
||||
# ---> Python
|
||||
# Byte-compiled / optimized / DLL files
|
||||
__pycache__/
|
||||
*.py[cod]
|
||||
*$py.class
|
||||
|
||||
# C extensions
|
||||
*.so
|
||||
|
||||
# Distribution / packaging
|
||||
.Python
|
||||
build/
|
||||
develop-eggs/
|
||||
dist/
|
||||
downloads/
|
||||
eggs/
|
||||
.eggs/
|
||||
lib/
|
||||
lib64/
|
||||
parts/
|
||||
sdist/
|
||||
var/
|
||||
wheels/
|
||||
share/python-wheels/
|
||||
*.egg-info/
|
||||
.installed.cfg
|
||||
*.egg
|
||||
MANIFEST
|
||||
|
||||
# PyInstaller
|
||||
# Usually these files are written by a python script from a template
|
||||
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
||||
*.manifest
|
||||
*.spec
|
||||
|
||||
# Installer logs
|
||||
pip-log.txt
|
||||
pip-delete-this-directory.txt
|
||||
|
||||
# Unit test / coverage reports
|
||||
htmlcov/
|
||||
.tox/
|
||||
.nox/
|
||||
.coverage
|
||||
.coverage.*
|
||||
.cache
|
||||
nosetests.xml
|
||||
coverage.xml
|
||||
*.cover
|
||||
*.py,cover
|
||||
.hypothesis/
|
||||
.pytest_cache/
|
||||
cover/
|
||||
|
||||
# Translations
|
||||
*.mo
|
||||
*.pot
|
||||
|
||||
# Django stuff:
|
||||
*.log
|
||||
local_settings.py
|
||||
db.sqlite3
|
||||
db.sqlite3-journal
|
||||
|
||||
# Flask stuff:
|
||||
instance/
|
||||
.webassets-cache
|
||||
|
||||
# Scrapy stuff:
|
||||
.scrapy
|
||||
|
||||
# Sphinx documentation
|
||||
docs/_build/
|
||||
|
||||
# PyBuilder
|
||||
.pybuilder/
|
||||
target/
|
||||
|
||||
# Jupyter Notebook
|
||||
.ipynb_checkpoints
|
||||
|
||||
# IPython
|
||||
profile_default/
|
||||
ipython_config.py
|
||||
|
||||
# pyenv
|
||||
# For a library or package, you might want to ignore these files since the code is
|
||||
# intended to run in multiple environments; otherwise, check them in:
|
||||
# .python-version
|
||||
|
||||
# pipenv
|
||||
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
||||
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
||||
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
||||
# install all needed dependencies.
|
||||
#Pipfile.lock
|
||||
|
||||
# UV
|
||||
# Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
|
||||
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
||||
# commonly ignored for libraries.
|
||||
#uv.lock
|
||||
|
||||
# poetry
|
||||
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
|
||||
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
||||
# commonly ignored for libraries.
|
||||
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
|
||||
#poetry.lock
|
||||
|
||||
# pdm
|
||||
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
|
||||
#pdm.lock
|
||||
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
|
||||
# in version control.
|
||||
# https://pdm.fming.dev/latest/usage/project/#working-with-version-control
|
||||
.pdm.toml
|
||||
.pdm-python
|
||||
.pdm-build/
|
||||
|
||||
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
|
||||
__pypackages__/
|
||||
|
||||
# Celery stuff
|
||||
celerybeat-schedule
|
||||
celerybeat.pid
|
||||
|
||||
# SageMath parsed files
|
||||
*.sage.py
|
||||
|
||||
# Environments
|
||||
.env
|
||||
.venv
|
||||
env/
|
||||
venv/
|
||||
ENV/
|
||||
env.bak/
|
||||
venv.bak/
|
||||
|
||||
# Spyder project settings
|
||||
.spyderproject
|
||||
.spyproject
|
||||
|
||||
# Rope project settings
|
||||
.ropeproject
|
||||
|
||||
# mkdocs documentation
|
||||
/site
|
||||
|
||||
# mypy
|
||||
.mypy_cache/
|
||||
.dmypy.json
|
||||
dmypy.json
|
||||
|
||||
# Pyre type checker
|
||||
.pyre/
|
||||
|
||||
# pytype static type analyzer
|
||||
.pytype/
|
||||
|
||||
# Cython debug symbols
|
||||
cython_debug/
|
||||
|
||||
# PyCharm
|
||||
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
|
||||
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
||||
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
||||
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
||||
#.idea/
|
||||
|
||||
# Ruff stuff:
|
||||
.ruff_cache/
|
||||
|
||||
# PyPI configuration file
|
||||
.pypirc
|
||||
|
||||
# ---> Node
|
||||
# Logs
|
||||
logs
|
||||
*.log
|
||||
npm-debug.log*
|
||||
yarn-debug.log*
|
||||
yarn-error.log*
|
||||
lerna-debug.log*
|
||||
.pnpm-debug.log*
|
||||
|
||||
# Diagnostic reports (https://nodejs.org/api/report.html)
|
||||
report.[0-9]*.[0-9]*.[0-9]*.[0-9]*.json
|
||||
|
||||
# Runtime data
|
||||
pids
|
||||
*.pid
|
||||
*.seed
|
||||
*.pid.lock
|
||||
|
||||
# Directory for instrumented libs generated by jscoverage/JSCover
|
||||
lib-cov
|
||||
|
||||
# Coverage directory used by tools like istanbul
|
||||
coverage
|
||||
*.lcov
|
||||
|
||||
# nyc test coverage
|
||||
.nyc_output
|
||||
|
||||
# Grunt intermediate storage (https://gruntjs.com/creating-plugins#storing-task-files)
|
||||
.grunt
|
||||
|
||||
# Bower dependency directory (https://bower.io/)
|
||||
bower_components
|
||||
|
||||
# node-waf configuration
|
||||
.lock-wscript
|
||||
|
||||
# Compiled binary addons (https://nodejs.org/api/addons.html)
|
||||
build/Release
|
||||
|
||||
# Dependency directories
|
||||
node_modules/
|
||||
jspm_packages/
|
||||
|
||||
# Snowpack dependency directory (https://snowpack.dev/)
|
||||
web_modules/
|
||||
|
||||
# TypeScript cache
|
||||
*.tsbuildinfo
|
||||
|
||||
# Optional npm cache directory
|
||||
.npm
|
||||
|
||||
# Optional eslint cache
|
||||
.eslintcache
|
||||
|
||||
# Optional stylelint cache
|
||||
.stylelintcache
|
||||
|
||||
# Microbundle cache
|
||||
.rpt2_cache/
|
||||
.rts2_cache_cjs/
|
||||
.rts2_cache_es/
|
||||
.rts2_cache_umd/
|
||||
|
||||
# Optional REPL history
|
||||
.node_repl_history
|
||||
|
||||
# Output of 'npm pack'
|
||||
*.tgz
|
||||
|
||||
# Yarn Integrity file
|
||||
.yarn-integrity
|
||||
|
||||
# dotenv environment variable files
|
||||
.env
|
||||
.env.development.local
|
||||
.env.test.local
|
||||
.env.production.local
|
||||
.env.local
|
||||
|
||||
# parcel-bundler cache (https://parceljs.org/)
|
||||
.cache
|
||||
.parcel-cache
|
||||
|
||||
# Next.js build output
|
||||
.next
|
||||
out
|
||||
|
||||
# Nuxt.js build / generate output
|
||||
.nuxt
|
||||
dist
|
||||
|
||||
# Gatsby files
|
||||
.cache/
|
||||
# Comment in the public line in if your project uses Gatsby and not Next.js
|
||||
# https://nextjs.org/blog/next-9-1#public-directory-support
|
||||
# public
|
||||
|
||||
# vuepress build output
|
||||
.vuepress/dist
|
||||
|
||||
# vuepress v2.x temp and cache directory
|
||||
.temp
|
||||
.cache
|
||||
|
||||
# vitepress build output
|
||||
**/.vitepress/dist
|
||||
|
||||
# vitepress cache directory
|
||||
**/.vitepress/cache
|
||||
|
||||
# Docusaurus cache and generated files
|
||||
.docusaurus
|
||||
|
||||
# Serverless directories
|
||||
.serverless/
|
||||
|
||||
# FuseBox cache
|
||||
.fusebox/
|
||||
|
||||
# DynamoDB Local files
|
||||
.dynamodb/
|
||||
|
||||
# TernJS port file
|
||||
.tern-port
|
||||
|
||||
# Stores VSCode versions used for testing VSCode extensions
|
||||
.vscode-test
|
||||
|
||||
# yarn v2
|
||||
.yarn/cache
|
||||
.yarn/unplugged
|
||||
.yarn/build-state.yml
|
||||
.yarn/install-state.gz
|
||||
.pnp.*
|
||||
|
||||
# ---> VisualStudioCode
|
||||
.vscode/*
|
||||
!.vscode/settings.json
|
||||
!.vscode/tasks.json
|
||||
!.vscode/launch.json
|
||||
!.vscode/extensions.json
|
||||
!.vscode/*.code-snippets
|
||||
|
||||
# Local History for Visual Studio Code
|
||||
.history/
|
||||
|
||||
# Built Visual Studio Code Extensions
|
||||
*.vsix
|
||||
|
||||
/data/*
|
||||
!/data/.gitkeep
|
||||
*.sqlite
|
||||
*.db
|
||||
.DS_Store
|
||||
|
||||
17
Dockerfile
Normal file
17
Dockerfile
Normal file
@@ -0,0 +1,17 @@
|
||||
FROM python:3.12-slim
|
||||
|
||||
WORKDIR /app
|
||||
ENV PYTHONDONTWRITEBYTECODE=1 \
|
||||
PYTHONUNBUFFERED=1 \
|
||||
DATA_DIR=/app/data \
|
||||
DATABASE_URL=sqlite:////app/data/workbench.sqlite
|
||||
|
||||
COPY requirements.txt ./
|
||||
RUN pip install --no-cache-dir -r requirements.txt
|
||||
|
||||
COPY app ./app
|
||||
COPY README.md MVP_ROADMAP.md ./
|
||||
RUN mkdir -p /app/data
|
||||
|
||||
EXPOSE 8000
|
||||
CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"]
|
||||
220
MVP_ROADMAP.md
Normal file
220
MVP_ROADMAP.md
Normal file
@@ -0,0 +1,220 @@
|
||||
# MVP roadmap
|
||||
|
||||
Last updated: 2026-07-01
|
||||
|
||||
See also `docs/backlog.md` for the prioritized engineering backlog, caveats, and open optimization list.
|
||||
|
||||
## Objective
|
||||
|
||||
Build an internal management workbench that turns public mobility data into a normalized, auditable, coverage-scored dataset for a future traveller-facing web/native app.
|
||||
|
||||
The workbench stays distinct from the public app. Its users are data engineers, analysts, and operations staff who need to ingest, inspect, link, correct, route against, and publish mobility data.
|
||||
|
||||
## Current prototype: implemented
|
||||
|
||||
The repository has moved beyond the original SQLite/Berlin prototype. The current development path is Germany-scale and PostGIS-first, while SQLite remains useful as a legacy/test fallback.
|
||||
|
||||
Implemented:
|
||||
|
||||
```text
|
||||
source registry and source catalog
|
||||
local source cache
|
||||
job queue with job events and worker process
|
||||
PostgreSQL/PostGIS runtime support with SQLite fallback
|
||||
GTFS static importer for large national feeds
|
||||
OSM PBF import path for Germany-scale extracts
|
||||
OSM address index and address-aware journey endpoints
|
||||
canonical stop/station linking from GTFS and OSM
|
||||
automatic GTFS <-> OSM route matching
|
||||
manual route and canonical-stop rule persistence
|
||||
visual route-layer builder from OSM routes and GTFS shapes
|
||||
walk/drive routing layer from OSM-derived routing graph
|
||||
progressive journey-search API and UI polling
|
||||
map right-click "from here" / "to here"
|
||||
management UI with map, sources, stats, jobs, matches, search, and journeys
|
||||
separate GTFS Harmonization and Mapping Data source modules in the UI
|
||||
generic job-details overlay with phase timeline, event log, and queue snapshot
|
||||
QA dashboard skeleton for source/import/link/route/publication health
|
||||
GTFS harmonization concept and service-boundary decision
|
||||
CLI commands
|
||||
tests and syntax checks for changed modules
|
||||
```
|
||||
|
||||
Recent fixes:
|
||||
|
||||
```text
|
||||
PostgreSQL startup avoids unnecessary DDL when PostGIS columns/indexes already exist.
|
||||
Queue route-layer rebuild can be claimed by a real worker instead of staying queued behind a stale worker pid.
|
||||
Timetable routing no longer requires visual route-pattern trip links.
|
||||
Walk-leg route geometry has a short-lived in-process cache.
|
||||
Address search is bbox-aware without being bbox-limited.
|
||||
Job rows expose a details overlay that polls job events only while open.
|
||||
Journey routing consumes the active harmonized GTFS snapshot instead of a raw feed picker.
|
||||
```
|
||||
|
||||
## Current prototype: known limits
|
||||
|
||||
The app can import and inspect Germany-scale OSM and GTFS, but the routing and route-layer rebuild paths are still prototype-grade.
|
||||
|
||||
Important limits:
|
||||
|
||||
```text
|
||||
journey search is not yet RAPTOR/CSA or connection-scan based
|
||||
address endpoints can multiply transit searches through several nearby access/egress stops
|
||||
progressive transfer stages still recompute too much
|
||||
route-layer rebuild is coarse-grained and rewrites derived link tables
|
||||
visual route-pattern links are not yet incrementally updated
|
||||
canonical stop extraction is CPU/memory heavy on national feeds
|
||||
route geometry cannot yet classify temporary GTFS detours as separate variants
|
||||
local-transport-only routing is not a first-class query mode
|
||||
route-search caches are process-local and not persisted
|
||||
Alembic migrations are still missing
|
||||
```
|
||||
|
||||
## MVP 1: stable Germany data workbench
|
||||
|
||||
### Backend
|
||||
|
||||
- Add proper Alembic migrations for PostgreSQL and keep SQLite test support.
|
||||
- Add source-run history and dataset-version comparison.
|
||||
- Make route-layer rebuild incremental: update only affected matches/patterns/stops.
|
||||
- Keep old route-layer tables readable while a rebuild prepares replacement rows.
|
||||
- Add source health checks: download success, hash change, feed freshness, calendar validity.
|
||||
- Expand the QA dashboard into drill-down review queues for source health, GTFS validation, canonical stop conflicts, route conflicts, and publication blockers.
|
||||
- Add GTFS validation summary reports: service dates, route direction coverage, stop coordinate outliers, bad stop_times, missing shapes.
|
||||
- Add database maintenance jobs: analyze, vacuum, stale job recovery, orphan cleanup.
|
||||
- Add durable cache tables for journey stages, nearest stops, address access candidates, and common station-to-station searches.
|
||||
|
||||
### Routing
|
||||
|
||||
- Replace the demo round-expansion router with a GTFS-appropriate algorithm such as RAPTOR or CSA.
|
||||
- Precompute transfer graph edges: station-internal transfers, nearby walking transfers, and access/egress stop candidates.
|
||||
- Add routing profiles:
|
||||
|
||||
```text
|
||||
fastest public transport
|
||||
fewest transfers
|
||||
local transport only / Deutschlandticket-like
|
||||
walk only
|
||||
drive
|
||||
car comparison
|
||||
```
|
||||
|
||||
- Treat access/egress walking as access legs, not as public-transport transfers.
|
||||
- Add bounded hub-aware long-distance routing for city-to-city requests: local access to likely hubs, long-distance/regional trunk, local egress.
|
||||
- Add arrive-by search and better stop conditions for "good enough" results.
|
||||
- Add route diagnostics that explain why a route was found or pruned.
|
||||
|
||||
### Frontend
|
||||
|
||||
- Add source detail page.
|
||||
- Add dataset detail page.
|
||||
- Add match-review queue with filters by mode, operator, country, confidence, and source scope.
|
||||
- Add route detail inspection: GTFS geometry, OSM geometry, candidate matches, stops, evidence, and route-pattern provenance.
|
||||
- Add canonical stop/station detail overlay.
|
||||
- Add persistent rule editor.
|
||||
- Add routing controls for profile, transfer buffer, avoid/prefer modes, arrive-by, via, and local-only.
|
||||
- Show partial/progressive route results with clear stage labels.
|
||||
|
||||
### Data outputs
|
||||
|
||||
- GeoJSON exports for small regions.
|
||||
- GeoParquet exports for analysis.
|
||||
- PMTiles/vector-tile export for map display.
|
||||
- Coverage CSV/API for downstream services.
|
||||
|
||||
## MVP 2: Europe-scale coverage map
|
||||
|
||||
- Use Geofabrik country/Europe extracts and reproducible OSM PBF jobs.
|
||||
- Store OSM transport features, addresses, and routing graph in PostGIS.
|
||||
- Generate ranked/generalized transport route layers by zoom level.
|
||||
- Serve tiles with Martin or export PMTiles.
|
||||
- Add coverage statuses:
|
||||
|
||||
```text
|
||||
existing_in_osm
|
||||
static_timetable_covered
|
||||
live_data_covered
|
||||
fare_data_covered
|
||||
booking_covered
|
||||
missing_static
|
||||
stale_feed
|
||||
restricted_license
|
||||
low_confidence_match
|
||||
detour_or_temporary_variant
|
||||
```
|
||||
|
||||
- Add coverage metrics:
|
||||
|
||||
```text
|
||||
operator coverage
|
||||
route coverage
|
||||
route-km coverage
|
||||
stop coverage
|
||||
live-data coverage
|
||||
feed freshness
|
||||
license confidence
|
||||
booking coverage
|
||||
route-layer provenance coverage
|
||||
```
|
||||
|
||||
## MVP 3: more source formats
|
||||
|
||||
Add importers:
|
||||
|
||||
```text
|
||||
NeTEx
|
||||
TransXChange
|
||||
SIRI discovery/live endpoints
|
||||
GTFS-Realtime
|
||||
GBFS for shared mobility, optional
|
||||
operator CSV/API adapters
|
||||
```
|
||||
|
||||
Target data model:
|
||||
|
||||
```text
|
||||
canonical operators
|
||||
canonical stops/stations/terminals
|
||||
canonical routes
|
||||
route variants
|
||||
trip patterns
|
||||
calendar/service validity
|
||||
transfers
|
||||
access/egress legs
|
||||
coverage observations
|
||||
source evidence
|
||||
manual rules
|
||||
```
|
||||
|
||||
## MVP 4: production journey-planning dataset
|
||||
|
||||
- Build a canonical stop/station graph with transfer rules and transfer-time profiles.
|
||||
- Generate timetable-routing input for RAPTOR/CSA.
|
||||
- Add first/last-mile routing from OSM walk/drive graph.
|
||||
- Add emissions factors per mode/operator/country.
|
||||
- Add fare/ticket placeholders and booking/deep-link metadata.
|
||||
- Add confidence and provenance to every derived route/journey.
|
||||
|
||||
## MVP 5: booking-readiness layer
|
||||
|
||||
- Track booking availability separately from timetable coverage.
|
||||
- Add deep-link metadata per operator/route.
|
||||
- Add partner API adapters later.
|
||||
- Distinguish clearly:
|
||||
|
||||
```text
|
||||
travel-plausible itinerary
|
||||
bookable itinerary
|
||||
single-interface multi-booking
|
||||
protected through-ticket
|
||||
```
|
||||
|
||||
## Recommended next implementation sprint
|
||||
|
||||
1. Finish route-layer rebuild resilience: incremental updates, shadow tables, and detour/provenance classification.
|
||||
2. Replace or heavily optimize journey routing: precomputed transfers, hub-aware long-distance routing, local-only profile, and bounded search.
|
||||
3. Add durable PostgreSQL-backed journey caches for address access, stop pairs, and repeated stage searches.
|
||||
4. Add Alembic migrations and remove runtime DDL from normal request/worker startup.
|
||||
5. Add route/journey diagnostics so slow or failed requests explain what was searched and pruned.
|
||||
6. Add vector-tile output for route layers and large map rendering.
|
||||
282
README.md
282
README.md
@@ -1,2 +1,282 @@
|
||||
# meubility-workbench
|
||||
# Mobility Workbench
|
||||
|
||||
Working prototype for a mobility-data management interface and pipeline.
|
||||
|
||||
It is intentionally small but executable. The current implementation lets you:
|
||||
|
||||
- register data sources;
|
||||
- download/copy source files into a local cache;
|
||||
- import GTFS static timetable feeds;
|
||||
- import raw OSM PBF extracts by deriving transport GeoJSON;
|
||||
- import OSM-derived transport GeoJSON;
|
||||
- persist raw datasets and normalized route/stop records;
|
||||
- run automatic GTFS-route ↔ OSM-route matching;
|
||||
- persist manual accept/reject rules from the UI;
|
||||
- expose GeoJSON layers for a zoomable map;
|
||||
- use a management web UI with separate GTFS Harmonization and Mapping Data modules, plus source runs, stats, matches, and map inspection.
|
||||
|
||||
The default database is SQLite so the prototype runs immediately. The schema is kept simple enough to migrate to PostGIS when the pipeline needs European scale, vector tiles, and spatial indexes.
|
||||
|
||||
## Quick start
|
||||
|
||||
```bash
|
||||
cd mobility-workbench
|
||||
python -m venv .venv
|
||||
source .venv/bin/activate
|
||||
pip install -r requirements.txt
|
||||
python -m app.cli load-sample
|
||||
uvicorn app.main:app --reload
|
||||
```
|
||||
|
||||
Open:
|
||||
|
||||
```text
|
||||
http://127.0.0.1:8000
|
||||
```
|
||||
|
||||
The sample project loads a small Berlin-like GTFS feed plus an OSM-like GeoJSON network. It imports routes/stops, runs the matcher, and shows matched and missing coverage on the map.
|
||||
|
||||
## PostgreSQL/PostGIS
|
||||
|
||||
SQLite remains the default. For Germany-scale imports, point `DATABASE_URL` at PostgreSQL:
|
||||
|
||||
```bash
|
||||
export DATABASE_URL=postgresql://USER:PASSWORD@localhost:5432/meubility
|
||||
python -m app.cli init-db
|
||||
uvicorn app.main:app --reload
|
||||
```
|
||||
|
||||
PostgreSQL mode automatically creates `postgis` and `pg_trgm`, stores GTFS `stop_times` and OSM features in main tables, and uses GiST/trigram indexes for map bbox queries, route-layer stop linking, and search filters. To keep using legacy sidecars with PostgreSQL, set:
|
||||
|
||||
```bash
|
||||
export POSTGRES_USE_SIDECARS=true
|
||||
```
|
||||
|
||||
To migrate the existing SQLite project into a fresh PostgreSQL database:
|
||||
|
||||
```bash
|
||||
python scripts/migrate_sqlite_to_postgres.py \
|
||||
--sqlite-path data/workbench.sqlite \
|
||||
--postgres-url postgresql://USER:PASSWORD@localhost:5432/meubility \
|
||||
--reset
|
||||
```
|
||||
|
||||
The migration copies normal tables first, imports legacy GTFS/OSM sidecars into PostgreSQL main tables, rewrites dataset storage metadata to `main`, refreshes PostGIS geometry columns, and rebuilds runtime indexes.
|
||||
|
||||
## Docker start
|
||||
|
||||
```bash
|
||||
docker compose up --build
|
||||
```
|
||||
|
||||
Then open:
|
||||
|
||||
```text
|
||||
http://127.0.0.1:8000
|
||||
```
|
||||
|
||||
## CLI commands
|
||||
|
||||
```bash
|
||||
python -m app.cli init-db
|
||||
python -m app.cli reset-db
|
||||
python -m app.cli load-sample
|
||||
python -m app.cli stats
|
||||
python -m app.cli add-source --name "My GTFS" --kind gtfs --url ./data/feed.zip --country DE
|
||||
python -m app.cli add-source --name "VBB Online GTFS" --kind gtfs --url https://unternehmen.vbb.de/fileadmin/user_upload/VBB/Dokumente/API-Datensaetze/gtfs-mastscharf/GTFS.zip --country DE --license "CC BY 4.0"
|
||||
python -m app.cli add-source --name "DB Long-distance Rail GTFS.DE" --kind gtfs --url https://download.gtfs.de/germany/fv_free/latest.zip --country DE --license "Creative Commons 4.0"
|
||||
python -m app.cli add-source --name "Germany Regional Rail GTFS.DE" --kind gtfs --url https://download.gtfs.de/germany/rv_free/latest.zip --country DE --license "Creative Commons 4.0"
|
||||
python -m app.cli add-source --name "Berlin OSM" --kind osm_pbf --url https://download.geofabrik.de/europe/germany/berlin-latest.osm.pbf --country DE --license ODbL
|
||||
python -m app.cli run-source 1
|
||||
python -m app.cli run-match
|
||||
python -m app.cli prune-cache --dry-run
|
||||
python -m app.cli prune-cache
|
||||
```
|
||||
|
||||
## HTTP API
|
||||
|
||||
Core endpoints:
|
||||
|
||||
```text
|
||||
GET /api/sources
|
||||
POST /api/sources
|
||||
POST /api/sources/{source_id}/run
|
||||
POST /api/sample/reset
|
||||
POST /api/match/run
|
||||
GET /api/stats
|
||||
GET /api/matches
|
||||
POST /api/matches/{match_id}/accept
|
||||
POST /api/matches/{match_id}/reject
|
||||
GET /api/rules
|
||||
POST /api/rules
|
||||
```
|
||||
|
||||
Map layers:
|
||||
|
||||
```text
|
||||
GET /api/map/osm_routes.geojson
|
||||
GET /api/map/osm_stops.geojson
|
||||
GET /api/map/gtfs_routes.geojson
|
||||
GET /api/map/gtfs_stops.geojson
|
||||
GET /api/map/matched_gtfs_routes.geojson
|
||||
GET /api/map/matched_gtfs_routes.geojson?status=missing
|
||||
```
|
||||
|
||||
Map endpoints accept viewport and layer filters:
|
||||
|
||||
```text
|
||||
bbox=min_lon,min_lat,max_lon,max_lat
|
||||
zoom=13
|
||||
kind=route,infra,stop,station,terminal
|
||||
mode=bus,tram,train,subway,light_rail,ferry
|
||||
geometry=point,line,polygon,nonpoint
|
||||
source_id=4
|
||||
dataset_id=5
|
||||
limit=5000
|
||||
```
|
||||
|
||||
## Source types implemented
|
||||
|
||||
### `gtfs`
|
||||
|
||||
Expected input: GTFS static zip.
|
||||
|
||||
Imported files:
|
||||
|
||||
```text
|
||||
agency.txt
|
||||
stops.txt
|
||||
routes.txt
|
||||
trips.txt
|
||||
stop_times.txt
|
||||
shapes.txt, if available
|
||||
```
|
||||
|
||||
The importer stores agencies, stops, routes, trips, limited stop-times, and representative route geometries. Route geometry comes from `shapes.txt` where available; otherwise it falls back to stop sequences from a representative trip.
|
||||
|
||||
Multiple GTFS sources can be active at once. Map endpoints and layer controls keep sources separate with `source_id` filters, so VBB, DB long-distance rail, DB/regional rail, and local sample feeds can be rendered independently.
|
||||
|
||||
The journey UI routes against the active harmonized transit snapshot instead of exposing a raw GTFS source selector. Feed-level filters remain available for map layers, QA, and source diagnostics.
|
||||
|
||||
### `osm_pbf`
|
||||
|
||||
Expected input: an OSM `.osm.pbf` extract, for example a Geofabrik regional extract.
|
||||
|
||||
The importer records the downloaded/copied file once as an immutable raw dataset with kind `osm_pbf_raw`. For `.osm.pbf` inputs it then runs `scripts/osmium_transport_filter.sh` and stores one transport-only extract as `osm_pbf_transport`. The Python extractor reads that filtered extract, writes `transport.geojson`, and imports it through the `osm_geojson` importer.
|
||||
|
||||
The raw and filtered datasets are inactive storage stages; the derived `osm_geojson` dataset is the active visual layer. Re-running an unchanged source reuses the existing raw, filtered, and derived datasets instead of duplicating the extract.
|
||||
|
||||
The extractor emits:
|
||||
|
||||
```text
|
||||
route relations as LineString/MultiLineString features built from member ways
|
||||
rail/tram/subway/ferry/aerialway infrastructure ways
|
||||
stations, stops, platforms, bus stations, and ferry terminals
|
||||
```
|
||||
|
||||
Route display uses OSM route relation member ways, not stop-to-stop straight-line interpolation.
|
||||
|
||||
### `osm_geojson`
|
||||
|
||||
Expected input: GeoJSON `FeatureCollection` containing OSM-derived route/station/stop/terminal features.
|
||||
|
||||
Minimum useful properties for route features:
|
||||
|
||||
```json
|
||||
{
|
||||
"osm_type": "relation",
|
||||
"osm_id": "12345",
|
||||
"type": "route",
|
||||
"route": "train",
|
||||
"ref": "RE1",
|
||||
"name": "RE1 Example Line",
|
||||
"operator": "Example Operator",
|
||||
"network": "Example Network"
|
||||
}
|
||||
```
|
||||
|
||||
Supported route modes include:
|
||||
|
||||
```text
|
||||
train, light_rail, subway, tram, bus, trolleybus, coach,
|
||||
ferry, monorail, funicular, aerialway
|
||||
```
|
||||
|
||||
## Matching logic
|
||||
|
||||
The current automatic matcher scores each GTFS route against OSM route features using:
|
||||
|
||||
```text
|
||||
mode compatibility
|
||||
route ref similarity
|
||||
route name similarity
|
||||
operator/network similarity
|
||||
bbox overlap or proximity, used as a major disambiguator for common refs
|
||||
GTFS/OSM geometry proximity, where both geometries are available
|
||||
same normalized route key
|
||||
```
|
||||
|
||||
Each match also stores a scope classification:
|
||||
|
||||
```text
|
||||
in_osm_scope
|
||||
near_osm_scope
|
||||
outside_osm_scope
|
||||
unknown_scope
|
||||
```
|
||||
|
||||
Overall coverage and in-scope coverage are intentionally separate. A GTFS route outside the loaded OSM extract should not be interpreted as a failed route match.
|
||||
|
||||
Status thresholds:
|
||||
|
||||
```text
|
||||
>= 85 matched
|
||||
65–84 probable
|
||||
40–64 weak
|
||||
< 40 missing
|
||||
```
|
||||
|
||||
Manual accept/reject actions are stored as `match_rules`. The current prototype records the rule; the next implementation step is applying those rules automatically before/after every matching run.
|
||||
|
||||
The route layer treats OSM route geometry as the visual authority when a suitable match exists. Multiple GTFS timetable shapes or trips, including opposite directions, can link to the same OSM-backed `RoutePattern`; each GTFS shape link keeps its own match and direction evidence. When no OSM route matches, the builder creates a `gtfs_proposed` visual pattern from GTFS geometry for review.
|
||||
|
||||
## Data flow
|
||||
|
||||
```text
|
||||
source registration
|
||||
→ local source cache
|
||||
→ dataset record with hash
|
||||
→ raw OSM commit, if source is osm_pbf
|
||||
→ filtered transport extract, if source is osm_pbf and prefiltering is enabled
|
||||
→ derived transport GeoJSON extraction, if source is osm_pbf
|
||||
→ normalized GTFS / OSM tables
|
||||
→ route matching
|
||||
→ canonical stops and OSM-authoritative route layer
|
||||
→ manual review rules
|
||||
→ GeoJSON map layers
|
||||
→ downstream routing/coverage/tile generation
|
||||
```
|
||||
|
||||
## Current limitations
|
||||
|
||||
- PostgreSQL/PostGIS is supported for large local imports; vector tiles are still the next step for country/Europe-scale browsing.
|
||||
- OSM PBF snapshot extraction is implemented; applying replication `.osc.gz` diffs onto prior raw snapshots is still a next step.
|
||||
- GTFS-RT, SIRI, NeTEx, TransXChange, OSDM, fares, and booking APIs are not yet implemented.
|
||||
- The matcher is deliberately transparent rather than sophisticated.
|
||||
- The frontend requests viewport-bounded GeoJSON by layer; vector tiles are still the next step for country/Europe scale.
|
||||
|
||||
## OSM extraction helper
|
||||
|
||||
A starter Osmium shell filter script is included:
|
||||
|
||||
```bash
|
||||
scripts/osmium_transport_filter.sh europe-latest.osm.pbf transport.osm.pbf
|
||||
```
|
||||
|
||||
The script calls Osmium through `scripts/host_tool.sh`, which also works from a Flatpak/containerized terminal when `flatpak-spawn --host` is available. The app has a Python Osmium-based `osm_pbf` importer for repeatable prototype runs. For the next stage, add OSM replication diff application, move large-region imports to PostGIS, and serve generalized vector tiles where network editing requires broad viewport rendering.
|
||||
|
||||
## Tests
|
||||
|
||||
```bash
|
||||
pytest -q
|
||||
```
|
||||
|
||||
1
app/__init__.py
Normal file
1
app/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
"""Mobility Workbench prototype."""
|
||||
1272
app/address_search.py
Normal file
1272
app/address_search.py
Normal file
File diff suppressed because it is too large
Load Diff
394
app/cli.py
Normal file
394
app/cli.py
Normal file
@@ -0,0 +1,394 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import csv
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
import typer
|
||||
from sqlalchemy import func, select, text
|
||||
|
||||
from app.config import settings
|
||||
from app.data_management import dataset_sidecar_paths, prune_inactive_datasets
|
||||
from app.db import engine, init_db, reset_db, session_scope
|
||||
from app.db_lock import database_write_lock
|
||||
from app.feed_discovery import build_gtfs_discovery_manifests, default_generated_dir
|
||||
from app.models import (
|
||||
Dataset,
|
||||
GtfsRoute,
|
||||
GtfsShape,
|
||||
GtfsStop,
|
||||
RouteMatch,
|
||||
RoutePattern,
|
||||
Source,
|
||||
SourceCatalogEntry,
|
||||
)
|
||||
from app.pipeline.matcher import run_route_matching
|
||||
from app.pipeline.osm_labeling import relabel_osm_features
|
||||
from app.pipeline.osm_pbf import run_osm_pbf_source_staged
|
||||
from app.pipeline.run import run_source
|
||||
from app.pipeline.gtfs import backfill_gtfs_shapes
|
||||
from app.pipeline.route_layer import rebuild_route_layer
|
||||
from app.pipeline.sample_data import load_sample_project
|
||||
from app.osm_storage import osm_feature_count
|
||||
from app.jobs import run_worker_loop
|
||||
from app.jobs import create_route_layer_rebuild_job, create_route_matching_job, create_source_import_job
|
||||
from app.source_catalog import (
|
||||
default_ingestable_sources_path,
|
||||
default_source_catalog_path,
|
||||
import_ingestable_sources,
|
||||
import_source_catalog,
|
||||
source_catalog_summary,
|
||||
)
|
||||
|
||||
cli = typer.Typer(help="Mobility Workbench pipeline CLI")
|
||||
|
||||
|
||||
@cli.command("init-db")
|
||||
def init_db_command() -> None:
|
||||
with _write_lock("init-db"):
|
||||
init_db()
|
||||
typer.echo("Database initialized")
|
||||
|
||||
|
||||
@cli.command("reset-db")
|
||||
def reset_db_command() -> None:
|
||||
with _write_lock("reset-db"):
|
||||
reset_db()
|
||||
typer.echo("Database reset")
|
||||
|
||||
|
||||
@cli.command("load-sample")
|
||||
def load_sample_command() -> None:
|
||||
with _write_lock("load-sample"):
|
||||
init_db()
|
||||
with session_scope() as session:
|
||||
result = load_sample_project(session)
|
||||
typer.echo(json.dumps(result, indent=2))
|
||||
|
||||
|
||||
@cli.command("add-source")
|
||||
def add_source_command(
|
||||
name: str = typer.Option(..., help="Source name"),
|
||||
kind: str = typer.Option(..., help="gtfs, osm_geojson, osm_pbf, or osm_diff"),
|
||||
url: str = typer.Option(..., help="HTTP URL or local path"),
|
||||
country: Optional[str] = typer.Option(None),
|
||||
license: Optional[str] = typer.Option(None),
|
||||
priority: Optional[str] = typer.Option(None),
|
||||
mode_scope: Optional[str] = typer.Option(None),
|
||||
source_basis: Optional[str] = typer.Option(None),
|
||||
notes: Optional[str] = typer.Option(None),
|
||||
) -> None:
|
||||
with _write_lock("add-source"):
|
||||
init_db()
|
||||
if kind not in {"gtfs", "osm_geojson", "osm_pbf", "osm_diff"}:
|
||||
raise typer.BadParameter("kind must be gtfs, osm_geojson, osm_pbf, or osm_diff")
|
||||
with session_scope() as session:
|
||||
source = Source(
|
||||
name=name,
|
||||
kind=kind,
|
||||
url=url,
|
||||
country=country,
|
||||
license=license,
|
||||
priority=priority,
|
||||
mode_scope=mode_scope,
|
||||
source_basis=source_basis,
|
||||
notes=notes,
|
||||
)
|
||||
session.add(source)
|
||||
session.flush()
|
||||
typer.echo(json.dumps({"id": source.id, "name": source.name}, indent=2))
|
||||
|
||||
|
||||
@cli.command("run-source")
|
||||
def run_source_command(source_id: int) -> None:
|
||||
init_db()
|
||||
with session_scope() as session:
|
||||
source = session.get(Source, source_id)
|
||||
if source is None:
|
||||
raise typer.BadParameter(f"source not found: {source_id}")
|
||||
source_kind = source.kind
|
||||
if source_kind == "osm_pbf":
|
||||
dataset = run_osm_pbf_source_staged(source_id)
|
||||
typer.echo(json.dumps({"source_id": source_id, "dataset_id": dataset.id, "status": dataset.status, "import_mode": "staged_short_lock"}, indent=2))
|
||||
return
|
||||
with _write_lock("run-source"):
|
||||
with session_scope() as session:
|
||||
source = session.get(Source, source_id)
|
||||
if source is None:
|
||||
raise typer.BadParameter(f"source not found: {source_id}")
|
||||
dataset = run_source(session, source)
|
||||
typer.echo(json.dumps({"source_id": source.id, "dataset_id": dataset.id, "status": dataset.status}, indent=2))
|
||||
|
||||
|
||||
@cli.command("run-match")
|
||||
def run_match_command() -> None:
|
||||
with _write_lock("run-match"):
|
||||
init_db()
|
||||
with session_scope() as session:
|
||||
result = run_route_matching(session)
|
||||
typer.echo(json.dumps(result, indent=2))
|
||||
|
||||
|
||||
@cli.command("build-route-layer")
|
||||
def build_route_layer_command() -> None:
|
||||
with _write_lock("build-route-layer"):
|
||||
init_db()
|
||||
with session_scope() as session:
|
||||
result = rebuild_route_layer(session)
|
||||
typer.echo(json.dumps(result, indent=2))
|
||||
|
||||
|
||||
@cli.command("relabel-osm-features")
|
||||
def relabel_osm_features_command(
|
||||
dataset_id: Optional[int] = typer.Option(None, help="Only relabel one OSM dataset"),
|
||||
force: bool = typer.Option(False, help="Run even when the recorded dependency signature is current"),
|
||||
chunk_size: int = typer.Option(5000, help="Rows per relabel batch"),
|
||||
rebuild_indexes: bool = typer.Option(True, help="Drop/rebuild affected route-scope indexes around large relabel writes"),
|
||||
build_route_layer: bool = typer.Option(True, help="Rebuild the route layer after relabeling"),
|
||||
) -> None:
|
||||
with _write_lock("relabel-osm-features"):
|
||||
init_db()
|
||||
with session_scope() as session:
|
||||
result = relabel_osm_features(
|
||||
session,
|
||||
dataset_id=dataset_id,
|
||||
force=force,
|
||||
chunk_size=chunk_size,
|
||||
rebuild_indexes=rebuild_indexes,
|
||||
)
|
||||
if build_route_layer and (result["changed"] or force):
|
||||
result["route_layer_result"] = rebuild_route_layer(session)
|
||||
typer.echo(json.dumps(result, indent=2))
|
||||
|
||||
|
||||
@cli.command("backfill-gtfs-shapes")
|
||||
def backfill_gtfs_shapes_command(dataset_id: Optional[int] = typer.Option(None, help="Only backfill one GTFS dataset")) -> None:
|
||||
with _write_lock("backfill-gtfs-shapes"):
|
||||
init_db()
|
||||
with session_scope() as session:
|
||||
result = backfill_gtfs_shapes(session, dataset_id=dataset_id)
|
||||
typer.echo(json.dumps(result, indent=2))
|
||||
|
||||
|
||||
@cli.command("stats")
|
||||
def stats_command() -> None:
|
||||
init_db()
|
||||
with session_scope() as session:
|
||||
active_dataset_ids = [row[0] for row in session.execute(select(Dataset.id).where(Dataset.is_active.is_(True))).all()]
|
||||
stats = {
|
||||
"sources": session.scalar(select(func.count()).select_from(Source)),
|
||||
"source_catalog_entries": session.scalar(select(func.count()).select_from(SourceCatalogEntry)) or 0,
|
||||
"active_datasets": len(active_dataset_ids),
|
||||
"gtfs_routes": session.scalar(select(func.count()).select_from(GtfsRoute).where(GtfsRoute.dataset_id.in_(active_dataset_ids))) if active_dataset_ids else 0,
|
||||
"gtfs_stops": session.scalar(select(func.count()).select_from(GtfsStop).where(GtfsStop.dataset_id.in_(active_dataset_ids))) if active_dataset_ids else 0,
|
||||
"gtfs_shapes": session.scalar(select(func.count()).select_from(GtfsShape).where(GtfsShape.dataset_id.in_(active_dataset_ids))) if active_dataset_ids else 0,
|
||||
"route_patterns": session.scalar(select(func.count()).select_from(RoutePattern)) or 0,
|
||||
"osm_routes": sum(osm_feature_count(session, dataset_id, kind="route") for dataset_id in active_dataset_ids),
|
||||
"matches": {status: count for status, count in session.execute(select(RouteMatch.status, func.count()).group_by(RouteMatch.status)).all()},
|
||||
}
|
||||
typer.echo(json.dumps(stats, indent=2))
|
||||
|
||||
|
||||
@cli.command("import-source-catalog")
|
||||
def import_source_catalog_command(
|
||||
csv_path: Path = typer.Option(default_source_catalog_path(), "--csv", help="Source catalog CSV path"),
|
||||
no_update: bool = typer.Option(False, help="Skip rows that already exist"),
|
||||
) -> None:
|
||||
with _write_lock("import-source-catalog"):
|
||||
init_db()
|
||||
with session_scope() as session:
|
||||
result = import_source_catalog(session, csv_path, update_existing=not no_update)
|
||||
result["summary"] = source_catalog_summary(session)
|
||||
typer.echo(json.dumps(result, indent=2))
|
||||
|
||||
|
||||
@cli.command("import-ingestable-sources")
|
||||
def import_ingestable_sources_command(
|
||||
csv_path: Path = typer.Option(default_ingestable_sources_path(), "--csv", help="Ingestable source seed CSV path"),
|
||||
no_update: bool = typer.Option(False, help="Skip sources that already exist"),
|
||||
) -> None:
|
||||
with _write_lock("import-ingestable-sources"):
|
||||
init_db()
|
||||
with session_scope() as session:
|
||||
result = import_ingestable_sources(session, csv_path, update_existing=not no_update)
|
||||
result["summary"] = source_catalog_summary(session)
|
||||
typer.echo(json.dumps(result, indent=2))
|
||||
|
||||
|
||||
@cli.command("discover-gtfs-sources")
|
||||
def discover_gtfs_sources_command(
|
||||
output_dir: Path = typer.Option(default_generated_dir(), "--output-dir", help="Directory for generated discovery CSVs"),
|
||||
countries: str = typer.Option(
|
||||
",".join(["DE", "AT", "CH", "NL", "DK", "FR", "BE", "LU", "NO", "SE", "FI", "IE", "GB"]),
|
||||
"--countries",
|
||||
help="Comma-separated country codes, or ALL for every country exposed by the upstream catalogs",
|
||||
),
|
||||
no_mobility_database: bool = typer.Option(False, help="Skip Mobility Database feeds_v2.csv"),
|
||||
no_acceptance_test_list: bool = typer.Option(False, help="Skip MobilityData validator acceptance-test feed list"),
|
||||
no_ptna: bool = typer.Option(False, help="Skip PTNA GTFS analysis pages"),
|
||||
max_ptna_details: int = typer.Option(80, help="Maximum PTNA detail pages to fetch for license/crosswalk metadata"),
|
||||
test_limit: int = typer.Option(24, help="Rows to write to the focused test-run ingestable CSV"),
|
||||
check_urls: bool = typer.Option(False, help="Run HEAD/range checks for ingestable feed URLs"),
|
||||
) -> None:
|
||||
result = build_gtfs_discovery_manifests(
|
||||
output_dir=output_dir,
|
||||
countries=[part.strip() for part in countries.split(",") if part.strip()],
|
||||
include_mobility_database=not no_mobility_database,
|
||||
include_acceptance_test_list=not no_acceptance_test_list,
|
||||
include_ptna=not no_ptna,
|
||||
max_ptna_details=max_ptna_details,
|
||||
test_limit=test_limit,
|
||||
check_urls=check_urls,
|
||||
)
|
||||
typer.echo(json.dumps(result, indent=2, ensure_ascii=False))
|
||||
|
||||
|
||||
@cli.command("queue-source-imports-from-csv")
|
||||
def queue_source_imports_from_csv_command(
|
||||
csv_path: Path = typer.Option(default_ingestable_sources_path(), "--csv", help="Ingestable source CSV path"),
|
||||
no_update: bool = typer.Option(False, help="Skip sources that already exist instead of updating them"),
|
||||
run_match_at_end: bool = typer.Option(True, help="Queue one route-matching job after all source imports"),
|
||||
build_route_layer_at_end: bool = typer.Option(True, help="Queue one route-layer rebuild after route matching"),
|
||||
priority: int = typer.Option(0, help="Priority for queued source import jobs"),
|
||||
) -> None:
|
||||
with _write_lock("queue-source-imports-from-csv"):
|
||||
init_db()
|
||||
with session_scope() as session:
|
||||
csv_path = csv_path if csv_path.is_absolute() else Path.cwd() / csv_path
|
||||
imported = import_ingestable_sources(session, csv_path, update_existing=not no_update)
|
||||
source_urls = _source_urls_from_ingestable_csv(csv_path)
|
||||
sources = session.scalars(
|
||||
select(Source)
|
||||
.where(Source.kind == "gtfs", Source.url.in_(source_urls))
|
||||
.order_by(Source.id)
|
||||
).all()
|
||||
jobs = [
|
||||
create_source_import_job(
|
||||
session,
|
||||
source,
|
||||
run_match=False,
|
||||
build_route_layer=False,
|
||||
priority=priority,
|
||||
)
|
||||
for source in sources
|
||||
]
|
||||
route_match_job = create_route_matching_job(session, priority=priority) if run_match_at_end else None
|
||||
route_layer_job = create_route_layer_rebuild_job(session, priority=priority) if build_route_layer_at_end else None
|
||||
typer.echo(
|
||||
json.dumps(
|
||||
{
|
||||
"csv": str(csv_path),
|
||||
"imported": imported,
|
||||
"sources": [{"id": source.id, "name": source.name} for source in sources],
|
||||
"source_import_jobs": [job.id for job in jobs],
|
||||
"route_match_job": None if route_match_job is None else route_match_job.id,
|
||||
"route_layer_job": None if route_layer_job is None else route_layer_job.id,
|
||||
},
|
||||
indent=2,
|
||||
ensure_ascii=False,
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
@cli.command("prune-cache")
|
||||
def prune_cache_command(dry_run: bool = typer.Option(False, help="Report files without deleting them")) -> None:
|
||||
with _write_lock("prune-cache"):
|
||||
init_db()
|
||||
with session_scope() as session:
|
||||
referenced = {
|
||||
Path(path).resolve()
|
||||
for path in session.scalars(select(Dataset.local_path)).all()
|
||||
if path
|
||||
}
|
||||
for dataset in session.scalars(select(Dataset)).all():
|
||||
referenced.update(path.resolve() for path in dataset_sidecar_paths(dataset))
|
||||
|
||||
roots = [settings.data_dir / "sources", settings.data_dir / "derived", settings.data_dir / "sidecars", settings.data_dir / "staging"]
|
||||
candidates = [
|
||||
path
|
||||
for root in roots
|
||||
if root.exists()
|
||||
for path in root.rglob("*")
|
||||
if path.is_file() and path.resolve() not in referenced
|
||||
]
|
||||
total_bytes = sum(path.stat().st_size for path in candidates)
|
||||
if not dry_run:
|
||||
for path in candidates:
|
||||
path.unlink()
|
||||
for root in roots:
|
||||
_remove_empty_dirs(root)
|
||||
|
||||
typer.echo(
|
||||
json.dumps(
|
||||
{
|
||||
"dry_run": dry_run,
|
||||
"files": len(candidates),
|
||||
"bytes": total_bytes,
|
||||
"deleted": 0 if dry_run else len(candidates),
|
||||
},
|
||||
indent=2,
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
@cli.command("prune-inactive-datasets")
|
||||
def prune_inactive_datasets_command(
|
||||
dry_run: bool = typer.Option(False, help="Report inactive normalized datasets without deleting them"),
|
||||
) -> None:
|
||||
with _write_lock("prune-inactive-datasets"):
|
||||
init_db()
|
||||
with session_scope() as session:
|
||||
result = prune_inactive_datasets(session, dry_run=dry_run)
|
||||
typer.echo(json.dumps(result, indent=2))
|
||||
|
||||
|
||||
@cli.command("vacuum-db")
|
||||
def vacuum_db_command() -> None:
|
||||
with _write_lock("vacuum-db"):
|
||||
init_db()
|
||||
with engine.connect().execution_options(isolation_level="AUTOCOMMIT") as connection:
|
||||
connection.execute(text("VACUUM"))
|
||||
connection.execute(text("PRAGMA wal_checkpoint(TRUNCATE)"))
|
||||
typer.echo("Database vacuumed")
|
||||
|
||||
|
||||
@cli.command("worker")
|
||||
def worker_command(
|
||||
once: bool = typer.Option(False, help="Process at most one queued job and exit"),
|
||||
max_jobs: Optional[int] = typer.Option(None, help="Process at most this many jobs and exit"),
|
||||
poll_interval: float = typer.Option(2.0, help="Seconds to wait between queue polls"),
|
||||
worker_id: Optional[str] = typer.Option(None, help="Stable worker identifier"),
|
||||
) -> None:
|
||||
result = run_worker_loop(worker_id=worker_id, poll_interval=poll_interval, max_jobs=max_jobs, once=once)
|
||||
typer.echo(json.dumps(result, indent=2))
|
||||
|
||||
|
||||
def _remove_empty_dirs(root: Path) -> None:
|
||||
if not root.exists():
|
||||
return
|
||||
for path in sorted((p for p in root.rglob("*") if p.is_dir()), key=lambda p: len(p.parts), reverse=True):
|
||||
try:
|
||||
path.rmdir()
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
|
||||
def _write_lock(operation: str):
|
||||
return database_write_lock(f"cli:{operation}", timeout=settings.database_write_lock_cli_timeout_seconds)
|
||||
|
||||
|
||||
def _source_urls_from_ingestable_csv(path: Path) -> list[str]:
|
||||
urls: list[str] = []
|
||||
with path.open("r", encoding="utf-8-sig", newline="") as handle:
|
||||
for row in csv.DictReader(handle):
|
||||
if (row.get("kind") or "").strip().lower() != "gtfs":
|
||||
continue
|
||||
url = (row.get("url") or "").strip()
|
||||
if url and url not in urls:
|
||||
urls.append(url)
|
||||
return urls
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
cli()
|
||||
74
app/config.py
Normal file
74
app/config.py
Normal file
@@ -0,0 +1,74 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
from pydantic_settings import BaseSettings, SettingsConfigDict
|
||||
|
||||
|
||||
class Settings(BaseSettings):
|
||||
"""Runtime settings.
|
||||
|
||||
SQLite is the default because this prototype should run immediately.
|
||||
The schema is deliberately plain enough to migrate to PostGIS later.
|
||||
"""
|
||||
|
||||
database_url: str = "sqlite:///./data/workbench.sqlite"
|
||||
data_dir: Path = Path("./data")
|
||||
# 0 means import all stop_times. Use a positive value only for constrained
|
||||
# demos where full timetable routing is not needed.
|
||||
gtfs_stop_times_import_limit: int = 0
|
||||
# "sidecar_stop_times" keeps the large timetable call table in a per-dataset
|
||||
# SQLite file and stores compact GTFS tables in the main app database.
|
||||
# Set to "main" for the old all-in-one SQLite layout.
|
||||
gtfs_timetable_storage: str = "sidecar_stop_times"
|
||||
gtfs_keep_activation_stage: bool = False
|
||||
# "sidecar_features" keeps extracted OSM transport features in a per-dataset
|
||||
# SQLite file. The main DB materializes only OSM rows that need stable
|
||||
# foreign keys for matches or route-layer output.
|
||||
osm_feature_storage: str = "sidecar_features"
|
||||
osm_sidecar_create_visual_only_stops: bool = False
|
||||
# Large OSM PBF extracts should be reduced to transport objects before the
|
||||
# Python extractor scans them. XML fixtures stay unfiltered by default.
|
||||
osm_pbf_prefilter_enabled: bool = True
|
||||
osm_pbf_prefilter_formats: str = "osm_pbf"
|
||||
osm_pbf_prefilter_script: Path = Path("scripts/osmium_transport_filter.sh")
|
||||
osm_diff_max_sequence_gap: int = 14
|
||||
osm_diff_apply_batch_size: int = 7
|
||||
osm_diff_state_timeout_seconds: float = 30.0
|
||||
sqlite_timeout_seconds: float = 120.0
|
||||
sqlite_busy_timeout_ms: int = 120000
|
||||
database_write_lock_timeout_seconds: float = 1.0
|
||||
database_write_lock_cli_timeout_seconds: float = 3600.0
|
||||
queue_worker_autostart: bool = True
|
||||
queue_worker_count: int = 1
|
||||
queue_worker_poll_interval_seconds: float = 2.0
|
||||
queue_job_lease_seconds: int = 7200
|
||||
route_matching_batch_size: int = 100
|
||||
route_layer_osm_route_batch_size: int = 1000
|
||||
route_layer_osm_stop_batch_size: int = 5000
|
||||
# SQLite defaults to sidecar storage. PostgreSQL/PostGIS defaults to main
|
||||
# table storage so indexes, joins, and spatial operators can work over the
|
||||
# full imported datasets.
|
||||
postgres_use_sidecars: bool = False
|
||||
# Keep supervised workers alive across API server restarts. Stale workers are
|
||||
# detected by PID files at the next startup; stale job leases are requeued.
|
||||
queue_worker_stop_on_shutdown: bool = False
|
||||
|
||||
model_config = SettingsConfigDict(env_file=".env", env_file_encoding="utf-8")
|
||||
|
||||
@property
|
||||
def normalized_database_url(self) -> str:
|
||||
if self.database_url.startswith("postgresql://"):
|
||||
return "postgresql+psycopg://" + self.database_url.removeprefix("postgresql://")
|
||||
return self.database_url
|
||||
|
||||
@property
|
||||
def is_sqlite_database(self) -> bool:
|
||||
return self.normalized_database_url.startswith("sqlite")
|
||||
|
||||
@property
|
||||
def is_postgresql_database(self) -> bool:
|
||||
return self.normalized_database_url.startswith("postgresql")
|
||||
|
||||
|
||||
settings = Settings()
|
||||
settings.data_dir.mkdir(parents=True, exist_ok=True)
|
||||
327
app/data_management.py
Normal file
327
app/data_management.py
Normal file
@@ -0,0 +1,327 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
from sqlalchemy import delete, func, or_, select
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.config import settings
|
||||
from app.gtfs_storage import dataset_sidecar_paths as gtfs_dataset_sidecar_paths, missing_sidecar_paths as gtfs_missing_sidecar_paths, stop_time_count
|
||||
from app.models import (
|
||||
CanonicalStopLink,
|
||||
Dataset,
|
||||
GtfsAgency,
|
||||
GtfsCalendar,
|
||||
GtfsCalendarDate,
|
||||
GtfsRoute,
|
||||
GtfsRoutePatternLink,
|
||||
GtfsShape,
|
||||
GtfsStop,
|
||||
GtfsStopTime,
|
||||
GtfsTripRoutePatternLink,
|
||||
GtfsTrip,
|
||||
OsmDiffState,
|
||||
OsmFeature,
|
||||
RouteMatch,
|
||||
RoutePattern,
|
||||
RoutePatternStop,
|
||||
Source,
|
||||
SourceUpdateCheck,
|
||||
)
|
||||
from app.osm_storage import (
|
||||
dataset_sidecar_paths as osm_dataset_sidecar_paths,
|
||||
missing_sidecar_paths as osm_missing_sidecar_paths,
|
||||
osm_feature_count,
|
||||
)
|
||||
|
||||
|
||||
def dataset_row_counts(session: Session, dataset_id: int, kind: str) -> dict[str, int]:
|
||||
if kind == "gtfs":
|
||||
route_ids = select(GtfsRoute.id).where(GtfsRoute.dataset_id == dataset_id)
|
||||
match_counts = {
|
||||
status: count
|
||||
for status, count in session.execute(
|
||||
select(RouteMatch.status, func.count())
|
||||
.where(RouteMatch.gtfs_route_id.in_(route_ids))
|
||||
.group_by(RouteMatch.status)
|
||||
).all()
|
||||
}
|
||||
return {
|
||||
"agencies": _count(session, GtfsAgency, dataset_id),
|
||||
"stops": _count(session, GtfsStop, dataset_id),
|
||||
"routes": _count(session, GtfsRoute, dataset_id),
|
||||
"trips": _count(session, GtfsTrip, dataset_id),
|
||||
"calendars": _count(session, GtfsCalendar, dataset_id),
|
||||
"calendar_dates": _count(session, GtfsCalendarDate, dataset_id),
|
||||
"shapes": _count(session, GtfsShape, dataset_id),
|
||||
"stop_times": stop_time_count(session, dataset_id),
|
||||
"missing_sidecar": _gtfs_sidecar_missing(session, dataset_id),
|
||||
"matches": sum(match_counts.values()),
|
||||
"match_counts": match_counts,
|
||||
}
|
||||
if kind == "osm_geojson":
|
||||
return {
|
||||
"features": _safe_osm_feature_count(session, dataset_id),
|
||||
"routes": _safe_osm_feature_count(session, dataset_id, kind="route"),
|
||||
"stops": _safe_osm_feature_count(session, dataset_id, kind=["stop", "station", "terminal"]),
|
||||
"infra": _safe_osm_feature_count(session, dataset_id, kind="infra"),
|
||||
"missing_sidecar": _osm_sidecar_missing(session, dataset_id),
|
||||
}
|
||||
return {}
|
||||
|
||||
|
||||
def source_row_counts(session: Session, source: Source) -> dict[str, object]:
|
||||
counts = {
|
||||
"datasets": len(source.datasets),
|
||||
"active_datasets": sum(1 for dataset in source.datasets if dataset.is_active),
|
||||
"routes": 0,
|
||||
"stops": 0,
|
||||
"features": 0,
|
||||
"trips": 0,
|
||||
"shapes": 0,
|
||||
"stop_times": 0,
|
||||
"missing_sidecars": 0,
|
||||
"match_counts": {},
|
||||
"missing_gtfs_sidecars": 0,
|
||||
"missing_osm_sidecars": 0,
|
||||
}
|
||||
match_counts: dict[str, int] = {}
|
||||
for dataset in source.datasets:
|
||||
stats = dataset_row_counts(session, dataset.id, dataset.kind)
|
||||
counts["routes"] += int(stats.get("routes", 0))
|
||||
counts["stops"] += int(stats.get("stops", 0))
|
||||
counts["features"] += int(stats.get("features", 0))
|
||||
counts["trips"] += int(stats.get("trips", 0))
|
||||
counts["shapes"] += int(stats.get("shapes", 0))
|
||||
counts["stop_times"] += int(stats.get("stop_times", 0))
|
||||
if stats.get("missing_sidecar"):
|
||||
counts["missing_sidecars"] += 1
|
||||
if dataset.kind == "gtfs":
|
||||
counts["missing_gtfs_sidecars"] += 1
|
||||
elif dataset.kind == "osm_geojson":
|
||||
counts["missing_osm_sidecars"] += 1
|
||||
for status, count in stats.get("match_counts", {}).items():
|
||||
match_counts[status] = match_counts.get(status, 0) + int(count)
|
||||
counts["match_counts"] = match_counts
|
||||
return counts
|
||||
|
||||
|
||||
def delete_dataset(session: Session, dataset_id: int) -> dict[str, object]:
|
||||
dataset = session.get(Dataset, dataset_id)
|
||||
if dataset is None:
|
||||
return {"deleted": False, "reason": "dataset not found", "dataset_id": dataset_id}
|
||||
|
||||
counts = dataset_row_counts(session, dataset.id, dataset.kind)
|
||||
_detach_update_checks_for_dataset(session, dataset.id)
|
||||
session.execute(delete(OsmDiffState).where(OsmDiffState.raw_dataset_id == dataset.id))
|
||||
_delete_dataset_rows(session, dataset)
|
||||
_delete_dataset_files(dataset)
|
||||
session.delete(dataset)
|
||||
session.flush()
|
||||
return {"deleted": True, "dataset_id": dataset_id, "counts": counts}
|
||||
|
||||
|
||||
def delete_source(session: Session, source_id: int) -> dict[str, object]:
|
||||
source = session.get(Source, source_id)
|
||||
if source is None:
|
||||
return {"deleted": False, "reason": "source not found", "source_id": source_id}
|
||||
|
||||
datasets = list(source.datasets)
|
||||
dataset_results = []
|
||||
for dataset in datasets:
|
||||
dataset_results.append({"dataset_id": dataset.id, "kind": dataset.kind, "counts": dataset_row_counts(session, dataset.id, dataset.kind)})
|
||||
_detach_update_checks_for_dataset(session, dataset.id)
|
||||
session.execute(delete(OsmDiffState).where(OsmDiffState.raw_dataset_id == dataset.id))
|
||||
_delete_dataset_rows(session, dataset)
|
||||
_delete_dataset_files(dataset)
|
||||
session.delete(dataset)
|
||||
session.execute(delete(OsmDiffState).where(OsmDiffState.source_id == source.id))
|
||||
session.delete(source)
|
||||
session.flush()
|
||||
return {"deleted": True, "source_id": source_id, "datasets": dataset_results}
|
||||
|
||||
|
||||
def unreferenced_cache_file_summary(session: Session) -> dict[str, int]:
|
||||
candidates = _unreferenced_cache_files(session)
|
||||
return {"files": len(candidates), "bytes": sum(path.stat().st_size for path in candidates)}
|
||||
|
||||
|
||||
def prune_unreferenced_cache_files(session: Session) -> dict[str, int]:
|
||||
candidates = _unreferenced_cache_files(session)
|
||||
total_bytes = sum(path.stat().st_size for path in candidates)
|
||||
for path in candidates:
|
||||
path.unlink()
|
||||
for root in _cache_roots():
|
||||
_remove_empty_dirs(root)
|
||||
return {"files": len(candidates), "bytes": total_bytes}
|
||||
|
||||
|
||||
def _unreferenced_cache_files(session: Session) -> list[Path]:
|
||||
referenced = {
|
||||
Path(path).resolve()
|
||||
for path in session.scalars(select(Dataset.local_path)).all()
|
||||
if path
|
||||
}
|
||||
for dataset in session.scalars(select(Dataset)).all():
|
||||
referenced.update(path.resolve() for path in dataset_sidecar_paths(dataset))
|
||||
return [
|
||||
path
|
||||
for root in _cache_roots()
|
||||
if root.exists()
|
||||
for path in root.rglob("*")
|
||||
if path.is_file() and path.resolve() not in referenced
|
||||
]
|
||||
|
||||
|
||||
def _cache_roots() -> list[Path]:
|
||||
# Staging files are not referenced by datasets until activation. Automatic
|
||||
# pruning must not remove a staging DB from a running import.
|
||||
return [settings.data_dir / "sources", settings.data_dir / "derived", settings.data_dir / "sidecars"]
|
||||
|
||||
|
||||
def prune_inactive_datasets(session: Session, dry_run: bool = True) -> dict[str, object]:
|
||||
dataset_rows = session.execute(
|
||||
select(Dataset.id, Dataset.kind).where(Dataset.is_active.is_(False), Dataset.kind.in_(["gtfs", "osm_geojson"]))
|
||||
).all()
|
||||
dataset_ids = [int(row[0]) for row in dataset_rows]
|
||||
gtfs_ids = [int(dataset_id) for dataset_id, kind in dataset_rows if kind == "gtfs"]
|
||||
osm_ids = [int(dataset_id) for dataset_id, kind in dataset_rows if kind == "osm_geojson"]
|
||||
|
||||
route_ids = select(GtfsRoute.id).where(GtfsRoute.dataset_id.in_(gtfs_ids)) if gtfs_ids else None
|
||||
osm_feature_ids = select(OsmFeature.id).where(OsmFeature.dataset_id.in_(osm_ids)) if osm_ids else None
|
||||
match_filters = []
|
||||
if route_ids is not None:
|
||||
match_filters.append(RouteMatch.gtfs_route_id.in_(route_ids))
|
||||
if osm_feature_ids is not None:
|
||||
match_filters.append(RouteMatch.osm_feature_id.in_(osm_feature_ids))
|
||||
|
||||
counts = {
|
||||
"datasets": len(dataset_ids),
|
||||
"gtfs_stop_times": sum(stop_time_count(session, dataset_id) for dataset_id in gtfs_ids),
|
||||
"gtfs_shapes": _count_dataset_rows(session, GtfsShape, gtfs_ids),
|
||||
"gtfs_trips": _count_dataset_rows(session, GtfsTrip, gtfs_ids),
|
||||
"gtfs_calendar_dates": _count_dataset_rows(session, GtfsCalendarDate, gtfs_ids),
|
||||
"gtfs_calendars": _count_dataset_rows(session, GtfsCalendar, gtfs_ids),
|
||||
"gtfs_routes": _count_dataset_rows(session, GtfsRoute, gtfs_ids),
|
||||
"gtfs_stops": _count_dataset_rows(session, GtfsStop, gtfs_ids),
|
||||
"gtfs_agencies": _count_dataset_rows(session, GtfsAgency, gtfs_ids),
|
||||
"osm_features": sum(_safe_osm_feature_count(session, dataset_id) for dataset_id in osm_ids),
|
||||
"missing_osm_sidecars": sum(1 for dataset_id in osm_ids if _osm_sidecar_missing(session, dataset_id)),
|
||||
"gtfs_route_pattern_links": session.scalar(select(func.count()).select_from(GtfsRoutePatternLink).where(GtfsRoutePatternLink.dataset_id.in_(gtfs_ids))) if gtfs_ids else 0,
|
||||
"gtfs_trip_route_pattern_links": session.scalar(select(func.count()).select_from(GtfsTripRoutePatternLink).where(GtfsTripRoutePatternLink.dataset_id.in_(gtfs_ids))) if gtfs_ids else 0,
|
||||
"canonical_stop_links": session.scalar(select(func.count()).select_from(CanonicalStopLink).where(CanonicalStopLink.dataset_id.in_(dataset_ids))) if dataset_ids else 0,
|
||||
"route_matches": session.scalar(select(func.count()).select_from(RouteMatch).where(or_(*match_filters))) if match_filters else 0,
|
||||
}
|
||||
if dry_run or not dataset_ids:
|
||||
return {"dry_run": dry_run, "dataset_ids": dataset_ids, "deleted": counts if not dry_run else {}, "would_delete": counts}
|
||||
|
||||
for dataset_id in dataset_ids:
|
||||
_detach_update_checks_for_dataset(session, dataset_id)
|
||||
if match_filters:
|
||||
session.execute(delete(RouteMatch).where(or_(*match_filters)))
|
||||
if gtfs_ids:
|
||||
route_ids = select(GtfsRoute.id).where(GtfsRoute.dataset_id.in_(gtfs_ids))
|
||||
pattern_ids = select(RoutePattern.id).where(RoutePattern.gtfs_route_id.in_(route_ids))
|
||||
session.execute(delete(RoutePatternStop).where(RoutePatternStop.route_pattern_id.in_(pattern_ids)))
|
||||
session.execute(delete(GtfsTripRoutePatternLink).where(GtfsTripRoutePatternLink.dataset_id.in_(gtfs_ids)))
|
||||
session.execute(delete(GtfsRoutePatternLink).where(GtfsRoutePatternLink.dataset_id.in_(gtfs_ids)))
|
||||
session.execute(delete(RoutePattern).where(RoutePattern.gtfs_route_id.in_(route_ids)))
|
||||
session.execute(delete(CanonicalStopLink).where(CanonicalStopLink.dataset_id.in_(gtfs_ids), CanonicalStopLink.object_type == "gtfs_stop"))
|
||||
for model in [GtfsStopTime, GtfsShape, GtfsTrip, GtfsCalendarDate, GtfsCalendar, GtfsRoute, GtfsStop, GtfsAgency]:
|
||||
session.execute(delete(model).where(model.dataset_id.in_(gtfs_ids)))
|
||||
if osm_ids:
|
||||
osm_feature_ids = select(OsmFeature.id).where(OsmFeature.dataset_id.in_(osm_ids))
|
||||
pattern_ids = select(RoutePattern.id).where(RoutePattern.osm_feature_id.in_(osm_feature_ids))
|
||||
session.execute(delete(RoutePatternStop).where(RoutePatternStop.route_pattern_id.in_(pattern_ids)))
|
||||
session.execute(delete(GtfsTripRoutePatternLink).where(GtfsTripRoutePatternLink.route_pattern_id.in_(pattern_ids)))
|
||||
session.execute(delete(GtfsRoutePatternLink).where(GtfsRoutePatternLink.route_pattern_id.in_(pattern_ids)))
|
||||
session.execute(delete(RoutePattern).where(RoutePattern.osm_feature_id.in_(osm_feature_ids)))
|
||||
session.execute(delete(CanonicalStopLink).where(CanonicalStopLink.dataset_id.in_(osm_ids), CanonicalStopLink.object_type == "osm_feature"))
|
||||
session.execute(delete(OsmFeature).where(OsmFeature.dataset_id.in_(osm_ids)))
|
||||
for dataset in session.scalars(select(Dataset).where(Dataset.id.in_(dataset_ids))).all():
|
||||
_delete_dataset_files(dataset)
|
||||
session.execute(delete(Dataset).where(Dataset.id.in_(dataset_ids)))
|
||||
session.flush()
|
||||
return {"dry_run": dry_run, "dataset_ids": dataset_ids, "deleted": counts, "would_delete": {}}
|
||||
|
||||
|
||||
def _delete_dataset_rows(session: Session, dataset: Dataset) -> None:
|
||||
if dataset.kind == "gtfs":
|
||||
route_ids = select(GtfsRoute.id).where(GtfsRoute.dataset_id == dataset.id)
|
||||
pattern_ids = select(RoutePattern.id).where(RoutePattern.gtfs_route_id.in_(route_ids))
|
||||
session.execute(delete(RouteMatch).where(RouteMatch.gtfs_route_id.in_(route_ids)))
|
||||
session.execute(delete(RoutePatternStop).where(RoutePatternStop.route_pattern_id.in_(pattern_ids)))
|
||||
session.execute(delete(GtfsTripRoutePatternLink).where(GtfsTripRoutePatternLink.dataset_id == dataset.id))
|
||||
session.execute(delete(GtfsRoutePatternLink).where(GtfsRoutePatternLink.dataset_id == dataset.id))
|
||||
session.execute(delete(RoutePattern).where(RoutePattern.gtfs_route_id.in_(route_ids)))
|
||||
session.execute(delete(CanonicalStopLink).where(CanonicalStopLink.dataset_id == dataset.id, CanonicalStopLink.object_type == "gtfs_stop"))
|
||||
for model in [GtfsStopTime, GtfsShape, GtfsTrip, GtfsCalendarDate, GtfsCalendar, GtfsRoute, GtfsStop, GtfsAgency]:
|
||||
session.execute(delete(model).where(model.dataset_id == dataset.id))
|
||||
elif dataset.kind == "osm_geojson":
|
||||
osm_feature_ids = select(OsmFeature.id).where(OsmFeature.dataset_id == dataset.id)
|
||||
pattern_ids = select(RoutePattern.id).where(RoutePattern.osm_feature_id.in_(osm_feature_ids))
|
||||
session.execute(delete(RouteMatch).where(RouteMatch.osm_feature_id.in_(osm_feature_ids)))
|
||||
session.execute(delete(RoutePatternStop).where(RoutePatternStop.route_pattern_id.in_(pattern_ids)))
|
||||
session.execute(delete(GtfsTripRoutePatternLink).where(GtfsTripRoutePatternLink.route_pattern_id.in_(pattern_ids)))
|
||||
session.execute(delete(GtfsRoutePatternLink).where(GtfsRoutePatternLink.route_pattern_id.in_(pattern_ids)))
|
||||
session.execute(delete(RoutePattern).where(RoutePattern.osm_feature_id.in_(osm_feature_ids)))
|
||||
session.execute(delete(CanonicalStopLink).where(CanonicalStopLink.dataset_id == dataset.id, CanonicalStopLink.object_type == "osm_feature"))
|
||||
session.execute(delete(OsmFeature).where(OsmFeature.dataset_id == dataset.id))
|
||||
|
||||
|
||||
def _delete_dataset_files(dataset: Dataset) -> None:
|
||||
for path in dataset_sidecar_paths(dataset):
|
||||
try:
|
||||
path.unlink()
|
||||
except FileNotFoundError:
|
||||
pass
|
||||
|
||||
|
||||
def dataset_sidecar_paths(dataset: Dataset) -> list[Path]:
|
||||
return [*gtfs_dataset_sidecar_paths(dataset), *osm_dataset_sidecar_paths(dataset)]
|
||||
|
||||
|
||||
def _gtfs_sidecar_missing(session: Session, dataset_id: int) -> bool:
|
||||
dataset = session.get(Dataset, dataset_id)
|
||||
return bool(gtfs_missing_sidecar_paths(dataset))
|
||||
|
||||
|
||||
def _safe_osm_feature_count(session: Session, dataset_id: int, *, kind=None) -> int:
|
||||
try:
|
||||
return osm_feature_count(session, dataset_id, kind=kind)
|
||||
except FileNotFoundError:
|
||||
return 0
|
||||
|
||||
|
||||
def _osm_sidecar_missing(session: Session, dataset_id: int) -> bool:
|
||||
dataset = session.get(Dataset, dataset_id)
|
||||
return bool(osm_missing_sidecar_paths(dataset))
|
||||
|
||||
|
||||
def _detach_update_checks_for_dataset(session: Session, dataset_id: int) -> None:
|
||||
for check in session.scalars(select(SourceUpdateCheck).where(SourceUpdateCheck.active_dataset_id == dataset_id)).all():
|
||||
check.active_dataset_id = None
|
||||
|
||||
|
||||
def _count(session: Session, model, dataset_id: int) -> int:
|
||||
return session.scalar(select(func.count()).select_from(model).where(model.dataset_id == dataset_id)) or 0
|
||||
|
||||
|
||||
def _count_where(session: Session, model, dataset_id: int, *where) -> int:
|
||||
return session.scalar(select(func.count()).select_from(model).where(model.dataset_id == dataset_id, *where)) or 0
|
||||
|
||||
|
||||
def _count_dataset_rows(session: Session, model, dataset_ids: list[int]) -> int:
|
||||
if not dataset_ids:
|
||||
return 0
|
||||
return session.scalar(select(func.count()).select_from(model).where(model.dataset_id.in_(dataset_ids))) or 0
|
||||
|
||||
|
||||
def _remove_empty_dirs(root: Path) -> None:
|
||||
if not root.exists():
|
||||
return
|
||||
for path in sorted((p for p in root.rglob("*") if p.is_dir()), key=lambda p: len(p.parts), reverse=True):
|
||||
try:
|
||||
path.rmdir()
|
||||
except OSError:
|
||||
pass
|
||||
252
app/dataset_search.py
Normal file
252
app/dataset_search.py
Normal file
@@ -0,0 +1,252 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from sqlalchemy import func, or_, select
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.gtfs_storage import execute_sidecar_query, uses_sidecar_stop_times
|
||||
from app.models import Dataset, GtfsRoute, GtfsShape, GtfsStopTime, GtfsTrip, OsmFeature, RoutePattern, Source
|
||||
from app.osm_storage import osm_feature_public_id, query_osm_features
|
||||
from app.pipeline.utils import norm_ref
|
||||
|
||||
|
||||
def search_datasets(session: Session, query: str, *, active_only: bool = False, limit: int = 80) -> dict:
|
||||
q = (query or "").strip()
|
||||
if len(q) < 1:
|
||||
return {"query": q, "gtfs_routes": [], "osm_routes": [], "route_patterns": [], "totals": {}}
|
||||
max_rows = max(1, min(limit, 250))
|
||||
gtfs_routes = _gtfs_route_hits(session, q, active_only=active_only, limit=max_rows)
|
||||
osm_routes = _osm_route_hits(session, q, active_only=active_only, limit=max_rows)
|
||||
route_patterns = _route_pattern_hits(session, q, limit=max_rows)
|
||||
return {
|
||||
"query": q,
|
||||
"gtfs_routes": gtfs_routes,
|
||||
"osm_routes": osm_routes,
|
||||
"route_patterns": route_patterns,
|
||||
"totals": {
|
||||
"gtfs_routes": len(gtfs_routes),
|
||||
"osm_routes": len(osm_routes),
|
||||
"route_patterns": len(route_patterns),
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
def _gtfs_route_hits(session: Session, query: str, *, active_only: bool, limit: int) -> list[dict]:
|
||||
pattern = f"%{query}%"
|
||||
ref = norm_ref(query)
|
||||
stmt = (
|
||||
select(GtfsRoute, Dataset, Source)
|
||||
.join(Dataset, Dataset.id == GtfsRoute.dataset_id)
|
||||
.join(Source, Source.id == Dataset.source_id)
|
||||
.where(
|
||||
or_(
|
||||
GtfsRoute.short_name.ilike(pattern),
|
||||
GtfsRoute.route_id.ilike(pattern),
|
||||
GtfsRoute.long_name.ilike(pattern),
|
||||
GtfsRoute.route_key == ref,
|
||||
)
|
||||
)
|
||||
.order_by(Dataset.is_active.desc(), Source.name, GtfsRoute.short_name, GtfsRoute.route_id)
|
||||
.limit(limit)
|
||||
)
|
||||
if active_only:
|
||||
stmt = stmt.where(Dataset.is_active.is_(True))
|
||||
rows = session.execute(stmt).all()
|
||||
route_ids = [route.id for route, _, _ in rows]
|
||||
trip_counts = _trip_counts(session, route_ids)
|
||||
stop_time_counts = _stop_time_counts(session, route_ids)
|
||||
shape_counts = _shape_counts(session, route_ids)
|
||||
return [
|
||||
{
|
||||
"type": "gtfs_route",
|
||||
"source": _source_payload(source),
|
||||
"dataset": _dataset_payload(dataset),
|
||||
"route": {
|
||||
"id": route.id,
|
||||
"route_id": route.route_id,
|
||||
"ref": route.short_name,
|
||||
"name": route.long_name,
|
||||
"mode": route.mode,
|
||||
"operator": route.operator_name,
|
||||
},
|
||||
"geometry": _geometry_payload(route),
|
||||
"timetable": {
|
||||
"trips": trip_counts.get(route.id, 0),
|
||||
"stop_times": stop_time_counts.get(route.id, 0),
|
||||
"shapes": shape_counts.get(route.id, 0),
|
||||
},
|
||||
}
|
||||
for route, dataset, source in rows
|
||||
]
|
||||
|
||||
|
||||
def _osm_route_hits(session: Session, query: str, *, active_only: bool, limit: int) -> list[dict]:
|
||||
ref = norm_ref(query)
|
||||
dataset_stmt = select(Dataset).where(Dataset.kind == "osm_geojson")
|
||||
if active_only:
|
||||
dataset_stmt = dataset_stmt.where(Dataset.is_active.is_(True))
|
||||
datasets = session.scalars(dataset_stmt.order_by(Dataset.is_active.desc(), Dataset.id)).all()
|
||||
if not datasets:
|
||||
return []
|
||||
dataset_ids = [dataset.id for dataset in datasets]
|
||||
sources = {source.id: source for source in session.scalars(select(Source).where(Source.id.in_([dataset.source_id for dataset in datasets]))).all()}
|
||||
dataset_by_id = {dataset.id: dataset for dataset in datasets}
|
||||
features_by_identity: dict[tuple[int, str, str], OsmFeature] = {}
|
||||
for feature in query_osm_features(session, dataset_ids, kinds=["route"], search=query, limit=limit):
|
||||
features_by_identity[(feature.dataset_id, feature.osm_type, feature.osm_id)] = feature
|
||||
if ref:
|
||||
for feature in query_osm_features(session, dataset_ids, kinds=["route"], route_key=ref, limit=limit):
|
||||
features_by_identity[(feature.dataset_id, feature.osm_type, feature.osm_id)] = feature
|
||||
features = sorted(
|
||||
features_by_identity.values(),
|
||||
key=lambda feature: (
|
||||
0 if dataset_by_id.get(feature.dataset_id) and dataset_by_id[feature.dataset_id].is_active else 1,
|
||||
sources.get(dataset_by_id[feature.dataset_id].source_id).name if dataset_by_id.get(feature.dataset_id) and sources.get(dataset_by_id[feature.dataset_id].source_id) else "",
|
||||
feature.ref or "",
|
||||
feature.name or "",
|
||||
feature.id or 0,
|
||||
),
|
||||
)[:limit]
|
||||
return [
|
||||
{
|
||||
"type": "osm_route",
|
||||
"source": _source_payload(source),
|
||||
"dataset": _dataset_payload(dataset),
|
||||
"osm": {
|
||||
"id": osm_feature_public_id(feature),
|
||||
"osm_type": feature.osm_type,
|
||||
"osm_id": feature.osm_id,
|
||||
"ref": feature.ref,
|
||||
"name": feature.name,
|
||||
"mode": feature.mode,
|
||||
"route_scope": feature.route_scope,
|
||||
"operator": feature.operator,
|
||||
"network": feature.network,
|
||||
},
|
||||
"geometry": _geometry_payload(feature),
|
||||
}
|
||||
for feature in features
|
||||
if (dataset := dataset_by_id.get(feature.dataset_id)) is not None
|
||||
if (source := sources.get(dataset.source_id)) is not None
|
||||
]
|
||||
|
||||
|
||||
def _route_pattern_hits(session: Session, query: str, *, limit: int) -> list[dict]:
|
||||
pattern = f"%{query}%"
|
||||
ref = norm_ref(query)
|
||||
stmt = (
|
||||
select(RoutePattern)
|
||||
.where(
|
||||
or_(
|
||||
RoutePattern.route_ref.ilike(pattern),
|
||||
RoutePattern.route_name.ilike(pattern),
|
||||
RoutePattern.pattern_key.ilike(pattern),
|
||||
)
|
||||
)
|
||||
.order_by(RoutePattern.source_kind, RoutePattern.route_ref, RoutePattern.id)
|
||||
.limit(limit)
|
||||
)
|
||||
rows = session.scalars(stmt).all()
|
||||
return [
|
||||
{
|
||||
"type": "route_pattern",
|
||||
"id": pattern_row.id,
|
||||
"ref": pattern_row.route_ref,
|
||||
"name": pattern_row.route_name,
|
||||
"mode": pattern_row.mode,
|
||||
"route_scope": pattern_row.route_scope,
|
||||
"source_kind": pattern_row.source_kind,
|
||||
"status": pattern_row.status,
|
||||
"confidence": pattern_row.confidence,
|
||||
"gtfs_route_id": pattern_row.gtfs_route_id,
|
||||
"osm_feature_id": pattern_row.osm_feature_id,
|
||||
"geometry": _geometry_payload(pattern_row),
|
||||
}
|
||||
for pattern_row in rows
|
||||
if not ref or norm_ref(pattern_row.route_ref or pattern_row.route_name or "") == ref or query.lower() in (pattern_row.route_name or "").lower()
|
||||
]
|
||||
|
||||
|
||||
def _trip_counts(session: Session, route_row_ids: list[int]) -> dict[int, int]:
|
||||
if not route_row_ids:
|
||||
return {}
|
||||
rows = session.execute(
|
||||
select(GtfsRoute.id, func.count(GtfsTrip.id))
|
||||
.join(GtfsTrip, (GtfsTrip.dataset_id == GtfsRoute.dataset_id) & (GtfsTrip.route_id == GtfsRoute.route_id))
|
||||
.where(GtfsRoute.id.in_(route_row_ids))
|
||||
.group_by(GtfsRoute.id)
|
||||
).all()
|
||||
return {int(route_id): int(count) for route_id, count in rows}
|
||||
|
||||
|
||||
def _stop_time_counts(session: Session, route_row_ids: list[int]) -> dict[int, int]:
|
||||
if not route_row_ids:
|
||||
return {}
|
||||
routes = session.scalars(select(GtfsRoute).where(GtfsRoute.id.in_(route_row_ids))).all()
|
||||
sidecar_routes = [route for route in routes if uses_sidecar_stop_times(session, route.dataset_id)]
|
||||
sidecar_route_ids = {route.id for route in sidecar_routes}
|
||||
main_route_ids = [route.id for route in routes if route.id not in sidecar_route_ids]
|
||||
counts: dict[int, int] = {}
|
||||
if main_route_ids:
|
||||
rows = session.execute(
|
||||
select(GtfsRoute.id, func.count(GtfsStopTime.id))
|
||||
.join(GtfsTrip, (GtfsTrip.dataset_id == GtfsRoute.dataset_id) & (GtfsTrip.route_id == GtfsRoute.route_id))
|
||||
.join(GtfsStopTime, (GtfsStopTime.dataset_id == GtfsTrip.dataset_id) & (GtfsStopTime.trip_id == GtfsTrip.trip_id))
|
||||
.where(GtfsRoute.id.in_(main_route_ids))
|
||||
.group_by(GtfsRoute.id)
|
||||
).all()
|
||||
counts.update({int(route_id): int(count) for route_id, count in rows})
|
||||
for route in sidecar_routes:
|
||||
rows = execute_sidecar_query(
|
||||
session,
|
||||
route.dataset_id,
|
||||
"""
|
||||
SELECT COUNT(*) AS count
|
||||
FROM gtfs_stop_times AS stop_times
|
||||
JOIN gtfs_trips AS trips
|
||||
ON trips.trip_id = stop_times.trip_id
|
||||
WHERE trips.route_id = ?
|
||||
""",
|
||||
[route.route_id],
|
||||
)
|
||||
counts[int(route.id)] = int(rows[0]["count"] or 0) if rows else 0
|
||||
return counts
|
||||
|
||||
|
||||
def _shape_counts(session: Session, route_row_ids: list[int]) -> dict[int, int]:
|
||||
if not route_row_ids:
|
||||
return {}
|
||||
rows = session.execute(
|
||||
select(GtfsRoute.id, func.count(func.distinct(GtfsShape.shape_id)))
|
||||
.join(GtfsTrip, (GtfsTrip.dataset_id == GtfsRoute.dataset_id) & (GtfsTrip.route_id == GtfsRoute.route_id))
|
||||
.join(GtfsShape, (GtfsShape.dataset_id == GtfsTrip.dataset_id) & (GtfsShape.shape_id == GtfsTrip.shape_id))
|
||||
.where(GtfsRoute.id.in_(route_row_ids))
|
||||
.group_by(GtfsRoute.id)
|
||||
).all()
|
||||
return {int(route_id): int(count) for route_id, count in rows}
|
||||
|
||||
|
||||
def _source_payload(source: Source) -> dict:
|
||||
return {"id": source.id, "name": source.name, "kind": source.kind, "country": source.country}
|
||||
|
||||
|
||||
def _dataset_payload(dataset: Dataset) -> dict:
|
||||
return {
|
||||
"id": dataset.id,
|
||||
"kind": dataset.kind,
|
||||
"is_active": dataset.is_active,
|
||||
"status": dataset.status,
|
||||
"created_at": dataset.created_at.isoformat() if dataset.created_at else None,
|
||||
"sha256": dataset.sha256,
|
||||
}
|
||||
|
||||
|
||||
def _geometry_payload(row) -> dict:
|
||||
bbox = None
|
||||
if all(getattr(row, attr, None) is not None for attr in ("min_lon", "min_lat", "max_lon", "max_lat")):
|
||||
bbox = {
|
||||
"min_lon": row.min_lon,
|
||||
"min_lat": row.min_lat,
|
||||
"max_lon": row.max_lon,
|
||||
"max_lat": row.max_lat,
|
||||
}
|
||||
return {"present": bool(getattr(row, "geometry_geojson", None)), "bbox": bbox}
|
||||
339
app/db.py
Normal file
339
app/db.py
Normal file
@@ -0,0 +1,339 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from contextlib import contextmanager
|
||||
from pathlib import Path
|
||||
import re
|
||||
from typing import Iterator
|
||||
|
||||
from sqlalchemy import create_engine
|
||||
from sqlalchemy import event
|
||||
from sqlalchemy import text
|
||||
from sqlalchemy.engine import Connection
|
||||
from sqlalchemy.orm import DeclarativeBase, Session, sessionmaker
|
||||
|
||||
from app.config import settings
|
||||
|
||||
|
||||
class Base(DeclarativeBase):
|
||||
pass
|
||||
|
||||
|
||||
def _connect_args() -> dict:
|
||||
if settings.is_sqlite_database:
|
||||
return {"check_same_thread": False, "timeout": settings.sqlite_timeout_seconds}
|
||||
return {}
|
||||
|
||||
|
||||
def _ensure_sqlite_parent() -> None:
|
||||
if not settings.is_sqlite_database:
|
||||
return
|
||||
# sqlite:///./data/workbench.sqlite -> ./data/workbench.sqlite
|
||||
path = settings.normalized_database_url.replace("sqlite:///", "", 1)
|
||||
if path and path != ":memory:":
|
||||
Path(path).parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
|
||||
_ensure_sqlite_parent()
|
||||
engine = create_engine(settings.normalized_database_url, connect_args=_connect_args(), pool_pre_ping=True, future=True)
|
||||
SessionLocal = sessionmaker(bind=engine, autoflush=False, autocommit=False, expire_on_commit=False, future=True)
|
||||
|
||||
_CREATE_INDEX_NAME_RE = re.compile(
|
||||
r"CREATE\s+(?:UNIQUE\s+)?INDEX\s+(?:CONCURRENTLY\s+)?(?:IF\s+NOT\s+EXISTS\s+)?([A-Za-z_][A-Za-z0-9_]*)",
|
||||
re.IGNORECASE,
|
||||
)
|
||||
|
||||
|
||||
if settings.is_sqlite_database:
|
||||
@event.listens_for(engine, "connect")
|
||||
def _set_sqlite_pragmas(dbapi_connection, _connection_record) -> None:
|
||||
cursor = dbapi_connection.cursor()
|
||||
try:
|
||||
cursor.execute("PRAGMA journal_mode=WAL")
|
||||
cursor.execute(f"PRAGMA busy_timeout={int(settings.sqlite_busy_timeout_ms)}")
|
||||
cursor.execute("PRAGMA synchronous=NORMAL")
|
||||
cursor.execute("PRAGMA foreign_keys=ON")
|
||||
finally:
|
||||
cursor.close()
|
||||
|
||||
|
||||
def init_db() -> None:
|
||||
# Import models so metadata is populated.
|
||||
from app import models # noqa: F401
|
||||
|
||||
_ensure_database_extensions()
|
||||
Base.metadata.create_all(bind=engine)
|
||||
_ensure_runtime_columns()
|
||||
_ensure_runtime_indexes()
|
||||
|
||||
|
||||
def reset_db() -> None:
|
||||
from app import models # noqa: F401
|
||||
|
||||
_ensure_database_extensions()
|
||||
Base.metadata.drop_all(bind=engine)
|
||||
Base.metadata.create_all(bind=engine)
|
||||
_ensure_runtime_columns()
|
||||
_ensure_runtime_indexes()
|
||||
|
||||
|
||||
def _ensure_database_extensions() -> None:
|
||||
if not settings.is_postgresql_database:
|
||||
return
|
||||
with engine.begin() as conn:
|
||||
conn.execute(text("CREATE EXTENSION IF NOT EXISTS postgis"))
|
||||
conn.execute(text("CREATE EXTENSION IF NOT EXISTS pg_trgm"))
|
||||
has_pgrouting = conn.execute(text("SELECT EXISTS (SELECT 1 FROM pg_available_extensions WHERE name = 'pgrouting')")).scalar()
|
||||
if has_pgrouting:
|
||||
conn.execute(text("CREATE EXTENSION IF NOT EXISTS pgrouting"))
|
||||
|
||||
|
||||
def _ensure_runtime_columns() -> None:
|
||||
if settings.is_postgresql_database:
|
||||
_ensure_postgresql_runtime_columns()
|
||||
return
|
||||
if not settings.is_sqlite_database:
|
||||
return
|
||||
with engine.begin() as conn:
|
||||
columns = {row[1] for row in conn.execute(text("PRAGMA table_info(gtfs_stop_times)")).all()}
|
||||
if "arrival_seconds" not in columns:
|
||||
conn.execute(text("ALTER TABLE gtfs_stop_times ADD COLUMN arrival_seconds INTEGER"))
|
||||
if "departure_seconds" not in columns:
|
||||
conn.execute(text("ALTER TABLE gtfs_stop_times ADD COLUMN departure_seconds INTEGER"))
|
||||
|
||||
source_columns = {row[1] for row in conn.execute(text("PRAGMA table_info(sources)")).all()}
|
||||
source_runtime_columns = {
|
||||
"catalog_entry_id": "INTEGER",
|
||||
"priority": "VARCHAR(16)",
|
||||
"mode_scope": "TEXT",
|
||||
"source_basis": "TEXT",
|
||||
"notes": "TEXT",
|
||||
}
|
||||
for column_name, column_type in source_runtime_columns.items():
|
||||
if column_name not in source_columns:
|
||||
conn.execute(text(f"ALTER TABLE sources ADD COLUMN {column_name} {column_type}"))
|
||||
|
||||
job_columns = {row[1] for row in conn.execute(text("PRAGMA table_info(jobs)")).all()}
|
||||
job_runtime_columns = {
|
||||
"priority": "INTEGER NOT NULL DEFAULT 0",
|
||||
"requested_action": "VARCHAR(32)",
|
||||
"lease_owner": "VARCHAR(255)",
|
||||
"lease_expires_at": "DATETIME",
|
||||
"paused_at": "DATETIME",
|
||||
"dismissed_at": "DATETIME",
|
||||
}
|
||||
for column_name, column_type in job_runtime_columns.items():
|
||||
if column_name not in job_columns:
|
||||
conn.execute(text(f"ALTER TABLE jobs ADD COLUMN {column_name} {column_type}"))
|
||||
|
||||
route_runtime_tables = {
|
||||
"gtfs_routes": "VARCHAR(64)",
|
||||
"route_patterns": "VARCHAR(64)",
|
||||
"osm_features": "VARCHAR(64)",
|
||||
}
|
||||
for table_name, column_type in route_runtime_tables.items():
|
||||
table_columns = {row[1] for row in conn.execute(text(f"PRAGMA table_info({table_name})")).all()}
|
||||
if "route_scope" not in table_columns:
|
||||
conn.execute(text(f"ALTER TABLE {table_name} ADD COLUMN route_scope {column_type}"))
|
||||
address_columns = {row[1] for row in conn.execute(text("PRAGMA table_info(osm_addresses)")).all()}
|
||||
if "geometry_geojson" not in address_columns:
|
||||
conn.execute(text("ALTER TABLE osm_addresses ADD COLUMN geometry_geojson TEXT"))
|
||||
|
||||
|
||||
def _ensure_postgresql_runtime_columns() -> None:
|
||||
column_statements = [
|
||||
("osm_features", "geom", "ALTER TABLE osm_features ADD COLUMN geom geometry(Geometry, 4326)"),
|
||||
("gtfs_routes", "geom", "ALTER TABLE gtfs_routes ADD COLUMN geom geometry(Geometry, 4326)"),
|
||||
("gtfs_shapes", "geom", "ALTER TABLE gtfs_shapes ADD COLUMN geom geometry(Geometry, 4326)"),
|
||||
("route_patterns", "geom", "ALTER TABLE route_patterns ADD COLUMN geom geometry(Geometry, 4326)"),
|
||||
("osm_addresses", "geometry_geojson", "ALTER TABLE osm_addresses ADD COLUMN geometry_geojson TEXT"),
|
||||
("osm_addresses", "geom", "ALTER TABLE osm_addresses ADD COLUMN geom geometry(Point, 4326)"),
|
||||
("osm_addresses", "area_geom", "ALTER TABLE osm_addresses ADD COLUMN area_geom geometry(Geometry, 4326)"),
|
||||
("gtfs_stops", "geom", "ALTER TABLE gtfs_stops ADD COLUMN geom geometry(Point, 4326)"),
|
||||
("canonical_stops", "geom", "ALTER TABLE canonical_stops ADD COLUMN geom geometry(Point, 4326)"),
|
||||
("routing_nodes", "geom", "ALTER TABLE routing_nodes ADD COLUMN geom geometry(Point, 4326)"),
|
||||
("routing_edges", "geom", "ALTER TABLE routing_edges ADD COLUMN geom geometry(LineString, 4326)"),
|
||||
]
|
||||
with engine.begin() as conn:
|
||||
columns = _postgresql_columns(conn)
|
||||
for table_name, column_name, statement in column_statements:
|
||||
if (table_name, column_name) not in columns:
|
||||
conn.execute(text(statement))
|
||||
country_column = columns.get(("osm_addresses", "country"))
|
||||
if country_column is not None and country_column["data_type"] != "text":
|
||||
conn.execute(text("ALTER TABLE osm_addresses ALTER COLUMN country TYPE TEXT"))
|
||||
|
||||
|
||||
def _ensure_runtime_indexes() -> None:
|
||||
statements = [
|
||||
"CREATE INDEX IF NOT EXISTS ix_osm_features_map_bbox ON osm_features (dataset_id, kind, mode, min_lon, max_lon, min_lat, max_lat)",
|
||||
"CREATE INDEX IF NOT EXISTS ix_osm_features_scope_bbox ON osm_features (dataset_id, kind, mode, route_scope, min_lon, max_lon, min_lat, max_lat)",
|
||||
"CREATE INDEX IF NOT EXISTS ix_gtfs_routes_map_bbox ON gtfs_routes (dataset_id, mode, min_lon, max_lon, min_lat, max_lat)",
|
||||
"CREATE INDEX IF NOT EXISTS ix_gtfs_routes_scope_bbox ON gtfs_routes (dataset_id, mode, route_scope, min_lon, max_lon, min_lat, max_lat)",
|
||||
"CREATE INDEX IF NOT EXISTS ix_gtfs_stops_map_point ON gtfs_stops (dataset_id, lon, lat)",
|
||||
"CREATE INDEX IF NOT EXISTS ix_gtfs_stop_times_stop ON gtfs_stop_times (dataset_id, stop_id, departure_seconds, trip_id, stop_sequence)",
|
||||
"CREATE INDEX IF NOT EXISTS ix_gtfs_stop_times_stop_depart_trip ON gtfs_stop_times (dataset_id, stop_id, departure_seconds, trip_id)",
|
||||
"CREATE INDEX IF NOT EXISTS ix_gtfs_stop_times_stop_arrival ON gtfs_stop_times (dataset_id, stop_id, arrival_seconds, trip_id, stop_sequence)",
|
||||
"CREATE INDEX IF NOT EXISTS ix_gtfs_stop_times_stop_arrive_trip ON gtfs_stop_times (dataset_id, stop_id, arrival_seconds, trip_id)",
|
||||
"CREATE INDEX IF NOT EXISTS ix_gtfs_stop_times_trip_seq ON gtfs_stop_times (dataset_id, trip_id, stop_sequence)",
|
||||
"CREATE INDEX IF NOT EXISTS ix_gtfs_stop_times_trip_stop_seq ON gtfs_stop_times (dataset_id, trip_id, stop_id, stop_sequence)",
|
||||
"CREATE INDEX IF NOT EXISTS ix_gtfs_trips_dataset_trip ON gtfs_trips (dataset_id, trip_id)",
|
||||
"CREATE INDEX IF NOT EXISTS ix_gtfs_trips_dataset_route ON gtfs_trips (dataset_id, route_id)",
|
||||
"CREATE INDEX IF NOT EXISTS ix_gtfs_trips_dataset_service ON gtfs_trips (dataset_id, service_id, trip_id)",
|
||||
"CREATE INDEX IF NOT EXISTS ix_gtfs_trips_dataset_route_service ON gtfs_trips (dataset_id, route_id, service_id)",
|
||||
"CREATE INDEX IF NOT EXISTS ix_gtfs_routes_dataset_route ON gtfs_routes (dataset_id, route_id)",
|
||||
"CREATE INDEX IF NOT EXISTS ix_gtfs_shapes_dataset_shape ON gtfs_shapes (dataset_id, shape_id)",
|
||||
"CREATE INDEX IF NOT EXISTS ix_gtfs_calendars_dataset_service_dates ON gtfs_calendars (dataset_id, service_id, start_date, end_date)",
|
||||
"CREATE INDEX IF NOT EXISTS ix_gtfs_calendar_dates_dataset_date ON gtfs_calendar_dates (dataset_id, date, service_id, exception_type)",
|
||||
"CREATE INDEX IF NOT EXISTS ix_canonical_stop_links_object ON canonical_stop_links (object_type, dataset_id, object_id)",
|
||||
"CREATE INDEX IF NOT EXISTS ix_canonical_stop_links_external ON canonical_stop_links (object_type, dataset_id, external_id)",
|
||||
"CREATE INDEX IF NOT EXISTS ix_route_patterns_ref_mode ON route_patterns (route_ref, mode, source_kind)",
|
||||
"CREATE INDEX IF NOT EXISTS ix_route_patterns_bbox ON route_patterns (mode, min_lon, max_lon, min_lat, max_lat)",
|
||||
"CREATE INDEX IF NOT EXISTS ix_route_patterns_scope_bbox ON route_patterns (mode, route_scope, source_kind, min_lon, max_lon, min_lat, max_lat)",
|
||||
"CREATE INDEX IF NOT EXISTS ix_gtfs_route_pattern_links_trip_shape ON gtfs_route_pattern_links (dataset_id, route_id, shape_id)",
|
||||
"CREATE INDEX IF NOT EXISTS ix_gtfs_trip_route_pattern_links_trip ON gtfs_trip_route_pattern_links (dataset_id, trip_id)",
|
||||
"CREATE INDEX IF NOT EXISTS ix_gtfs_trip_route_pattern_links_pattern ON gtfs_trip_route_pattern_links (route_pattern_id, dataset_id, trip_id)",
|
||||
"CREATE INDEX IF NOT EXISTS ix_sources_catalog_entry ON sources (catalog_entry_id)",
|
||||
"CREATE INDEX IF NOT EXISTS ix_sources_priority_country_kind ON sources (priority, country, kind)",
|
||||
"CREATE INDEX IF NOT EXISTS ix_source_catalog_country_priority ON source_catalog_entries (country_code, priority, status)",
|
||||
"CREATE INDEX IF NOT EXISTS ix_source_catalog_name ON source_catalog_entries (source_name)",
|
||||
"CREATE INDEX IF NOT EXISTS ix_source_update_checks_source_checked ON source_update_checks (source_id, checked_at)",
|
||||
"CREATE INDEX IF NOT EXISTS ix_source_update_checks_available ON source_update_checks (source_id, update_available, checked_at)",
|
||||
"CREATE INDEX IF NOT EXISTS ix_osm_diff_states_source_sequence ON osm_diff_states (source_id, sequence_number)",
|
||||
"CREATE INDEX IF NOT EXISTS ix_osm_diff_states_source_status ON osm_diff_states (source_id, status, updated_at)",
|
||||
"CREATE INDEX IF NOT EXISTS ix_jobs_status_created ON jobs (status, created_at)",
|
||||
"CREATE INDEX IF NOT EXISTS ix_jobs_kind_status ON jobs (kind, status)",
|
||||
"CREATE INDEX IF NOT EXISTS ix_jobs_queue_claim ON jobs (status, priority, created_at, id)",
|
||||
"CREATE INDEX IF NOT EXISTS ix_jobs_lease ON jobs (status, lease_expires_at)",
|
||||
"CREATE INDEX IF NOT EXISTS ix_jobs_dismissed_status ON jobs (dismissed_at, status, created_at)",
|
||||
"CREATE INDEX IF NOT EXISTS ix_job_events_job_created ON job_events (job_id, created_at, id)",
|
||||
"CREATE INDEX IF NOT EXISTS ix_pipeline_runs_stage_dataset_hash ON pipeline_runs (stage, dataset_id, dependency_hash, status, started_at)",
|
||||
"CREATE INDEX IF NOT EXISTS ix_pipeline_runs_stage_source_hash ON pipeline_runs (stage, source_id, dependency_hash, status, started_at)",
|
||||
"CREATE INDEX IF NOT EXISTS ix_pipeline_runs_job ON pipeline_runs (job_id, stage, status)",
|
||||
"CREATE INDEX IF NOT EXISTS ix_match_rules_type_active ON match_rules (rule_type, active)",
|
||||
"CREATE INDEX IF NOT EXISTS ix_journey_search_cache_type_expires ON journey_search_cache (cache_type, expires_at)",
|
||||
"CREATE INDEX IF NOT EXISTS ix_travel_requests_created ON travel_requests (created_at)",
|
||||
"CREATE INDEX IF NOT EXISTS ix_itineraries_request_saved ON itineraries (request_id, saved, created_at)",
|
||||
"CREATE INDEX IF NOT EXISTS ix_itinerary_legs_itinerary_sequence ON itinerary_legs (itinerary_id, sequence)",
|
||||
"CREATE INDEX IF NOT EXISTS ix_routing_nodes_dataset_osm ON routing_nodes (dataset_id, osm_node_id)",
|
||||
"CREATE INDEX IF NOT EXISTS ix_routing_edges_dataset_source ON routing_edges (dataset_id, source_osm_node_id)",
|
||||
"CREATE INDEX IF NOT EXISTS ix_routing_edges_dataset_target ON routing_edges (dataset_id, target_osm_node_id)",
|
||||
"CREATE INDEX IF NOT EXISTS ix_routing_edges_dataset_drive ON routing_edges (dataset_id, source_osm_node_id) WHERE drive_cost_s IS NOT NULL",
|
||||
"CREATE INDEX IF NOT EXISTS ix_routing_edges_dataset_walk ON routing_edges (dataset_id, source_osm_node_id) WHERE walk_cost_s IS NOT NULL",
|
||||
"CREATE INDEX IF NOT EXISTS ix_routing_edges_dataset_reverse_drive ON routing_edges (dataset_id, target_osm_node_id) WHERE reverse_drive_cost_s IS NOT NULL",
|
||||
"CREATE INDEX IF NOT EXISTS ix_routing_edges_dataset_reverse_walk ON routing_edges (dataset_id, target_osm_node_id) WHERE reverse_walk_cost_s IS NOT NULL",
|
||||
"CREATE INDEX IF NOT EXISTS ix_routing_edges_bbox ON routing_edges (dataset_id, min_lon, max_lon, min_lat, max_lat)",
|
||||
"CREATE INDEX IF NOT EXISTS ix_osm_addresses_dataset_city_street ON osm_addresses (dataset_id, city, street, housenumber)",
|
||||
"CREATE INDEX IF NOT EXISTS ix_osm_addresses_dataset_postcode ON osm_addresses (dataset_id, postcode)",
|
||||
"CREATE INDEX IF NOT EXISTS ix_osm_addresses_bbox ON osm_addresses (dataset_id, min_lon, max_lon, min_lat, max_lat)",
|
||||
]
|
||||
with engine.begin() as conn:
|
||||
if settings.is_sqlite_database:
|
||||
conn.execute(text("PRAGMA journal_mode=WAL"))
|
||||
conn.execute(text(f"PRAGMA busy_timeout={int(settings.sqlite_busy_timeout_ms)}"))
|
||||
if settings.is_postgresql_database:
|
||||
_execute_missing_postgresql_indexes(conn, statements + _postgresql_index_statements())
|
||||
else:
|
||||
for statement in statements:
|
||||
conn.execute(text(statement))
|
||||
|
||||
|
||||
def _postgresql_columns(conn: Connection) -> dict[tuple[str, str], dict[str, str]]:
|
||||
rows = conn.execute(
|
||||
text(
|
||||
"""
|
||||
SELECT table_name, column_name, data_type, udt_name
|
||||
FROM information_schema.columns
|
||||
WHERE table_schema = ANY (current_schemas(false))
|
||||
"""
|
||||
)
|
||||
).mappings()
|
||||
return {
|
||||
(str(row["table_name"]), str(row["column_name"])): {
|
||||
"data_type": str(row["data_type"]),
|
||||
"udt_name": str(row["udt_name"]),
|
||||
}
|
||||
for row in rows
|
||||
}
|
||||
|
||||
|
||||
def _execute_missing_postgresql_indexes(conn: Connection, statements: list[str]) -> None:
|
||||
existing = _postgresql_index_names(conn)
|
||||
for statement in statements:
|
||||
index_name = _index_name_from_create_statement(statement)
|
||||
if index_name and index_name in existing:
|
||||
continue
|
||||
conn.execute(text(statement))
|
||||
if index_name:
|
||||
existing.add(index_name)
|
||||
|
||||
|
||||
def _postgresql_index_names(conn: Connection) -> set[str]:
|
||||
rows = conn.execute(
|
||||
text(
|
||||
"""
|
||||
SELECT indexname
|
||||
FROM pg_indexes
|
||||
WHERE schemaname = ANY (current_schemas(false))
|
||||
"""
|
||||
)
|
||||
)
|
||||
return {str(row[0]) for row in rows}
|
||||
|
||||
|
||||
def _index_name_from_create_statement(statement: str) -> str | None:
|
||||
match = _CREATE_INDEX_NAME_RE.search(statement)
|
||||
return match.group(1) if match else None
|
||||
|
||||
|
||||
def _postgresql_index_statements() -> list[str]:
|
||||
return [
|
||||
"CREATE INDEX IF NOT EXISTS ix_osm_features_geom_gist ON osm_features USING GIST (geom)",
|
||||
"CREATE INDEX IF NOT EXISTS ix_osm_features_stop_geom_gist ON osm_features USING GIST (geom) WHERE kind IN ('stop', 'station', 'terminal')",
|
||||
"CREATE INDEX IF NOT EXISTS ix_osm_features_route_geom_gist ON osm_features USING GIST (geom) WHERE kind = 'route'",
|
||||
"CREATE INDEX IF NOT EXISTS ix_gtfs_stops_geom_gist ON gtfs_stops USING GIST (geom)",
|
||||
"CREATE INDEX IF NOT EXISTS ix_canonical_stops_geom_gist ON canonical_stops USING GIST (geom)",
|
||||
"CREATE INDEX IF NOT EXISTS ix_gtfs_routes_geom_gist ON gtfs_routes USING GIST (geom)",
|
||||
"CREATE INDEX IF NOT EXISTS ix_gtfs_shapes_geom_gist ON gtfs_shapes USING GIST (geom)",
|
||||
"CREATE INDEX IF NOT EXISTS ix_route_patterns_geom_gist ON route_patterns USING GIST (geom)",
|
||||
"CREATE INDEX IF NOT EXISTS ix_osm_addresses_geom_gist ON osm_addresses USING GIST (geom)",
|
||||
"CREATE INDEX IF NOT EXISTS ix_osm_addresses_area_geom_gist ON osm_addresses USING GIST (area_geom)",
|
||||
"CREATE INDEX IF NOT EXISTS ix_routing_nodes_geom_gist ON routing_nodes USING GIST (geom)",
|
||||
"CREATE INDEX IF NOT EXISTS ix_routing_edges_bbox_box_gist ON routing_edges USING GIST (box(point(max_lon, max_lat), point(min_lon, min_lat)))",
|
||||
"CREATE INDEX IF NOT EXISTS ix_gtfs_trips_dataset_route_shape_expr ON gtfs_trips (dataset_id, route_id, (COALESCE(shape_id, '__route__')))",
|
||||
"CREATE INDEX IF NOT EXISTS ix_gtfs_stop_times_dataset_stop ON gtfs_stop_times (dataset_id, stop_id)",
|
||||
"CREATE INDEX IF NOT EXISTS ix_canonical_stop_links_gtfs_external ON canonical_stop_links (dataset_id, external_id, canonical_stop_id) WHERE object_type = 'gtfs_stop'",
|
||||
"CREATE INDEX IF NOT EXISTS ix_gtfs_stops_dataset_parent ON gtfs_stops (dataset_id, parent_station)",
|
||||
"CREATE INDEX IF NOT EXISTS ix_gtfs_stops_dataset_stop_prefix ON gtfs_stops (dataset_id, (split_part(stop_id, '::', 1)))",
|
||||
"CREATE INDEX IF NOT EXISTS ix_osm_features_name_trgm ON osm_features USING GIN (LOWER(COALESCE(name, '')) gin_trgm_ops)",
|
||||
"CREATE INDEX IF NOT EXISTS ix_osm_features_ref_trgm ON osm_features USING GIN (LOWER(COALESCE(ref, '')) gin_trgm_ops)",
|
||||
"CREATE INDEX IF NOT EXISTS ix_osm_features_tags_trgm ON osm_features USING GIN (LOWER(COALESCE(tags_json, '')) gin_trgm_ops)",
|
||||
"CREATE INDEX IF NOT EXISTS ix_osm_addresses_search_trgm ON osm_addresses USING GIN (LOWER(COALESCE(search_text, '')) gin_trgm_ops)",
|
||||
"CREATE INDEX IF NOT EXISTS ix_osm_addresses_display_trgm ON osm_addresses USING GIN (LOWER(COALESCE(display_name, '')) gin_trgm_ops)",
|
||||
"CREATE INDEX IF NOT EXISTS ix_osm_addresses_street_key_house ON osm_addresses (dataset_id, REPLACE(LOWER(COALESCE(NULLIF(street, ''), NULLIF(place, ''), '')), 'ß', 'ss'), housenumber)",
|
||||
"CREATE INDEX IF NOT EXISTS ix_osm_addresses_street_key_trgm ON osm_addresses USING GIN (REPLACE(LOWER(COALESCE(NULLIF(street, ''), NULLIF(place, ''), '')), 'ß', 'ss') gin_trgm_ops)",
|
||||
"CREATE INDEX IF NOT EXISTS ix_gtfs_stops_name_trgm ON gtfs_stops USING GIN (name gin_trgm_ops)",
|
||||
"CREATE INDEX IF NOT EXISTS ix_gtfs_stops_stop_id_trgm ON gtfs_stops USING GIN (stop_id gin_trgm_ops)",
|
||||
"CREATE INDEX IF NOT EXISTS ix_route_patterns_ref_trgm ON route_patterns USING GIN (LOWER(COALESCE(route_ref, '')) gin_trgm_ops)",
|
||||
"CREATE INDEX IF NOT EXISTS ix_route_patterns_name_trgm ON route_patterns USING GIN (LOWER(COALESCE(route_name, '')) gin_trgm_ops)",
|
||||
]
|
||||
|
||||
|
||||
def get_db() -> Iterator[Session]:
|
||||
db = SessionLocal()
|
||||
try:
|
||||
yield db
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
|
||||
@contextmanager
|
||||
def session_scope() -> Iterator[Session]:
|
||||
db = SessionLocal()
|
||||
try:
|
||||
yield db
|
||||
db.commit()
|
||||
except Exception:
|
||||
db.rollback()
|
||||
raise
|
||||
finally:
|
||||
db.close()
|
||||
211
app/db_lock.py
Normal file
211
app/db_lock.py
Normal file
@@ -0,0 +1,211 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from contextlib import contextmanager
|
||||
from dataclasses import dataclass
|
||||
import json
|
||||
import os
|
||||
from pathlib import Path
|
||||
import threading
|
||||
import time
|
||||
from typing import Iterator
|
||||
|
||||
from app.config import settings
|
||||
|
||||
try:
|
||||
import fcntl
|
||||
except ImportError: # pragma: no cover - this app currently targets Linux/macOS dev hosts
|
||||
fcntl = None # type: ignore[assignment]
|
||||
|
||||
|
||||
class DatabaseWriteBusy(RuntimeError):
|
||||
def __init__(self, operation: str, active: dict[str, object] | None = None) -> None:
|
||||
self.operation = operation
|
||||
self.active = active or {}
|
||||
active_operation = self.active.get("operation")
|
||||
detail = f"Database is busy with another write operation"
|
||||
if active_operation:
|
||||
detail += f": {active_operation}"
|
||||
super().__init__(detail)
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class DatabaseWriteState:
|
||||
locked: bool
|
||||
operation: str | None = None
|
||||
pid: int | None = None
|
||||
started_at: float | None = None
|
||||
|
||||
@property
|
||||
def elapsed_seconds(self) -> float | None:
|
||||
if self.started_at is None:
|
||||
return None
|
||||
return max(0.0, time.time() - self.started_at)
|
||||
|
||||
|
||||
_process_write_lock = threading.Lock()
|
||||
_state_lock = threading.Lock()
|
||||
_state = DatabaseWriteState(locked=False)
|
||||
|
||||
|
||||
def is_sqlite_database() -> bool:
|
||||
return settings.is_sqlite_database
|
||||
|
||||
|
||||
@contextmanager
|
||||
def database_write_lock(operation: str, timeout: float | None = None) -> Iterator[None]:
|
||||
"""Serialize SQLite writes inside and across app processes.
|
||||
|
||||
SQLite allows only one writer. This lock prevents mutating endpoints from
|
||||
competing until SQLite times out with a low-level "database is locked" error.
|
||||
"""
|
||||
if not is_sqlite_database():
|
||||
yield
|
||||
return
|
||||
|
||||
effective_timeout = settings.database_write_lock_timeout_seconds if timeout is None else timeout
|
||||
deadline = None if effective_timeout is None else time.monotonic() + max(0.0, effective_timeout)
|
||||
if not _acquire_process_lock(deadline):
|
||||
raise DatabaseWriteBusy(operation, database_write_status().__dict__)
|
||||
|
||||
handle = None
|
||||
file_locked = False
|
||||
try:
|
||||
lock_path = _lock_path()
|
||||
lock_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
handle = _open_locked_handle(lock_path, deadline)
|
||||
if handle is None:
|
||||
raise DatabaseWriteBusy(operation, _read_lock_metadata(lock_path))
|
||||
file_locked = True
|
||||
_write_lock_metadata(handle, operation)
|
||||
_set_state(DatabaseWriteState(locked=True, operation=operation, pid=os.getpid(), started_at=time.time()))
|
||||
yield
|
||||
finally:
|
||||
_set_state(DatabaseWriteState(locked=False))
|
||||
if handle is not None:
|
||||
if file_locked and fcntl is not None:
|
||||
try:
|
||||
fcntl.flock(handle.fileno(), fcntl.LOCK_UN)
|
||||
except OSError:
|
||||
pass
|
||||
handle.close()
|
||||
if file_locked:
|
||||
try:
|
||||
_lock_path().unlink()
|
||||
except FileNotFoundError:
|
||||
pass
|
||||
except OSError:
|
||||
pass
|
||||
_process_write_lock.release()
|
||||
|
||||
|
||||
def database_write_status() -> DatabaseWriteState:
|
||||
with _state_lock:
|
||||
return _state
|
||||
|
||||
|
||||
def _acquire_process_lock(deadline: float | None) -> bool:
|
||||
while True:
|
||||
if _process_write_lock.acquire(blocking=False):
|
||||
return True
|
||||
if deadline is not None and time.monotonic() >= deadline:
|
||||
return False
|
||||
time.sleep(0.05)
|
||||
|
||||
|
||||
def _acquire_file_lock(handle, deadline: float | None) -> bool:
|
||||
if fcntl is None:
|
||||
return True
|
||||
while True:
|
||||
try:
|
||||
fcntl.flock(handle.fileno(), fcntl.LOCK_EX | fcntl.LOCK_NB)
|
||||
return True
|
||||
except BlockingIOError:
|
||||
if deadline is not None and time.monotonic() >= deadline:
|
||||
return False
|
||||
time.sleep(0.05)
|
||||
|
||||
|
||||
def _open_locked_handle(lock_path: Path, deadline: float | None):
|
||||
while True:
|
||||
try:
|
||||
lock_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
handle = lock_path.open("a+", encoding="utf-8")
|
||||
except FileNotFoundError:
|
||||
if deadline is not None and time.monotonic() >= deadline:
|
||||
return None
|
||||
time.sleep(0.05)
|
||||
continue
|
||||
if _try_file_lock(handle):
|
||||
return handle
|
||||
metadata = _read_lock_metadata(lock_path)
|
||||
handle.close()
|
||||
if not _lock_metadata_is_stale(metadata):
|
||||
if deadline is not None and time.monotonic() >= deadline:
|
||||
return None
|
||||
time.sleep(0.05)
|
||||
continue
|
||||
try:
|
||||
lock_path.unlink()
|
||||
except FileNotFoundError:
|
||||
pass
|
||||
except OSError:
|
||||
return None
|
||||
if deadline is not None and time.monotonic() >= deadline:
|
||||
return None
|
||||
|
||||
|
||||
def _try_file_lock(handle) -> bool:
|
||||
if fcntl is None:
|
||||
return True
|
||||
try:
|
||||
fcntl.flock(handle.fileno(), fcntl.LOCK_EX | fcntl.LOCK_NB)
|
||||
except BlockingIOError:
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def _lock_metadata_is_stale(metadata: dict[str, object]) -> bool:
|
||||
pid = metadata.get("pid")
|
||||
try:
|
||||
pid_int = int(pid) # type: ignore[arg-type]
|
||||
except (TypeError, ValueError):
|
||||
return False
|
||||
if pid_int <= 0 or pid_int == os.getpid():
|
||||
return False
|
||||
return not _pid_exists(pid_int)
|
||||
|
||||
|
||||
def _pid_exists(pid: int) -> bool:
|
||||
try:
|
||||
os.kill(pid, 0)
|
||||
except ProcessLookupError:
|
||||
return False
|
||||
except PermissionError:
|
||||
return True
|
||||
return True
|
||||
|
||||
|
||||
def _set_state(state: DatabaseWriteState) -> None:
|
||||
global _state
|
||||
with _state_lock:
|
||||
_state = state
|
||||
|
||||
|
||||
def _lock_path() -> Path:
|
||||
return settings.data_dir / "workbench.write.lock"
|
||||
|
||||
|
||||
def _write_lock_metadata(handle, operation: str) -> None:
|
||||
handle.seek(0)
|
||||
handle.truncate()
|
||||
json.dump({"operation": operation, "pid": os.getpid(), "started_at": time.time()}, handle, separators=(",", ":"))
|
||||
handle.flush()
|
||||
os.fsync(handle.fileno())
|
||||
|
||||
|
||||
def _read_lock_metadata(path: Path) -> dict[str, object]:
|
||||
try:
|
||||
text = path.read_text(encoding="utf-8").strip()
|
||||
return json.loads(text) if text else {}
|
||||
except (OSError, json.JSONDecodeError):
|
||||
return {}
|
||||
923
app/feed_discovery.py
Normal file
923
app/feed_discovery.py
Normal file
@@ -0,0 +1,923 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import csv
|
||||
import hashlib
|
||||
import json
|
||||
import re
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime, timezone
|
||||
from html import unescape
|
||||
from html.parser import HTMLParser
|
||||
from pathlib import Path
|
||||
from typing import Iterable
|
||||
from urllib.parse import parse_qs, urljoin, urlparse
|
||||
|
||||
import requests
|
||||
|
||||
|
||||
MOBILITY_DATABASE_FEEDS_URL = "https://files.mobilitydatabase.org/feeds_v2.csv"
|
||||
MOBILITY_DATABASE_ACCEPTANCE_TEST_URL = (
|
||||
"https://raw.githubusercontent.com/MobilityData/gtfs-validator/master/"
|
||||
"scripts/mobility-database-harvester/acceptance_test_feed_list.csv"
|
||||
)
|
||||
PTNA_GTFS_INDEX_URL = "https://ptna.openstreetmap.de/gtfs/index.html"
|
||||
PTNA_COUNTRY_URL_TEMPLATE = "https://ptna.openstreetmap.de/gtfs/{country}/index.php"
|
||||
|
||||
DEFAULT_DISCOVERY_COUNTRIES = ["DE", "AT", "CH", "NL", "DK", "FR", "BE", "LU", "NO", "SE", "FI", "IE", "GB"]
|
||||
CURATED_TEST_COUNTRIES = ["DE", "CH", "AT", "NL", "DK", "FI", "NO", "SE", "IE", "GB", "FR", "BE", "LU"]
|
||||
DIRECT_INGEST_HEADERS = ["name", "kind", "url", "country", "license", "mode_scope", "source_basis", "priority", "notes"]
|
||||
CANONICAL_HEADERS = [
|
||||
"candidate_id",
|
||||
"discovery_source",
|
||||
"country",
|
||||
"subdivision",
|
||||
"provider",
|
||||
"feed_name",
|
||||
"stable_id",
|
||||
"ptna_feed_id",
|
||||
"data_type",
|
||||
"status",
|
||||
"is_official",
|
||||
"selected_url",
|
||||
"direct_download_url",
|
||||
"latest_url",
|
||||
"original_release_url",
|
||||
"license_url",
|
||||
"license_text",
|
||||
"osm_license_text",
|
||||
"details_url",
|
||||
"routes_url",
|
||||
"valid_from",
|
||||
"valid_to",
|
||||
"release_date",
|
||||
"feed_version",
|
||||
"bbox",
|
||||
"features",
|
||||
"priority",
|
||||
"availability_status",
|
||||
"http_status",
|
||||
"content_type",
|
||||
"content_length",
|
||||
"final_url",
|
||||
"source_basis",
|
||||
"notes",
|
||||
]
|
||||
|
||||
|
||||
@dataclass
|
||||
class FeedCandidate:
|
||||
discovery_source: str
|
||||
country: str = ""
|
||||
subdivision: str = ""
|
||||
provider: str = ""
|
||||
feed_name: str = ""
|
||||
stable_id: str = ""
|
||||
ptna_feed_id: str = ""
|
||||
data_type: str = "gtfs"
|
||||
status: str = ""
|
||||
is_official: str = ""
|
||||
selected_url: str = ""
|
||||
direct_download_url: str = ""
|
||||
latest_url: str = ""
|
||||
original_release_url: str = ""
|
||||
license_url: str = ""
|
||||
license_text: str = ""
|
||||
osm_license_text: str = ""
|
||||
details_url: str = ""
|
||||
routes_url: str = ""
|
||||
valid_from: str = ""
|
||||
valid_to: str = ""
|
||||
release_date: str = ""
|
||||
feed_version: str = ""
|
||||
bbox: str = ""
|
||||
features: str = ""
|
||||
priority: str = ""
|
||||
availability_status: str = "unchecked"
|
||||
http_status: str = ""
|
||||
content_type: str = ""
|
||||
content_length: str = ""
|
||||
final_url: str = ""
|
||||
source_basis: str = ""
|
||||
notes: str = ""
|
||||
evidence_sources: list[str] = field(default_factory=list)
|
||||
|
||||
def key(self) -> str:
|
||||
if self.stable_id:
|
||||
return f"stable:{self.stable_id}"
|
||||
if self.selected_url:
|
||||
return f"url:{_normalize_url_key(self.selected_url)}"
|
||||
if self.ptna_feed_id:
|
||||
return f"ptna:{self.ptna_feed_id}"
|
||||
return "hash:" + hashlib.sha256(json.dumps(self.row(), sort_keys=True).encode("utf-8")).hexdigest()
|
||||
|
||||
def candidate_id(self) -> str:
|
||||
seed = "|".join(
|
||||
[
|
||||
self.discovery_source,
|
||||
self.country,
|
||||
self.stable_id,
|
||||
self.ptna_feed_id,
|
||||
self.selected_url,
|
||||
self.provider,
|
||||
self.feed_name,
|
||||
]
|
||||
)
|
||||
return hashlib.sha256(seed.encode("utf-8")).hexdigest()[:16]
|
||||
|
||||
def row(self) -> dict[str, str]:
|
||||
payload = {header: _string(getattr(self, header, "")) for header in CANONICAL_HEADERS if header != "candidate_id"}
|
||||
payload["candidate_id"] = self.candidate_id()
|
||||
return payload
|
||||
|
||||
def ingestable_row(self) -> dict[str, str]:
|
||||
name = _feed_source_name(self.country, self.provider or self.feed_name)
|
||||
license_value = self.license_text or (f"see {self.license_url}" if self.license_url else "")
|
||||
basis_parts = [self.source_basis or self.discovery_source]
|
||||
if self.details_url:
|
||||
basis_parts.append(f"details: {self.details_url}")
|
||||
if self.original_release_url and self.original_release_url != self.selected_url:
|
||||
basis_parts.append(f"release: {self.original_release_url}")
|
||||
notes = self.notes or ""
|
||||
if self.latest_url and self.latest_url != self.selected_url:
|
||||
notes = _join_notes(notes, f"Mobility Database mirror: {self.latest_url}")
|
||||
if self.osm_license_text:
|
||||
notes = _join_notes(notes, f"OSM permission note: {_truncate(self.osm_license_text, 240)}")
|
||||
return {
|
||||
"name": _truncate(name, 240),
|
||||
"kind": "gtfs",
|
||||
"url": self.selected_url,
|
||||
"country": self.country,
|
||||
"license": _truncate(license_value, 240),
|
||||
"mode_scope": _mode_scope_from_features(self.features),
|
||||
"source_basis": _truncate("; ".join(part for part in basis_parts if part), 500),
|
||||
"priority": self.priority or _candidate_priority(self),
|
||||
"notes": _truncate(notes, 1200),
|
||||
}
|
||||
|
||||
|
||||
def default_generated_dir() -> Path:
|
||||
return Path(__file__).resolve().parents[1] / "docs" / "generated"
|
||||
|
||||
|
||||
def build_gtfs_discovery_manifests(
|
||||
*,
|
||||
output_dir: Path | str | None = None,
|
||||
countries: Iterable[str] | None = None,
|
||||
include_mobility_database: bool = True,
|
||||
include_acceptance_test_list: bool = True,
|
||||
include_ptna: bool = True,
|
||||
max_ptna_details: int = 80,
|
||||
test_limit: int = 24,
|
||||
check_urls: bool = False,
|
||||
timeout: float = 30.0,
|
||||
) -> dict[str, object]:
|
||||
selected_countries = _normalize_countries(countries)
|
||||
out_dir = Path(output_dir) if output_dir is not None else default_generated_dir()
|
||||
out_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
candidates: list[FeedCandidate] = []
|
||||
candidates.extend(load_curated_ingestable_seed(countries=selected_countries))
|
||||
if include_mobility_database:
|
||||
candidates.extend(fetch_mobility_database_candidates(countries=selected_countries, timeout=timeout))
|
||||
if include_acceptance_test_list:
|
||||
candidates.extend(fetch_mobility_acceptance_candidates(countries=selected_countries, timeout=timeout))
|
||||
if include_ptna:
|
||||
candidates.extend(fetch_ptna_candidates(countries=selected_countries, max_details=max_ptna_details, timeout=timeout))
|
||||
|
||||
merged = merge_candidates(candidates)
|
||||
ingestable = [candidate for candidate in merged if candidate.selected_url and candidate.data_type == "gtfs"]
|
||||
if check_urls:
|
||||
for candidate in ingestable:
|
||||
annotate_url_availability(candidate, timeout=min(timeout, 12.0))
|
||||
test_run = select_test_run_candidates(ingestable, limit=test_limit)
|
||||
|
||||
candidates_path = out_dir / "gtfs_feed_candidates.csv"
|
||||
ingestable_path = out_dir / "gtfs_ingestable_sources.csv"
|
||||
test_path = out_dir / "gtfs_test_run_sources.csv"
|
||||
report_path = out_dir / "gtfs_discovery_report.json"
|
||||
|
||||
_write_csv(candidates_path, CANONICAL_HEADERS, [candidate.row() for candidate in merged])
|
||||
_write_csv(ingestable_path, DIRECT_INGEST_HEADERS, [candidate.ingestable_row() for candidate in ingestable])
|
||||
_write_csv(test_path, DIRECT_INGEST_HEADERS, [candidate.ingestable_row() for candidate in test_run])
|
||||
|
||||
by_source = _count_by(merged, lambda item: item.discovery_source)
|
||||
by_country = _count_by(ingestable, lambda item: item.country or "unknown")
|
||||
report = {
|
||||
"generated_at": datetime.now(timezone.utc).isoformat(),
|
||||
"countries": selected_countries or "all",
|
||||
"sources": {
|
||||
"mobility_database": MOBILITY_DATABASE_FEEDS_URL if include_mobility_database else None,
|
||||
"mobility_acceptance_test_list": MOBILITY_DATABASE_ACCEPTANCE_TEST_URL if include_acceptance_test_list else None,
|
||||
"ptna": PTNA_GTFS_INDEX_URL if include_ptna else None,
|
||||
},
|
||||
"counts": {
|
||||
"candidates": len(merged),
|
||||
"ingestable": len(ingestable),
|
||||
"test_run": len(test_run),
|
||||
"by_source": by_source,
|
||||
"ingestable_by_country": by_country,
|
||||
},
|
||||
"files": {
|
||||
"candidates": str(candidates_path),
|
||||
"ingestable": str(ingestable_path),
|
||||
"test_run": str(test_path),
|
||||
},
|
||||
}
|
||||
report_path.write_text(json.dumps(report, indent=2, ensure_ascii=False) + "\n", encoding="utf-8")
|
||||
return report
|
||||
|
||||
|
||||
def fetch_mobility_database_candidates(
|
||||
*,
|
||||
countries: list[str] | None = None,
|
||||
timeout: float = 30.0,
|
||||
url: str = MOBILITY_DATABASE_FEEDS_URL,
|
||||
) -> list[FeedCandidate]:
|
||||
text = _fetch_text(url, timeout=timeout)
|
||||
rows = csv.DictReader(text.splitlines())
|
||||
candidates: list[FeedCandidate] = []
|
||||
for row in rows:
|
||||
if _value(row, "data_type").lower() != "gtfs":
|
||||
continue
|
||||
country = _value(row, "location.country_code").upper()
|
||||
if countries and country not in countries:
|
||||
continue
|
||||
direct_url = _normalize_feed_url(_value(row, "urls.direct_download"))
|
||||
latest_url = _normalize_feed_url(_value(row, "urls.latest"))
|
||||
selected_url = _choose_feed_url(direct_url, latest_url)
|
||||
candidate = FeedCandidate(
|
||||
discovery_source="mobility_database",
|
||||
country=country,
|
||||
subdivision=_value(row, "location.subdivision_name"),
|
||||
provider=_value(row, "provider"),
|
||||
feed_name=_value(row, "name"),
|
||||
stable_id=_value(row, "id"),
|
||||
data_type="gtfs",
|
||||
status=_value(row, "status"),
|
||||
is_official=_value(row, "is_official"),
|
||||
selected_url=selected_url,
|
||||
direct_download_url=direct_url,
|
||||
latest_url=latest_url,
|
||||
license_url=_value(row, "urls.license"),
|
||||
bbox=_bbox_from_mobility_row(row),
|
||||
features=_value(row, "features"),
|
||||
source_basis="Mobility Database feed catalog",
|
||||
notes=_value(row, "note"),
|
||||
)
|
||||
normalize_candidate_geography(candidate)
|
||||
apply_known_download_overrides(candidate)
|
||||
candidate.priority = _candidate_priority(candidate)
|
||||
candidates.append(candidate)
|
||||
return candidates
|
||||
|
||||
|
||||
def fetch_mobility_acceptance_candidates(
|
||||
*,
|
||||
countries: list[str] | None = None,
|
||||
timeout: float = 30.0,
|
||||
url: str = MOBILITY_DATABASE_ACCEPTANCE_TEST_URL,
|
||||
) -> list[FeedCandidate]:
|
||||
text = _fetch_text(url, timeout=timeout)
|
||||
rows = csv.DictReader(text.splitlines())
|
||||
candidates: list[FeedCandidate] = []
|
||||
for row in rows:
|
||||
country = _value(row, "country_code").upper()
|
||||
if countries and country not in countries:
|
||||
continue
|
||||
latest_url = _normalize_feed_url(_value(row, "urls.latest"))
|
||||
if not latest_url:
|
||||
continue
|
||||
candidate = FeedCandidate(
|
||||
discovery_source="mobility_validator_acceptance",
|
||||
country=country,
|
||||
subdivision=_value(row, "subdivision_name"),
|
||||
provider=_value(row, "provider"),
|
||||
feed_name=_value(row, "provider"),
|
||||
stable_id=_value(row, "stable_id"),
|
||||
status="acceptance_test",
|
||||
selected_url=latest_url,
|
||||
latest_url=latest_url,
|
||||
source_basis="MobilityData validator acceptance-test feed list",
|
||||
notes="Useful smoke-test feed list; prefer Mobility Database feeds_v2 metadata for production source review.",
|
||||
priority="P3",
|
||||
)
|
||||
normalize_candidate_geography(candidate)
|
||||
apply_known_download_overrides(candidate)
|
||||
candidates.append(candidate)
|
||||
return candidates
|
||||
|
||||
|
||||
def fetch_ptna_candidates(
|
||||
*,
|
||||
countries: list[str] | None = None,
|
||||
max_details: int = 80,
|
||||
timeout: float = 30.0,
|
||||
) -> list[FeedCandidate]:
|
||||
country_codes = countries or DEFAULT_DISCOVERY_COUNTRIES
|
||||
if not country_codes:
|
||||
country_codes = discover_ptna_country_codes(timeout=timeout)
|
||||
candidates: list[FeedCandidate] = []
|
||||
detail_fetches = 0
|
||||
for country in country_codes:
|
||||
country_url = PTNA_COUNTRY_URL_TEMPLATE.format(country=country)
|
||||
try:
|
||||
html = _fetch_text(country_url, timeout=timeout)
|
||||
except requests.RequestException:
|
||||
continue
|
||||
for candidate in parse_ptna_country_page(html, country=country, page_url=country_url):
|
||||
if candidate.details_url and detail_fetches < max_details:
|
||||
try:
|
||||
detail_html = _fetch_text(candidate.details_url, timeout=timeout)
|
||||
enrich_ptna_candidate_from_details(candidate, detail_html, candidate.details_url)
|
||||
detail_fetches += 1
|
||||
except requests.RequestException:
|
||||
candidate.notes = _join_notes(candidate.notes, "PTNA detail page could not be fetched during discovery.")
|
||||
candidate.priority = _candidate_priority(candidate)
|
||||
candidates.append(candidate)
|
||||
return candidates
|
||||
|
||||
|
||||
def discover_ptna_country_codes(*, timeout: float = 30.0) -> list[str]:
|
||||
html = _fetch_text(PTNA_GTFS_INDEX_URL, timeout=timeout)
|
||||
links = _all_links(html, PTNA_GTFS_INDEX_URL)
|
||||
codes: list[str] = []
|
||||
for link in links:
|
||||
match = re.search(r"/gtfs/([A-Z]{2})/index\.php$", urlparse(link).path)
|
||||
if match and match.group(1) not in codes:
|
||||
codes.append(match.group(1))
|
||||
return codes
|
||||
|
||||
|
||||
def parse_ptna_country_page(html: str, *, country: str, page_url: str) -> list[FeedCandidate]:
|
||||
rows = _parse_table_rows(html, page_url)
|
||||
candidates: list[FeedCandidate] = []
|
||||
for row in rows:
|
||||
links = [link for cell in row.cells for link in cell.links]
|
||||
routes_url = _first_link_matching(links, "routes.php?feed=")
|
||||
details_url = _first_link_matching(links, "gtfs-details.php?feed=")
|
||||
if not routes_url and not details_url:
|
||||
continue
|
||||
feed_id = _feed_id_from_url(routes_url or details_url)
|
||||
if not feed_id:
|
||||
continue
|
||||
texts = [cell.text for cell in row.cells]
|
||||
release_link = _normalize_feed_url(row.cells[6].first_external_link if len(row.cells) > 6 else "")
|
||||
direct_url = release_link if _looks_like_download_url(release_link) else ""
|
||||
candidate = FeedCandidate(
|
||||
discovery_source="ptna",
|
||||
country=country,
|
||||
provider=texts[2] if len(texts) > 2 else "",
|
||||
feed_name=texts[1] if len(texts) > 1 else feed_id,
|
||||
ptna_feed_id=feed_id,
|
||||
selected_url=direct_url,
|
||||
direct_download_url=direct_url,
|
||||
original_release_url=release_link,
|
||||
details_url=details_url,
|
||||
routes_url=routes_url,
|
||||
valid_from=texts[3] if len(texts) > 3 else "",
|
||||
valid_to=texts[4] if len(texts) > 4 else "",
|
||||
feed_version=texts[5] if len(texts) > 5 else "",
|
||||
release_date=texts[6] if len(texts) > 6 else "",
|
||||
source_basis="PTNA GTFS analysis",
|
||||
notes="PTNA candidate; use original publisher URL where available.",
|
||||
)
|
||||
normalize_candidate_geography(candidate)
|
||||
apply_known_download_overrides(candidate)
|
||||
candidates.append(candidate)
|
||||
return candidates
|
||||
|
||||
|
||||
def enrich_ptna_candidate_from_details(candidate: FeedCandidate, html: str, page_url: str) -> None:
|
||||
fields = parse_ptna_detail_fields(html, page_url)
|
||||
candidate.original_release_url = _normalize_feed_url(fields.get("release url href") or fields.get("release url") or candidate.original_release_url)
|
||||
candidate.license_url = fields.get("publisher's license href") or candidate.license_url
|
||||
candidate.license_text = fields.get("publisher's license") or candidate.license_text
|
||||
candidate.osm_license_text = fields.get("license given for use in osm") or candidate.osm_license_text
|
||||
candidate.valid_from = fields.get("feed start date") or candidate.valid_from
|
||||
candidate.valid_to = fields.get("feed end date") or candidate.valid_to
|
||||
candidate.feed_version = fields.get("feed version") or candidate.feed_version
|
||||
candidate.release_date = fields.get("release date") or candidate.release_date
|
||||
network_guid = fields.get('"network:guid"')
|
||||
if network_guid:
|
||||
candidate.notes = _join_notes(candidate.notes, f"PTNA network:guid={network_guid}")
|
||||
if not candidate.selected_url and _looks_like_download_url(candidate.original_release_url):
|
||||
candidate.selected_url = _normalize_feed_url(candidate.original_release_url)
|
||||
candidate.direct_download_url = candidate.selected_url
|
||||
normalize_candidate_geography(candidate)
|
||||
|
||||
|
||||
def parse_ptna_detail_fields(html: str, page_url: str) -> dict[str, str]:
|
||||
parsed: dict[str, str] = {}
|
||||
for row in _parse_table_rows(html, page_url):
|
||||
if len(row.cells) < 2:
|
||||
continue
|
||||
label = _clean_text(row.cells[0].text).lower()
|
||||
if not label:
|
||||
continue
|
||||
detail = _clean_text(row.cells[1].text)
|
||||
parsed[label] = detail
|
||||
if row.cells[1].first_external_link:
|
||||
parsed[f"{label} href"] = row.cells[1].first_external_link
|
||||
return parsed
|
||||
|
||||
|
||||
def load_curated_ingestable_seed(
|
||||
*,
|
||||
countries: list[str] | None = None,
|
||||
path: Path | str | None = None,
|
||||
) -> list[FeedCandidate]:
|
||||
seed_path = Path(path) if path is not None else Path(__file__).resolve().parents[1] / "docs" / "ingestable_sources_seed.csv"
|
||||
if not seed_path.exists():
|
||||
return []
|
||||
candidates: list[FeedCandidate] = []
|
||||
with seed_path.open("r", encoding="utf-8-sig", newline="") as handle:
|
||||
for row in csv.DictReader(handle):
|
||||
if _value(row, "kind").lower() != "gtfs":
|
||||
continue
|
||||
country = _value(row, "country").upper()
|
||||
if countries and country not in countries and country != "EU":
|
||||
continue
|
||||
candidate = FeedCandidate(
|
||||
discovery_source="curated_seed",
|
||||
country=country,
|
||||
provider=_value(row, "name").removesuffix(" GTFS"),
|
||||
feed_name=_value(row, "name"),
|
||||
selected_url=_normalize_feed_url(_value(row, "url")),
|
||||
direct_download_url=_normalize_feed_url(_value(row, "url")),
|
||||
license_text=_value(row, "license"),
|
||||
features=_value(row, "mode_scope"),
|
||||
priority=_value(row, "priority"),
|
||||
source_basis=_value(row, "source_basis") or "curated seed",
|
||||
notes=_value(row, "notes"),
|
||||
)
|
||||
normalize_candidate_geography(candidate)
|
||||
apply_known_download_overrides(candidate)
|
||||
candidates.append(candidate)
|
||||
return candidates
|
||||
|
||||
|
||||
def merge_candidates(candidates: Iterable[FeedCandidate]) -> list[FeedCandidate]:
|
||||
by_key: dict[str, FeedCandidate] = {}
|
||||
alias_to_key: dict[str, str] = {}
|
||||
for candidate in candidates:
|
||||
keys = _candidate_alias_keys(candidate)
|
||||
primary_key = keys[0]
|
||||
existing_key = next((alias_to_key[key] for key in keys if key in alias_to_key), None)
|
||||
existing = by_key.get(existing_key) if existing_key is not None else None
|
||||
if existing is None:
|
||||
by_key[primary_key] = candidate
|
||||
for key in keys:
|
||||
alias_to_key[key] = primary_key
|
||||
continue
|
||||
_merge_candidate(existing, candidate)
|
||||
for key in keys:
|
||||
alias_to_key[key] = existing_key or primary_key
|
||||
return sorted(by_key.values(), key=lambda item: (_priority_sort_key(item.priority), item.country, item.provider.lower(), item.feed_name.lower()))
|
||||
|
||||
|
||||
def select_test_run_candidates(candidates: Iterable[FeedCandidate], *, limit: int = 24) -> list[FeedCandidate]:
|
||||
sorted_candidates = sorted(
|
||||
[
|
||||
candidate
|
||||
for candidate in candidates
|
||||
if candidate.discovery_source != "mobility_validator_acceptance" and _test_candidate_eligible(candidate)
|
||||
],
|
||||
key=_test_candidate_sort_key,
|
||||
)
|
||||
selected: list[FeedCandidate] = []
|
||||
seen_urls: set[str] = set()
|
||||
per_country: dict[str, int] = {}
|
||||
|
||||
def add(candidate: FeedCandidate, *, force: bool = False) -> None:
|
||||
if len(selected) >= limit:
|
||||
return
|
||||
url_key = _normalize_url_key(candidate.selected_url)
|
||||
if not candidate.selected_url or url_key in seen_urls:
|
||||
return
|
||||
country = candidate.country or "unknown"
|
||||
country_limit = 7 if force and country == "DE" else 3
|
||||
if per_country.get(country, 0) >= country_limit:
|
||||
return
|
||||
selected.append(candidate)
|
||||
seen_urls.add(url_key)
|
||||
per_country[country] = per_country.get(country, 0) + 1
|
||||
|
||||
preferred_tokens = [
|
||||
"opendata-oepnv.de",
|
||||
"download.gtfs.de/germany/",
|
||||
"vbb.de/vbbgtfs",
|
||||
"rnv-online.de",
|
||||
"vrn.de",
|
||||
"gtfs.geops.ch",
|
||||
"wienerlinien.at",
|
||||
"gtfs.openov.nl",
|
||||
"gtfs.ovapi.nl",
|
||||
"rejseplanen.info",
|
||||
"dev.hsl.fi/gtfs",
|
||||
"hsldev.com/gtfs",
|
||||
"rb_norway-aggregated-gtfs",
|
||||
"data.bus-data.dft.gov.uk",
|
||||
"transportforireland",
|
||||
"gtfs.irail.be/de-lijn",
|
||||
]
|
||||
for candidate in sorted_candidates:
|
||||
text = " ".join([candidate.provider, candidate.feed_name, candidate.source_basis, candidate.selected_url]).lower()
|
||||
if any(token in text for token in preferred_tokens):
|
||||
add(candidate, force=True)
|
||||
for country in CURATED_TEST_COUNTRIES:
|
||||
for candidate in sorted_candidates:
|
||||
if candidate.country == country:
|
||||
add(candidate)
|
||||
if len(selected) >= limit:
|
||||
break
|
||||
if len(selected) >= limit:
|
||||
break
|
||||
for candidate in sorted_candidates:
|
||||
add(candidate)
|
||||
if len(selected) >= limit:
|
||||
break
|
||||
return selected
|
||||
|
||||
|
||||
def _test_candidate_eligible(candidate: FeedCandidate) -> bool:
|
||||
if not candidate.selected_url:
|
||||
return False
|
||||
if _priority_sort_key(candidate.priority) > 2:
|
||||
return False
|
||||
text = " ".join([candidate.status, candidate.selected_url, candidate.provider, candidate.feed_name, candidate.notes]).lower()
|
||||
if "deprecated" in text or "inactive" in text or "{apikey}" in text:
|
||||
return False
|
||||
if "registration required" in text or "authentication" in text:
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def annotate_url_availability(candidate: FeedCandidate, *, timeout: float = 10.0) -> FeedCandidate:
|
||||
if not candidate.selected_url:
|
||||
candidate.availability_status = "missing_url"
|
||||
return candidate
|
||||
headers = {"User-Agent": "meubility-workbench-feed-discovery/0.1"}
|
||||
try:
|
||||
response = requests.head(candidate.selected_url, allow_redirects=True, timeout=timeout, headers=headers)
|
||||
if response.status_code in {405, 403} or response.status_code >= 500:
|
||||
response = requests.get(
|
||||
candidate.selected_url,
|
||||
allow_redirects=True,
|
||||
timeout=timeout,
|
||||
headers={**headers, "Range": "bytes=0-0"},
|
||||
stream=True,
|
||||
)
|
||||
candidate.http_status = str(response.status_code)
|
||||
candidate.content_type = response.headers.get("content-type", "")
|
||||
candidate.content_length = response.headers.get("content-length", "")
|
||||
candidate.final_url = response.url
|
||||
candidate.availability_status = "ok" if response.status_code < 400 else "error"
|
||||
response.close()
|
||||
except requests.RequestException as exc:
|
||||
candidate.availability_status = "error"
|
||||
candidate.notes = _join_notes(candidate.notes, f"Availability check failed: {exc}")
|
||||
return candidate
|
||||
|
||||
|
||||
def normalize_candidate_geography(candidate: FeedCandidate) -> None:
|
||||
text = " ".join(
|
||||
[
|
||||
candidate.selected_url,
|
||||
candidate.direct_download_url,
|
||||
candidate.latest_url,
|
||||
candidate.original_release_url,
|
||||
candidate.provider,
|
||||
candidate.feed_name,
|
||||
candidate.source_basis,
|
||||
]
|
||||
).lower()
|
||||
if "download.gtfs.de/germany/" in text or "gtfs for germany" in text:
|
||||
candidate.country = "DE"
|
||||
elif "storage.googleapis.com/marduk-production/outbound/gtfs/rb_norway" in text:
|
||||
candidate.country = "NO"
|
||||
elif "gtfs.ovapi.nl" in text or "openov.nl" in text:
|
||||
candidate.country = "NL"
|
||||
elif "www.nvbw.de/fileadmin/user_upload/service/open_data/" in text:
|
||||
candidate.country = "DE"
|
||||
|
||||
|
||||
def apply_known_download_overrides(candidate: FeedCandidate) -> None:
|
||||
stale_direct_ids = {"mdb-684", "mdb-777"}
|
||||
if candidate.stable_id in stale_direct_ids and candidate.latest_url:
|
||||
candidate.selected_url = candidate.latest_url
|
||||
candidate.notes = _join_notes(
|
||||
candidate.notes,
|
||||
"Selected Mobility Database latest.zip mirror because the catalog direct URL is known to be stale.",
|
||||
)
|
||||
|
||||
|
||||
@dataclass
|
||||
class _HtmlCell:
|
||||
text: str = ""
|
||||
links: list[str] = field(default_factory=list)
|
||||
|
||||
@property
|
||||
def first_external_link(self) -> str:
|
||||
for link in self.links:
|
||||
parsed = urlparse(link)
|
||||
if parsed.scheme in {"http", "https"} and "ptna.openstreetmap.de" not in parsed.netloc:
|
||||
return link
|
||||
return ""
|
||||
|
||||
|
||||
@dataclass
|
||||
class _HtmlRow:
|
||||
cells: list[_HtmlCell] = field(default_factory=list)
|
||||
|
||||
|
||||
class _TableParser(HTMLParser):
|
||||
def __init__(self, base_url: str):
|
||||
super().__init__(convert_charrefs=True)
|
||||
self.base_url = base_url
|
||||
self.rows: list[_HtmlRow] = []
|
||||
self._row: _HtmlRow | None = None
|
||||
self._cell: _HtmlCell | None = None
|
||||
self._active_link: str = ""
|
||||
|
||||
def handle_starttag(self, tag: str, attrs: list[tuple[str, str | None]]) -> None:
|
||||
attrs_dict = {key: value or "" for key, value in attrs}
|
||||
if tag == "tr":
|
||||
self._row = _HtmlRow()
|
||||
elif tag in {"td", "th"} and self._row is not None:
|
||||
self._cell = _HtmlCell()
|
||||
elif tag == "a" and self._cell is not None:
|
||||
href = attrs_dict.get("href", "")
|
||||
if href:
|
||||
self._active_link = urljoin(self.base_url, href)
|
||||
self._cell.links.append(self._active_link)
|
||||
|
||||
def handle_endtag(self, tag: str) -> None:
|
||||
if tag in {"td", "th"} and self._row is not None and self._cell is not None:
|
||||
self._cell.text = _clean_text(self._cell.text)
|
||||
self._row.cells.append(self._cell)
|
||||
self._cell = None
|
||||
self._active_link = ""
|
||||
elif tag == "a":
|
||||
self._active_link = ""
|
||||
elif tag == "tr":
|
||||
if self._row is not None and self._row.cells:
|
||||
self.rows.append(self._row)
|
||||
self._row = None
|
||||
self._cell = None
|
||||
self._active_link = ""
|
||||
|
||||
def handle_data(self, data: str) -> None:
|
||||
if self._cell is not None:
|
||||
self._cell.text += data
|
||||
|
||||
|
||||
class _LinkParser(HTMLParser):
|
||||
def __init__(self, base_url: str):
|
||||
super().__init__(convert_charrefs=True)
|
||||
self.base_url = base_url
|
||||
self.links: list[str] = []
|
||||
|
||||
def handle_starttag(self, tag: str, attrs: list[tuple[str, str | None]]) -> None:
|
||||
if tag != "a":
|
||||
return
|
||||
for key, value in attrs:
|
||||
if key == "href" and value:
|
||||
self.links.append(urljoin(self.base_url, value))
|
||||
|
||||
|
||||
def _parse_table_rows(html: str, base_url: str) -> list[_HtmlRow]:
|
||||
parser = _TableParser(base_url)
|
||||
parser.feed(html)
|
||||
return parser.rows
|
||||
|
||||
|
||||
def _all_links(html: str, base_url: str) -> list[str]:
|
||||
parser = _LinkParser(base_url)
|
||||
parser.feed(html)
|
||||
return parser.links
|
||||
|
||||
|
||||
def _fetch_text(url: str, *, timeout: float) -> str:
|
||||
response = requests.get(url, timeout=timeout, headers={"User-Agent": "meubility-workbench-feed-discovery/0.1"})
|
||||
response.raise_for_status()
|
||||
return response.text
|
||||
|
||||
|
||||
def _first_link_matching(links: Iterable[str], needle: str) -> str:
|
||||
for link in links:
|
||||
if needle in link:
|
||||
return link
|
||||
return ""
|
||||
|
||||
|
||||
def _feed_id_from_url(url: str) -> str:
|
||||
query = parse_qs(urlparse(url).query)
|
||||
return (query.get("feed") or [""])[0]
|
||||
|
||||
|
||||
def _looks_like_download_url(url: str) -> bool:
|
||||
if not url:
|
||||
return False
|
||||
parsed = urlparse(url)
|
||||
lower_path = parsed.path.lower()
|
||||
lower_url = url.lower()
|
||||
if lower_path.endswith(".zip"):
|
||||
return True
|
||||
if "exportformat=gtfs" in lower_url or "google_transit" in lower_url:
|
||||
return True
|
||||
if lower_path.rstrip("/").endswith(("current_gtfs", "gtfs")):
|
||||
return True
|
||||
if "gtfs.ovapi.nl" in parsed.netloc.lower() and "gtfs" in lower_path:
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def _normalize_feed_url(url: str) -> str:
|
||||
cleaned = _clean_text(url)
|
||||
if not cleaned:
|
||||
return ""
|
||||
parsed = urlparse(cleaned)
|
||||
if parsed.scheme:
|
||||
return cleaned
|
||||
first = cleaned.split("/", 1)[0]
|
||||
if "." in first:
|
||||
return f"https://{cleaned}"
|
||||
return cleaned
|
||||
|
||||
|
||||
def _choose_feed_url(direct_url: str, latest_url: str) -> str:
|
||||
if direct_url:
|
||||
return direct_url
|
||||
return latest_url
|
||||
|
||||
|
||||
def _candidate_priority(candidate: FeedCandidate) -> str:
|
||||
status = candidate.status.lower()
|
||||
official = candidate.is_official.lower() == "true"
|
||||
if candidate.discovery_source == "curated_seed":
|
||||
return candidate.priority or "P1"
|
||||
if status == "active" and official and candidate.direct_download_url:
|
||||
return "P0"
|
||||
if status == "active" and candidate.direct_download_url:
|
||||
return "P1"
|
||||
if status == "active" and candidate.latest_url:
|
||||
return "P2"
|
||||
if candidate.discovery_source == "ptna":
|
||||
return "P2" if candidate.selected_url else "P4"
|
||||
return "P3"
|
||||
|
||||
|
||||
def _test_candidate_sort_key(candidate: FeedCandidate) -> tuple[int, int, str, str]:
|
||||
source_bonus = 0 if candidate.discovery_source == "curated_seed" else 1
|
||||
country_bonus = CURATED_TEST_COUNTRIES.index(candidate.country) if candidate.country in CURATED_TEST_COUNTRIES else 99
|
||||
return (_priority_sort_key(candidate.priority), source_bonus + country_bonus, candidate.country, candidate.provider.lower())
|
||||
|
||||
|
||||
def _priority_sort_key(priority: str) -> int:
|
||||
match = re.match(r"P(\d+)", priority or "")
|
||||
return int(match.group(1)) if match else 9
|
||||
|
||||
|
||||
def _candidate_alias_keys(candidate: FeedCandidate) -> list[str]:
|
||||
keys = [candidate.key()]
|
||||
if candidate.stable_id:
|
||||
keys.append(f"stable:{candidate.stable_id}")
|
||||
for url in [candidate.selected_url, candidate.direct_download_url, candidate.latest_url]:
|
||||
if url:
|
||||
keys.append(f"url:{_normalize_url_key(url)}")
|
||||
if candidate.ptna_feed_id:
|
||||
keys.append(f"ptna:{candidate.ptna_feed_id}")
|
||||
deduped: list[str] = []
|
||||
for key in keys:
|
||||
if key not in deduped:
|
||||
deduped.append(key)
|
||||
return deduped
|
||||
|
||||
|
||||
def _merge_candidate(existing: FeedCandidate, incoming: FeedCandidate) -> None:
|
||||
if incoming.discovery_source == "curated_seed":
|
||||
for field_name in ["country", "provider", "feed_name", "license_text", "features", "source_basis", "notes"]:
|
||||
new_value = getattr(incoming, field_name, "")
|
||||
if new_value:
|
||||
setattr(existing, field_name, new_value)
|
||||
existing.discovery_source = _join_unique(existing.discovery_source, incoming.discovery_source)
|
||||
for field_name in CANONICAL_HEADERS:
|
||||
if field_name == "candidate_id":
|
||||
continue
|
||||
current = getattr(existing, field_name, "")
|
||||
new_value = getattr(incoming, field_name, "")
|
||||
if not current and new_value:
|
||||
setattr(existing, field_name, new_value)
|
||||
existing.priority = _better_priority(existing.priority, incoming.priority)
|
||||
existing.source_basis = _join_unique(existing.source_basis, incoming.source_basis)
|
||||
existing.notes = _join_notes(existing.notes, incoming.notes)
|
||||
|
||||
|
||||
def _better_priority(left: str, right: str) -> str:
|
||||
return left if _priority_sort_key(left) <= _priority_sort_key(right) else right
|
||||
|
||||
|
||||
def _join_unique(left: str, right: str) -> str:
|
||||
parts: list[str] = []
|
||||
for value in [left, right]:
|
||||
for part in value.split(";"):
|
||||
cleaned = part.strip()
|
||||
if cleaned and cleaned not in parts:
|
||||
parts.append(cleaned)
|
||||
return "; ".join(parts)
|
||||
|
||||
|
||||
def _join_notes(left: str, right: str) -> str:
|
||||
return _join_unique(left, right)
|
||||
|
||||
|
||||
def _compact_name(value: str) -> str:
|
||||
return re.sub(r"\s+", " ", _clean_text(value)).strip()
|
||||
|
||||
|
||||
def _feed_source_name(country: str, value: str) -> str:
|
||||
base = _compact_name(value) or "GTFS feed"
|
||||
prefix = country.upper()
|
||||
display = base
|
||||
if prefix and not base.upper().startswith(f"{prefix} "):
|
||||
display = f"{prefix} {base}"
|
||||
if "gtfs" not in display.lower():
|
||||
display = f"{display} GTFS"
|
||||
return display
|
||||
|
||||
|
||||
def _clean_text(value: str) -> str:
|
||||
cleaned = unescape(value or "").replace("\xa0", " ")
|
||||
cleaned = re.sub(r"\s+", " ", cleaned)
|
||||
return cleaned.strip()
|
||||
|
||||
|
||||
def _mode_scope_from_features(features: str) -> str:
|
||||
lower = features.lower()
|
||||
modes = []
|
||||
if "rail" in lower or "train" in lower:
|
||||
modes.append("rail")
|
||||
if "tram" in lower or "light_rail" in lower:
|
||||
modes.append("tram")
|
||||
if "subway" in lower or "metro" in lower:
|
||||
modes.append("metro")
|
||||
if "bus" in lower or not modes:
|
||||
modes.append("bus")
|
||||
if "ferry" in lower:
|
||||
modes.append("ferry")
|
||||
return ",".join(dict.fromkeys(modes))
|
||||
|
||||
|
||||
def _bbox_from_mobility_row(row: dict[str, str]) -> str:
|
||||
min_lat = _value(row, "location.bounding_box.minimum_latitude")
|
||||
max_lat = _value(row, "location.bounding_box.maximum_latitude")
|
||||
min_lon = _value(row, "location.bounding_box.minimum_longitude")
|
||||
max_lon = _value(row, "location.bounding_box.maximum_longitude")
|
||||
if not all([min_lat, max_lat, min_lon, max_lon]):
|
||||
return ""
|
||||
return f"{min_lon},{min_lat},{max_lon},{max_lat}"
|
||||
|
||||
|
||||
def _normalize_countries(countries: Iterable[str] | None) -> list[str] | None:
|
||||
if countries is None:
|
||||
return DEFAULT_DISCOVERY_COUNTRIES
|
||||
normalized = [country.strip().upper() for country in countries if country and country.strip()]
|
||||
if any(country == "ALL" for country in normalized):
|
||||
return None
|
||||
return normalized
|
||||
|
||||
|
||||
def _normalize_url_key(url: str) -> str:
|
||||
parsed = urlparse(url.strip())
|
||||
scheme = parsed.scheme.lower()
|
||||
netloc = parsed.netloc.lower()
|
||||
path = parsed.path.rstrip("/")
|
||||
query = parsed.query
|
||||
return f"{scheme}://{netloc}{path}" + (f"?{query}" if query else "")
|
||||
|
||||
|
||||
def _write_csv(path: Path, headers: list[str], rows: list[dict[str, str]]) -> None:
|
||||
with path.open("w", encoding="utf-8", newline="") as handle:
|
||||
writer = csv.DictWriter(handle, fieldnames=headers, extrasaction="ignore")
|
||||
writer.writeheader()
|
||||
writer.writerows(rows)
|
||||
|
||||
|
||||
def _count_by(items: Iterable[FeedCandidate], key_fn) -> dict[str, int]:
|
||||
counts: dict[str, int] = {}
|
||||
for item in items:
|
||||
key = key_fn(item)
|
||||
counts[key] = counts.get(key, 0) + 1
|
||||
return dict(sorted(counts.items()))
|
||||
|
||||
|
||||
def _value(row: dict[str, str], key: str) -> str:
|
||||
return _clean_text(row.get(key, ""))
|
||||
|
||||
|
||||
def _string(value: object) -> str:
|
||||
return "" if value is None else str(value)
|
||||
|
||||
|
||||
def _truncate(value: str, length: int) -> str:
|
||||
return value[:length] if value else ""
|
||||
120
app/geofabrik.py
Normal file
120
app/geofabrik.py
Normal file
@@ -0,0 +1,120 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from typing import Any
|
||||
|
||||
import requests
|
||||
from sqlalchemy import select
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.models import Source
|
||||
|
||||
|
||||
GEOFABRIK_INDEX_URL = "https://download.geofabrik.de/index-v1-nogeom.json"
|
||||
_CACHE: dict[str, Any] = {"expires_at": None, "rows": None}
|
||||
|
||||
|
||||
def geofabrik_catalog(q: str | None = None, limit: int = 80) -> list[dict[str, Any]]:
|
||||
rows = _geofabrik_rows()
|
||||
query = (q or "").strip().casefold()
|
||||
if query:
|
||||
rows = [
|
||||
row
|
||||
for row in rows
|
||||
if query in row["id"].casefold()
|
||||
or query in row["name"].casefold()
|
||||
or query in (row.get("parent") or "").casefold()
|
||||
or query in " ".join(row.get("country_codes") or []).casefold()
|
||||
]
|
||||
rows.sort(key=lambda row: (row.get("parent") or "", row["name"]))
|
||||
return rows[: max(1, min(limit, 500))]
|
||||
|
||||
|
||||
def geofabrik_entry(geofabrik_id: str) -> dict[str, Any] | None:
|
||||
target = geofabrik_id.strip().casefold()
|
||||
for row in _geofabrik_rows():
|
||||
if row["id"].casefold() == target:
|
||||
return row
|
||||
return None
|
||||
|
||||
|
||||
def create_geofabrik_source(session: Session, geofabrik_id: str, *, import_updates: bool = False) -> Source:
|
||||
entry = geofabrik_entry(geofabrik_id)
|
||||
if entry is None:
|
||||
raise ValueError(f"Geofabrik extract not found: {geofabrik_id}")
|
||||
if not entry.get("pbf_url"):
|
||||
raise ValueError(f"Geofabrik extract has no PBF URL: {geofabrik_id}")
|
||||
existing = session.scalar(select(Source).where(Source.kind == "osm_pbf", Source.url == entry["pbf_url"]))
|
||||
if existing is not None:
|
||||
return existing
|
||||
source = Source(
|
||||
name=f"Geofabrik {entry['name']}",
|
||||
kind="osm_pbf",
|
||||
url=entry["pbf_url"],
|
||||
country=",".join(entry.get("country_codes") or [])[:8] or None,
|
||||
license="ODbL / Geofabrik extract terms",
|
||||
priority="P0 fallback",
|
||||
mode_scope="public transport OSM routes, stops, and infrastructure",
|
||||
source_basis="OpenStreetMap / Geofabrik extracts",
|
||||
notes=_geofabrik_notes(entry, import_updates=import_updates),
|
||||
)
|
||||
session.add(source)
|
||||
session.flush()
|
||||
if import_updates and entry.get("updates_url"):
|
||||
update_source = Source(
|
||||
name=f"Geofabrik {entry['name']} updates",
|
||||
kind="osm_diff",
|
||||
url=entry["updates_url"],
|
||||
country=source.country,
|
||||
license=source.license,
|
||||
priority=source.priority,
|
||||
mode_scope=source.mode_scope,
|
||||
source_basis="OpenStreetMap / Geofabrik replication diffs",
|
||||
notes=f"Diff base for Geofabrik extract {entry['id']}; applying diffs to a local base extract is not implemented yet.",
|
||||
)
|
||||
session.add(update_source)
|
||||
return source
|
||||
|
||||
|
||||
def _geofabrik_rows() -> list[dict[str, Any]]:
|
||||
now = datetime.now(timezone.utc)
|
||||
expires_at = _CACHE.get("expires_at")
|
||||
if _CACHE.get("rows") is not None and isinstance(expires_at, datetime) and expires_at > now:
|
||||
return list(_CACHE["rows"])
|
||||
response = requests.get(GEOFABRIK_INDEX_URL, timeout=45)
|
||||
response.raise_for_status()
|
||||
payload = response.json()
|
||||
rows = [_normalize_feature(feature) for feature in payload.get("features", [])]
|
||||
rows = [row for row in rows if row.get("id") and row.get("pbf_url")]
|
||||
_CACHE["rows"] = rows
|
||||
_CACHE["expires_at"] = now + timedelta(hours=12)
|
||||
return list(rows)
|
||||
|
||||
|
||||
def _normalize_feature(feature: dict[str, Any]) -> dict[str, Any]:
|
||||
props = feature.get("properties") or {}
|
||||
urls = props.get("urls") or {}
|
||||
country_codes = props.get("iso3166-1:alpha2") or []
|
||||
if isinstance(country_codes, str):
|
||||
country_codes = [country_codes]
|
||||
return {
|
||||
"id": str(props.get("id") or ""),
|
||||
"name": str(props.get("name") or props.get("id") or ""),
|
||||
"parent": props.get("parent"),
|
||||
"country_codes": country_codes,
|
||||
"pbf_url": urls.get("pbf"),
|
||||
"updates_url": urls.get("updates"),
|
||||
"taginfo_url": urls.get("taginfo"),
|
||||
"urls": urls,
|
||||
}
|
||||
|
||||
|
||||
def _geofabrik_notes(entry: dict[str, Any], *, import_updates: bool) -> str:
|
||||
parts = [
|
||||
f"geofabrik_id={entry['id']}",
|
||||
f"parent={entry.get('parent') or 'root'}",
|
||||
f"updates_url={entry.get('updates_url') or ''}",
|
||||
"diff_source_requested=true" if import_updates else "diff_source_requested=false",
|
||||
"Overlap dedupe is handled by OSM object identity in the route layer; source-specific map layers may still show both extracts.",
|
||||
]
|
||||
return "; ".join(parts)
|
||||
308
app/gtfs_storage.py
Normal file
308
app/gtfs_storage.py
Normal file
@@ -0,0 +1,308 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import sqlite3
|
||||
from contextlib import contextmanager
|
||||
from pathlib import Path
|
||||
from typing import Iterator, Sequence
|
||||
|
||||
from sqlalchemy import func, select
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.config import settings
|
||||
from app.models import Dataset, GtfsStopTime
|
||||
|
||||
|
||||
GTFS_STORAGE_METADATA_KEY = "gtfs_storage"
|
||||
GTFS_STORAGE_MAIN = "main"
|
||||
GTFS_STORAGE_SIDECAR_STOP_TIMES = "sidecar_stop_times"
|
||||
GTFS_STOP_TIME_COLUMNS = [
|
||||
"trip_id",
|
||||
"stop_id",
|
||||
"stop_sequence",
|
||||
"arrival_time",
|
||||
"departure_time",
|
||||
"arrival_seconds",
|
||||
"departure_seconds",
|
||||
]
|
||||
SQLITE_IN_CHUNK_SIZE = 800
|
||||
|
||||
|
||||
def effective_gtfs_timetable_storage(value: str | None = None) -> str:
|
||||
configured = str(value or settings.gtfs_timetable_storage or GTFS_STORAGE_SIDECAR_STOP_TIMES).strip().lower()
|
||||
if configured in {GTFS_STORAGE_MAIN, "main_db", "main_sqlite", "postgres", "postgresql"}:
|
||||
return GTFS_STORAGE_MAIN
|
||||
if settings.is_postgresql_database and not settings.postgres_use_sidecars:
|
||||
return GTFS_STORAGE_MAIN
|
||||
return GTFS_STORAGE_SIDECAR_STOP_TIMES
|
||||
|
||||
|
||||
class MissingGtfsSidecar(FileNotFoundError):
|
||||
def __init__(self, dataset_id: int | None, path: Path | None) -> None:
|
||||
self.dataset_id = dataset_id
|
||||
self.path = path
|
||||
if path is None:
|
||||
message = f"dataset #{dataset_id} does not reference a GTFS sidecar"
|
||||
else:
|
||||
message = f"GTFS sidecar does not exist: {path}"
|
||||
super().__init__(message)
|
||||
|
||||
|
||||
def dataset_metadata(dataset: Dataset) -> dict:
|
||||
try:
|
||||
metadata = json.loads(dataset.metadata_json or "{}")
|
||||
except json.JSONDecodeError:
|
||||
return {}
|
||||
return metadata if isinstance(metadata, dict) else {}
|
||||
|
||||
|
||||
def stop_times_are_sidecar(dataset: Dataset | None) -> bool:
|
||||
if dataset is None:
|
||||
return False
|
||||
storage = dataset_metadata(dataset).get(GTFS_STORAGE_METADATA_KEY)
|
||||
if not isinstance(storage, dict):
|
||||
return False
|
||||
tables = storage.get("tables")
|
||||
if isinstance(tables, dict):
|
||||
return tables.get("gtfs_stop_times") == "sidecar"
|
||||
return storage.get("mode") == GTFS_STORAGE_SIDECAR_STOP_TIMES
|
||||
|
||||
|
||||
def sidecar_path(dataset: Dataset | None) -> Path | None:
|
||||
if dataset is None:
|
||||
return None
|
||||
storage = dataset_metadata(dataset).get(GTFS_STORAGE_METADATA_KEY)
|
||||
if not isinstance(storage, dict):
|
||||
return None
|
||||
value = storage.get("sidecar_path")
|
||||
if not value:
|
||||
return None
|
||||
return Path(str(value))
|
||||
|
||||
|
||||
def dataset_sidecar_paths(dataset: Dataset) -> list[Path]:
|
||||
path = sidecar_path(dataset)
|
||||
return [] if path is None else [path]
|
||||
|
||||
|
||||
def missing_sidecar_paths(dataset: Dataset | None) -> list[str]:
|
||||
if not stop_times_are_sidecar(dataset):
|
||||
return []
|
||||
path = sidecar_path(dataset)
|
||||
if path is None:
|
||||
dataset_id = "unknown" if dataset is None else str(dataset.id)
|
||||
return [f"dataset #{dataset_id} has no configured GTFS sidecar path"]
|
||||
return [] if path.exists() else [str(path)]
|
||||
|
||||
|
||||
def uses_sidecar_stop_times(session: Session, dataset_id: int) -> bool:
|
||||
return stop_times_are_sidecar(session.get(Dataset, dataset_id))
|
||||
|
||||
|
||||
@contextmanager
|
||||
def sidecar_connection(dataset: Dataset) -> Iterator[sqlite3.Connection]:
|
||||
path = sidecar_path(dataset)
|
||||
if path is None:
|
||||
raise MissingGtfsSidecar(dataset.id, None)
|
||||
if not path.exists():
|
||||
raise MissingGtfsSidecar(dataset.id, path)
|
||||
connection = sqlite3.connect(f"file:{path}?mode=ro", uri=True)
|
||||
connection.row_factory = sqlite3.Row
|
||||
try:
|
||||
yield connection
|
||||
finally:
|
||||
connection.close()
|
||||
|
||||
|
||||
def stop_time_count(session: Session, dataset_id: int) -> int:
|
||||
dataset = session.get(Dataset, dataset_id)
|
||||
if stop_times_are_sidecar(dataset):
|
||||
try:
|
||||
with sidecar_connection(dataset) as connection:
|
||||
return int(connection.execute("SELECT COUNT(*) FROM gtfs_stop_times").fetchone()[0] or 0)
|
||||
except MissingGtfsSidecar:
|
||||
return 0
|
||||
return session.scalar(select(func.count()).select_from(GtfsStopTime).where(GtfsStopTime.dataset_id == dataset_id)) or 0
|
||||
|
||||
|
||||
def stop_time_counts_by_dataset(session: Session, dataset_ids: Sequence[int]) -> dict[int, int]:
|
||||
counts: dict[int, int] = {}
|
||||
for dataset_id in dataset_ids:
|
||||
counts[int(dataset_id)] = stop_time_count(session, int(dataset_id))
|
||||
return counts
|
||||
|
||||
|
||||
def scheduled_stop_ids(session: Session, dataset_id: int, stop_ids: Sequence[str]) -> tuple[str, ...]:
|
||||
if not stop_ids:
|
||||
return ()
|
||||
dataset = session.get(Dataset, dataset_id)
|
||||
requested = [str(stop_id) for stop_id in stop_ids]
|
||||
found: set[str] = set()
|
||||
if stop_times_are_sidecar(dataset):
|
||||
try:
|
||||
with sidecar_connection(dataset) as connection:
|
||||
for chunk in _chunks(requested, SQLITE_IN_CHUNK_SIZE):
|
||||
placeholders = ", ".join(["?"] * len(chunk))
|
||||
rows = connection.execute(
|
||||
f"""
|
||||
SELECT stop_id
|
||||
FROM gtfs_stop_times
|
||||
WHERE stop_id IN ({placeholders})
|
||||
GROUP BY stop_id
|
||||
""",
|
||||
list(chunk),
|
||||
).fetchall()
|
||||
found.update(str(row["stop_id"]) for row in rows)
|
||||
except MissingGtfsSidecar:
|
||||
return ()
|
||||
else:
|
||||
for chunk in _chunks(requested, SQLITE_IN_CHUNK_SIZE):
|
||||
rows = session.scalars(
|
||||
select(GtfsStopTime.stop_id)
|
||||
.where(GtfsStopTime.dataset_id == dataset_id, GtfsStopTime.stop_id.in_(chunk))
|
||||
.group_by(GtfsStopTime.stop_id)
|
||||
).all()
|
||||
found.update(str(row) for row in rows)
|
||||
return tuple(sorted(found))
|
||||
|
||||
|
||||
def all_scheduled_stop_ids(session: Session, dataset_id: int) -> set[str]:
|
||||
dataset = session.get(Dataset, dataset_id)
|
||||
if stop_times_are_sidecar(dataset):
|
||||
try:
|
||||
with sidecar_connection(dataset) as connection:
|
||||
return {
|
||||
str(row["stop_id"])
|
||||
for row in connection.execute("SELECT stop_id FROM gtfs_stop_times GROUP BY stop_id").fetchall()
|
||||
}
|
||||
except MissingGtfsSidecar:
|
||||
return set()
|
||||
return {
|
||||
str(row)
|
||||
for row in session.scalars(
|
||||
select(GtfsStopTime.stop_id)
|
||||
.where(GtfsStopTime.dataset_id == dataset_id)
|
||||
.group_by(GtfsStopTime.stop_id)
|
||||
).all()
|
||||
}
|
||||
|
||||
|
||||
def scheduled_stop_ids_by_dataset(session: Session, dataset_ids: Sequence[int]) -> dict[int, set[str]]:
|
||||
return {int(dataset_id): all_scheduled_stop_ids(session, int(dataset_id)) for dataset_id in dataset_ids}
|
||||
|
||||
|
||||
def has_scheduled_stop(session: Session, dataset_id: int, stop_id: str) -> bool:
|
||||
return bool(scheduled_stop_ids(session, dataset_id, [stop_id]))
|
||||
|
||||
|
||||
def stop_times_by_trip(
|
||||
session: Session,
|
||||
dataset_id: int,
|
||||
trip_ids: Sequence[str],
|
||||
) -> dict[str, list[GtfsStopTime]]:
|
||||
if not trip_ids:
|
||||
return {}
|
||||
grouped: dict[str, list[GtfsStopTime]] = {}
|
||||
dataset = session.get(Dataset, dataset_id)
|
||||
requested = [str(trip_id) for trip_id in trip_ids]
|
||||
if stop_times_are_sidecar(dataset):
|
||||
column_sql = ", ".join(GTFS_STOP_TIME_COLUMNS)
|
||||
try:
|
||||
with sidecar_connection(dataset) as connection:
|
||||
for chunk in _chunks(requested, SQLITE_IN_CHUNK_SIZE):
|
||||
placeholders = ", ".join(["?"] * len(chunk))
|
||||
rows = connection.execute(
|
||||
f"""
|
||||
SELECT {column_sql}
|
||||
FROM gtfs_stop_times
|
||||
WHERE trip_id IN ({placeholders})
|
||||
ORDER BY trip_id, stop_sequence
|
||||
""",
|
||||
list(chunk),
|
||||
).fetchall()
|
||||
for row in rows:
|
||||
stop_time = stop_time_from_row(dataset_id, row)
|
||||
grouped.setdefault(stop_time.trip_id, []).append(stop_time)
|
||||
except MissingGtfsSidecar:
|
||||
return {}
|
||||
return grouped
|
||||
|
||||
for chunk in _chunks(requested, SQLITE_IN_CHUNK_SIZE):
|
||||
rows = session.scalars(
|
||||
select(GtfsStopTime)
|
||||
.where(GtfsStopTime.dataset_id == dataset_id, GtfsStopTime.trip_id.in_(chunk))
|
||||
.order_by(GtfsStopTime.trip_id, GtfsStopTime.stop_sequence)
|
||||
).all()
|
||||
for row in rows:
|
||||
grouped.setdefault(row.trip_id, []).append(row)
|
||||
return grouped
|
||||
|
||||
|
||||
def stop_times_for_trip_range(
|
||||
session: Session,
|
||||
dataset_id: int,
|
||||
trip_id: str,
|
||||
start_sequence: int,
|
||||
end_sequence: int,
|
||||
) -> list[GtfsStopTime]:
|
||||
dataset = session.get(Dataset, dataset_id)
|
||||
if stop_times_are_sidecar(dataset):
|
||||
column_sql = ", ".join(GTFS_STOP_TIME_COLUMNS)
|
||||
try:
|
||||
with sidecar_connection(dataset) as connection:
|
||||
rows = connection.execute(
|
||||
f"""
|
||||
SELECT {column_sql}
|
||||
FROM gtfs_stop_times
|
||||
WHERE trip_id = ?
|
||||
AND stop_sequence >= ?
|
||||
AND stop_sequence <= ?
|
||||
ORDER BY stop_sequence
|
||||
""",
|
||||
(trip_id, int(start_sequence), int(end_sequence)),
|
||||
).fetchall()
|
||||
return [stop_time_from_row(dataset_id, row) for row in rows]
|
||||
except MissingGtfsSidecar:
|
||||
return []
|
||||
|
||||
return list(
|
||||
session.scalars(
|
||||
select(GtfsStopTime)
|
||||
.where(
|
||||
GtfsStopTime.dataset_id == dataset_id,
|
||||
GtfsStopTime.trip_id == trip_id,
|
||||
GtfsStopTime.stop_sequence >= start_sequence,
|
||||
GtfsStopTime.stop_sequence <= end_sequence,
|
||||
)
|
||||
.order_by(GtfsStopTime.stop_sequence)
|
||||
).all()
|
||||
)
|
||||
|
||||
|
||||
def stop_time_from_row(dataset_id: int, row) -> GtfsStopTime:
|
||||
return GtfsStopTime(
|
||||
dataset_id=dataset_id,
|
||||
trip_id=str(row["trip_id"]),
|
||||
stop_id=str(row["stop_id"]),
|
||||
stop_sequence=int(row["stop_sequence"]),
|
||||
arrival_time=row["arrival_time"],
|
||||
departure_time=row["departure_time"],
|
||||
arrival_seconds=row["arrival_seconds"],
|
||||
departure_seconds=row["departure_seconds"],
|
||||
)
|
||||
|
||||
|
||||
def execute_sidecar_query(session: Session, dataset_id: int, sql: str, params: Sequence[object]) -> list[sqlite3.Row]:
|
||||
dataset = session.get(Dataset, dataset_id)
|
||||
if not stop_times_are_sidecar(dataset):
|
||||
raise ValueError(f"dataset #{dataset_id} does not use sidecar stop_times")
|
||||
try:
|
||||
with sidecar_connection(dataset) as connection:
|
||||
return list(connection.execute(sql, list(params)).fetchall())
|
||||
except MissingGtfsSidecar:
|
||||
return []
|
||||
|
||||
|
||||
def _chunks[T](items: Sequence[T], size: int) -> Iterator[Sequence[T]]:
|
||||
for index in range(0, len(items), size):
|
||||
yield items[index : index + size]
|
||||
394
app/harmonization.py
Normal file
394
app/harmonization.py
Normal file
@@ -0,0 +1,394 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import date, datetime, timezone
|
||||
from typing import Any
|
||||
|
||||
from sqlalchemy import and_, func, select
|
||||
from sqlalchemy.orm import Session, aliased
|
||||
|
||||
from app.data_management import dataset_row_counts
|
||||
from app.models import (
|
||||
CanonicalStopLink,
|
||||
Dataset,
|
||||
GtfsCalendar,
|
||||
GtfsCalendarDate,
|
||||
GtfsRoute,
|
||||
GtfsStop,
|
||||
GtfsStopTime,
|
||||
GtfsTrip,
|
||||
RouteMatch,
|
||||
Source,
|
||||
)
|
||||
|
||||
|
||||
GTFS_QA_NOTE_PREFIX = "[GTFS QA]"
|
||||
|
||||
|
||||
def gtfs_harmonization_inventory(session: Session) -> dict[str, Any]:
|
||||
feeds = [_feed_inventory_item(session, source) for source in _gtfs_sources(session)]
|
||||
summary = {
|
||||
"sources": len(feeds),
|
||||
"active_sources": sum(1 for feed in feeds if feed["active_dataset"] is not None),
|
||||
"datasets": sum(len(feed["datasets"]) for feed in feeds),
|
||||
"ready": sum(1 for feed in feeds if feed["qa_status"] == "ready"),
|
||||
"needs_review": sum(1 for feed in feeds if feed["qa_status"] == "needs_review"),
|
||||
"blocked": sum(1 for feed in feeds if feed["qa_status"] == "blocked"),
|
||||
}
|
||||
return {
|
||||
"summary": summary,
|
||||
"feeds": feeds,
|
||||
}
|
||||
|
||||
|
||||
def gtfs_harmonization_feed_detail(session: Session, source_id: int) -> dict[str, Any] | None:
|
||||
source = session.get(Source, source_id)
|
||||
if source is None or source.kind != "gtfs":
|
||||
return None
|
||||
feed = _feed_inventory_item(session, source)
|
||||
return {
|
||||
**feed,
|
||||
"sections": _feed_sections(feed),
|
||||
}
|
||||
|
||||
|
||||
def _gtfs_sources(session: Session) -> list[Source]:
|
||||
return session.scalars(select(Source).where(Source.kind == "gtfs").order_by(Source.country, Source.priority, Source.name, Source.id)).all()
|
||||
|
||||
|
||||
def _feed_inventory_item(session: Session, source: Source) -> dict[str, Any]:
|
||||
datasets = sorted([dataset for dataset in source.datasets if dataset.kind == "gtfs"], key=lambda item: (not item.is_active, item.created_at, item.id))
|
||||
active_dataset = next((dataset for dataset in datasets if dataset.is_active), None)
|
||||
counts = dataset_row_counts(session, active_dataset.id, active_dataset.kind) if active_dataset is not None else {}
|
||||
validation = _validate_gtfs_dataset(session, source, active_dataset, counts)
|
||||
overlap = _overlap_summary(session, active_dataset)
|
||||
service = _service_horizon(session, active_dataset)
|
||||
issues = [*validation["issues"], *service["issues"], *overlap["issues"], *_license_issues(source)]
|
||||
qa_status = _qa_status(issues, active_dataset)
|
||||
return {
|
||||
"source": _source_payload(source),
|
||||
"active_dataset": None if active_dataset is None else _dataset_payload(active_dataset, counts),
|
||||
"datasets": [_dataset_payload(dataset, dataset_row_counts(session, dataset.id, dataset.kind)) for dataset in datasets],
|
||||
"counts": counts,
|
||||
"validation": validation,
|
||||
"service": service,
|
||||
"overlap": overlap,
|
||||
"license": _license_payload(source),
|
||||
"issues": issues,
|
||||
"qa_status": qa_status,
|
||||
}
|
||||
|
||||
|
||||
def _source_payload(source: Source) -> dict[str, Any]:
|
||||
return {
|
||||
"id": source.id,
|
||||
"name": source.name,
|
||||
"country": source.country,
|
||||
"license": source.license,
|
||||
"priority": source.priority,
|
||||
"mode_scope": source.mode_scope,
|
||||
"source_basis": source.source_basis,
|
||||
"status": source.status,
|
||||
"enabled": source.enabled,
|
||||
"last_error": source.last_error,
|
||||
"last_run_at": _iso(source.last_run_at),
|
||||
"url": source.url,
|
||||
"catalog_entry_id": source.catalog_entry_id,
|
||||
"notes": source.notes,
|
||||
"qa_review": _qa_review_payload(source.notes),
|
||||
}
|
||||
|
||||
|
||||
def _dataset_payload(dataset: Dataset, counts: dict[str, Any]) -> dict[str, Any]:
|
||||
return {
|
||||
"id": dataset.id,
|
||||
"kind": dataset.kind,
|
||||
"is_active": dataset.is_active,
|
||||
"status": dataset.status,
|
||||
"sha256": dataset.sha256,
|
||||
"local_path": dataset.local_path,
|
||||
"created_at": _iso(dataset.created_at),
|
||||
"counts": counts,
|
||||
}
|
||||
|
||||
|
||||
def _validate_gtfs_dataset(session: Session, source: Source, dataset: Dataset | None, counts: dict[str, Any]) -> dict[str, Any]:
|
||||
if dataset is None:
|
||||
return {
|
||||
"status": "blocked",
|
||||
"items": [],
|
||||
"issues": [_issue("missing_active_dataset", "bad", "No active GTFS dataset", "Import this source before harmonization.")],
|
||||
}
|
||||
items = [
|
||||
_metric("Agencies", counts.get("agencies", 0), "bad" if not counts.get("agencies", 0) else "good"),
|
||||
_metric("Stops", counts.get("stops", 0), "bad" if not counts.get("stops", 0) else "good"),
|
||||
_metric("Routes", counts.get("routes", 0), "bad" if not counts.get("routes", 0) else "good"),
|
||||
_metric("Trips", counts.get("trips", 0), "bad" if not counts.get("trips", 0) else "good"),
|
||||
_metric("Stop times", counts.get("stop_times", 0), "bad" if not counts.get("stop_times", 0) else "good"),
|
||||
_metric("Shapes", counts.get("shapes", 0), "warn" if not counts.get("shapes", 0) else "good"),
|
||||
]
|
||||
missing_coords = _count(session, GtfsStop, dataset.id, (GtfsStop.lat.is_(None) | GtfsStop.lon.is_(None)))
|
||||
invalid_coords = _count(
|
||||
session,
|
||||
GtfsStop,
|
||||
dataset.id,
|
||||
(GtfsStop.lat < -90) | (GtfsStop.lat > 90) | (GtfsStop.lon < -180) | (GtfsStop.lon > 180),
|
||||
)
|
||||
routes_without_trips = _routes_without_trips(session, dataset.id)
|
||||
trips_without_stop_times = _trips_without_stop_times(session, dataset.id)
|
||||
stop_times_without_seconds = _stop_times_without_seconds(session, dataset.id)
|
||||
route_geometry_missing = _count(session, GtfsRoute, dataset.id, GtfsRoute.geometry_geojson.is_(None))
|
||||
canonical_links = _count(session, CanonicalStopLink, dataset.id, CanonicalStopLink.object_type == "gtfs_stop")
|
||||
match_counts = counts.get("match_counts", {}) if isinstance(counts.get("match_counts"), dict) else {}
|
||||
|
||||
items.extend(
|
||||
[
|
||||
_metric("Stops missing coordinates", missing_coords, "bad" if missing_coords else "good"),
|
||||
_metric("Stops with invalid coordinates", invalid_coords, "bad" if invalid_coords else "good"),
|
||||
_metric("Routes without trips", routes_without_trips, "bad" if routes_without_trips else "good"),
|
||||
_metric("Trips without stop_times", trips_without_stop_times, "bad" if trips_without_stop_times else "good"),
|
||||
_metric("Stop times without parsed seconds", stop_times_without_seconds, "warn" if stop_times_without_seconds else "good"),
|
||||
_metric("Routes without geometry", route_geometry_missing, "warn" if route_geometry_missing else "good"),
|
||||
_metric("Canonical stop links", canonical_links, "warn" if counts.get("stops", 0) and canonical_links == 0 else "good"),
|
||||
_metric("Route matches", counts.get("matches", 0), "warn" if counts.get("routes", 0) and not counts.get("matches", 0) else "good"),
|
||||
]
|
||||
)
|
||||
issues: list[dict[str, str]] = []
|
||||
if counts.get("missing_sidecar"):
|
||||
issues.append(_issue("missing_sidecar", "bad", "GTFS sidecar is missing", "Queue a recovery import for this dataset."))
|
||||
for key, label in [
|
||||
("agencies", "No agencies imported"),
|
||||
("stops", "No stops imported"),
|
||||
("routes", "No routes imported"),
|
||||
("trips", "No trips imported"),
|
||||
("stop_times", "No stop_times imported"),
|
||||
]:
|
||||
if not counts.get(key, 0):
|
||||
issues.append(_issue(f"missing_{key}", "bad", label, "Required GTFS content is absent or failed to import."))
|
||||
if missing_coords:
|
||||
issues.append(_issue("missing_stop_coordinates", "bad", f"{missing_coords:,} stops have no coordinates", "Stop coordinates are required for deduplication and routing access."))
|
||||
if invalid_coords:
|
||||
issues.append(_issue("invalid_stop_coordinates", "bad", f"{invalid_coords:,} stops have invalid coordinates", "Fix or exclude invalid stop coordinates before publication."))
|
||||
if routes_without_trips:
|
||||
issues.append(_issue("routes_without_trips", "warn", f"{routes_without_trips:,} routes have no trips", "These routes cannot contribute timetable service."))
|
||||
if trips_without_stop_times:
|
||||
issues.append(_issue("trips_without_stop_times", "bad", f"{trips_without_stop_times:,} trips have no stop_times", "These trips cannot be routed."))
|
||||
if route_geometry_missing:
|
||||
issues.append(_issue("route_geometry_missing", "warn", f"{route_geometry_missing:,} routes have no geometry", "Use GTFS shapes, route-layer matching, or stop-by-stop fallback."))
|
||||
if counts.get("routes", 0) and not counts.get("shapes", 0):
|
||||
issues.append(_issue("missing_shapes", "warn", "No GTFS shapes imported", "OSM route matching or generated geometry will be needed."))
|
||||
if counts.get("routes", 0) and not match_counts:
|
||||
issues.append(_issue("no_route_matching", "warn", "No route-match rows", "Run route matching before route-layer publication QA."))
|
||||
return {
|
||||
"status": _qa_status(issues, dataset),
|
||||
"items": items,
|
||||
"issues": issues,
|
||||
}
|
||||
|
||||
|
||||
def _service_horizon(session: Session, dataset: Dataset | None) -> dict[str, Any]:
|
||||
if dataset is None:
|
||||
return {"start_date": None, "end_date": None, "days_until_end": None, "items": [], "issues": []}
|
||||
cal_min, cal_max = session.execute(
|
||||
select(func.min(GtfsCalendar.start_date), func.max(GtfsCalendar.end_date)).where(GtfsCalendar.dataset_id == dataset.id)
|
||||
).one()
|
||||
date_min, date_max = session.execute(
|
||||
select(func.min(GtfsCalendarDate.date), func.max(GtfsCalendarDate.date)).where(GtfsCalendarDate.dataset_id == dataset.id)
|
||||
).one()
|
||||
start_int = _min_int(cal_min, date_min)
|
||||
end_int = _max_int(cal_max, date_max)
|
||||
start_date = _gtfs_date(start_int)
|
||||
end_date = _gtfs_date(end_int)
|
||||
today = datetime.now(timezone.utc).date()
|
||||
days_until_end = None if end_date is None else (end_date - today).days
|
||||
issues: list[dict[str, str]] = []
|
||||
if end_date is None:
|
||||
issues.append(_issue("service_horizon_missing", "bad", "No service calendar horizon", "calendar.txt or calendar_dates.txt is required for reliable routing."))
|
||||
elif days_until_end is not None and days_until_end < 0:
|
||||
issues.append(_issue("service_horizon_expired", "bad", f"Service expired {abs(days_until_end):,} days ago", "Update or exclude this feed."))
|
||||
elif days_until_end is not None and days_until_end < 30:
|
||||
issues.append(_issue("service_horizon_short", "warn", f"Service ends in {days_until_end:,} days", "Update cadence is too close for publication confidence."))
|
||||
return {
|
||||
"start_date": None if start_date is None else start_date.isoformat(),
|
||||
"end_date": None if end_date is None else end_date.isoformat(),
|
||||
"days_until_end": days_until_end,
|
||||
"items": [
|
||||
_metric("Service starts", start_date.isoformat() if start_date else "n/a", "info"),
|
||||
_metric("Service ends", end_date.isoformat() if end_date else "n/a", "bad" if end_date is None or (days_until_end is not None and days_until_end < 0) else "warn" if days_until_end is not None and days_until_end < 30 else "good"),
|
||||
],
|
||||
"issues": issues,
|
||||
}
|
||||
|
||||
|
||||
def _overlap_summary(session: Session, dataset: Dataset | None) -> dict[str, Any]:
|
||||
if dataset is None:
|
||||
return {"items": [], "issues": []}
|
||||
route_key_overlaps = _shared_route_keys(session, dataset.id)
|
||||
canonical_stop_overlaps = _shared_canonical_stops(session, dataset.id)
|
||||
issues: list[dict[str, str]] = []
|
||||
if route_key_overlaps:
|
||||
issues.append(_issue("shared_route_keys", "warn", f"{route_key_overlaps:,} route keys also exist in another active feed", "Deduplicate or rank source authority for overlapping routes."))
|
||||
if canonical_stop_overlaps:
|
||||
issues.append(_issue("shared_canonical_stops", "warn", f"{canonical_stop_overlaps:,} canonical stops are shared with another active feed", "This is useful linking evidence, but conflicts need review."))
|
||||
return {
|
||||
"items": [
|
||||
_metric("Shared route keys", route_key_overlaps, "warn" if route_key_overlaps else "good"),
|
||||
_metric("Shared canonical stops", canonical_stop_overlaps, "warn" if canonical_stop_overlaps else "good"),
|
||||
],
|
||||
"issues": issues,
|
||||
}
|
||||
|
||||
|
||||
def _license_payload(source: Source) -> dict[str, Any]:
|
||||
text = (source.license or "").strip()
|
||||
unknown = not text or "unknown" in text.lower()
|
||||
return {
|
||||
"label": text or "unknown",
|
||||
"redistribution_status": "unknown" if unknown else "review_required",
|
||||
"tone": "warn" if unknown else "info",
|
||||
}
|
||||
|
||||
|
||||
def _license_issues(source: Source) -> list[dict[str, str]]:
|
||||
if _license_payload(source)["redistribution_status"] == "unknown":
|
||||
return [_issue("license_unknown", "warn", "License/redistribution status is unknown", "Publication needs explicit import, derivation, redistribution, and attribution flags.")]
|
||||
return []
|
||||
|
||||
|
||||
def _qa_review_payload(notes: str | None) -> dict[str, Any]:
|
||||
if not notes:
|
||||
return {"status": "unreviewed", "note": "", "updated_at": None}
|
||||
for line in str(notes).splitlines():
|
||||
if not line.startswith(GTFS_QA_NOTE_PREFIX):
|
||||
continue
|
||||
payload: dict[str, str] = {}
|
||||
for part in line[len(GTFS_QA_NOTE_PREFIX) :].strip().split(";"):
|
||||
if "=" not in part:
|
||||
continue
|
||||
key, value = part.split("=", 1)
|
||||
payload[key.strip()] = value.strip()
|
||||
return {
|
||||
"status": payload.get("status") or "unreviewed",
|
||||
"note": payload.get("note") or "",
|
||||
"updated_at": payload.get("updated_at"),
|
||||
}
|
||||
return {"status": "unreviewed", "note": "", "updated_at": None}
|
||||
|
||||
|
||||
def _routes_without_trips(session: Session, dataset_id: int) -> int:
|
||||
trip_exists = select(GtfsTrip.id).where(GtfsTrip.dataset_id == dataset_id, GtfsTrip.route_id == GtfsRoute.route_id).exists()
|
||||
return int(session.scalar(select(func.count()).select_from(GtfsRoute).where(GtfsRoute.dataset_id == dataset_id, ~trip_exists)) or 0)
|
||||
|
||||
|
||||
def _trips_without_stop_times(session: Session, dataset_id: int) -> int:
|
||||
stop_time_exists = select(GtfsStopTime.id).where(GtfsStopTime.dataset_id == dataset_id, GtfsStopTime.trip_id == GtfsTrip.trip_id).exists()
|
||||
return int(session.scalar(select(func.count()).select_from(GtfsTrip).where(GtfsTrip.dataset_id == dataset_id, ~stop_time_exists)) or 0)
|
||||
|
||||
|
||||
def _stop_times_without_seconds(session: Session, dataset_id: int) -> int:
|
||||
return int(
|
||||
session.scalar(
|
||||
select(func.count())
|
||||
.select_from(GtfsStopTime)
|
||||
.where(GtfsStopTime.dataset_id == dataset_id, GtfsStopTime.arrival_seconds.is_(None), GtfsStopTime.departure_seconds.is_(None))
|
||||
)
|
||||
or 0
|
||||
)
|
||||
|
||||
|
||||
def _shared_route_keys(session: Session, dataset_id: int) -> int:
|
||||
current = aliased(GtfsRoute)
|
||||
other = aliased(GtfsRoute)
|
||||
other_dataset = aliased(Dataset)
|
||||
return int(
|
||||
session.scalar(
|
||||
select(func.count(func.distinct(current.route_key)))
|
||||
.select_from(current)
|
||||
.join(other, and_(other.route_key == current.route_key, other.dataset_id != current.dataset_id))
|
||||
.join(other_dataset, other_dataset.id == other.dataset_id)
|
||||
.where(
|
||||
current.dataset_id == dataset_id,
|
||||
current.route_key.is_not(None),
|
||||
current.route_key != "",
|
||||
other_dataset.kind == "gtfs",
|
||||
other_dataset.is_active.is_(True),
|
||||
)
|
||||
)
|
||||
or 0
|
||||
)
|
||||
|
||||
|
||||
def _shared_canonical_stops(session: Session, dataset_id: int) -> int:
|
||||
current = aliased(CanonicalStopLink)
|
||||
other = aliased(CanonicalStopLink)
|
||||
other_dataset = aliased(Dataset)
|
||||
return int(
|
||||
session.scalar(
|
||||
select(func.count(func.distinct(current.canonical_stop_id)))
|
||||
.select_from(current)
|
||||
.join(other, and_(other.canonical_stop_id == current.canonical_stop_id, other.dataset_id != current.dataset_id))
|
||||
.join(other_dataset, other_dataset.id == other.dataset_id)
|
||||
.where(
|
||||
current.dataset_id == dataset_id,
|
||||
current.object_type == "gtfs_stop",
|
||||
other.object_type == "gtfs_stop",
|
||||
other_dataset.kind == "gtfs",
|
||||
other_dataset.is_active.is_(True),
|
||||
)
|
||||
)
|
||||
or 0
|
||||
)
|
||||
|
||||
|
||||
def _count(session: Session, model: Any, dataset_id: int, *criteria: Any) -> int:
|
||||
stmt = select(func.count()).select_from(model).where(model.dataset_id == dataset_id)
|
||||
if criteria:
|
||||
stmt = stmt.where(*criteria)
|
||||
return int(session.scalar(stmt) or 0)
|
||||
|
||||
|
||||
def _metric(label: str, value: Any, tone: str = "info", description: str = "") -> dict[str, Any]:
|
||||
return {"label": label, "value": value, "tone": tone, "description": description}
|
||||
|
||||
|
||||
def _issue(issue_id: str, severity: str, title: str, detail: str) -> dict[str, str]:
|
||||
return {"id": issue_id, "severity": severity, "title": title, "detail": detail}
|
||||
|
||||
|
||||
def _qa_status(issues: list[dict[str, str]], dataset: Dataset | None) -> str:
|
||||
if dataset is None or any(issue.get("severity") == "bad" for issue in issues):
|
||||
return "blocked"
|
||||
if any(issue.get("severity") == "warn" for issue in issues):
|
||||
return "needs_review"
|
||||
return "ready"
|
||||
|
||||
|
||||
def _feed_sections(feed: dict[str, Any]) -> list[dict[str, Any]]:
|
||||
return [
|
||||
{"id": "validation", "title": "GTFS Validation", "items": feed["validation"]["items"]},
|
||||
{"id": "service", "title": "Service Horizon", "items": feed["service"]["items"]},
|
||||
{"id": "overlap", "title": "Overlap and Deduplication", "items": feed["overlap"]["items"]},
|
||||
{"id": "license", "title": "License", "items": [_metric("Redistribution", feed["license"]["redistribution_status"], feed["license"]["tone"]), _metric("License", feed["license"]["label"], feed["license"]["tone"])]},
|
||||
]
|
||||
|
||||
|
||||
def _gtfs_date(value: int | None) -> date | None:
|
||||
if value is None:
|
||||
return None
|
||||
try:
|
||||
return datetime.strptime(str(int(value)), "%Y%m%d").date()
|
||||
except ValueError:
|
||||
return None
|
||||
|
||||
|
||||
def _min_int(*values: int | None) -> int | None:
|
||||
clean = [int(value) for value in values if value is not None]
|
||||
return min(clean) if clean else None
|
||||
|
||||
|
||||
def _max_int(*values: int | None) -> int | None:
|
||||
clean = [int(value) for value in values if value is not None]
|
||||
return max(clean) if clean else None
|
||||
|
||||
|
||||
def _iso(value: datetime | None) -> str | None:
|
||||
return None if value is None else value.isoformat()
|
||||
360
app/itineraries.py
Normal file
360
app/itineraries.py
Normal file
@@ -0,0 +1,360 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from datetime import datetime, timezone
|
||||
from typing import Any
|
||||
|
||||
from sqlalchemy import select
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.journey import duration_minutes_ceil, find_journeys, format_duration_label
|
||||
from app.models import Itinerary, ItineraryLeg, TravelRequest
|
||||
from app.routing import route_between_points
|
||||
|
||||
|
||||
def generate_itineraries(
|
||||
db: Session,
|
||||
*,
|
||||
from_stop_id: str,
|
||||
to_stop_id: str,
|
||||
via_stop_id: str | None,
|
||||
departure: str,
|
||||
service_date: str | None,
|
||||
max_transfers: int,
|
||||
transfer_seconds: int,
|
||||
limit: int,
|
||||
source_ids: list[int] | None,
|
||||
preferences: dict[str, Any] | None = None,
|
||||
) -> dict:
|
||||
request = TravelRequest(
|
||||
origin_stop_id=from_stop_id,
|
||||
destination_stop_id=to_stop_id,
|
||||
via_stop_id=via_stop_id or None,
|
||||
departure_time=departure,
|
||||
service_date=service_date or None,
|
||||
max_transfers=max(0, max_transfers),
|
||||
transfer_seconds=max(0, transfer_seconds),
|
||||
source_filter=",".join(str(source_id) for source_id in source_ids or []) or None,
|
||||
preferences_json=json.dumps(preferences or {}, separators=(",", ":")),
|
||||
)
|
||||
db.add(request)
|
||||
db.flush()
|
||||
|
||||
journey_result = find_journeys(
|
||||
db=db,
|
||||
from_stop_id=from_stop_id,
|
||||
to_stop_id=to_stop_id,
|
||||
via_stop_id=via_stop_id,
|
||||
departure=departure,
|
||||
service_date=service_date,
|
||||
max_transfers=max(0, max_transfers),
|
||||
transfer_seconds=max(0, transfer_seconds),
|
||||
limit=limit,
|
||||
source_ids=source_ids,
|
||||
)
|
||||
itineraries: list[Itinerary] = []
|
||||
for index, journey in enumerate(journey_result.get("journeys", []), start=1):
|
||||
itinerary = _journey_itinerary(request.id, journey, index)
|
||||
db.add(itinerary)
|
||||
db.flush()
|
||||
_add_journey_legs(db, itinerary.id, journey)
|
||||
itineraries.append(itinerary)
|
||||
|
||||
car_itinerary = _car_itinerary(db, request.id, journey_result.get("from"), journey_result.get("to"))
|
||||
if car_itinerary is not None:
|
||||
db.add(car_itinerary)
|
||||
db.flush()
|
||||
_add_routing_leg(db, car_itinerary.id, car_itinerary)
|
||||
itineraries.append(car_itinerary)
|
||||
|
||||
placeholders = _placeholder_itineraries(
|
||||
request.id,
|
||||
journey_result.get("from"),
|
||||
journey_result.get("to"),
|
||||
service_date=service_date,
|
||||
include_car=car_itinerary is None,
|
||||
)
|
||||
for itinerary in placeholders:
|
||||
db.add(itinerary)
|
||||
db.flush()
|
||||
itineraries.append(itinerary)
|
||||
|
||||
db.flush()
|
||||
return {
|
||||
"request": travel_request_payload(request),
|
||||
"journey_context": {
|
||||
"from": journey_result.get("from"),
|
||||
"to": journey_result.get("to"),
|
||||
"via": journey_result.get("via"),
|
||||
"sources": journey_result.get("sources", []),
|
||||
},
|
||||
"itineraries": [itinerary_payload(db, itinerary) for itinerary in itineraries],
|
||||
}
|
||||
|
||||
|
||||
def travel_request_payload(request: TravelRequest) -> dict[str, Any]:
|
||||
return {
|
||||
"id": request.id,
|
||||
"origin_stop_id": request.origin_stop_id,
|
||||
"destination_stop_id": request.destination_stop_id,
|
||||
"via_stop_id": request.via_stop_id,
|
||||
"departure_time": request.departure_time,
|
||||
"service_date": request.service_date,
|
||||
"max_transfers": request.max_transfers,
|
||||
"transfer_seconds": request.transfer_seconds,
|
||||
"source_filter": request.source_filter,
|
||||
"preferences": _json_dict(request.preferences_json),
|
||||
"created_at": request.created_at.isoformat() if request.created_at else None,
|
||||
}
|
||||
|
||||
|
||||
def itinerary_payload(db: Session, itinerary: Itinerary) -> dict[str, Any]:
|
||||
legs = db.scalars(
|
||||
select(ItineraryLeg)
|
||||
.where(ItineraryLeg.itinerary_id == itinerary.id)
|
||||
.order_by(ItineraryLeg.sequence)
|
||||
).all()
|
||||
return {
|
||||
"id": itinerary.id,
|
||||
"request_id": itinerary.request_id,
|
||||
"title": itinerary.title,
|
||||
"family": itinerary.family,
|
||||
"status": itinerary.status,
|
||||
"saved": itinerary.saved,
|
||||
"summary": _json_dict(itinerary.summary_json),
|
||||
"score": _json_dict(itinerary.score_json),
|
||||
"payload": _json_dict(itinerary.payload_json),
|
||||
"legs": [itinerary_leg_payload(leg) for leg in legs],
|
||||
"created_at": itinerary.created_at.isoformat() if itinerary.created_at else None,
|
||||
"updated_at": itinerary.updated_at.isoformat() if itinerary.updated_at else None,
|
||||
}
|
||||
|
||||
|
||||
def itinerary_leg_payload(leg: ItineraryLeg) -> dict[str, Any]:
|
||||
return {
|
||||
"id": leg.id,
|
||||
"itinerary_id": leg.itinerary_id,
|
||||
"sequence": leg.sequence,
|
||||
"mode": leg.mode,
|
||||
"route_ref": leg.route_ref,
|
||||
"route_name": leg.route_name,
|
||||
"from_name": leg.from_name,
|
||||
"to_name": leg.to_name,
|
||||
"departure_time": leg.departure_time,
|
||||
"arrival_time": leg.arrival_time,
|
||||
"locked": leg.locked,
|
||||
"payload": _json_dict(leg.payload_json),
|
||||
}
|
||||
|
||||
|
||||
def set_itinerary_saved(db: Session, itinerary: Itinerary, saved: bool) -> dict[str, Any]:
|
||||
itinerary.saved = saved
|
||||
itinerary.status = "saved" if saved else "candidate"
|
||||
itinerary.updated_at = datetime.now(timezone.utc)
|
||||
db.flush()
|
||||
return itinerary_payload(db, itinerary)
|
||||
|
||||
|
||||
def set_leg_locked(db: Session, leg: ItineraryLeg, locked: bool) -> dict[str, Any]:
|
||||
leg.locked = locked
|
||||
itinerary = db.get(Itinerary, leg.itinerary_id)
|
||||
if itinerary is not None:
|
||||
itinerary.updated_at = datetime.now(timezone.utc)
|
||||
db.flush()
|
||||
return itinerary_leg_payload(leg)
|
||||
|
||||
|
||||
def recent_itineraries(db: Session, *, saved_only: bool = False, limit: int = 30) -> list[dict[str, Any]]:
|
||||
stmt = select(Itinerary).order_by(Itinerary.updated_at.desc(), Itinerary.id.desc())
|
||||
if saved_only:
|
||||
stmt = stmt.where(Itinerary.saved.is_(True))
|
||||
rows = db.scalars(stmt.limit(max(1, min(limit, 100)))).all()
|
||||
return [itinerary_payload(db, itinerary) for itinerary in rows]
|
||||
|
||||
|
||||
def _journey_itinerary(request_id: int, journey: dict, index: int) -> Itinerary:
|
||||
score = _journey_score(journey)
|
||||
summary = {
|
||||
"departure_time": journey.get("departure_time"),
|
||||
"arrival_time": journey.get("arrival_time"),
|
||||
"duration_minutes": journey.get("duration_minutes"),
|
||||
"duration_label": journey.get("duration_label"),
|
||||
"transfers": journey.get("transfers"),
|
||||
"leg_count": len(journey.get("legs", [])),
|
||||
"route_refs": [leg.get("route_ref") or leg.get("route_id") for leg in journey.get("legs", [])],
|
||||
}
|
||||
return Itinerary(
|
||||
request_id=request_id,
|
||||
title=f"Public transport option {index}",
|
||||
family="public_transport",
|
||||
status="candidate",
|
||||
saved=False,
|
||||
summary_json=json.dumps(summary, separators=(",", ":")),
|
||||
score_json=json.dumps(score, separators=(",", ":")),
|
||||
payload_json=json.dumps({"journey": journey}, separators=(",", ":")),
|
||||
)
|
||||
|
||||
|
||||
def _add_journey_legs(db: Session, itinerary_id: int, journey: dict) -> None:
|
||||
for index, leg in enumerate(journey.get("legs", []), start=1):
|
||||
db.add(
|
||||
ItineraryLeg(
|
||||
itinerary_id=itinerary_id,
|
||||
sequence=index,
|
||||
mode=leg.get("mode"),
|
||||
route_ref=leg.get("route_ref"),
|
||||
route_name=leg.get("route_name"),
|
||||
from_name=(leg.get("from") or {}).get("name") or (leg.get("from") or {}).get("stop_id"),
|
||||
to_name=(leg.get("to") or {}).get("name") or (leg.get("to") or {}).get("stop_id"),
|
||||
departure_time=leg.get("departure_time"),
|
||||
arrival_time=leg.get("arrival_time"),
|
||||
locked=False,
|
||||
payload_json=json.dumps({"journey_leg": leg}, separators=(",", ":")),
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
def _car_itinerary(db: Session, request_id: int, from_stop: dict | None, to_stop: dict | None) -> Itinerary | None:
|
||||
from_lon = _float_or_none((from_stop or {}).get("lon"))
|
||||
from_lat = _float_or_none((from_stop or {}).get("lat"))
|
||||
to_lon = _float_or_none((to_stop or {}).get("lon"))
|
||||
to_lat = _float_or_none((to_stop or {}).get("lat"))
|
||||
if None in {from_lon, from_lat, to_lon, to_lat}:
|
||||
return None
|
||||
try:
|
||||
route = route_between_points(
|
||||
db,
|
||||
from_lon=from_lon,
|
||||
from_lat=from_lat,
|
||||
to_lon=to_lon,
|
||||
to_lat=to_lat,
|
||||
mode="drive",
|
||||
max_visited=300_000,
|
||||
)
|
||||
except Exception: # noqa: BLE001 - car comparison is optional
|
||||
return None
|
||||
duration_seconds = _float_or_none(route.get("duration_seconds"))
|
||||
duration_minutes = duration_minutes_ceil(duration_seconds)
|
||||
distance_m = _float_or_none(route.get("distance_m"))
|
||||
summary = {
|
||||
"from": (from_stop or {}).get("name") or (from_stop or {}).get("stop_id") or "origin",
|
||||
"to": (to_stop or {}).get("name") or (to_stop or {}).get("stop_id") or "destination",
|
||||
"duration_minutes": duration_minutes,
|
||||
"duration_label": format_duration_label(duration_seconds),
|
||||
"distance_km": None if distance_m is None else round(distance_m / 1000, 1),
|
||||
"transfers": 0,
|
||||
"engine": route.get("engine"),
|
||||
}
|
||||
score = {
|
||||
"duration_minutes": duration_minutes,
|
||||
"transfers": 0,
|
||||
"complexity": 1,
|
||||
"emissions": "high",
|
||||
"estimated_cost": None,
|
||||
}
|
||||
return Itinerary(
|
||||
request_id=request_id,
|
||||
title="Car only",
|
||||
family="car",
|
||||
status="candidate",
|
||||
saved=False,
|
||||
summary_json=json.dumps(summary, separators=(",", ":")),
|
||||
score_json=json.dumps(score, separators=(",", ":")),
|
||||
payload_json=json.dumps({"routing": route}, separators=(",", ":")),
|
||||
)
|
||||
|
||||
|
||||
def _add_routing_leg(db: Session, itinerary_id: int, itinerary: Itinerary) -> None:
|
||||
payload = _json_dict(itinerary.payload_json)
|
||||
route = payload.get("routing") if isinstance(payload, dict) else None
|
||||
if not isinstance(route, dict):
|
||||
return
|
||||
db.add(
|
||||
ItineraryLeg(
|
||||
itinerary_id=itinerary_id,
|
||||
sequence=1,
|
||||
mode=str(route.get("mode") or "drive"),
|
||||
route_ref=None,
|
||||
route_name="Road route",
|
||||
from_name=str((route.get("start_node") or {}).get("osm_node_id") or "origin"),
|
||||
to_name=str((route.get("target_node") or {}).get("osm_node_id") or "destination"),
|
||||
departure_time=None,
|
||||
arrival_time=None,
|
||||
locked=False,
|
||||
payload_json=json.dumps({"routing_leg": route}, separators=(",", ":")),
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
def _placeholder_itineraries(
|
||||
request_id: int,
|
||||
from_stop: dict | None,
|
||||
to_stop: dict | None,
|
||||
*,
|
||||
service_date: str | None,
|
||||
include_car: bool = True,
|
||||
) -> list[Itinerary]:
|
||||
from_name = (from_stop or {}).get("name") or (from_stop or {}).get("stop_id") or "origin"
|
||||
to_name = (to_stop or {}).get("name") or (to_stop or {}).get("stop_id") or "destination"
|
||||
placeholders = [
|
||||
("car_ferry", "Car + ferry", "Needs ferry-port candidate graph", {"complexity": 3, "emissions": "medium_high"}),
|
||||
("flight_access", "Flight + airport access", "Needs airport/flight schedule connector", {"complexity": 4, "emissions": "high"}),
|
||||
("rail_long_stay", "Rail with adjustable city stop", "Use via stop and leg locking to refine", {"complexity": 3, "emissions": "low"}),
|
||||
]
|
||||
if include_car:
|
||||
placeholders.insert(0, ("car", "Car only", "Needs road-routing connector", {"complexity": 1, "emissions": "high"}))
|
||||
rows = []
|
||||
for family, title, note, score in placeholders:
|
||||
summary = {
|
||||
"from": from_name,
|
||||
"to": to_name,
|
||||
"service_date": service_date,
|
||||
"note": note,
|
||||
"duration_minutes": None,
|
||||
"transfers": None,
|
||||
}
|
||||
rows.append(
|
||||
Itinerary(
|
||||
request_id=request_id,
|
||||
title=title,
|
||||
family=family,
|
||||
status="placeholder",
|
||||
saved=False,
|
||||
summary_json=json.dumps(summary, separators=(",", ":")),
|
||||
score_json=json.dumps(score, separators=(",", ":")),
|
||||
payload_json=json.dumps({"placeholder": True, "note": note}, separators=(",", ":")),
|
||||
)
|
||||
)
|
||||
return rows
|
||||
|
||||
|
||||
def _float_or_none(value: object) -> float | None:
|
||||
try:
|
||||
return None if value is None else float(value)
|
||||
except (TypeError, ValueError):
|
||||
return None
|
||||
|
||||
|
||||
def _journey_score(journey: dict) -> dict[str, Any]:
|
||||
modes = [leg.get("mode") for leg in journey.get("legs", [])]
|
||||
duration = journey.get("duration_minutes")
|
||||
transfers = int(journey.get("transfers") or 0)
|
||||
railish = sum(1 for mode in modes if mode in {"train", "subway", "tram", "light_rail"})
|
||||
busish = sum(1 for mode in modes if mode in {"bus", "coach", "trolleybus"})
|
||||
emissions_hint = "low" if railish >= busish else "medium"
|
||||
return {
|
||||
"duration_minutes": duration,
|
||||
"transfers": transfers,
|
||||
"complexity": transfers + len(modes),
|
||||
"emissions": emissions_hint,
|
||||
"overnight": False,
|
||||
"estimated_cost": None,
|
||||
}
|
||||
|
||||
|
||||
def _json_dict(value: str | None) -> dict[str, Any]:
|
||||
try:
|
||||
data = json.loads(value or "{}")
|
||||
except json.JSONDecodeError:
|
||||
return {}
|
||||
return data if isinstance(data, dict) else {}
|
||||
1932
app/jobs.py
Normal file
1932
app/jobs.py
Normal file
File diff suppressed because it is too large
Load Diff
5385
app/journey.py
Normal file
5385
app/journey.py
Normal file
File diff suppressed because it is too large
Load Diff
717
app/journey_search.py
Normal file
717
app/journey_search.py
Normal file
@@ -0,0 +1,717 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import hashlib
|
||||
import threading
|
||||
import time
|
||||
import uuid
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import date, datetime, timedelta, timezone
|
||||
from typing import Any
|
||||
|
||||
from sqlalchemy import select
|
||||
|
||||
from app.address_search import is_location_token
|
||||
from app.db import SessionLocal
|
||||
from app.journey import find_journeys, parse_service_date, resolve_location_summary
|
||||
from app.models import JourneySearchCache
|
||||
from app.routing import direct_route_between_points, route_between_points
|
||||
|
||||
|
||||
MAX_PROGRESSIVE_TRANSFERS = 5
|
||||
TRANSIT_STAGE_CACHE_TTL_SECONDS = 5 * 60
|
||||
TRANSIT_STAGE_CACHE_MAX_ENTRIES = 256
|
||||
PROGRESSIVE_SEARCH_CACHE_TTL_SECONDS = 10 * 60
|
||||
PROGRESSIVE_SEARCH_CACHE_MAX_ENTRIES = 128
|
||||
JOURNEY_SEARCH_CACHE_VERSION = "journey-search-v7"
|
||||
_executor = ThreadPoolExecutor(max_workers=2, thread_name_prefix="journey-search")
|
||||
_lock = threading.RLock()
|
||||
_searches: dict[str, "_SearchState"] = {}
|
||||
_progressive_search_inflight: dict[tuple[object, ...], str] = {}
|
||||
_transit_stage_cache: dict[tuple[object, ...], tuple[float, dict[str, Any]]] = {}
|
||||
_progressive_search_cache: dict[tuple[object, ...], tuple[float, dict[str, Any]]] = {}
|
||||
|
||||
|
||||
@dataclass
|
||||
class _SearchState:
|
||||
id: str
|
||||
request: dict[str, Any]
|
||||
cache_key: tuple[object, ...] | None = None
|
||||
status: str = "queued"
|
||||
message: str = "Queued."
|
||||
stage: str = "queued"
|
||||
journeys: list[dict] = field(default_factory=list)
|
||||
routing: dict[str, Any] | None = None
|
||||
context: dict[str, Any] = field(default_factory=dict)
|
||||
error: str | None = None
|
||||
created_at: float = field(default_factory=time.time)
|
||||
updated_at: float = field(default_factory=time.time)
|
||||
complete: bool = False
|
||||
cancelled: bool = False
|
||||
|
||||
|
||||
def start_journey_search(request: dict[str, Any]) -> dict[str, Any]:
|
||||
key = _progressive_cache_key(request)
|
||||
cached = _progressive_cache_get(key)
|
||||
search_id = uuid.uuid4().hex
|
||||
state = _SearchState(id=search_id, request=dict(request), cache_key=key)
|
||||
if cached is not None:
|
||||
_apply_cached_payload(state, cached)
|
||||
with _lock:
|
||||
_prune_old_searches()
|
||||
if cached is None:
|
||||
existing_search_id = _progressive_search_inflight.get(key)
|
||||
existing_state = None if existing_search_id is None else _searches.get(existing_search_id)
|
||||
if existing_state is not None and not existing_state.complete and not existing_state.cancelled:
|
||||
return _payload(existing_state)
|
||||
_progressive_search_inflight[key] = search_id
|
||||
_searches[search_id] = state
|
||||
if cached is None:
|
||||
_executor.submit(_run_search, search_id)
|
||||
return journey_search_payload(search_id)
|
||||
|
||||
|
||||
def journey_search_payload(search_id: str) -> dict[str, Any]:
|
||||
with _lock:
|
||||
state = _searches.get(search_id)
|
||||
if state is None:
|
||||
raise KeyError(search_id)
|
||||
return _payload(state)
|
||||
|
||||
|
||||
def cancel_journey_search(search_id: str) -> dict[str, Any]:
|
||||
with _lock:
|
||||
state = _searches.get(search_id)
|
||||
if state is None:
|
||||
raise KeyError(search_id)
|
||||
state.cancelled = True
|
||||
if not state.complete:
|
||||
state.status = "cancelled"
|
||||
state.message = "Search cancelled."
|
||||
state.complete = True
|
||||
state.updated_at = time.time()
|
||||
_clear_inflight_search_locked(state)
|
||||
return _payload(state)
|
||||
|
||||
|
||||
def _run_search(search_id: str) -> None:
|
||||
with _lock:
|
||||
state = _searches.get(search_id)
|
||||
if state is None or state.cancelled:
|
||||
return
|
||||
state.status = "running"
|
||||
state.stage = "starting"
|
||||
state.message = "Starting search..."
|
||||
state.updated_at = time.time()
|
||||
request = dict(state.request)
|
||||
try:
|
||||
mode = str(request.get("mode") or "transit")
|
||||
if mode in {"walk", "drive", "car"}:
|
||||
_run_point_route_search(search_id, "drive" if mode == "car" else mode, request)
|
||||
else:
|
||||
_run_transit_search(search_id, request)
|
||||
except Exception as exc: # noqa: BLE001 - report progressive-search failure to client
|
||||
_publish_error(search_id, str(exc))
|
||||
|
||||
|
||||
def _run_transit_search(search_id: str, request: dict[str, Any]) -> None:
|
||||
direct_only = bool(request.get("direct_only"))
|
||||
limit = max(3, min(int(request.get("limit") or 5), 10))
|
||||
transfer_seconds = max(0, min(int(request.get("transfer_seconds") or 120), 3600))
|
||||
source_ids = _csv_ints(request.get("source_id"))
|
||||
service_date = request.get("service_date") or None
|
||||
stages = [0] if direct_only else list(range(0, MAX_PROGRESSIVE_TRANSFERS + 1))
|
||||
address_search = is_location_token(request.get("from_stop_id")) or is_location_token(request.get("to_stop_id"))
|
||||
stage_limit = limit if address_search else max(limit, 10)
|
||||
merged: dict[str, dict] = {}
|
||||
context: dict[str, Any] = {}
|
||||
diagnostics: dict[str, Any] = {"stages": []}
|
||||
best_count = 0
|
||||
stale_stages = 0
|
||||
for transfers in stages:
|
||||
if _is_cancelled(search_id):
|
||||
return
|
||||
label = "direct" if transfers == 0 else f"up to {transfers} transfer{'s' if transfers != 1 else ''}"
|
||||
_publish_status(search_id, "running", f"Searching {label}...", f"transfers_{transfers}")
|
||||
stage_started_at = time.monotonic()
|
||||
with SessionLocal() as db:
|
||||
result = _cached_find_journeys(
|
||||
db,
|
||||
from_stop_id=str(request.get("from_stop_id") or ""),
|
||||
to_stop_id=str(request.get("to_stop_id") or ""),
|
||||
via_stop_id=request.get("via_stop_id") or None,
|
||||
source_ids=source_ids,
|
||||
departure=str(request.get("departure") or "08:00"),
|
||||
service_date=service_date,
|
||||
max_transfers=transfers,
|
||||
transfer_seconds=transfer_seconds,
|
||||
limit=stage_limit,
|
||||
)
|
||||
cache_status = str(result.pop("_cache_status", "miss"))
|
||||
elapsed_ms = int((time.monotonic() - stage_started_at) * 1000)
|
||||
stage_diagnostics = {
|
||||
"transfers": transfers,
|
||||
"cache": cache_status,
|
||||
"elapsed_ms": elapsed_ms,
|
||||
"journeys": len(result.get("journeys") or []),
|
||||
}
|
||||
result_diagnostics = result.get("diagnostics")
|
||||
if isinstance(result_diagnostics, dict):
|
||||
stage_diagnostics["details"] = result_diagnostics
|
||||
diagnostics["stages"].append(stage_diagnostics)
|
||||
context = _context_from_result(result)
|
||||
context["diagnostics"] = diagnostics
|
||||
before = len(merged)
|
||||
for journey in result.get("journeys") or []:
|
||||
merged.setdefault(_journey_key(journey), journey)
|
||||
ranked = _select_diverse_journeys(_rank_journeys(merged.values(), str(request.get("ranking") or "recommended")), limit=limit)
|
||||
_publish_results(
|
||||
search_id,
|
||||
journeys=ranked,
|
||||
context=context,
|
||||
status="running",
|
||||
stage=f"transfers_{transfers}",
|
||||
message=f"Found {len(ranked)} option{'s' if len(ranked) != 1 else ''}; still searching..." if not direct_only else "Direct search complete.",
|
||||
)
|
||||
if len(merged) <= before and ranked:
|
||||
stale_stages += 1
|
||||
else:
|
||||
stale_stages = 0
|
||||
best_count = max(best_count, len(ranked))
|
||||
if ranked and stale_stages >= 2 and transfers >= 2:
|
||||
break
|
||||
if _major_hub_address_stage_is_complete(result_diagnostics, ranked, transfers=transfers, limit=limit):
|
||||
break
|
||||
complete_message = (
|
||||
f"Search complete. Found {best_count} option{'s' if best_count != 1 else ''}."
|
||||
if best_count
|
||||
else "Search complete. No route found in the imported timetable."
|
||||
)
|
||||
_publish_complete(search_id, message=complete_message)
|
||||
payload = journey_search_payload(search_id)
|
||||
if payload.get("status") == "complete" and not payload.get("error"):
|
||||
_progressive_cache_put(_progressive_cache_key(request), payload)
|
||||
|
||||
|
||||
def _major_hub_address_stage_is_complete(
|
||||
diagnostics: dict[str, Any] | None,
|
||||
ranked: list[dict],
|
||||
*,
|
||||
transfers: int,
|
||||
limit: int,
|
||||
) -> bool:
|
||||
if transfers < 1 or not ranked or not isinstance(diagnostics, dict):
|
||||
return False
|
||||
address_access = diagnostics.get("address_access")
|
||||
if not isinstance(address_access, dict) or not address_access.get("major_hubs"):
|
||||
return False
|
||||
return len(ranked) >= min(3, limit)
|
||||
|
||||
|
||||
def _run_point_route_search(search_id: str, mode: str, request: dict[str, Any]) -> None:
|
||||
_publish_status(search_id, "running", f"Searching {mode} route...", mode)
|
||||
with SessionLocal() as db:
|
||||
from_location = resolve_location_summary(db, str(request.get("from_stop_id") or ""), source_ids=_csv_ints(request.get("source_id")))
|
||||
to_location = resolve_location_summary(db, str(request.get("to_stop_id") or ""), source_ids=_csv_ints(request.get("source_id")))
|
||||
if from_location.lon is None or from_location.lat is None:
|
||||
raise ValueError("Selected start has no coordinates.")
|
||||
if to_location.lon is None or to_location.lat is None:
|
||||
raise ValueError("Selected destination has no coordinates.")
|
||||
try:
|
||||
route = route_between_points(
|
||||
db,
|
||||
from_lon=float(from_location.lon),
|
||||
from_lat=float(from_location.lat),
|
||||
to_lon=float(to_location.lon),
|
||||
to_lat=float(to_location.lat),
|
||||
mode=mode,
|
||||
max_visited=300_000,
|
||||
)
|
||||
message = f"{mode.title()} route found."
|
||||
except Exception as exc: # noqa: BLE001 - point routing should still return an approximate connector
|
||||
route = direct_route_between_points(
|
||||
db,
|
||||
from_lon=float(from_location.lon),
|
||||
from_lat=float(from_location.lat),
|
||||
to_lon=float(to_location.lon),
|
||||
to_lat=float(to_location.lat),
|
||||
mode=mode,
|
||||
reason=str(exc),
|
||||
)
|
||||
message = f"{mode.title()} route approximated."
|
||||
context = {
|
||||
"from": _stop_payload(from_location),
|
||||
"to": _stop_payload(to_location),
|
||||
"mode": mode,
|
||||
}
|
||||
_publish_routing(search_id, route, context=context, message=message)
|
||||
_publish_complete(search_id, message=f"{mode.title()} route complete.")
|
||||
payload = journey_search_payload(search_id)
|
||||
if payload.get("status") == "complete" and not payload.get("error"):
|
||||
_progressive_cache_put(_progressive_cache_key(request), payload)
|
||||
|
||||
|
||||
def _cached_find_journeys(
|
||||
db,
|
||||
*,
|
||||
from_stop_id: str,
|
||||
to_stop_id: str,
|
||||
via_stop_id: object,
|
||||
source_ids: list[int] | None,
|
||||
departure: str,
|
||||
service_date: object,
|
||||
max_transfers: int,
|
||||
transfer_seconds: int,
|
||||
limit: int,
|
||||
) -> dict[str, Any]:
|
||||
key = (
|
||||
from_stop_id,
|
||||
to_stop_id,
|
||||
str(via_stop_id or ""),
|
||||
tuple(sorted(int(source_id) for source_id in source_ids or [])),
|
||||
departure,
|
||||
str(service_date or ""),
|
||||
int(max_transfers),
|
||||
int(transfer_seconds),
|
||||
int(limit),
|
||||
)
|
||||
now = time.monotonic()
|
||||
with _lock:
|
||||
cached = _transit_stage_cache.get(key)
|
||||
if cached is not None:
|
||||
expires_at, payload = cached
|
||||
if expires_at > now:
|
||||
return _with_cache_status(payload, "memory")
|
||||
_transit_stage_cache.pop(key, None)
|
||||
durable = _durable_cache_get("transit_stage", key)
|
||||
if durable is not None:
|
||||
with _lock:
|
||||
_transit_stage_cache[key] = (now + TRANSIT_STAGE_CACHE_TTL_SECONDS, json.loads(json.dumps(durable)))
|
||||
_prune_timed_cache(_transit_stage_cache, TRANSIT_STAGE_CACHE_MAX_ENTRIES)
|
||||
return _with_cache_status(durable, "persistent")
|
||||
result = find_journeys(
|
||||
db=db,
|
||||
from_stop_id=from_stop_id,
|
||||
to_stop_id=to_stop_id,
|
||||
via_stop_id=via_stop_id,
|
||||
source_ids=source_ids,
|
||||
departure=departure,
|
||||
service_date=service_date,
|
||||
max_transfers=max_transfers,
|
||||
transfer_seconds=transfer_seconds,
|
||||
limit=limit,
|
||||
)
|
||||
stored_result = json.loads(json.dumps(result))
|
||||
with _lock:
|
||||
_transit_stage_cache[key] = (now + TRANSIT_STAGE_CACHE_TTL_SECONDS, stored_result)
|
||||
_prune_timed_cache(_transit_stage_cache, TRANSIT_STAGE_CACHE_MAX_ENTRIES)
|
||||
_durable_cache_put("transit_stage", key, stored_result, ttl_seconds=TRANSIT_STAGE_CACHE_TTL_SECONDS)
|
||||
return _with_cache_status(result, "miss")
|
||||
|
||||
|
||||
def _with_cache_status(payload: dict[str, Any], cache_status: str) -> dict[str, Any]:
|
||||
copied = json.loads(json.dumps(payload))
|
||||
copied["_cache_status"] = cache_status
|
||||
return copied
|
||||
|
||||
|
||||
def _prune_timed_cache(cache: dict[tuple[object, ...], tuple[float, dict[str, Any]]], max_entries: int) -> None:
|
||||
if len(cache) <= max_entries:
|
||||
return
|
||||
oldest = sorted(cache.items(), key=lambda item: item[1][0])[: len(cache) - max_entries]
|
||||
for old_key, _ in oldest:
|
||||
cache.pop(old_key, None)
|
||||
|
||||
|
||||
def _durable_cache_get(cache_type: str, key: tuple[object, ...]) -> dict[str, Any] | None:
|
||||
storage_key = _durable_cache_key(cache_type, key)
|
||||
now = datetime.now(timezone.utc)
|
||||
try:
|
||||
with SessionLocal() as session:
|
||||
row = session.scalar(select(JourneySearchCache).where(JourneySearchCache.cache_key == storage_key))
|
||||
if row is None:
|
||||
return None
|
||||
expires_at = _as_utc(row.expires_at)
|
||||
if expires_at is None or expires_at <= now:
|
||||
session.delete(row)
|
||||
session.commit()
|
||||
return None
|
||||
return json.loads(row.payload_json)
|
||||
except Exception: # noqa: BLE001 - cache misses must not break journey search
|
||||
return None
|
||||
|
||||
|
||||
def _durable_cache_put(cache_type: str, key: tuple[object, ...], payload: dict[str, Any], *, ttl_seconds: int) -> None:
|
||||
storage_key = _durable_cache_key(cache_type, key)
|
||||
now = datetime.now(timezone.utc)
|
||||
expires_at = now + timedelta(seconds=max(1, int(ttl_seconds)))
|
||||
try:
|
||||
with SessionLocal() as session:
|
||||
row = session.scalar(select(JourneySearchCache).where(JourneySearchCache.cache_key == storage_key))
|
||||
if row is None:
|
||||
row = JourneySearchCache(
|
||||
cache_key=storage_key,
|
||||
cache_type=cache_type,
|
||||
payload_json=json.dumps(payload, separators=(",", ":")),
|
||||
created_at=now,
|
||||
updated_at=now,
|
||||
expires_at=expires_at,
|
||||
)
|
||||
session.add(row)
|
||||
else:
|
||||
row.cache_type = cache_type
|
||||
row.payload_json = json.dumps(payload, separators=(",", ":"))
|
||||
row.updated_at = now
|
||||
row.expires_at = expires_at
|
||||
session.commit()
|
||||
except Exception: # noqa: BLE001 - cache writes are best-effort
|
||||
return
|
||||
|
||||
|
||||
def _durable_cache_key(cache_type: str, key: tuple[object, ...]) -> str:
|
||||
raw = json.dumps(
|
||||
{
|
||||
"version": JOURNEY_SEARCH_CACHE_VERSION,
|
||||
"cache_type": cache_type,
|
||||
"key": _json_safe(key),
|
||||
},
|
||||
sort_keys=True,
|
||||
separators=(",", ":"),
|
||||
)
|
||||
return hashlib.sha256(raw.encode("utf-8")).hexdigest()
|
||||
|
||||
|
||||
def _json_safe(value: object) -> object:
|
||||
if isinstance(value, tuple):
|
||||
return [_json_safe(item) for item in value]
|
||||
if isinstance(value, list):
|
||||
return [_json_safe(item) for item in value]
|
||||
if isinstance(value, dict):
|
||||
return {str(key): _json_safe(item) for key, item in sorted(value.items(), key=lambda item: str(item[0]))}
|
||||
if isinstance(value, (str, int, float, bool)) or value is None:
|
||||
return value
|
||||
if isinstance(value, (date, datetime)):
|
||||
return value.isoformat()
|
||||
return str(value)
|
||||
|
||||
|
||||
def _as_utc(value: datetime | None) -> datetime | None:
|
||||
if value is None:
|
||||
return None
|
||||
if value.tzinfo is None:
|
||||
return value.replace(tzinfo=timezone.utc)
|
||||
return value.astimezone(timezone.utc)
|
||||
|
||||
|
||||
def _progressive_cache_key(request: dict[str, Any]) -> tuple[object, ...]:
|
||||
source_ids = _csv_ints(request.get("source_id"))
|
||||
return (
|
||||
str(request.get("mode") or "transit"),
|
||||
str(request.get("from_stop_id") or ""),
|
||||
str(request.get("to_stop_id") or ""),
|
||||
str(request.get("via_stop_id") or ""),
|
||||
tuple(sorted(int(source_id) for source_id in source_ids or [])),
|
||||
str(request.get("departure") or "08:00"),
|
||||
str(request.get("service_date") or ""),
|
||||
bool(request.get("direct_only")),
|
||||
str(request.get("ranking") or "recommended"),
|
||||
int(request.get("transfer_seconds") or 120),
|
||||
max(3, min(int(request.get("limit") or 5), 10)),
|
||||
)
|
||||
|
||||
|
||||
def _progressive_cache_get(key: tuple[object, ...]) -> dict[str, Any] | None:
|
||||
now = time.monotonic()
|
||||
with _lock:
|
||||
cached = _progressive_search_cache.get(key)
|
||||
if cached is not None:
|
||||
expires_at, payload = cached
|
||||
if expires_at > now:
|
||||
copied = json.loads(json.dumps(payload))
|
||||
copied["cache_status"] = "memory"
|
||||
return copied
|
||||
_progressive_search_cache.pop(key, None)
|
||||
durable = _durable_cache_get("progressive", key)
|
||||
if durable is None:
|
||||
return None
|
||||
with _lock:
|
||||
_progressive_search_cache[key] = (now + PROGRESSIVE_SEARCH_CACHE_TTL_SECONDS, json.loads(json.dumps(durable)))
|
||||
_prune_timed_cache(_progressive_search_cache, PROGRESSIVE_SEARCH_CACHE_MAX_ENTRIES)
|
||||
copied = json.loads(json.dumps(durable))
|
||||
copied["cache_status"] = "persistent"
|
||||
return copied
|
||||
|
||||
|
||||
def _progressive_cache_put(key: tuple[object, ...], payload: dict[str, Any]) -> None:
|
||||
stored_payload = json.loads(json.dumps(payload))
|
||||
stored_payload.pop("cache_status", None)
|
||||
with _lock:
|
||||
_progressive_search_cache[key] = (time.monotonic() + PROGRESSIVE_SEARCH_CACHE_TTL_SECONDS, stored_payload)
|
||||
_prune_timed_cache(_progressive_search_cache, PROGRESSIVE_SEARCH_CACHE_MAX_ENTRIES)
|
||||
_durable_cache_put("progressive", key, stored_payload, ttl_seconds=PROGRESSIVE_SEARCH_CACHE_TTL_SECONDS)
|
||||
|
||||
|
||||
def _apply_cached_payload(state: _SearchState, payload: dict[str, Any]) -> None:
|
||||
state.status = str(payload.get("status") or "complete")
|
||||
state.message = "Cached result."
|
||||
state.stage = str(payload.get("stage") or "cached")
|
||||
state.journeys = json.loads(json.dumps(payload.get("journeys") or []))
|
||||
state.routing = json.loads(json.dumps(payload.get("routing"))) if payload.get("routing") is not None else None
|
||||
state.context = {
|
||||
key: value
|
||||
for key, value in payload.items()
|
||||
if key not in {"search_id", "status", "stage", "message", "complete", "error", "journeys", "routing", "created_at", "updated_at"}
|
||||
}
|
||||
state.error = None
|
||||
state.complete = True
|
||||
state.updated_at = time.time()
|
||||
|
||||
|
||||
def _publish_status(search_id: str, status: str, message: str, stage: str) -> None:
|
||||
with _lock:
|
||||
state = _searches.get(search_id)
|
||||
if state is None or state.cancelled:
|
||||
return
|
||||
state.status = status
|
||||
state.message = message
|
||||
state.stage = stage
|
||||
state.updated_at = time.time()
|
||||
|
||||
|
||||
def _publish_results(search_id: str, *, journeys: list[dict], context: dict[str, Any], status: str, stage: str, message: str) -> None:
|
||||
with _lock:
|
||||
state = _searches.get(search_id)
|
||||
if state is None or state.cancelled:
|
||||
return
|
||||
state.status = status
|
||||
state.stage = stage
|
||||
state.message = message
|
||||
state.journeys = list(journeys)
|
||||
state.context = dict(context)
|
||||
state.updated_at = time.time()
|
||||
|
||||
|
||||
def _publish_routing(search_id: str, routing: dict[str, Any], *, context: dict[str, Any], message: str) -> None:
|
||||
with _lock:
|
||||
state = _searches.get(search_id)
|
||||
if state is None or state.cancelled:
|
||||
return
|
||||
state.status = "running"
|
||||
state.stage = str(routing.get("mode") or "route")
|
||||
state.message = message
|
||||
state.routing = routing
|
||||
state.context = dict(context)
|
||||
state.updated_at = time.time()
|
||||
|
||||
|
||||
def _publish_complete(search_id: str, *, message: str) -> None:
|
||||
with _lock:
|
||||
state = _searches.get(search_id)
|
||||
if state is None or state.cancelled:
|
||||
return
|
||||
state.status = "complete"
|
||||
state.message = message
|
||||
state.complete = True
|
||||
state.updated_at = time.time()
|
||||
_clear_inflight_search_locked(state)
|
||||
|
||||
|
||||
def _publish_error(search_id: str, message: str) -> None:
|
||||
with _lock:
|
||||
state = _searches.get(search_id)
|
||||
if state is None:
|
||||
return
|
||||
state.status = "error"
|
||||
state.stage = "error"
|
||||
state.message = message
|
||||
state.error = message
|
||||
state.complete = True
|
||||
state.updated_at = time.time()
|
||||
_clear_inflight_search_locked(state)
|
||||
|
||||
|
||||
def _clear_inflight_search_locked(state: _SearchState) -> None:
|
||||
if state.cache_key is not None and _progressive_search_inflight.get(state.cache_key) == state.id:
|
||||
_progressive_search_inflight.pop(state.cache_key, None)
|
||||
|
||||
|
||||
def _is_cancelled(search_id: str) -> bool:
|
||||
with _lock:
|
||||
state = _searches.get(search_id)
|
||||
return state is None or state.cancelled
|
||||
|
||||
|
||||
def _payload(state: _SearchState) -> dict[str, Any]:
|
||||
return {
|
||||
"search_id": state.id,
|
||||
"status": state.status,
|
||||
"stage": state.stage,
|
||||
"message": state.message,
|
||||
"complete": state.complete,
|
||||
"error": state.error,
|
||||
"journeys": json.loads(json.dumps(state.journeys)),
|
||||
"routing": json.loads(json.dumps(state.routing)) if state.routing is not None else None,
|
||||
"created_at": state.created_at,
|
||||
"updated_at": state.updated_at,
|
||||
**json.loads(json.dumps(state.context)),
|
||||
}
|
||||
|
||||
|
||||
def _context_from_result(result: dict[str, Any]) -> dict[str, Any]:
|
||||
return {
|
||||
key: value
|
||||
for key, value in result.items()
|
||||
if key not in {"journeys"} and key not in {"error"}
|
||||
}
|
||||
|
||||
|
||||
def _journey_key(journey: dict[str, Any]) -> str:
|
||||
parts = []
|
||||
for leg in journey.get("legs") or []:
|
||||
parts.append(
|
||||
"|".join(
|
||||
str(part or "")
|
||||
for part in [
|
||||
leg.get("dataset_id"),
|
||||
leg.get("mode"),
|
||||
leg.get("route_id"),
|
||||
leg.get("trip_id"),
|
||||
(leg.get("from") or {}).get("stop_id") or (leg.get("from") or {}).get("name"),
|
||||
(leg.get("to") or {}).get("stop_id") or (leg.get("to") or {}).get("name"),
|
||||
leg.get("departure_time"),
|
||||
leg.get("arrival_time"),
|
||||
]
|
||||
)
|
||||
)
|
||||
return "||".join(parts)
|
||||
|
||||
|
||||
def _rank_journeys(journeys, ranking: str) -> list[dict]:
|
||||
def key(journey: dict[str, Any]) -> tuple[float, float, int, float]:
|
||||
departure = journey.get("departure_seconds")
|
||||
arrival = journey.get("arrival_seconds")
|
||||
duration = journey.get("duration_minutes")
|
||||
transfers = int(journey.get("transfers") or 0)
|
||||
walking = sum(float(leg.get("distance_m") or 0) for leg in journey.get("legs") or [] if leg.get("mode") == "walk")
|
||||
walking_seconds = walking / 1.35
|
||||
if ranking == "duration":
|
||||
return (
|
||||
float("inf") if duration is None else float(duration),
|
||||
float("inf") if arrival is None else float(arrival),
|
||||
transfers,
|
||||
walking,
|
||||
)
|
||||
if ranking == "fewest_transfers":
|
||||
return (
|
||||
transfers,
|
||||
float("inf") if arrival is None else float(arrival),
|
||||
float("inf") if duration is None else float(duration),
|
||||
walking,
|
||||
)
|
||||
if ranking == "earliest_arrival":
|
||||
return (
|
||||
float("inf") if arrival is None else float(arrival),
|
||||
float("inf") if duration is None else float(duration),
|
||||
transfers,
|
||||
walking,
|
||||
)
|
||||
return (
|
||||
float("inf") if arrival is None else float(arrival) + transfers * 600 + walking_seconds,
|
||||
float("inf") if arrival is None else float(arrival),
|
||||
transfers,
|
||||
walking,
|
||||
)
|
||||
|
||||
return sorted((dict(journey) for journey in journeys), key=key)
|
||||
|
||||
|
||||
def _select_diverse_journeys(journeys: list[dict], *, limit: int) -> list[dict]:
|
||||
selected: list[dict] = []
|
||||
selected_exact: set[str] = set()
|
||||
selected_diversity: set[tuple[object, ...]] = set()
|
||||
for journey in journeys:
|
||||
exact_key = _journey_key(journey)
|
||||
if exact_key in selected_exact:
|
||||
continue
|
||||
diversity_key = _journey_diversity_key(journey)
|
||||
if diversity_key in selected_diversity and len(selected) >= 3:
|
||||
continue
|
||||
selected.append(journey)
|
||||
selected_exact.add(exact_key)
|
||||
selected_diversity.add(diversity_key)
|
||||
if len(selected) >= limit:
|
||||
return selected
|
||||
if len(selected) >= min(3, limit):
|
||||
return selected
|
||||
for journey in journeys:
|
||||
exact_key = _journey_key(journey)
|
||||
if exact_key in selected_exact:
|
||||
continue
|
||||
selected.append(journey)
|
||||
selected_exact.add(exact_key)
|
||||
if len(selected) >= min(3, limit):
|
||||
break
|
||||
return _ensure_walk_only_option(selected, journeys, limit=limit)
|
||||
|
||||
|
||||
def _ensure_walk_only_option(selected: list[dict], ranked: list[dict], *, limit: int) -> list[dict]:
|
||||
if any(_journey_is_walk_only(journey) for journey in selected):
|
||||
return selected
|
||||
walk = next((journey for journey in ranked if _journey_is_walk_only(journey)), None)
|
||||
if walk is None:
|
||||
return selected
|
||||
if len(selected) < limit:
|
||||
return [*selected, walk]
|
||||
if selected:
|
||||
selected[-1] = walk
|
||||
return selected
|
||||
|
||||
|
||||
def _journey_is_walk_only(journey: dict) -> bool:
|
||||
legs = journey.get("legs") or []
|
||||
return bool(legs) and all(leg.get("mode") == "walk" for leg in legs)
|
||||
|
||||
|
||||
def _journey_diversity_key(journey: dict[str, Any]) -> tuple[object, ...]:
|
||||
route_signature = tuple(
|
||||
str(leg.get("route_ref") or leg.get("route_id") or leg.get("mode") or "")
|
||||
for leg in journey.get("legs") or []
|
||||
if leg.get("mode") != "walk"
|
||||
)
|
||||
departure = journey.get("departure_seconds")
|
||||
time_band = None if departure is None else int(departure) // (30 * 60)
|
||||
return (int(journey.get("transfers") or 0), route_signature, time_band)
|
||||
|
||||
|
||||
def _csv_ints(value: object) -> list[int] | None:
|
||||
if value is None:
|
||||
return None
|
||||
items = [item.strip() for item in str(value).split(",") if item.strip()]
|
||||
if not items:
|
||||
return None
|
||||
return [int(item) for item in items]
|
||||
|
||||
|
||||
def _stop_payload(stop) -> dict[str, Any]:
|
||||
return {
|
||||
"id": stop.id,
|
||||
"dataset_id": stop.dataset_id,
|
||||
"stop_id": stop.stop_id,
|
||||
"name": stop.name,
|
||||
"lat": stop.lat,
|
||||
"lon": stop.lon,
|
||||
}
|
||||
|
||||
|
||||
def _prune_old_searches() -> None:
|
||||
now = time.time()
|
||||
stale = [
|
||||
search_id
|
||||
for search_id, state in _searches.items()
|
||||
if now - state.updated_at > 15 * 60 or (state.complete and now - state.updated_at > 3 * 60)
|
||||
]
|
||||
for search_id in stale:
|
||||
state = _searches.pop(search_id, None)
|
||||
if state is not None:
|
||||
_clear_inflight_search_locked(state)
|
||||
2653
app/main.py
Normal file
2653
app/main.py
Normal file
File diff suppressed because it is too large
Load Diff
612
app/models.py
Normal file
612
app/models.py
Normal file
@@ -0,0 +1,612 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime, timezone
|
||||
from typing import Optional
|
||||
|
||||
from sqlalchemy import BigInteger, Boolean, DateTime, Float, ForeignKey, Integer, String, Text, UniqueConstraint
|
||||
from sqlalchemy.orm import Mapped, mapped_column, relationship
|
||||
|
||||
from app.db import Base
|
||||
|
||||
|
||||
def now_utc() -> datetime:
|
||||
return datetime.now(timezone.utc)
|
||||
|
||||
|
||||
class Source(Base):
|
||||
__tablename__ = "sources"
|
||||
|
||||
id: Mapped[int] = mapped_column(Integer, primary_key=True)
|
||||
catalog_entry_id: Mapped[Optional[int]] = mapped_column(ForeignKey("source_catalog_entries.id"), nullable=True, index=True)
|
||||
name: Mapped[str] = mapped_column(String(255), nullable=False)
|
||||
kind: Mapped[str] = mapped_column(String(64), nullable=False) # gtfs, osm_geojson, osm_pbf, osm_diff
|
||||
url: Mapped[str] = mapped_column(Text, nullable=False)
|
||||
country: Mapped[Optional[str]] = mapped_column(String(8), nullable=True)
|
||||
license: Mapped[Optional[str]] = mapped_column(String(255), nullable=True)
|
||||
priority: Mapped[Optional[str]] = mapped_column(String(16), nullable=True, index=True)
|
||||
mode_scope: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
|
||||
source_basis: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
|
||||
notes: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
|
||||
enabled: Mapped[bool] = mapped_column(Boolean, default=True, nullable=False)
|
||||
status: Mapped[str] = mapped_column(String(64), default="new", nullable=False)
|
||||
last_error: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
|
||||
last_run_at: Mapped[Optional[datetime]] = mapped_column(DateTime(timezone=True), nullable=True)
|
||||
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=now_utc, nullable=False)
|
||||
|
||||
catalog_entry: Mapped[Optional["SourceCatalogEntry"]] = relationship()
|
||||
datasets: Mapped[list["Dataset"]] = relationship(back_populates="source", cascade="all, delete-orphan")
|
||||
update_checks: Mapped[list["SourceUpdateCheck"]] = relationship(back_populates="source", cascade="all, delete-orphan")
|
||||
|
||||
|
||||
class SourceCatalogEntry(Base):
|
||||
__tablename__ = "source_catalog_entries"
|
||||
__table_args__ = (UniqueConstraint("catalog_key", name="uq_source_catalog_entry_key"),)
|
||||
|
||||
id: Mapped[int] = mapped_column(Integer, primary_key=True)
|
||||
catalog_key: Mapped[str] = mapped_column(String(255), nullable=False, index=True)
|
||||
geography: Mapped[Optional[str]] = mapped_column(String(128), nullable=True, index=True)
|
||||
country_code: Mapped[Optional[str]] = mapped_column(String(64), nullable=True, index=True)
|
||||
mode_scope: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
|
||||
source_name: Mapped[str] = mapped_column(Text, nullable=False)
|
||||
source_category: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
|
||||
formats_apis: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
|
||||
availability: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
|
||||
coverage_notes: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
|
||||
geometry_notes: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
|
||||
disruptions_closures: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
|
||||
operator_list_use: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
|
||||
access_license_notes: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
|
||||
priority: Mapped[Optional[str]] = mapped_column(String(32), nullable=True, index=True)
|
||||
source_url: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
|
||||
evidence_url: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
|
||||
next_pipeline_action: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
|
||||
status: Mapped[str] = mapped_column(String(64), default="backlog", nullable=False, index=True)
|
||||
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=now_utc, nullable=False)
|
||||
updated_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=now_utc, nullable=False)
|
||||
|
||||
|
||||
class Dataset(Base):
|
||||
__tablename__ = "datasets"
|
||||
|
||||
id: Mapped[int] = mapped_column(Integer, primary_key=True)
|
||||
source_id: Mapped[int] = mapped_column(ForeignKey("sources.id"), nullable=False, index=True)
|
||||
kind: Mapped[str] = mapped_column(String(64), nullable=False)
|
||||
local_path: Mapped[str] = mapped_column(Text, nullable=False)
|
||||
sha256: Mapped[str] = mapped_column(String(64), nullable=False)
|
||||
is_active: Mapped[bool] = mapped_column(Boolean, default=True, nullable=False)
|
||||
status: Mapped[str] = mapped_column(String(64), default="imported", nullable=False)
|
||||
metadata_json: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
|
||||
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=now_utc, nullable=False)
|
||||
|
||||
source: Mapped[Source] = relationship(back_populates="datasets")
|
||||
|
||||
|
||||
class SourceUpdateCheck(Base):
|
||||
__tablename__ = "source_update_checks"
|
||||
|
||||
id: Mapped[int] = mapped_column(Integer, primary_key=True)
|
||||
source_id: Mapped[int] = mapped_column(ForeignKey("sources.id"), nullable=False, index=True)
|
||||
checked_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=now_utc, nullable=False, index=True)
|
||||
status: Mapped[str] = mapped_column(String(64), nullable=False, default="checked", index=True)
|
||||
update_available: Mapped[bool] = mapped_column(Boolean, default=False, nullable=False)
|
||||
reason: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
|
||||
remote_url: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
|
||||
etag: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
|
||||
last_modified: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
|
||||
content_length: Mapped[Optional[int]] = mapped_column(Integer, nullable=True)
|
||||
content_type: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
|
||||
local_mtime: Mapped[Optional[datetime]] = mapped_column(DateTime(timezone=True), nullable=True)
|
||||
local_size: Mapped[Optional[int]] = mapped_column(Integer, nullable=True)
|
||||
local_sha256: Mapped[Optional[str]] = mapped_column(String(64), nullable=True)
|
||||
active_dataset_id: Mapped[Optional[int]] = mapped_column(ForeignKey("datasets.id"), nullable=True, index=True)
|
||||
active_dataset_sha256: Mapped[Optional[str]] = mapped_column(String(64), nullable=True)
|
||||
metadata_json: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
|
||||
|
||||
source: Mapped[Source] = relationship(back_populates="update_checks")
|
||||
active_dataset: Mapped[Optional[Dataset]] = relationship()
|
||||
|
||||
|
||||
class OsmDiffState(Base):
|
||||
__tablename__ = "osm_diff_states"
|
||||
|
||||
id: Mapped[int] = mapped_column(Integer, primary_key=True)
|
||||
source_id: Mapped[int] = mapped_column(ForeignKey("sources.id"), nullable=False, index=True)
|
||||
raw_dataset_id: Mapped[Optional[int]] = mapped_column(ForeignKey("datasets.id"), nullable=True, index=True)
|
||||
updates_url: Mapped[str] = mapped_column(Text, nullable=False)
|
||||
sequence_number: Mapped[int] = mapped_column(Integer, nullable=False, index=True)
|
||||
timestamp: Mapped[Optional[str]] = mapped_column(String(64), nullable=True, index=True)
|
||||
status: Mapped[str] = mapped_column(String(64), nullable=False, default="active", index=True)
|
||||
metadata_json: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
|
||||
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=now_utc, nullable=False, index=True)
|
||||
updated_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=now_utc, nullable=False)
|
||||
|
||||
source: Mapped[Source] = relationship()
|
||||
raw_dataset: Mapped[Optional[Dataset]] = relationship()
|
||||
|
||||
|
||||
class Job(Base):
|
||||
__tablename__ = "jobs"
|
||||
|
||||
id: Mapped[int] = mapped_column(Integer, primary_key=True)
|
||||
kind: Mapped[str] = mapped_column(String(64), nullable=False, index=True)
|
||||
status: Mapped[str] = mapped_column(String(64), nullable=False, default="queued", index=True)
|
||||
description: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
|
||||
progress_current: Mapped[int] = mapped_column(Integer, nullable=False, default=0)
|
||||
progress_total: Mapped[int] = mapped_column(Integer, nullable=False, default=0)
|
||||
priority: Mapped[int] = mapped_column(Integer, nullable=False, default=0, index=True)
|
||||
requested_action: Mapped[Optional[str]] = mapped_column(String(32), nullable=True, index=True)
|
||||
lease_owner: Mapped[Optional[str]] = mapped_column(String(255), nullable=True, index=True)
|
||||
lease_expires_at: Mapped[Optional[datetime]] = mapped_column(DateTime(timezone=True), nullable=True, index=True)
|
||||
paused_at: Mapped[Optional[datetime]] = mapped_column(DateTime(timezone=True), nullable=True)
|
||||
result_json: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
|
||||
error: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
|
||||
dismissed_at: Mapped[Optional[datetime]] = mapped_column(DateTime(timezone=True), nullable=True, index=True)
|
||||
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=now_utc, nullable=False, index=True)
|
||||
started_at: Mapped[Optional[datetime]] = mapped_column(DateTime(timezone=True), nullable=True)
|
||||
updated_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=now_utc, nullable=False)
|
||||
finished_at: Mapped[Optional[datetime]] = mapped_column(DateTime(timezone=True), nullable=True)
|
||||
|
||||
events: Mapped[list["JobEvent"]] = relationship(back_populates="job", cascade="all, delete-orphan")
|
||||
|
||||
|
||||
class JobEvent(Base):
|
||||
__tablename__ = "job_events"
|
||||
|
||||
id: Mapped[int] = mapped_column(Integer, primary_key=True)
|
||||
job_id: Mapped[int] = mapped_column(ForeignKey("jobs.id"), nullable=False, index=True)
|
||||
level: Mapped[str] = mapped_column(String(32), nullable=False, default="info", index=True)
|
||||
event_type: Mapped[str] = mapped_column(String(64), nullable=False, index=True)
|
||||
message: Mapped[str] = mapped_column(Text, nullable=False)
|
||||
progress_current: Mapped[Optional[int]] = mapped_column(Integer, nullable=True)
|
||||
progress_total: Mapped[Optional[int]] = mapped_column(Integer, nullable=True)
|
||||
metadata_json: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
|
||||
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=now_utc, nullable=False, index=True)
|
||||
|
||||
job: Mapped[Job] = relationship(back_populates="events")
|
||||
|
||||
|
||||
class PipelineRun(Base):
|
||||
__tablename__ = "pipeline_runs"
|
||||
|
||||
id: Mapped[int] = mapped_column(Integer, primary_key=True)
|
||||
stage: Mapped[str] = mapped_column(String(64), nullable=False, index=True)
|
||||
version: Mapped[str] = mapped_column(String(128), nullable=False, index=True)
|
||||
dependency_hash: Mapped[str] = mapped_column(String(64), nullable=False, index=True)
|
||||
status: Mapped[str] = mapped_column(String(64), nullable=False, default="running", index=True)
|
||||
source_id: Mapped[Optional[int]] = mapped_column(ForeignKey("sources.id"), nullable=True, index=True)
|
||||
dataset_id: Mapped[Optional[int]] = mapped_column(ForeignKey("datasets.id"), nullable=True, index=True)
|
||||
job_id: Mapped[Optional[int]] = mapped_column(ForeignKey("jobs.id"), nullable=True, index=True)
|
||||
input_json: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
|
||||
output_json: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
|
||||
error: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
|
||||
started_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=now_utc, nullable=False, index=True)
|
||||
updated_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=now_utc, nullable=False)
|
||||
finished_at: Mapped[Optional[datetime]] = mapped_column(DateTime(timezone=True), nullable=True)
|
||||
|
||||
source: Mapped[Optional[Source]] = relationship()
|
||||
dataset: Mapped[Optional[Dataset]] = relationship()
|
||||
job: Mapped[Optional[Job]] = relationship()
|
||||
|
||||
|
||||
class GtfsAgency(Base):
|
||||
__tablename__ = "gtfs_agencies"
|
||||
__table_args__ = (UniqueConstraint("dataset_id", "agency_id", name="uq_gtfs_agency_dataset_id"),)
|
||||
|
||||
id: Mapped[int] = mapped_column(Integer, primary_key=True)
|
||||
dataset_id: Mapped[int] = mapped_column(ForeignKey("datasets.id"), nullable=False, index=True)
|
||||
agency_id: Mapped[str] = mapped_column(String(255), nullable=False)
|
||||
name: Mapped[str] = mapped_column(Text, nullable=False)
|
||||
url: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
|
||||
timezone: Mapped[Optional[str]] = mapped_column(String(128), nullable=True)
|
||||
|
||||
|
||||
class GtfsStop(Base):
|
||||
__tablename__ = "gtfs_stops"
|
||||
__table_args__ = (UniqueConstraint("dataset_id", "stop_id", name="uq_gtfs_stop_dataset_id"),)
|
||||
|
||||
id: Mapped[int] = mapped_column(Integer, primary_key=True)
|
||||
dataset_id: Mapped[int] = mapped_column(ForeignKey("datasets.id"), nullable=False, index=True)
|
||||
stop_id: Mapped[str] = mapped_column(String(255), nullable=False)
|
||||
name: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
|
||||
lat: Mapped[Optional[float]] = mapped_column(Float, nullable=True)
|
||||
lon: Mapped[Optional[float]] = mapped_column(Float, nullable=True)
|
||||
parent_station: Mapped[Optional[str]] = mapped_column(String(255), nullable=True)
|
||||
|
||||
|
||||
class GtfsRoute(Base):
|
||||
__tablename__ = "gtfs_routes"
|
||||
__table_args__ = (UniqueConstraint("dataset_id", "route_id", name="uq_gtfs_route_dataset_id"),)
|
||||
|
||||
id: Mapped[int] = mapped_column(Integer, primary_key=True)
|
||||
dataset_id: Mapped[int] = mapped_column(ForeignKey("datasets.id"), nullable=False, index=True)
|
||||
route_id: Mapped[str] = mapped_column(String(255), nullable=False)
|
||||
agency_id: Mapped[Optional[str]] = mapped_column(String(255), nullable=True)
|
||||
short_name: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
|
||||
long_name: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
|
||||
route_type: Mapped[Optional[int]] = mapped_column(Integer, nullable=True)
|
||||
mode: Mapped[Optional[str]] = mapped_column(String(64), nullable=True, index=True)
|
||||
route_scope: Mapped[Optional[str]] = mapped_column(String(64), nullable=True, index=True)
|
||||
operator_name: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
|
||||
geometry_geojson: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
|
||||
min_lon: Mapped[Optional[float]] = mapped_column(Float, nullable=True)
|
||||
min_lat: Mapped[Optional[float]] = mapped_column(Float, nullable=True)
|
||||
max_lon: Mapped[Optional[float]] = mapped_column(Float, nullable=True)
|
||||
max_lat: Mapped[Optional[float]] = mapped_column(Float, nullable=True)
|
||||
route_key: Mapped[Optional[str]] = mapped_column(Text, nullable=True, index=True)
|
||||
operator_key: Mapped[Optional[str]] = mapped_column(Text, nullable=True, index=True)
|
||||
|
||||
|
||||
class GtfsTrip(Base):
|
||||
__tablename__ = "gtfs_trips"
|
||||
__table_args__ = (UniqueConstraint("dataset_id", "trip_id", name="uq_gtfs_trip_dataset_id"),)
|
||||
|
||||
id: Mapped[int] = mapped_column(Integer, primary_key=True)
|
||||
dataset_id: Mapped[int] = mapped_column(ForeignKey("datasets.id"), nullable=False, index=True)
|
||||
route_id: Mapped[str] = mapped_column(String(255), nullable=False, index=True)
|
||||
trip_id: Mapped[str] = mapped_column(String(255), nullable=False)
|
||||
service_id: Mapped[Optional[str]] = mapped_column(String(255), nullable=True)
|
||||
shape_id: Mapped[Optional[str]] = mapped_column(String(255), nullable=True)
|
||||
|
||||
|
||||
class GtfsCalendar(Base):
|
||||
__tablename__ = "gtfs_calendars"
|
||||
__table_args__ = (UniqueConstraint("dataset_id", "service_id", name="uq_gtfs_calendar_dataset_service"),)
|
||||
|
||||
id: Mapped[int] = mapped_column(Integer, primary_key=True)
|
||||
dataset_id: Mapped[int] = mapped_column(ForeignKey("datasets.id"), nullable=False, index=True)
|
||||
service_id: Mapped[str] = mapped_column(String(255), nullable=False, index=True)
|
||||
monday: Mapped[bool] = mapped_column(Boolean, nullable=False, default=False)
|
||||
tuesday: Mapped[bool] = mapped_column(Boolean, nullable=False, default=False)
|
||||
wednesday: Mapped[bool] = mapped_column(Boolean, nullable=False, default=False)
|
||||
thursday: Mapped[bool] = mapped_column(Boolean, nullable=False, default=False)
|
||||
friday: Mapped[bool] = mapped_column(Boolean, nullable=False, default=False)
|
||||
saturday: Mapped[bool] = mapped_column(Boolean, nullable=False, default=False)
|
||||
sunday: Mapped[bool] = mapped_column(Boolean, nullable=False, default=False)
|
||||
start_date: Mapped[int] = mapped_column(Integer, nullable=False, index=True)
|
||||
end_date: Mapped[int] = mapped_column(Integer, nullable=False, index=True)
|
||||
|
||||
|
||||
class GtfsCalendarDate(Base):
|
||||
__tablename__ = "gtfs_calendar_dates"
|
||||
__table_args__ = (UniqueConstraint("dataset_id", "service_id", "date", name="uq_gtfs_calendar_date_dataset_service_date"),)
|
||||
|
||||
id: Mapped[int] = mapped_column(Integer, primary_key=True)
|
||||
dataset_id: Mapped[int] = mapped_column(ForeignKey("datasets.id"), nullable=False, index=True)
|
||||
service_id: Mapped[str] = mapped_column(String(255), nullable=False, index=True)
|
||||
date: Mapped[int] = mapped_column(Integer, nullable=False, index=True)
|
||||
exception_type: Mapped[int] = mapped_column(Integer, nullable=False)
|
||||
|
||||
|
||||
class GtfsShape(Base):
|
||||
__tablename__ = "gtfs_shapes"
|
||||
__table_args__ = (UniqueConstraint("dataset_id", "shape_id", name="uq_gtfs_shape_dataset_id"),)
|
||||
|
||||
id: Mapped[int] = mapped_column(Integer, primary_key=True)
|
||||
dataset_id: Mapped[int] = mapped_column(ForeignKey("datasets.id"), nullable=False, index=True)
|
||||
shape_id: Mapped[str] = mapped_column(String(255), nullable=False, index=True)
|
||||
geometry_geojson: Mapped[str] = mapped_column(Text, nullable=False)
|
||||
min_lon: Mapped[Optional[float]] = mapped_column(Float, nullable=True)
|
||||
min_lat: Mapped[Optional[float]] = mapped_column(Float, nullable=True)
|
||||
max_lon: Mapped[Optional[float]] = mapped_column(Float, nullable=True)
|
||||
max_lat: Mapped[Optional[float]] = mapped_column(Float, nullable=True)
|
||||
|
||||
|
||||
class GtfsStopTime(Base):
|
||||
__tablename__ = "gtfs_stop_times"
|
||||
|
||||
id: Mapped[int] = mapped_column(Integer, primary_key=True)
|
||||
dataset_id: Mapped[int] = mapped_column(ForeignKey("datasets.id"), nullable=False, index=True)
|
||||
trip_id: Mapped[str] = mapped_column(String(255), nullable=False, index=True)
|
||||
stop_id: Mapped[str] = mapped_column(String(255), nullable=False)
|
||||
stop_sequence: Mapped[int] = mapped_column(Integer, nullable=False)
|
||||
arrival_time: Mapped[Optional[str]] = mapped_column(String(32), nullable=True)
|
||||
departure_time: Mapped[Optional[str]] = mapped_column(String(32), nullable=True)
|
||||
arrival_seconds: Mapped[Optional[int]] = mapped_column(Integer, nullable=True, index=True)
|
||||
departure_seconds: Mapped[Optional[int]] = mapped_column(Integer, nullable=True, index=True)
|
||||
|
||||
|
||||
class CanonicalStop(Base):
|
||||
__tablename__ = "canonical_stops"
|
||||
|
||||
id: Mapped[int] = mapped_column(Integer, primary_key=True)
|
||||
stop_key: Mapped[str] = mapped_column(String(255), nullable=False, unique=True, index=True)
|
||||
name: Mapped[str] = mapped_column(Text, nullable=False)
|
||||
normalized_name: Mapped[str] = mapped_column(Text, nullable=False, index=True)
|
||||
lat: Mapped[Optional[float]] = mapped_column(Float, nullable=True)
|
||||
lon: Mapped[Optional[float]] = mapped_column(Float, nullable=True)
|
||||
mode: Mapped[Optional[str]] = mapped_column(String(64), nullable=True, index=True)
|
||||
metadata_json: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
|
||||
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=now_utc, nullable=False)
|
||||
|
||||
|
||||
class CanonicalStopLink(Base):
|
||||
__tablename__ = "canonical_stop_links"
|
||||
__table_args__ = (
|
||||
UniqueConstraint("object_type", "dataset_id", "object_id", name="uq_canonical_stop_link_object"),
|
||||
)
|
||||
|
||||
id: Mapped[int] = mapped_column(Integer, primary_key=True)
|
||||
canonical_stop_id: Mapped[int] = mapped_column(ForeignKey("canonical_stops.id"), nullable=False, index=True)
|
||||
layer: Mapped[str] = mapped_column(String(64), nullable=False, index=True) # timetable, visual
|
||||
object_type: Mapped[str] = mapped_column(String(64), nullable=False, index=True) # gtfs_stop, osm_feature
|
||||
dataset_id: Mapped[int] = mapped_column(ForeignKey("datasets.id"), nullable=False, index=True)
|
||||
object_id: Mapped[int] = mapped_column(Integer, nullable=False, index=True)
|
||||
external_id: Mapped[str] = mapped_column(Text, nullable=False)
|
||||
role: Mapped[Optional[str]] = mapped_column(String(64), nullable=True)
|
||||
confidence: Mapped[float] = mapped_column(Float, nullable=False, default=1.0)
|
||||
distance_m: Mapped[Optional[float]] = mapped_column(Float, nullable=True)
|
||||
metadata_json: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
|
||||
|
||||
canonical_stop: Mapped[CanonicalStop] = relationship()
|
||||
|
||||
|
||||
class RoutePattern(Base):
|
||||
__tablename__ = "route_patterns"
|
||||
|
||||
id: Mapped[int] = mapped_column(Integer, primary_key=True)
|
||||
pattern_key: Mapped[str] = mapped_column(String(255), nullable=False, unique=True, index=True)
|
||||
route_ref: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
|
||||
route_name: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
|
||||
mode: Mapped[Optional[str]] = mapped_column(String(64), nullable=True, index=True)
|
||||
route_scope: Mapped[Optional[str]] = mapped_column(String(64), nullable=True, index=True)
|
||||
operator_name: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
|
||||
source_kind: Mapped[str] = mapped_column(String(64), nullable=False, index=True) # osm, gtfs_proposed
|
||||
status: Mapped[str] = mapped_column(String(64), nullable=False, default="active", index=True)
|
||||
osm_feature_id: Mapped[Optional[int]] = mapped_column(ForeignKey("osm_features.id"), nullable=True, index=True)
|
||||
gtfs_route_id: Mapped[Optional[int]] = mapped_column(ForeignKey("gtfs_routes.id"), nullable=True, index=True)
|
||||
gtfs_shape_id: Mapped[Optional[str]] = mapped_column(String(255), nullable=True, index=True)
|
||||
geometry_geojson: Mapped[str] = mapped_column(Text, nullable=False)
|
||||
min_lon: Mapped[Optional[float]] = mapped_column(Float, nullable=True)
|
||||
min_lat: Mapped[Optional[float]] = mapped_column(Float, nullable=True)
|
||||
max_lon: Mapped[Optional[float]] = mapped_column(Float, nullable=True)
|
||||
max_lat: Mapped[Optional[float]] = mapped_column(Float, nullable=True)
|
||||
confidence: Mapped[float] = mapped_column(Float, nullable=False, default=1.0)
|
||||
metadata_json: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
|
||||
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=now_utc, nullable=False)
|
||||
|
||||
osm_feature: Mapped[Optional["OsmFeature"]] = relationship()
|
||||
gtfs_route: Mapped[Optional[GtfsRoute]] = relationship()
|
||||
|
||||
|
||||
class RoutePatternStop(Base):
|
||||
__tablename__ = "route_pattern_stops"
|
||||
__table_args__ = (UniqueConstraint("route_pattern_id", "sequence", name="uq_route_pattern_stop_sequence"),)
|
||||
|
||||
id: Mapped[int] = mapped_column(Integer, primary_key=True)
|
||||
route_pattern_id: Mapped[int] = mapped_column(ForeignKey("route_patterns.id"), nullable=False, index=True)
|
||||
canonical_stop_id: Mapped[int] = mapped_column(ForeignKey("canonical_stops.id"), nullable=False, index=True)
|
||||
sequence: Mapped[int] = mapped_column(Integer, nullable=False)
|
||||
distance_along: Mapped[Optional[float]] = mapped_column(Float, nullable=True)
|
||||
source_kind: Mapped[str] = mapped_column(String(64), nullable=False, default="timetable")
|
||||
confidence: Mapped[float] = mapped_column(Float, nullable=False, default=1.0)
|
||||
|
||||
route_pattern: Mapped[RoutePattern] = relationship()
|
||||
canonical_stop: Mapped[CanonicalStop] = relationship()
|
||||
|
||||
|
||||
class GtfsRoutePatternLink(Base):
|
||||
__tablename__ = "gtfs_route_pattern_links"
|
||||
__table_args__ = (UniqueConstraint("dataset_id", "route_id", "shape_id", name="uq_gtfs_route_pattern_shape"),)
|
||||
|
||||
id: Mapped[int] = mapped_column(Integer, primary_key=True)
|
||||
dataset_id: Mapped[int] = mapped_column(ForeignKey("datasets.id"), nullable=False, index=True)
|
||||
gtfs_route_id: Mapped[int] = mapped_column(ForeignKey("gtfs_routes.id"), nullable=False, index=True)
|
||||
route_id: Mapped[str] = mapped_column(String(255), nullable=False, index=True)
|
||||
shape_id: Mapped[str] = mapped_column(String(255), nullable=False, index=True)
|
||||
route_pattern_id: Mapped[int] = mapped_column(ForeignKey("route_patterns.id"), nullable=False, index=True)
|
||||
confidence: Mapped[float] = mapped_column(Float, nullable=False, default=0)
|
||||
status: Mapped[str] = mapped_column(String(64), nullable=False, default="linked", index=True)
|
||||
source_kind: Mapped[str] = mapped_column(String(64), nullable=False)
|
||||
reasons_json: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
|
||||
|
||||
gtfs_route: Mapped[GtfsRoute] = relationship()
|
||||
route_pattern: Mapped[RoutePattern] = relationship()
|
||||
|
||||
|
||||
class GtfsTripRoutePatternLink(Base):
|
||||
__tablename__ = "gtfs_trip_route_pattern_links"
|
||||
__table_args__ = (UniqueConstraint("dataset_id", "trip_id", name="uq_gtfs_trip_route_pattern"),)
|
||||
|
||||
id: Mapped[int] = mapped_column(Integer, primary_key=True)
|
||||
dataset_id: Mapped[int] = mapped_column(ForeignKey("datasets.id"), nullable=False, index=True)
|
||||
trip_id: Mapped[str] = mapped_column(String(255), nullable=False, index=True)
|
||||
route_id: Mapped[str] = mapped_column(String(255), nullable=False, index=True)
|
||||
shape_id: Mapped[str] = mapped_column(String(255), nullable=False, index=True)
|
||||
route_pattern_id: Mapped[int] = mapped_column(ForeignKey("route_patterns.id"), nullable=False, index=True)
|
||||
source_kind: Mapped[str] = mapped_column(String(64), nullable=False)
|
||||
confidence: Mapped[float] = mapped_column(Float, nullable=False, default=0)
|
||||
status: Mapped[str] = mapped_column(String(64), nullable=False, default="linked", index=True)
|
||||
|
||||
route_pattern: Mapped[RoutePattern] = relationship()
|
||||
|
||||
|
||||
class OsmFeature(Base):
|
||||
__tablename__ = "osm_features"
|
||||
__table_args__ = (UniqueConstraint("dataset_id", "osm_type", "osm_id", name="uq_osm_feature_dataset_type_id"),)
|
||||
|
||||
id: Mapped[int] = mapped_column(Integer, primary_key=True)
|
||||
dataset_id: Mapped[int] = mapped_column(ForeignKey("datasets.id"), nullable=False, index=True)
|
||||
osm_type: Mapped[str] = mapped_column(String(32), nullable=False)
|
||||
osm_id: Mapped[str] = mapped_column(String(64), nullable=False)
|
||||
kind: Mapped[str] = mapped_column(String(64), nullable=False, index=True) # route, stop, terminal, station, infra
|
||||
mode: Mapped[Optional[str]] = mapped_column(String(64), nullable=True, index=True)
|
||||
route_scope: Mapped[Optional[str]] = mapped_column(String(64), nullable=True, index=True)
|
||||
name: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
|
||||
ref: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
|
||||
operator: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
|
||||
network: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
|
||||
geometry_geojson: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
|
||||
min_lon: Mapped[Optional[float]] = mapped_column(Float, nullable=True)
|
||||
min_lat: Mapped[Optional[float]] = mapped_column(Float, nullable=True)
|
||||
max_lon: Mapped[Optional[float]] = mapped_column(Float, nullable=True)
|
||||
max_lat: Mapped[Optional[float]] = mapped_column(Float, nullable=True)
|
||||
tags_json: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
|
||||
route_key: Mapped[Optional[str]] = mapped_column(Text, nullable=True, index=True)
|
||||
operator_key: Mapped[Optional[str]] = mapped_column(Text, nullable=True, index=True)
|
||||
|
||||
|
||||
class OsmAddress(Base):
|
||||
__tablename__ = "osm_addresses"
|
||||
__table_args__ = (UniqueConstraint("dataset_id", "osm_type", "osm_id", name="uq_osm_address_dataset_type_id"),)
|
||||
|
||||
id: Mapped[int] = mapped_column(Integer, primary_key=True)
|
||||
dataset_id: Mapped[int] = mapped_column(ForeignKey("datasets.id"), nullable=False, index=True)
|
||||
osm_type: Mapped[str] = mapped_column(String(32), nullable=False)
|
||||
osm_id: Mapped[str] = mapped_column(String(64), nullable=False)
|
||||
housenumber: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
|
||||
street: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
|
||||
place: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
|
||||
postcode: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
|
||||
city: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
|
||||
country: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
|
||||
unit: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
|
||||
name: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
|
||||
display_name: Mapped[str] = mapped_column(Text, nullable=False)
|
||||
search_text: Mapped[str] = mapped_column(Text, nullable=False)
|
||||
lat: Mapped[float] = mapped_column(Float, nullable=False)
|
||||
lon: Mapped[float] = mapped_column(Float, nullable=False)
|
||||
min_lon: Mapped[Optional[float]] = mapped_column(Float, nullable=True)
|
||||
min_lat: Mapped[Optional[float]] = mapped_column(Float, nullable=True)
|
||||
max_lon: Mapped[Optional[float]] = mapped_column(Float, nullable=True)
|
||||
max_lat: Mapped[Optional[float]] = mapped_column(Float, nullable=True)
|
||||
geometry_geojson: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
|
||||
tags_json: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
|
||||
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=now_utc, nullable=False)
|
||||
|
||||
|
||||
class RoutingNode(Base):
|
||||
__tablename__ = "routing_nodes"
|
||||
__table_args__ = (UniqueConstraint("dataset_id", "osm_node_id", name="uq_routing_node_dataset_osm"),)
|
||||
|
||||
id: Mapped[int] = mapped_column(Integer, primary_key=True)
|
||||
dataset_id: Mapped[int] = mapped_column(ForeignKey("datasets.id"), nullable=False, index=True)
|
||||
osm_node_id: Mapped[int] = mapped_column(BigInteger, nullable=False, index=True)
|
||||
lat: Mapped[float] = mapped_column(Float, nullable=False)
|
||||
lon: Mapped[float] = mapped_column(Float, nullable=False)
|
||||
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=now_utc, nullable=False)
|
||||
|
||||
|
||||
class RoutingEdge(Base):
|
||||
__tablename__ = "routing_edges"
|
||||
|
||||
id: Mapped[int] = mapped_column(Integer, primary_key=True)
|
||||
dataset_id: Mapped[int] = mapped_column(ForeignKey("datasets.id"), nullable=False, index=True)
|
||||
osm_way_id: Mapped[int] = mapped_column(BigInteger, nullable=False, index=True)
|
||||
source_osm_node_id: Mapped[int] = mapped_column(BigInteger, nullable=False, index=True)
|
||||
target_osm_node_id: Mapped[int] = mapped_column(BigInteger, nullable=False, index=True)
|
||||
source_lat: Mapped[float] = mapped_column(Float, nullable=False)
|
||||
source_lon: Mapped[float] = mapped_column(Float, nullable=False)
|
||||
target_lat: Mapped[float] = mapped_column(Float, nullable=False)
|
||||
target_lon: Mapped[float] = mapped_column(Float, nullable=False)
|
||||
highway: Mapped[Optional[str]] = mapped_column(String(64), nullable=True, index=True)
|
||||
name: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
|
||||
length_m: Mapped[float] = mapped_column(Float, nullable=False)
|
||||
walk_cost_s: Mapped[Optional[float]] = mapped_column(Float, nullable=True)
|
||||
reverse_walk_cost_s: Mapped[Optional[float]] = mapped_column(Float, nullable=True)
|
||||
drive_cost_s: Mapped[Optional[float]] = mapped_column(Float, nullable=True)
|
||||
reverse_drive_cost_s: Mapped[Optional[float]] = mapped_column(Float, nullable=True)
|
||||
geometry_geojson: Mapped[str] = mapped_column(Text, nullable=False)
|
||||
min_lon: Mapped[float] = mapped_column(Float, nullable=False)
|
||||
min_lat: Mapped[float] = mapped_column(Float, nullable=False)
|
||||
max_lon: Mapped[float] = mapped_column(Float, nullable=False)
|
||||
max_lat: Mapped[float] = mapped_column(Float, nullable=False)
|
||||
tags_json: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
|
||||
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=now_utc, nullable=False)
|
||||
|
||||
|
||||
class RouteMatch(Base):
|
||||
__tablename__ = "route_matches"
|
||||
|
||||
id: Mapped[int] = mapped_column(Integer, primary_key=True)
|
||||
gtfs_route_id: Mapped[int] = mapped_column(ForeignKey("gtfs_routes.id"), nullable=False, index=True)
|
||||
osm_feature_id: Mapped[Optional[int]] = mapped_column(ForeignKey("osm_features.id"), nullable=True, index=True)
|
||||
confidence: Mapped[float] = mapped_column(Float, nullable=False, default=0)
|
||||
status: Mapped[str] = mapped_column(String(64), nullable=False) # matched, probable, weak, missing, accepted, rejected
|
||||
rule_source: Mapped[str] = mapped_column(String(64), default="auto", nullable=False) # auto, manual
|
||||
reasons_json: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
|
||||
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=now_utc, nullable=False)
|
||||
updated_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=now_utc, nullable=False)
|
||||
|
||||
gtfs_route: Mapped[GtfsRoute] = relationship()
|
||||
osm_feature: Mapped[Optional[OsmFeature]] = relationship()
|
||||
|
||||
|
||||
class MatchRule(Base):
|
||||
__tablename__ = "match_rules"
|
||||
|
||||
id: Mapped[int] = mapped_column(Integer, primary_key=True)
|
||||
rule_type: Mapped[str] = mapped_column(String(64), nullable=False) # accept_match, reject_match, alias, force_operator
|
||||
selector_json: Mapped[str] = mapped_column(Text, nullable=False)
|
||||
action_json: Mapped[str] = mapped_column(Text, nullable=False)
|
||||
note: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
|
||||
active: Mapped[bool] = mapped_column(Boolean, default=True, nullable=False)
|
||||
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=now_utc, nullable=False)
|
||||
|
||||
|
||||
class JourneySearchCache(Base):
|
||||
__tablename__ = "journey_search_cache"
|
||||
|
||||
id: Mapped[int] = mapped_column(Integer, primary_key=True)
|
||||
cache_key: Mapped[str] = mapped_column(String(128), nullable=False, unique=True, index=True)
|
||||
cache_type: Mapped[str] = mapped_column(String(64), nullable=False, index=True)
|
||||
payload_json: Mapped[str] = mapped_column(Text, nullable=False)
|
||||
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=now_utc, nullable=False, index=True)
|
||||
updated_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=now_utc, nullable=False)
|
||||
expires_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), nullable=False, index=True)
|
||||
|
||||
|
||||
class TravelRequest(Base):
|
||||
__tablename__ = "travel_requests"
|
||||
|
||||
id: Mapped[int] = mapped_column(Integer, primary_key=True)
|
||||
origin_stop_id: Mapped[str] = mapped_column(Text, nullable=False)
|
||||
destination_stop_id: Mapped[str] = mapped_column(Text, nullable=False)
|
||||
via_stop_id: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
|
||||
departure_time: Mapped[str] = mapped_column(String(32), nullable=False)
|
||||
service_date: Mapped[Optional[str]] = mapped_column(String(10), nullable=True, index=True)
|
||||
max_transfers: Mapped[int] = mapped_column(Integer, nullable=False, default=1)
|
||||
transfer_seconds: Mapped[int] = mapped_column(Integer, nullable=False, default=120)
|
||||
source_filter: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
|
||||
preferences_json: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
|
||||
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=now_utc, nullable=False, index=True)
|
||||
|
||||
itineraries: Mapped[list["Itinerary"]] = relationship(back_populates="request", cascade="all, delete-orphan")
|
||||
|
||||
|
||||
class Itinerary(Base):
|
||||
__tablename__ = "itineraries"
|
||||
|
||||
id: Mapped[int] = mapped_column(Integer, primary_key=True)
|
||||
request_id: Mapped[int] = mapped_column(ForeignKey("travel_requests.id"), nullable=False, index=True)
|
||||
title: Mapped[str] = mapped_column(Text, nullable=False)
|
||||
family: Mapped[str] = mapped_column(String(64), nullable=False, index=True)
|
||||
status: Mapped[str] = mapped_column(String(64), nullable=False, default="candidate", index=True)
|
||||
saved: Mapped[bool] = mapped_column(Boolean, nullable=False, default=False, index=True)
|
||||
summary_json: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
|
||||
score_json: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
|
||||
payload_json: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
|
||||
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=now_utc, nullable=False, index=True)
|
||||
updated_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=now_utc, nullable=False)
|
||||
|
||||
request: Mapped[TravelRequest] = relationship(back_populates="itineraries")
|
||||
legs: Mapped[list["ItineraryLeg"]] = relationship(back_populates="itinerary", cascade="all, delete-orphan")
|
||||
|
||||
|
||||
class ItineraryLeg(Base):
|
||||
__tablename__ = "itinerary_legs"
|
||||
__table_args__ = (UniqueConstraint("itinerary_id", "sequence", name="uq_itinerary_leg_sequence"),)
|
||||
|
||||
id: Mapped[int] = mapped_column(Integer, primary_key=True)
|
||||
itinerary_id: Mapped[int] = mapped_column(ForeignKey("itineraries.id"), nullable=False, index=True)
|
||||
sequence: Mapped[int] = mapped_column(Integer, nullable=False)
|
||||
mode: Mapped[Optional[str]] = mapped_column(String(64), nullable=True, index=True)
|
||||
route_ref: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
|
||||
route_name: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
|
||||
from_name: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
|
||||
to_name: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
|
||||
departure_time: Mapped[Optional[str]] = mapped_column(String(32), nullable=True)
|
||||
arrival_time: Mapped[Optional[str]] = mapped_column(String(32), nullable=True)
|
||||
locked: Mapped[bool] = mapped_column(Boolean, nullable=False, default=False, index=True)
|
||||
payload_json: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
|
||||
|
||||
itinerary: Mapped[Itinerary] = relationship(back_populates="legs")
|
||||
111
app/osm_classification.py
Normal file
111
app/osm_classification.py
Normal file
@@ -0,0 +1,111 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import re
|
||||
from typing import Mapping
|
||||
|
||||
|
||||
LOCAL_SCOPE = "local"
|
||||
REGIONAL_SCOPE = "regional"
|
||||
LONG_DISTANCE_SCOPE = "long_distance"
|
||||
UNKNOWN_SCOPE = "unknown"
|
||||
OSM_ROUTE_SCOPE_CLASSIFIER_VERSION = "route_scope_v2"
|
||||
|
||||
BUS_MODES = {"bus", "trolleybus"}
|
||||
LOCAL_MODES = {"tram", "light_rail", "subway", "ferry", "funicular", "aerialway", "monorail"}
|
||||
LONG_DISTANCE_MODES = {"coach"}
|
||||
|
||||
LONG_DISTANCE_SERVICE_VALUES = {
|
||||
"high_speed",
|
||||
"long_distance",
|
||||
"intercity",
|
||||
"international",
|
||||
"night",
|
||||
"sleeper",
|
||||
}
|
||||
REGIONAL_SERVICE_VALUES = {"regional", "interurban", "commuter", "branch", "suburban"}
|
||||
LOCAL_SERVICE_VALUES = {"local", "urban", "city", "subway", "tram", "light_rail", "s-bahn", "sbahn"}
|
||||
|
||||
LONG_DISTANCE_PREFIX_RE = re.compile(r"^(ICE|IC|EC|ECE|EN|NJ|RJ|RJX|TGV|THA|EST|FLX|WB)\b|^(ICE|IC|EC|ECE|EN|NJ|RJ|RJX|TGV|THA|EST|FLX|WB)\d")
|
||||
REGIONAL_PREFIX_RE = re.compile(r"^(IRE|RE|RB|RER|TER|REX|MEX|ALX|WFB|R)\b|^(IRE|RE|RB|RER|TER|REX|MEX|ALX|WFB|R)\d")
|
||||
LOCAL_TRAIN_PREFIX_RE = re.compile(r"^(S|S-BAHN)\b|^S\d")
|
||||
|
||||
|
||||
def infer_osm_route_scope(
|
||||
*,
|
||||
mode: str | None,
|
||||
ref: str | None = None,
|
||||
name: str | None = None,
|
||||
network: str | None = None,
|
||||
tags: Mapping[str, object] | str | None = None,
|
||||
) -> str | None:
|
||||
"""Classify a public-transport route into a display scope.
|
||||
|
||||
OSM tagging varies by country and operator, so this intentionally combines
|
||||
explicit service tags with conservative reference-prefix heuristics.
|
||||
"""
|
||||
normalized_mode = (mode or "").strip().lower()
|
||||
tags_dict = _tags_dict(tags)
|
||||
values = {
|
||||
str(tags_dict.get(key) or "").strip().lower()
|
||||
for key in ("service", "train", "bus", "passenger", "network:type", "route_scope")
|
||||
if tags_dict.get(key)
|
||||
}
|
||||
if values & LONG_DISTANCE_SERVICE_VALUES:
|
||||
return LONG_DISTANCE_SCOPE
|
||||
if values & LOCAL_SERVICE_VALUES:
|
||||
return LOCAL_SCOPE
|
||||
if values & REGIONAL_SERVICE_VALUES:
|
||||
return REGIONAL_SCOPE
|
||||
if normalized_mode in LOCAL_MODES:
|
||||
return LOCAL_SCOPE
|
||||
if normalized_mode in LONG_DISTANCE_MODES:
|
||||
return LONG_DISTANCE_SCOPE
|
||||
|
||||
text = _classification_text(ref, name, network, tags_dict)
|
||||
if normalized_mode in BUS_MODES:
|
||||
if any(marker in text for marker in ("FLIXBUS", "EUROLINES", "INTERCITYBUS", "IC BUS", "LONG DISTANCE", "FERNBUS")):
|
||||
return LONG_DISTANCE_SCOPE
|
||||
if any(marker in text for marker in ("REGIONALBUS", "REGIOBUS", "REGIONAL BUS", "REGIONALVERKEHR", "REGIONAL VERKEHR")):
|
||||
return REGIONAL_SCOPE
|
||||
return LOCAL_SCOPE
|
||||
|
||||
if normalized_mode == "train":
|
||||
if LONG_DISTANCE_PREFIX_RE.search(text) or any(marker in text for marker in ("INTERCITY", "EUROCITY", "NIGHTJET", "FLIXTRAIN")):
|
||||
return LONG_DISTANCE_SCOPE
|
||||
if LOCAL_TRAIN_PREFIX_RE.search(text) or "S-BAHN" in text or "SBahn".upper() in text:
|
||||
return LOCAL_SCOPE
|
||||
if REGIONAL_PREFIX_RE.search(text) or any(marker in text for marker in ("REGIONAL", "REGIO", "REGIONALBAHN", "REGIONALEXPRESS")):
|
||||
return REGIONAL_SCOPE
|
||||
return UNKNOWN_SCOPE
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def infer_osm_route_scope_from_tags(mode: str | None, ref: str | None, name: str | None, network: str | None, tags_json: str | None) -> str | None:
|
||||
return infer_osm_route_scope(mode=mode, ref=ref, name=name, network=network, tags=tags_json)
|
||||
|
||||
|
||||
def _tags_dict(tags: Mapping[str, object] | str | None) -> dict[str, object]:
|
||||
if isinstance(tags, str):
|
||||
try:
|
||||
data = json.loads(tags or "{}")
|
||||
except json.JSONDecodeError:
|
||||
return {}
|
||||
return data if isinstance(data, dict) else {}
|
||||
if isinstance(tags, Mapping):
|
||||
return dict(tags)
|
||||
return {}
|
||||
|
||||
|
||||
def _classification_text(ref: str | None, name: str | None, network: str | None, tags: Mapping[str, object]) -> str:
|
||||
parts = [
|
||||
ref or "",
|
||||
name or "",
|
||||
network or "",
|
||||
str(tags.get("ref") or ""),
|
||||
str(tags.get("name") or ""),
|
||||
str(tags.get("network") or ""),
|
||||
str(tags.get("network:short") or ""),
|
||||
]
|
||||
return " ".join(parts).strip().upper().replace("_", " ")
|
||||
981
app/osm_storage.py
Normal file
981
app/osm_storage.py
Normal file
@@ -0,0 +1,981 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import sqlite3
|
||||
from contextlib import contextmanager
|
||||
from pathlib import Path
|
||||
from typing import Iterator, Sequence
|
||||
|
||||
from sqlalchemy import and_, func, insert, not_, or_, select, text
|
||||
from sqlalchemy.dialects.postgresql import insert as postgresql_insert
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.config import settings
|
||||
from app.models import Dataset, OsmFeature
|
||||
from app.spatial import refresh_postgis_geometries
|
||||
|
||||
|
||||
OSM_STORAGE_METADATA_KEY = "osm_storage"
|
||||
OSM_STORAGE_MAIN = "main"
|
||||
OSM_STORAGE_SIDECAR_FEATURES = "sidecar_features"
|
||||
SQLITE_IN_CHUNK_SIZE = 800
|
||||
OSM_SIDECAR_ROUTE_SCOPE_INDEXES = ["ix_osm_sidecar_scope_bbox"]
|
||||
OSM_FEATURE_COLUMNS = [
|
||||
"dataset_id",
|
||||
"osm_type",
|
||||
"osm_id",
|
||||
"kind",
|
||||
"mode",
|
||||
"route_scope",
|
||||
"name",
|
||||
"ref",
|
||||
"operator",
|
||||
"network",
|
||||
"geometry_geojson",
|
||||
"min_lon",
|
||||
"min_lat",
|
||||
"max_lon",
|
||||
"max_lat",
|
||||
"tags_json",
|
||||
"route_key",
|
||||
"operator_key",
|
||||
]
|
||||
|
||||
|
||||
def effective_osm_feature_storage(value: str | None = None) -> str:
|
||||
configured = str(value or settings.osm_feature_storage or OSM_STORAGE_SIDECAR_FEATURES).strip().lower()
|
||||
if configured in {OSM_STORAGE_MAIN, "main", "main_db", "postgres", "postgresql"}:
|
||||
return OSM_STORAGE_MAIN
|
||||
if settings.is_postgresql_database and not settings.postgres_use_sidecars:
|
||||
return OSM_STORAGE_MAIN
|
||||
return OSM_STORAGE_SIDECAR_FEATURES
|
||||
|
||||
|
||||
class MissingOsmSidecar(FileNotFoundError):
|
||||
pass
|
||||
|
||||
|
||||
def dataset_metadata(dataset: Dataset) -> dict:
|
||||
try:
|
||||
metadata = json.loads(dataset.metadata_json or "{}")
|
||||
except json.JSONDecodeError:
|
||||
return {}
|
||||
return metadata if isinstance(metadata, dict) else {}
|
||||
|
||||
|
||||
def features_are_sidecar(dataset: Dataset | None) -> bool:
|
||||
if dataset is None:
|
||||
return False
|
||||
storage = dataset_metadata(dataset).get(OSM_STORAGE_METADATA_KEY)
|
||||
if not isinstance(storage, dict):
|
||||
return False
|
||||
tables = storage.get("tables")
|
||||
if isinstance(tables, dict):
|
||||
return tables.get("osm_features") == "sidecar"
|
||||
return storage.get("mode") == OSM_STORAGE_SIDECAR_FEATURES
|
||||
|
||||
|
||||
def sidecar_path(dataset: Dataset | None) -> Path | None:
|
||||
if dataset is None:
|
||||
return None
|
||||
storage = dataset_metadata(dataset).get(OSM_STORAGE_METADATA_KEY)
|
||||
if not isinstance(storage, dict):
|
||||
return None
|
||||
value = storage.get("sidecar_path")
|
||||
if not value:
|
||||
return None
|
||||
return Path(str(value))
|
||||
|
||||
|
||||
def dataset_sidecar_paths(dataset: Dataset) -> list[Path]:
|
||||
path = sidecar_path(dataset)
|
||||
return [] if path is None else [path]
|
||||
|
||||
|
||||
def missing_sidecar_paths(dataset: Dataset | None) -> list[str]:
|
||||
if not features_are_sidecar(dataset):
|
||||
return []
|
||||
path = sidecar_path(dataset)
|
||||
if path is None or path.exists():
|
||||
return []
|
||||
return [str(path)]
|
||||
|
||||
|
||||
@contextmanager
|
||||
def sidecar_connection(dataset: Dataset) -> Iterator[sqlite3.Connection]:
|
||||
path = sidecar_path(dataset)
|
||||
if path is None:
|
||||
raise MissingOsmSidecar(f"dataset #{dataset.id} does not reference an OSM sidecar")
|
||||
if not path.exists():
|
||||
raise MissingOsmSidecar(f"OSM sidecar does not exist: {path}")
|
||||
connection = sqlite3.connect(f"file:{path}?mode=ro", uri=True)
|
||||
connection.row_factory = sqlite3.Row
|
||||
try:
|
||||
yield connection
|
||||
finally:
|
||||
connection.close()
|
||||
|
||||
|
||||
@contextmanager
|
||||
def writable_sidecar_connection(dataset: Dataset) -> Iterator[sqlite3.Connection]:
|
||||
path = sidecar_path(dataset)
|
||||
if path is None:
|
||||
raise MissingOsmSidecar(f"dataset #{dataset.id} does not reference an OSM sidecar")
|
||||
if not path.exists():
|
||||
raise MissingOsmSidecar(f"OSM sidecar does not exist: {path}")
|
||||
connection = sqlite3.connect(path)
|
||||
connection.row_factory = sqlite3.Row
|
||||
try:
|
||||
connection.execute(f"PRAGMA busy_timeout={int(settings.sqlite_busy_timeout_ms)}")
|
||||
connection.execute("PRAGMA synchronous=NORMAL")
|
||||
yield connection
|
||||
finally:
|
||||
connection.close()
|
||||
|
||||
|
||||
def create_osm_sidecar(dataset: Dataset, rows: Sequence[dict[str, object]], *, source_hash: str | None = None) -> dict:
|
||||
path = _new_sidecar_path(dataset, source_hash or dataset.sha256)
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
if path.exists():
|
||||
path.unlink()
|
||||
connection = sqlite3.connect(path)
|
||||
try:
|
||||
connection.execute("PRAGMA journal_mode=OFF")
|
||||
connection.execute("PRAGMA synchronous=OFF")
|
||||
_create_schema(connection)
|
||||
deduped_rows, duplicate_count = dedupe_osm_feature_rows(rows)
|
||||
inserted = 0
|
||||
counts = {"route": 0, "stop": 0, "station": 0, "terminal": 0, "infra": 0, "feature": 0}
|
||||
insert_sql = f"""
|
||||
INSERT INTO osm_features
|
||||
({", ".join(["id", *OSM_FEATURE_COLUMNS])})
|
||||
VALUES
|
||||
({", ".join(["?"] * (len(OSM_FEATURE_COLUMNS) + 1))})
|
||||
"""
|
||||
batch = []
|
||||
for index, row in enumerate(deduped_rows, start=1):
|
||||
kind = str(row.get("kind") or "feature")
|
||||
counts[kind] = counts.get(kind, 0) + 1
|
||||
batch.append((index, *[row.get(column) for column in OSM_FEATURE_COLUMNS]))
|
||||
if len(batch) >= 5000:
|
||||
connection.executemany(insert_sql, batch)
|
||||
inserted += len(batch)
|
||||
batch.clear()
|
||||
if batch:
|
||||
connection.executemany(insert_sql, batch)
|
||||
inserted += len(batch)
|
||||
connection.commit()
|
||||
_create_indexes(connection)
|
||||
connection.commit()
|
||||
finally:
|
||||
connection.close()
|
||||
return {
|
||||
"mode": OSM_STORAGE_SIDECAR_FEATURES,
|
||||
"tables": {"osm_features": "sidecar"},
|
||||
"sidecar_path": str(path),
|
||||
"features": inserted,
|
||||
"duplicate_features_skipped": duplicate_count,
|
||||
"counts": counts,
|
||||
}
|
||||
|
||||
|
||||
def ensure_osm_sidecar_schema(connection: sqlite3.Connection) -> None:
|
||||
columns = _sidecar_columns(connection)
|
||||
if "route_scope" not in columns:
|
||||
connection.execute("ALTER TABLE osm_features ADD COLUMN route_scope TEXT")
|
||||
connection.commit()
|
||||
|
||||
|
||||
def drop_osm_sidecar_route_scope_indexes(connection: sqlite3.Connection) -> None:
|
||||
for index_name in OSM_SIDECAR_ROUTE_SCOPE_INDEXES:
|
||||
connection.execute(f"DROP INDEX IF EXISTS {index_name}")
|
||||
|
||||
|
||||
def rebuild_osm_sidecar_indexes(connection: sqlite3.Connection) -> None:
|
||||
_create_indexes(connection)
|
||||
|
||||
|
||||
def osm_feature_count(session: Session, dataset_id: int, *, kind: str | Sequence[str] | None = None) -> int:
|
||||
dataset = session.get(Dataset, dataset_id)
|
||||
if features_are_sidecar(dataset):
|
||||
kinds = _as_list(kind)
|
||||
sql = "SELECT COUNT(*) FROM osm_features"
|
||||
params: list[object] = []
|
||||
if kinds:
|
||||
placeholders = ", ".join(["?"] * len(kinds))
|
||||
sql += f" WHERE kind IN ({placeholders})"
|
||||
params.extend(kinds)
|
||||
try:
|
||||
with sidecar_connection(dataset) as connection:
|
||||
return int(connection.execute(sql, params).fetchone()[0] or 0)
|
||||
except MissingOsmSidecar:
|
||||
return 0
|
||||
stmt = select(func.count()).select_from(OsmFeature).where(OsmFeature.dataset_id == dataset_id)
|
||||
kinds = _as_list(kind)
|
||||
if kinds:
|
||||
stmt = stmt.where(OsmFeature.kind.in_(kinds))
|
||||
return int(session.scalar(stmt) or 0)
|
||||
|
||||
|
||||
def osm_feature_bbox(
|
||||
session: Session,
|
||||
dataset_ids: Sequence[int],
|
||||
*,
|
||||
kinds: Sequence[str] | None = None,
|
||||
) -> tuple[float | None, float | None, float | None, float | None]:
|
||||
if not dataset_ids:
|
||||
return (None, None, None, None)
|
||||
datasets = {
|
||||
dataset.id: dataset
|
||||
for dataset in session.scalars(select(Dataset).where(Dataset.id.in_([int(value) for value in dataset_ids]))).all()
|
||||
}
|
||||
boxes: list[tuple[float, float, float, float]] = []
|
||||
main_dataset_ids = [dataset_id for dataset_id, dataset in datasets.items() if not features_are_sidecar(dataset)]
|
||||
if main_dataset_ids:
|
||||
stmt = select(func.min(OsmFeature.min_lon), func.min(OsmFeature.min_lat), func.max(OsmFeature.max_lon), func.max(OsmFeature.max_lat)).where(
|
||||
OsmFeature.dataset_id.in_(main_dataset_ids)
|
||||
)
|
||||
if kinds:
|
||||
stmt = stmt.where(OsmFeature.kind.in_(list(kinds)))
|
||||
row = session.execute(stmt).one()
|
||||
if None not in row:
|
||||
boxes.append((float(row[0]), float(row[1]), float(row[2]), float(row[3])))
|
||||
for dataset in datasets.values():
|
||||
if not features_are_sidecar(dataset):
|
||||
continue
|
||||
where = []
|
||||
params: list[object] = []
|
||||
if kinds:
|
||||
placeholders = ", ".join(["?"] * len(kinds))
|
||||
where.append(f"kind IN ({placeholders})")
|
||||
params.extend(list(kinds))
|
||||
sql = "SELECT MIN(min_lon), MIN(min_lat), MAX(max_lon), MAX(max_lat) FROM osm_features"
|
||||
if where:
|
||||
sql += " WHERE " + " AND ".join(where)
|
||||
try:
|
||||
with sidecar_connection(dataset) as connection:
|
||||
row = connection.execute(sql, params).fetchone()
|
||||
if row is not None and None not in row:
|
||||
boxes.append((float(row[0]), float(row[1]), float(row[2]), float(row[3])))
|
||||
except MissingOsmSidecar:
|
||||
continue
|
||||
if not boxes:
|
||||
return (None, None, None, None)
|
||||
return (
|
||||
min(box[0] for box in boxes),
|
||||
min(box[1] for box in boxes),
|
||||
max(box[2] for box in boxes),
|
||||
max(box[3] for box in boxes),
|
||||
)
|
||||
|
||||
|
||||
def query_osm_features(
|
||||
session: Session,
|
||||
dataset_ids: Sequence[int],
|
||||
*,
|
||||
kinds: Sequence[str] | None = None,
|
||||
modes: Sequence[str] | None = None,
|
||||
bbox: tuple[float, float, float, float] | None = None,
|
||||
geometry_required: bool | None = None,
|
||||
search: str | None = None,
|
||||
route_key: str | None = None,
|
||||
route_scopes: Sequence[str] | None = None,
|
||||
ref: str | None = None,
|
||||
osm_type: str | None = None,
|
||||
osm_id: str | None = None,
|
||||
limit: int | None = None,
|
||||
offset: int | None = None,
|
||||
prefer_materialized_ids: bool = True,
|
||||
) -> list[OsmFeature]:
|
||||
if not dataset_ids:
|
||||
return []
|
||||
datasets = {
|
||||
dataset.id: dataset
|
||||
for dataset in session.scalars(select(Dataset).where(Dataset.id.in_([int(value) for value in dataset_ids]))).all()
|
||||
}
|
||||
materialized_ids = _materialized_ids_by_identity(session, list(datasets)) if prefer_materialized_ids else {}
|
||||
rows: list[OsmFeature] = []
|
||||
main_dataset_ids = [dataset_id for dataset_id, dataset in datasets.items() if not features_are_sidecar(dataset)]
|
||||
if main_dataset_ids:
|
||||
stmt = select(OsmFeature).where(OsmFeature.dataset_id.in_(main_dataset_ids))
|
||||
stmt = _apply_main_filters(
|
||||
stmt,
|
||||
kinds=kinds,
|
||||
modes=modes,
|
||||
bbox=bbox,
|
||||
geometry_required=geometry_required,
|
||||
search=search,
|
||||
route_key=route_key,
|
||||
route_scopes=route_scopes,
|
||||
ref=ref,
|
||||
osm_type=osm_type,
|
||||
osm_id=osm_id,
|
||||
)
|
||||
if offset:
|
||||
stmt = stmt.offset(max(0, int(offset)))
|
||||
rows.extend(
|
||||
session.scalars(
|
||||
stmt.order_by(OsmFeature.kind, OsmFeature.mode, OsmFeature.ref, OsmFeature.name, OsmFeature.id).limit(limit)
|
||||
).all()
|
||||
)
|
||||
for dataset_id, dataset in datasets.items():
|
||||
if not features_are_sidecar(dataset):
|
||||
continue
|
||||
rows.extend(
|
||||
_query_sidecar_features(
|
||||
dataset,
|
||||
kinds=kinds,
|
||||
modes=modes,
|
||||
bbox=bbox,
|
||||
geometry_required=geometry_required,
|
||||
search=search,
|
||||
route_key=route_key,
|
||||
route_scopes=route_scopes,
|
||||
ref=ref,
|
||||
osm_type=osm_type,
|
||||
osm_id=osm_id,
|
||||
limit=limit,
|
||||
offset=offset,
|
||||
materialized_ids=materialized_ids,
|
||||
)
|
||||
)
|
||||
rows.sort(key=lambda row: (row.kind or "", row.mode or "", row.ref or "", row.name or "", int(row.id or 0)))
|
||||
if limit is not None:
|
||||
return rows[: max(1, int(limit))]
|
||||
return rows
|
||||
|
||||
|
||||
def get_osm_feature(session: Session, feature_id: int) -> OsmFeature | None:
|
||||
return session.get(OsmFeature, feature_id)
|
||||
|
||||
|
||||
def osm_feature_identity_key(feature: OsmFeature) -> str:
|
||||
return f"{feature.dataset_id}|{feature.osm_type}|{feature.osm_id}"
|
||||
|
||||
|
||||
def osm_feature_public_id(feature: OsmFeature) -> int | str | None:
|
||||
if getattr(feature, "_osm_sidecar_source", False):
|
||||
return osm_feature_identity_key(feature)
|
||||
return feature.id
|
||||
|
||||
|
||||
def resolve_osm_feature(session: Session, value: int | str) -> OsmFeature | None:
|
||||
int_value = _safe_int(value)
|
||||
if int_value is not None:
|
||||
feature = session.get(OsmFeature, int_value)
|
||||
if feature is not None:
|
||||
return feature
|
||||
parsed = parse_osm_feature_identity_key(str(value))
|
||||
if parsed is None:
|
||||
return None
|
||||
dataset_id, osm_type, osm_id = parsed
|
||||
existing = session.scalar(
|
||||
select(OsmFeature).where(
|
||||
OsmFeature.dataset_id == dataset_id,
|
||||
OsmFeature.osm_type == osm_type,
|
||||
OsmFeature.osm_id == osm_id,
|
||||
)
|
||||
)
|
||||
if existing is not None:
|
||||
return existing
|
||||
dataset = session.get(Dataset, dataset_id)
|
||||
if not features_are_sidecar(dataset):
|
||||
return None
|
||||
try:
|
||||
with sidecar_connection(dataset) as connection:
|
||||
select_columns = ", ".join(_sidecar_select_columns(_sidecar_columns(connection)))
|
||||
row = connection.execute(
|
||||
f"""
|
||||
SELECT id, {select_columns}
|
||||
FROM osm_features
|
||||
WHERE dataset_id = ?
|
||||
AND osm_type = ?
|
||||
AND osm_id = ?
|
||||
""",
|
||||
(dataset_id, osm_type, osm_id),
|
||||
).fetchone()
|
||||
except MissingOsmSidecar:
|
||||
return None
|
||||
if row is None:
|
||||
return None
|
||||
return _feature_from_row(row, {})
|
||||
|
||||
|
||||
def parse_osm_feature_identity_key(value: str) -> tuple[int, str, str] | None:
|
||||
parts = value.split("|", 2)
|
||||
if len(parts) != 3:
|
||||
return None
|
||||
dataset_id = _safe_int(parts[0])
|
||||
if dataset_id is None:
|
||||
return None
|
||||
osm_type = parts[1].strip()
|
||||
osm_id = parts[2].strip()
|
||||
if not osm_type or not osm_id:
|
||||
return None
|
||||
return dataset_id, osm_type, osm_id
|
||||
|
||||
|
||||
def ensure_main_osm_feature(session: Session, feature: OsmFeature) -> OsmFeature:
|
||||
existing = session.scalar(
|
||||
select(OsmFeature).where(
|
||||
OsmFeature.dataset_id == feature.dataset_id,
|
||||
OsmFeature.osm_type == feature.osm_type,
|
||||
OsmFeature.osm_id == feature.osm_id,
|
||||
)
|
||||
)
|
||||
if existing is not None:
|
||||
return existing
|
||||
values = dict(
|
||||
dataset_id=feature.dataset_id,
|
||||
osm_type=feature.osm_type,
|
||||
osm_id=feature.osm_id,
|
||||
kind=feature.kind,
|
||||
mode=feature.mode,
|
||||
route_scope=feature.route_scope,
|
||||
name=feature.name,
|
||||
ref=feature.ref,
|
||||
operator=feature.operator,
|
||||
network=feature.network,
|
||||
geometry_geojson=feature.geometry_geojson,
|
||||
min_lon=feature.min_lon,
|
||||
min_lat=feature.min_lat,
|
||||
max_lon=feature.max_lon,
|
||||
max_lat=feature.max_lat,
|
||||
tags_json=feature.tags_json,
|
||||
route_key=feature.route_key,
|
||||
operator_key=feature.operator_key,
|
||||
)
|
||||
if settings.is_postgresql_database:
|
||||
session.execute(
|
||||
postgresql_insert(OsmFeature)
|
||||
.values(**values)
|
||||
.on_conflict_do_nothing(index_elements=["dataset_id", "osm_type", "osm_id"])
|
||||
)
|
||||
else:
|
||||
session.execute(insert(OsmFeature).values(**values).prefix_with("OR IGNORE"))
|
||||
session.flush()
|
||||
refresh_postgis_geometries(session, dataset_id=feature.dataset_id, tables=["osm_features"])
|
||||
existing = session.scalar(
|
||||
select(OsmFeature).where(
|
||||
OsmFeature.dataset_id == feature.dataset_id,
|
||||
OsmFeature.osm_type == feature.osm_type,
|
||||
OsmFeature.osm_id == feature.osm_id,
|
||||
)
|
||||
)
|
||||
if existing is None:
|
||||
raise RuntimeError(f"Could not materialize OSM feature {feature.dataset_id}:{feature.osm_type}:{feature.osm_id}")
|
||||
return existing
|
||||
|
||||
|
||||
def materialize_osm_features(session: Session, features: Sequence[OsmFeature]) -> list[OsmFeature]:
|
||||
return [ensure_main_osm_feature(session, feature) for feature in features]
|
||||
|
||||
|
||||
def _new_sidecar_path(dataset: Dataset, source_hash: str | None) -> Path:
|
||||
suffix = (source_hash or dataset.sha256 or str(dataset.id))[:12]
|
||||
return settings.data_dir / "sidecars" / f"source_{dataset.source_id}" / f"osm_dataset_{dataset.id}_{suffix}.sqlite"
|
||||
|
||||
|
||||
def dedupe_osm_feature_rows(rows: Sequence[dict[str, object]]) -> tuple[list[dict[str, object]], int]:
|
||||
selected: dict[tuple[int, str, str], dict[str, object]] = {}
|
||||
for row in rows:
|
||||
key = (int(row["dataset_id"]), str(row["osm_type"]), str(row["osm_id"]))
|
||||
current = selected.get(key)
|
||||
if current is None or _feature_row_preference(row) < _feature_row_preference(current):
|
||||
selected[key] = dict(row)
|
||||
return list(selected.values()), max(0, len(rows) - len(selected))
|
||||
|
||||
|
||||
def _feature_row_preference(row: dict[str, object]) -> tuple[int, int, int]:
|
||||
kind_rank = {
|
||||
"route": 0,
|
||||
"station": 1,
|
||||
"terminal": 2,
|
||||
"stop": 3,
|
||||
"infra": 4,
|
||||
"feature": 5,
|
||||
}.get(str(row.get("kind") or "feature"), 6)
|
||||
has_geometry = 0 if row.get("geometry_geojson") else 1
|
||||
geometry_size = -len(str(row.get("geometry_geojson") or ""))
|
||||
return (kind_rank, has_geometry, geometry_size)
|
||||
|
||||
|
||||
def _create_schema(connection: sqlite3.Connection) -> None:
|
||||
connection.execute(
|
||||
"""
|
||||
CREATE TABLE osm_features (
|
||||
id INTEGER PRIMARY KEY,
|
||||
dataset_id INTEGER NOT NULL,
|
||||
osm_type TEXT NOT NULL,
|
||||
osm_id TEXT NOT NULL,
|
||||
kind TEXT NOT NULL,
|
||||
mode TEXT,
|
||||
route_scope TEXT,
|
||||
name TEXT,
|
||||
ref TEXT,
|
||||
operator TEXT,
|
||||
network TEXT,
|
||||
geometry_geojson TEXT,
|
||||
min_lon REAL,
|
||||
min_lat REAL,
|
||||
max_lon REAL,
|
||||
max_lat REAL,
|
||||
tags_json TEXT,
|
||||
route_key TEXT,
|
||||
operator_key TEXT,
|
||||
UNIQUE(dataset_id, osm_type, osm_id)
|
||||
)
|
||||
"""
|
||||
)
|
||||
|
||||
|
||||
def _create_indexes(connection: sqlite3.Connection) -> None:
|
||||
statements = [
|
||||
"CREATE INDEX IF NOT EXISTS ix_osm_sidecar_kind_mode_bbox ON osm_features (kind, mode, min_lon, max_lon, min_lat, max_lat)",
|
||||
"CREATE INDEX IF NOT EXISTS ix_osm_sidecar_scope_bbox ON osm_features (kind, mode, route_scope, min_lon, max_lon, min_lat, max_lat)",
|
||||
"CREATE INDEX IF NOT EXISTS ix_osm_sidecar_route_key ON osm_features (route_key)",
|
||||
"CREATE INDEX IF NOT EXISTS ix_osm_sidecar_ref ON osm_features (ref)",
|
||||
"CREATE INDEX IF NOT EXISTS ix_osm_sidecar_identity ON osm_features (dataset_id, osm_type, osm_id)",
|
||||
"CREATE INDEX IF NOT EXISTS ix_osm_sidecar_kind_ref_mode ON osm_features (kind, ref, mode)",
|
||||
]
|
||||
for statement in statements:
|
||||
connection.execute(statement)
|
||||
|
||||
|
||||
def _apply_main_filters(stmt, *, kinds, modes, bbox, geometry_required, search, route_key, route_scopes, ref, osm_type, osm_id):
|
||||
if kinds:
|
||||
stmt = stmt.where(OsmFeature.kind.in_(list(kinds)))
|
||||
if modes:
|
||||
stmt = stmt.where(OsmFeature.mode.in_(list(modes)))
|
||||
if route_scopes:
|
||||
stmt = stmt.where(_main_route_scope_condition([str(scope) for scope in route_scopes]))
|
||||
if bbox:
|
||||
min_lon, min_lat, max_lon, max_lat = bbox
|
||||
if settings.is_postgresql_database:
|
||||
stmt = stmt.where(
|
||||
text(
|
||||
"""
|
||||
(
|
||||
osm_features.geom && ST_MakeEnvelope(:bbox_min_lon, :bbox_min_lat, :bbox_max_lon, :bbox_max_lat, 4326)
|
||||
OR (
|
||||
osm_features.geom IS NULL
|
||||
AND osm_features.min_lon <= :bbox_max_lon
|
||||
AND osm_features.max_lon >= :bbox_min_lon
|
||||
AND osm_features.min_lat <= :bbox_max_lat
|
||||
AND osm_features.max_lat >= :bbox_min_lat
|
||||
)
|
||||
)
|
||||
"""
|
||||
)
|
||||
).params(
|
||||
bbox_min_lon=min_lon,
|
||||
bbox_min_lat=min_lat,
|
||||
bbox_max_lon=max_lon,
|
||||
bbox_max_lat=max_lat,
|
||||
)
|
||||
else:
|
||||
stmt = stmt.where(OsmFeature.min_lon <= max_lon, OsmFeature.max_lon >= min_lon, OsmFeature.min_lat <= max_lat, OsmFeature.max_lat >= min_lat)
|
||||
if geometry_required is True:
|
||||
stmt = stmt.where(OsmFeature.geometry_geojson.is_not(None))
|
||||
elif geometry_required is False:
|
||||
stmt = stmt.where(OsmFeature.geometry_geojson.is_(None))
|
||||
if search:
|
||||
if settings.is_postgresql_database:
|
||||
stmt = stmt.where(
|
||||
text(
|
||||
"""
|
||||
(
|
||||
LOWER(COALESCE(osm_features.ref, '')) LIKE :search_pattern
|
||||
OR LOWER(COALESCE(osm_features.name, '')) LIKE :search_pattern
|
||||
OR LOWER(COALESCE(osm_features.tags_json, '')) LIKE :search_pattern
|
||||
)
|
||||
"""
|
||||
)
|
||||
).params(search_pattern=f"%{search.lower()}%")
|
||||
else:
|
||||
pattern = f"%{search}%"
|
||||
stmt = stmt.where(
|
||||
(OsmFeature.ref.ilike(pattern))
|
||||
| (OsmFeature.name.ilike(pattern))
|
||||
| (OsmFeature.tags_json.ilike(pattern))
|
||||
)
|
||||
if route_key:
|
||||
stmt = stmt.where(OsmFeature.route_key == route_key)
|
||||
if ref:
|
||||
stmt = stmt.where(OsmFeature.ref == ref)
|
||||
if osm_type:
|
||||
stmt = stmt.where(OsmFeature.osm_type == osm_type)
|
||||
if osm_id:
|
||||
stmt = stmt.where(OsmFeature.osm_id == osm_id)
|
||||
return stmt
|
||||
|
||||
|
||||
def _main_route_scope_condition(route_scopes: list[str]):
|
||||
fallback = _main_route_scope_fallback_condition(route_scopes)
|
||||
stored = OsmFeature.route_scope.in_(route_scopes)
|
||||
if "local" in route_scopes:
|
||||
non_local_bus_fallback = _main_route_scope_fallback_condition(["long_distance", "regional"])
|
||||
stored = and_(stored, not_(and_(OsmFeature.mode.in_(["bus", "trolleybus"]), non_local_bus_fallback)))
|
||||
return or_(stored, fallback)
|
||||
|
||||
|
||||
def _main_route_scope_fallback_condition(route_scopes: list[str]):
|
||||
ref = func.upper(func.coalesce(OsmFeature.ref, ""))
|
||||
name = func.upper(func.coalesce(OsmFeature.name, ""))
|
||||
network = func.upper(func.coalesce(OsmFeature.network, ""))
|
||||
tags = func.lower(func.coalesce(OsmFeature.tags_json, ""))
|
||||
train_long_distance = and_(
|
||||
OsmFeature.mode == "train",
|
||||
or_(
|
||||
ref.like("ICE%"),
|
||||
ref.like("IC%"),
|
||||
ref.like("EC%"),
|
||||
ref.like("ECE%"),
|
||||
ref.like("EN%"),
|
||||
ref.like("NJ%"),
|
||||
ref.like("RJ%"),
|
||||
ref.like("RJX%"),
|
||||
ref.like("TGV%"),
|
||||
ref.like("THA%"),
|
||||
ref.like("FLX%"),
|
||||
name.like("%INTERCITY%"),
|
||||
name.like("%EUROCITY%"),
|
||||
name.like("%NIGHTJET%"),
|
||||
name.like("%FLIXTRAIN%"),
|
||||
tags.like('%"service":"long_distance"%'),
|
||||
tags.like('%"train":"long_distance"%'),
|
||||
tags.like('%"train":"high_speed"%'),
|
||||
tags.like('%"train":"intercity"%'),
|
||||
),
|
||||
)
|
||||
bus_long_distance = and_(
|
||||
OsmFeature.mode.in_(["bus", "trolleybus"]),
|
||||
or_(
|
||||
name.like("%FLIXBUS%"),
|
||||
network.like("%FLIXBUS%"),
|
||||
name.like("%EUROLINES%"),
|
||||
network.like("%EUROLINES%"),
|
||||
name.like("%INTERCITYBUS%"),
|
||||
name.like("%IC BUS%"),
|
||||
name.like("%FERNBUS%"),
|
||||
tags.like('%"service":"long_distance"%'),
|
||||
tags.like('%"bus":"long_distance"%'),
|
||||
tags.like('%"bus":"intercity"%'),
|
||||
tags.like('%"network:type":"long_distance"%'),
|
||||
),
|
||||
)
|
||||
long_distance = or_(OsmFeature.mode == "coach", train_long_distance, bus_long_distance)
|
||||
bus_regional = and_(
|
||||
OsmFeature.mode.in_(["bus", "trolleybus"]),
|
||||
not_(bus_long_distance),
|
||||
or_(
|
||||
name.like("%REGIONALBUS%"),
|
||||
name.like("%REGIOBUS%"),
|
||||
name.like("%REGIONAL BUS%"),
|
||||
name.like("%REGIONALVERKEHR%"),
|
||||
network.like("%REGIONALBUS%"),
|
||||
network.like("%REGIOBUS%"),
|
||||
network.like("%REGIONALVERKEHR%"),
|
||||
tags.like('%"service":"regional"%'),
|
||||
tags.like('%"bus":"regional"%'),
|
||||
tags.like('%"bus":"interurban"%'),
|
||||
tags.like('%"network:type":"regional"%'),
|
||||
),
|
||||
)
|
||||
local = or_(
|
||||
OsmFeature.mode.in_(["tram", "light_rail", "subway", "ferry", "funicular", "aerialway", "monorail"]),
|
||||
and_(OsmFeature.mode.in_(["bus", "trolleybus"]), not_(or_(bus_long_distance, bus_regional))),
|
||||
and_(
|
||||
OsmFeature.mode == "train",
|
||||
or_(ref.like("S%"), name.like("%S-BAHN%"), network.like("%S-BAHN%"), tags.like('%"train":"commuter"%')),
|
||||
),
|
||||
)
|
||||
train_regional = and_(
|
||||
OsmFeature.mode == "train",
|
||||
not_(train_long_distance),
|
||||
or_(
|
||||
ref.like("IRE%"),
|
||||
ref.like("RE%"),
|
||||
ref.like("RB%"),
|
||||
ref.like("RER%"),
|
||||
ref.like("TER%"),
|
||||
ref.like("REX%"),
|
||||
ref.like("MEX%"),
|
||||
ref.like("ALX%"),
|
||||
ref.like("WFB%"),
|
||||
ref.like("R%"),
|
||||
name.like("%REGIONAL%"),
|
||||
name.like("%REGIO%"),
|
||||
tags.like('%"service":"regional"%'),
|
||||
tags.like('%"train":"regional"%'),
|
||||
),
|
||||
)
|
||||
regional = or_(train_regional, bus_regional)
|
||||
conditions = []
|
||||
if "long_distance" in route_scopes:
|
||||
conditions.append(long_distance)
|
||||
if "regional" in route_scopes:
|
||||
conditions.append(regional)
|
||||
if "local" in route_scopes:
|
||||
conditions.append(local)
|
||||
if "unknown" in route_scopes:
|
||||
conditions.append(and_(OsmFeature.mode == "train", not_(or_(long_distance, regional, local))))
|
||||
return or_(*conditions) if conditions else OsmFeature.route_scope.is_(None)
|
||||
|
||||
|
||||
def _query_sidecar_features(
|
||||
dataset: Dataset,
|
||||
*,
|
||||
kinds: Sequence[str] | None,
|
||||
modes: Sequence[str] | None,
|
||||
bbox: tuple[float, float, float, float] | None,
|
||||
geometry_required: bool | None,
|
||||
search: str | None,
|
||||
route_key: str | None,
|
||||
route_scopes: Sequence[str] | None,
|
||||
ref: str | None,
|
||||
osm_type: str | None,
|
||||
osm_id: str | None,
|
||||
limit: int | None,
|
||||
offset: int | None,
|
||||
materialized_ids: dict[tuple[int, str, str], int],
|
||||
) -> list[OsmFeature]:
|
||||
where = []
|
||||
params: list[object] = []
|
||||
try:
|
||||
with sidecar_connection(dataset) as connection:
|
||||
available_columns = _sidecar_columns(connection)
|
||||
if kinds:
|
||||
placeholders = ", ".join(["?"] * len(kinds))
|
||||
where.append(f"kind IN ({placeholders})")
|
||||
params.extend(list(kinds))
|
||||
if modes:
|
||||
placeholders = ", ".join(["?"] * len(modes))
|
||||
where.append(f"mode IN ({placeholders})")
|
||||
params.extend(list(modes))
|
||||
if bbox:
|
||||
min_lon, min_lat, max_lon, max_lat = bbox
|
||||
where.extend(["min_lon <= ?", "max_lon >= ?", "min_lat <= ?", "max_lat >= ?"])
|
||||
params.extend([max_lon, min_lon, max_lat, min_lat])
|
||||
if geometry_required is True:
|
||||
where.append("geometry_geojson IS NOT NULL")
|
||||
elif geometry_required is False:
|
||||
where.append("geometry_geojson IS NULL")
|
||||
if search:
|
||||
where.append("(LOWER(COALESCE(ref, '')) LIKE ? OR LOWER(COALESCE(name, '')) LIKE ? OR LOWER(COALESCE(tags_json, '')) LIKE ?)")
|
||||
pattern = f"%{search.lower()}%"
|
||||
params.extend([pattern, pattern, pattern])
|
||||
if route_key:
|
||||
where.append("route_key = ?")
|
||||
params.append(route_key)
|
||||
if route_scopes:
|
||||
condition, condition_params = _sidecar_route_scope_condition([str(scope) for scope in route_scopes], has_route_scope="route_scope" in available_columns)
|
||||
where.append(condition)
|
||||
params.extend(condition_params)
|
||||
if ref:
|
||||
where.append("ref = ?")
|
||||
params.append(ref)
|
||||
if osm_type:
|
||||
where.append("osm_type = ?")
|
||||
params.append(osm_type)
|
||||
if osm_id:
|
||||
where.append("osm_id = ?")
|
||||
params.append(osm_id)
|
||||
select_columns = ", ".join(_sidecar_select_columns(available_columns))
|
||||
sql = f"SELECT id, {select_columns} FROM osm_features"
|
||||
if where:
|
||||
sql += " WHERE " + " AND ".join(where)
|
||||
sql += " ORDER BY kind, mode, ref, name, id"
|
||||
if limit is not None:
|
||||
sql += " LIMIT ?"
|
||||
params.append(max(1, int(limit)))
|
||||
if offset:
|
||||
if limit is None:
|
||||
sql += " LIMIT -1"
|
||||
sql += " OFFSET ?"
|
||||
params.append(max(0, int(offset)))
|
||||
return [_feature_from_row(row, materialized_ids) for row in connection.execute(sql, params).fetchall()]
|
||||
except MissingOsmSidecar:
|
||||
return []
|
||||
|
||||
|
||||
def _sidecar_columns(connection: sqlite3.Connection) -> set[str]:
|
||||
return {str(row["name"]) for row in connection.execute("PRAGMA table_info(osm_features)").fetchall()}
|
||||
|
||||
|
||||
def _sidecar_select_columns(available_columns: set[str]) -> list[str]:
|
||||
return [column if column in available_columns else f"NULL AS {column}" for column in OSM_FEATURE_COLUMNS]
|
||||
|
||||
|
||||
def _sidecar_route_scope_condition(route_scopes: list[str], *, has_route_scope: bool) -> tuple[str, list[object]]:
|
||||
fallback_sql, fallback_params = _sidecar_route_scope_fallback_condition(route_scopes)
|
||||
if has_route_scope:
|
||||
placeholders = ", ".join(["?"] * len(route_scopes))
|
||||
stored_sql = f"route_scope IN ({placeholders})"
|
||||
params: list[object] = [*route_scopes]
|
||||
if "local" in route_scopes:
|
||||
non_local_sql, non_local_params = _sidecar_route_scope_fallback_condition(["long_distance", "regional"])
|
||||
stored_sql = f"({stored_sql} AND NOT (mode IN ('bus', 'trolleybus') AND {non_local_sql}))"
|
||||
params.extend(non_local_params)
|
||||
return f"({stored_sql} OR {fallback_sql})", [*params, *fallback_params]
|
||||
return fallback_sql, fallback_params
|
||||
|
||||
|
||||
def _sidecar_route_scope_fallback_condition(route_scopes: list[str]) -> tuple[str, list[object]]:
|
||||
train_long_distance = """(
|
||||
mode = 'train'
|
||||
AND (
|
||||
UPPER(COALESCE(ref, '')) LIKE 'ICE%'
|
||||
OR UPPER(COALESCE(ref, '')) LIKE 'IC%'
|
||||
OR UPPER(COALESCE(ref, '')) LIKE 'EC%'
|
||||
OR UPPER(COALESCE(ref, '')) LIKE 'ECE%'
|
||||
OR UPPER(COALESCE(ref, '')) LIKE 'EN%'
|
||||
OR UPPER(COALESCE(ref, '')) LIKE 'NJ%'
|
||||
OR UPPER(COALESCE(ref, '')) LIKE 'RJ%'
|
||||
OR UPPER(COALESCE(ref, '')) LIKE 'RJX%'
|
||||
OR UPPER(COALESCE(ref, '')) LIKE 'TGV%'
|
||||
OR UPPER(COALESCE(ref, '')) LIKE 'THA%'
|
||||
OR UPPER(COALESCE(ref, '')) LIKE 'FLX%'
|
||||
OR UPPER(COALESCE(name, '')) LIKE '%INTERCITY%'
|
||||
OR UPPER(COALESCE(name, '')) LIKE '%EUROCITY%'
|
||||
OR UPPER(COALESCE(name, '')) LIKE '%NIGHTJET%'
|
||||
OR UPPER(COALESCE(name, '')) LIKE '%FLIXTRAIN%'
|
||||
OR LOWER(COALESCE(tags_json, '')) LIKE '%"service":"long_distance"%'
|
||||
OR LOWER(COALESCE(tags_json, '')) LIKE '%"train":"long_distance"%'
|
||||
OR LOWER(COALESCE(tags_json, '')) LIKE '%"train":"high_speed"%'
|
||||
OR LOWER(COALESCE(tags_json, '')) LIKE '%"train":"intercity"%'
|
||||
)
|
||||
)"""
|
||||
bus_long_distance = """(
|
||||
mode IN ('bus', 'trolleybus')
|
||||
AND (
|
||||
UPPER(COALESCE(name, '')) LIKE '%FLIXBUS%'
|
||||
OR UPPER(COALESCE(network, '')) LIKE '%FLIXBUS%'
|
||||
OR UPPER(COALESCE(name, '')) LIKE '%EUROLINES%'
|
||||
OR UPPER(COALESCE(network, '')) LIKE '%EUROLINES%'
|
||||
OR UPPER(COALESCE(name, '')) LIKE '%INTERCITYBUS%'
|
||||
OR UPPER(COALESCE(name, '')) LIKE '%IC BUS%'
|
||||
OR UPPER(COALESCE(name, '')) LIKE '%FERNBUS%'
|
||||
OR LOWER(COALESCE(tags_json, '')) LIKE '%"service":"long_distance"%'
|
||||
OR LOWER(COALESCE(tags_json, '')) LIKE '%"bus":"long_distance"%'
|
||||
OR LOWER(COALESCE(tags_json, '')) LIKE '%"bus":"intercity"%'
|
||||
OR LOWER(COALESCE(tags_json, '')) LIKE '%"network:type":"long_distance"%'
|
||||
)
|
||||
)"""
|
||||
long_distance = f"(mode = 'coach' OR {train_long_distance} OR {bus_long_distance})"
|
||||
bus_regional = f"""(
|
||||
mode IN ('bus', 'trolleybus')
|
||||
AND NOT {bus_long_distance}
|
||||
AND (
|
||||
UPPER(COALESCE(name, '')) LIKE '%REGIONALBUS%'
|
||||
OR UPPER(COALESCE(name, '')) LIKE '%REGIOBUS%'
|
||||
OR UPPER(COALESCE(name, '')) LIKE '%REGIONAL BUS%'
|
||||
OR UPPER(COALESCE(name, '')) LIKE '%REGIONALVERKEHR%'
|
||||
OR UPPER(COALESCE(network, '')) LIKE '%REGIONALBUS%'
|
||||
OR UPPER(COALESCE(network, '')) LIKE '%REGIOBUS%'
|
||||
OR UPPER(COALESCE(network, '')) LIKE '%REGIONALVERKEHR%'
|
||||
OR LOWER(COALESCE(tags_json, '')) LIKE '%"service":"regional"%'
|
||||
OR LOWER(COALESCE(tags_json, '')) LIKE '%"bus":"regional"%'
|
||||
OR LOWER(COALESCE(tags_json, '')) LIKE '%"bus":"interurban"%'
|
||||
OR LOWER(COALESCE(tags_json, '')) LIKE '%"network:type":"regional"%'
|
||||
)
|
||||
)"""
|
||||
train_regional = f"""(
|
||||
mode = 'train'
|
||||
AND NOT {train_long_distance}
|
||||
AND (
|
||||
UPPER(COALESCE(ref, '')) LIKE 'IRE%'
|
||||
OR UPPER(COALESCE(ref, '')) LIKE 'RE%'
|
||||
OR UPPER(COALESCE(ref, '')) LIKE 'RB%'
|
||||
OR UPPER(COALESCE(ref, '')) LIKE 'RER%'
|
||||
OR UPPER(COALESCE(ref, '')) LIKE 'TER%'
|
||||
OR UPPER(COALESCE(ref, '')) LIKE 'REX%'
|
||||
OR UPPER(COALESCE(ref, '')) LIKE 'MEX%'
|
||||
OR UPPER(COALESCE(ref, '')) LIKE 'ALX%'
|
||||
OR UPPER(COALESCE(ref, '')) LIKE 'WFB%'
|
||||
OR UPPER(COALESCE(ref, '')) LIKE 'R%'
|
||||
OR UPPER(COALESCE(name, '')) LIKE '%REGIONAL%'
|
||||
OR UPPER(COALESCE(name, '')) LIKE '%REGIO%'
|
||||
OR LOWER(COALESCE(tags_json, '')) LIKE '%"service":"regional"%'
|
||||
OR LOWER(COALESCE(tags_json, '')) LIKE '%"train":"regional"%'
|
||||
)
|
||||
)"""
|
||||
regional = f"({train_regional} OR {bus_regional})"
|
||||
local = f"""(
|
||||
mode IN ('tram', 'light_rail', 'subway', 'ferry', 'funicular', 'aerialway', 'monorail')
|
||||
OR (mode IN ('bus', 'trolleybus') AND NOT ({bus_long_distance} OR {bus_regional}))
|
||||
OR (
|
||||
mode = 'train'
|
||||
AND (
|
||||
UPPER(COALESCE(ref, '')) LIKE 'S%'
|
||||
OR UPPER(COALESCE(name, '')) LIKE '%S-BAHN%'
|
||||
OR UPPER(COALESCE(network, '')) LIKE '%S-BAHN%'
|
||||
OR LOWER(COALESCE(tags_json, '')) LIKE '%"train":"commuter"%'
|
||||
)
|
||||
)
|
||||
)"""
|
||||
parts = []
|
||||
if "long_distance" in route_scopes:
|
||||
parts.append(long_distance)
|
||||
if "regional" in route_scopes:
|
||||
parts.append(regional)
|
||||
if "local" in route_scopes:
|
||||
parts.append(local)
|
||||
if "unknown" in route_scopes:
|
||||
parts.append(f"(mode = 'train' AND NOT ({long_distance} OR {regional} OR {local}))")
|
||||
return "(" + " OR ".join(parts or ["0"]) + ")", []
|
||||
|
||||
|
||||
def _feature_from_row(row: sqlite3.Row, materialized_ids: dict[tuple[int, str, str], int]) -> OsmFeature:
|
||||
dataset_id = int(row["dataset_id"])
|
||||
osm_type = str(row["osm_type"])
|
||||
osm_id = str(row["osm_id"])
|
||||
feature_id = materialized_ids.get((dataset_id, osm_type, osm_id), int(row["id"]))
|
||||
feature = OsmFeature(
|
||||
id=feature_id,
|
||||
dataset_id=dataset_id,
|
||||
osm_type=osm_type,
|
||||
osm_id=osm_id,
|
||||
kind=str(row["kind"]),
|
||||
mode=row["mode"],
|
||||
route_scope=row["route_scope"],
|
||||
name=row["name"],
|
||||
ref=row["ref"],
|
||||
operator=row["operator"],
|
||||
network=row["network"],
|
||||
geometry_geojson=row["geometry_geojson"],
|
||||
min_lon=row["min_lon"],
|
||||
min_lat=row["min_lat"],
|
||||
max_lon=row["max_lon"],
|
||||
max_lat=row["max_lat"],
|
||||
tags_json=row["tags_json"],
|
||||
route_key=row["route_key"],
|
||||
operator_key=row["operator_key"],
|
||||
)
|
||||
setattr(feature, "_osm_sidecar_source", True)
|
||||
setattr(feature, "_osm_sidecar_row_id", int(row["id"]))
|
||||
return feature
|
||||
|
||||
|
||||
def _materialized_ids_by_identity(session: Session, dataset_ids: Sequence[int]) -> dict[tuple[int, str, str], int]:
|
||||
if not dataset_ids:
|
||||
return {}
|
||||
rows = session.execute(
|
||||
select(OsmFeature.dataset_id, OsmFeature.osm_type, OsmFeature.osm_id, OsmFeature.id).where(OsmFeature.dataset_id.in_(dataset_ids))
|
||||
).all()
|
||||
return {(int(dataset_id), str(osm_type), str(osm_id)): int(feature_id) for dataset_id, osm_type, osm_id, feature_id in rows}
|
||||
|
||||
|
||||
def _as_list(value: str | Sequence[str] | None) -> list[str]:
|
||||
if value is None:
|
||||
return []
|
||||
if isinstance(value, str):
|
||||
return [value]
|
||||
return [str(item) for item in value]
|
||||
|
||||
|
||||
def _safe_int(value: object) -> int | None:
|
||||
try:
|
||||
return int(value) # type: ignore[arg-type]
|
||||
except (TypeError, ValueError):
|
||||
return None
|
||||
61
app/performance.py
Normal file
61
app/performance.py
Normal file
@@ -0,0 +1,61 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import time
|
||||
from contextlib import contextmanager
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
from typing import Iterator
|
||||
|
||||
from app.config import settings
|
||||
|
||||
|
||||
@contextmanager
|
||||
def measure_pipeline_phase(
|
||||
phase: str,
|
||||
*,
|
||||
source_id: int | None = None,
|
||||
dataset_id: int | None = None,
|
||||
metadata: dict[str, object] | None = None,
|
||||
) -> Iterator[dict[str, object]]:
|
||||
start = time.perf_counter()
|
||||
payload: dict[str, object] = dict(metadata or {})
|
||||
try:
|
||||
yield payload
|
||||
finally:
|
||||
duration = round(time.perf_counter() - start, 3)
|
||||
payload["duration_seconds"] = duration
|
||||
record_pipeline_metric(
|
||||
phase,
|
||||
source_id=source_id,
|
||||
dataset_id=dataset_id,
|
||||
duration_seconds=duration,
|
||||
metadata=payload,
|
||||
)
|
||||
|
||||
|
||||
def record_pipeline_metric(
|
||||
phase: str,
|
||||
*,
|
||||
source_id: int | None = None,
|
||||
dataset_id: int | None = None,
|
||||
duration_seconds: float | None = None,
|
||||
metadata: dict[str, object] | None = None,
|
||||
) -> None:
|
||||
path = _metric_path()
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
row = {
|
||||
"timestamp": datetime.now(timezone.utc).isoformat(),
|
||||
"phase": phase,
|
||||
"source_id": source_id,
|
||||
"dataset_id": dataset_id,
|
||||
"duration_seconds": duration_seconds,
|
||||
"metadata": metadata or {},
|
||||
}
|
||||
with path.open("a", encoding="utf-8") as handle:
|
||||
handle.write(json.dumps(row, separators=(",", ":"), default=str))
|
||||
handle.write("\n")
|
||||
|
||||
|
||||
def _metric_path() -> Path:
|
||||
return settings.data_dir / "metrics" / "pipeline_metrics.jsonl"
|
||||
111
app/pipeline/download.py
Normal file
111
app/pipeline/download.py
Normal file
@@ -0,0 +1,111 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import shutil
|
||||
import time
|
||||
from pathlib import Path
|
||||
from urllib.parse import urlparse
|
||||
|
||||
import requests
|
||||
|
||||
from app.config import settings
|
||||
from app.models import Source
|
||||
from app.pipeline.utils import sha256_file
|
||||
|
||||
|
||||
def materialize_source(source: Source) -> Path:
|
||||
"""Download/copy a source into the local cache and return the file path.
|
||||
|
||||
Files are stored by content hash per source. Re-running an unchanged source
|
||||
reuses the existing cached file instead of creating another timestamped copy.
|
||||
"""
|
||||
source_dir = settings.data_dir / "sources" / f"source_{source.id}"
|
||||
source_dir.mkdir(parents=True, exist_ok=True)
|
||||
suffix = _guess_suffix(source.url, source.kind)
|
||||
|
||||
parsed = urlparse(source.url)
|
||||
if parsed.scheme in {"http", "https"}:
|
||||
temp_path = _download_temp_path(source_dir, suffix)
|
||||
existing_size = temp_path.stat().st_size if temp_path.exists() else 0
|
||||
headers = {"Range": f"bytes={existing_size}-"} if existing_size > 0 else None
|
||||
with requests.get(source.url, stream=True, timeout=120, headers=headers) as r:
|
||||
r.raise_for_status()
|
||||
mode = "ab" if existing_size > 0 and r.status_code == 206 else "wb"
|
||||
with temp_path.open(mode) as f:
|
||||
for chunk in r.iter_content(chunk_size=1024 * 1024):
|
||||
if chunk:
|
||||
f.write(chunk)
|
||||
return _store_or_reuse_cached_file(source_dir=source_dir, source_path=temp_path, suffix=suffix, move=True)
|
||||
|
||||
if parsed.scheme == "file":
|
||||
source_path = Path(parsed.path)
|
||||
else:
|
||||
source_path = Path(source.url)
|
||||
|
||||
if not source_path.exists():
|
||||
raise FileNotFoundError(f"Source file does not exist: {source.url}")
|
||||
if _is_relative_to(source_path.resolve(), source_dir.resolve()):
|
||||
return source_path
|
||||
return _store_or_reuse_cached_file(source_dir=source_dir, source_path=source_path, suffix=suffix, move=False)
|
||||
|
||||
|
||||
def _download_temp_path(source_dir: Path, suffix: str) -> Path:
|
||||
candidates = sorted(
|
||||
source_dir.glob(f"*.download{suffix}"),
|
||||
key=lambda path: path.stat().st_mtime if path.exists() else 0,
|
||||
reverse=True,
|
||||
)
|
||||
if candidates:
|
||||
return candidates[0]
|
||||
return source_dir / f"{int(time.time())}.download{suffix}"
|
||||
|
||||
|
||||
def _guess_suffix(url: str, kind: str) -> str:
|
||||
path = urlparse(url).path or url
|
||||
lower = path.lower()
|
||||
for suffix in (".zip", ".geojson", ".json", ".osm.pbf", ".pbf", ".osm", ".osm.xml", ".osc.gz", ".osc", ".csv"):
|
||||
if lower.endswith(suffix):
|
||||
return suffix
|
||||
if kind == "gtfs":
|
||||
return ".zip"
|
||||
if kind == "osm_geojson":
|
||||
return ".geojson"
|
||||
return ".dat"
|
||||
|
||||
|
||||
def _store_or_reuse_cached_file(source_dir: Path, source_path: Path, suffix: str, move: bool) -> Path:
|
||||
source_hash = sha256_file(source_path)
|
||||
target = source_dir / f"{source_hash[:16]}{suffix}"
|
||||
|
||||
if target.exists() and sha256_file(target) == source_hash:
|
||||
if move and source_path != target:
|
||||
source_path.unlink(missing_ok=True)
|
||||
return target
|
||||
|
||||
existing = _find_existing_cached_file(source_dir, source_hash, suffix, exclude=source_path)
|
||||
if existing is not None:
|
||||
if move and source_path != existing:
|
||||
source_path.unlink(missing_ok=True)
|
||||
return existing
|
||||
|
||||
if move:
|
||||
source_path.replace(target)
|
||||
else:
|
||||
shutil.copyfile(source_path, target)
|
||||
return target
|
||||
|
||||
|
||||
def _find_existing_cached_file(source_dir: Path, source_hash: str, suffix: str, exclude: Path | None = None) -> Path | None:
|
||||
for candidate in sorted(source_dir.glob(f"*{suffix}")):
|
||||
if exclude is not None and candidate.resolve() == exclude.resolve():
|
||||
continue
|
||||
if candidate.is_file() and sha256_file(candidate) == source_hash:
|
||||
return candidate
|
||||
return None
|
||||
|
||||
|
||||
def _is_relative_to(path: Path, parent: Path) -> bool:
|
||||
try:
|
||||
path.relative_to(parent)
|
||||
return True
|
||||
except ValueError:
|
||||
return False
|
||||
1327
app/pipeline/gtfs.py
Normal file
1327
app/pipeline/gtfs.py
Normal file
File diff suppressed because it is too large
Load Diff
995
app/pipeline/matcher.py
Normal file
995
app/pipeline/matcher.py
Normal file
@@ -0,0 +1,995 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
from datetime import datetime, timezone
|
||||
import json
|
||||
from typing import Callable, Optional
|
||||
|
||||
from shapely.geometry import LineString, MultiLineString, Point, shape
|
||||
from sqlalchemy import delete, select
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.config import settings
|
||||
from app.models import Dataset, GtfsRoute, MatchRule, OsmFeature, RouteMatch
|
||||
from app.osm_storage import ensure_main_osm_feature, osm_feature_bbox, query_osm_features
|
||||
from app.pipeline.state import STAGE_MATCH_ROUTES, dependency_hash, finish_pipeline_run, start_pipeline_run
|
||||
from app.pipeline.utils import approx_bbox_center_distance_deg, bbox_overlap, norm_ref, norm_text
|
||||
|
||||
MODE_GROUPS = {
|
||||
"train": {"train", "rail", "railway"},
|
||||
"subway": {"subway", "metro"},
|
||||
"tram": {"tram", "light_rail"},
|
||||
"light_rail": {"light_rail", "tram"},
|
||||
"bus": {"bus", "coach", "trolleybus"},
|
||||
"coach": {"coach", "bus"},
|
||||
"trolleybus": {"trolleybus", "bus"},
|
||||
"ferry": {"ferry"},
|
||||
"funicular": {"funicular"},
|
||||
"aerialway": {"aerialway", "cable_car"},
|
||||
"monorail": {"monorail"},
|
||||
}
|
||||
MAX_FALLBACK_CANDIDATES_WITH_REF = 40
|
||||
MAX_FALLBACK_CANDIDATES_WITHOUT_REF = 80
|
||||
MAX_EXACT_REF_CANDIDATES = 120
|
||||
OSM_SCOPE_NEAR_DISTANCE_DEG = 0.15
|
||||
GEOMETRY_PROXIMITY_DEG = 0.0035
|
||||
GEOMETRY_SAMPLE_POINTS = 24
|
||||
MATCHER_VERSION = "matcher_v4_scope_spatial_manual_rules"
|
||||
ProgressCallback = Callable[[str, str, int | None, int | None, dict[str, object] | None], None]
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class _ManualMatchRule:
|
||||
id: int
|
||||
rule_type: str
|
||||
route_selector: dict[str, object]
|
||||
osm_selector: dict[str, object] | None
|
||||
status: str
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class _OsmRouteIndex:
|
||||
all_routes: list[OsmFeature]
|
||||
by_ref: dict[str, list[OsmFeature]]
|
||||
by_route_key: dict[str, list[OsmFeature]]
|
||||
by_mode: dict[str, list[OsmFeature]]
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class _GeometryProfile:
|
||||
geom: object
|
||||
lines: list[LineString]
|
||||
length: float
|
||||
sample_points: list[Point]
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class _RouteMatchPayload:
|
||||
gtfs_route_id: int
|
||||
osm_feature_id: int | None
|
||||
confidence: float
|
||||
status: str
|
||||
rule_source: str
|
||||
reasons_json: str | None
|
||||
|
||||
|
||||
def run_route_matching(
|
||||
session: Session,
|
||||
*,
|
||||
progress_callback: ProgressCallback | None = None,
|
||||
batch_size: int | None = None,
|
||||
) -> dict[str, object]:
|
||||
"""Match active GTFS routes against active OSM route features."""
|
||||
active_datasets = session.execute(
|
||||
select(Dataset.id, Dataset.kind, Dataset.source_id).where(Dataset.is_active.is_(True))
|
||||
).all()
|
||||
if not active_datasets:
|
||||
return {"routes": 0, "matches": 0, "missing": 0}
|
||||
dataset_source_ids = {int(dataset_id): int(source_id) for dataset_id, _, source_id in active_datasets}
|
||||
gtfs_dataset_ids = [int(dataset_id) for dataset_id, kind, _ in active_datasets if kind == "gtfs"]
|
||||
osm_dataset_ids = [int(dataset_id) for dataset_id, kind, _ in active_datasets if kind == "osm_geojson"]
|
||||
if not gtfs_dataset_ids:
|
||||
return {"routes": 0, "matches": 0, "missing": 0}
|
||||
|
||||
route_row_ids = session.scalars(
|
||||
select(GtfsRoute.id)
|
||||
.where(GtfsRoute.dataset_id.in_(gtfs_dataset_ids))
|
||||
.order_by(GtfsRoute.dataset_id, GtfsRoute.route_id, GtfsRoute.id)
|
||||
).all()
|
||||
# Reconcile current match rows from auto scoring plus durable manual rules.
|
||||
total_routes = len(route_row_ids)
|
||||
if total_routes == 0:
|
||||
return {"routes": 0, "matches": 0, "missing": 0}
|
||||
|
||||
dependency = _route_matching_dependency(session, active_datasets)
|
||||
run = start_pipeline_run(
|
||||
session,
|
||||
stage=STAGE_MATCH_ROUTES,
|
||||
version=MATCHER_VERSION,
|
||||
dependency_hash_value=dependency_hash(dependency),
|
||||
inputs=dependency,
|
||||
)
|
||||
session.commit()
|
||||
effective_batch_size = max(1, int(batch_size or settings.route_matching_batch_size))
|
||||
_emit_progress(
|
||||
progress_callback,
|
||||
"route_matching_started",
|
||||
f"Matching {total_routes} GTFS routes in batches of {effective_batch_size}.",
|
||||
0,
|
||||
total_routes,
|
||||
{"gtfs_datasets": gtfs_dataset_ids, "osm_datasets": osm_dataset_ids, "batch_size": effective_batch_size},
|
||||
)
|
||||
manual_rules = _manual_match_rules(session)
|
||||
osm_scope_bbox = osm_feature_bbox(session, osm_dataset_ids, kinds=["route"])
|
||||
counts = {"routes": total_routes, "matches": 0, "missing": 0, "manual": 0, "created": 0, "updated": 0, "unchanged": 0}
|
||||
scoped_counts = {"in_osm_scope": 0, "near_osm_scope": 0, "outside_osm_scope": 0, "unknown_scope": 0}
|
||||
processed = 0
|
||||
for chunk in _chunks_int(route_row_ids, effective_batch_size):
|
||||
routes = session.scalars(
|
||||
select(GtfsRoute)
|
||||
.where(GtfsRoute.id.in_(chunk))
|
||||
.order_by(GtfsRoute.dataset_id, GtfsRoute.route_id, GtfsRoute.id)
|
||||
).all()
|
||||
batch_counts = _match_route_batch(
|
||||
session=session,
|
||||
routes=routes,
|
||||
osm_dataset_ids=osm_dataset_ids,
|
||||
dataset_source_ids=dataset_source_ids,
|
||||
manual_rules=manual_rules,
|
||||
osm_scope_bbox=osm_scope_bbox,
|
||||
scoped_counts=scoped_counts,
|
||||
)
|
||||
counts["matches"] += batch_counts["matches"]
|
||||
counts["missing"] += batch_counts["missing"]
|
||||
counts["manual"] += batch_counts["manual"]
|
||||
counts["created"] += batch_counts["created"]
|
||||
counts["updated"] += batch_counts["updated"]
|
||||
counts["unchanged"] += batch_counts["unchanged"]
|
||||
processed += len(routes)
|
||||
session.commit()
|
||||
_emit_progress(
|
||||
progress_callback,
|
||||
"route_matching_batch",
|
||||
f"Matched {processed}/{total_routes} GTFS routes.",
|
||||
processed,
|
||||
total_routes,
|
||||
{
|
||||
"processed": processed,
|
||||
"matches": counts["matches"],
|
||||
"missing": counts["missing"],
|
||||
"manual": counts["manual"],
|
||||
"created": counts["created"],
|
||||
"updated": counts["updated"],
|
||||
"unchanged": counts["unchanged"],
|
||||
"scope": dict(scoped_counts),
|
||||
},
|
||||
)
|
||||
result = {**counts, "scope": scoped_counts}
|
||||
finish_pipeline_run(session, run, outputs=result)
|
||||
session.commit()
|
||||
_emit_progress(
|
||||
progress_callback,
|
||||
"route_matching_completed",
|
||||
"Route matching completed.",
|
||||
total_routes,
|
||||
total_routes,
|
||||
result,
|
||||
)
|
||||
return result
|
||||
|
||||
|
||||
def _route_matching_dependency(session: Session, active_datasets) -> dict[str, object]:
|
||||
datasets = [
|
||||
{"id": int(dataset_id), "kind": str(kind), "source_id": int(source_id), "sha256": _dataset_sha(session, int(dataset_id))}
|
||||
for dataset_id, kind, source_id in active_datasets
|
||||
]
|
||||
rules = [
|
||||
{
|
||||
"id": int(rule.id),
|
||||
"type": rule.rule_type,
|
||||
"active": bool(rule.active),
|
||||
"selector": rule.selector_json,
|
||||
"action": rule.action_json,
|
||||
}
|
||||
for rule in session.scalars(select(MatchRule).order_by(MatchRule.id)).all()
|
||||
]
|
||||
return {"version": MATCHER_VERSION, "active_datasets": datasets, "manual_rules": rules}
|
||||
|
||||
|
||||
def _dataset_sha(session: Session, dataset_id: int) -> str | None:
|
||||
dataset = session.get(Dataset, dataset_id)
|
||||
return None if dataset is None else dataset.sha256
|
||||
|
||||
|
||||
def _match_route_batch(
|
||||
*,
|
||||
session: Session,
|
||||
routes: list[GtfsRoute],
|
||||
osm_dataset_ids: list[int],
|
||||
dataset_source_ids: dict[int, int],
|
||||
manual_rules: list[_ManualMatchRule],
|
||||
osm_scope_bbox: tuple[float | None, float | None, float | None, float | None],
|
||||
scoped_counts: dict[str, int],
|
||||
) -> dict[str, int]:
|
||||
matches = 0
|
||||
missing = 0
|
||||
manual = 0
|
||||
payloads: list[_RouteMatchPayload] = []
|
||||
for route in routes:
|
||||
scope = route_match_scope(route, osm_scope_bbox)
|
||||
scoped_counts[scope] = scoped_counts.get(scope, 0) + 1
|
||||
route_source_id = dataset_source_ids.get(route.dataset_id)
|
||||
accepted_rule = _accepted_rule_for_route(manual_rules, route, route_source_id)
|
||||
if accepted_rule is not None:
|
||||
accepted_feature = _feature_for_rule_from_storage(session, osm_dataset_ids, dataset_source_ids, accepted_rule)
|
||||
if accepted_feature is not None:
|
||||
accepted_feature = ensure_main_osm_feature(session, accepted_feature)
|
||||
payloads.append(
|
||||
_RouteMatchPayload(
|
||||
gtfs_route_id=route.id,
|
||||
osm_feature_id=accepted_feature.id,
|
||||
confidence=100.0,
|
||||
status="accepted",
|
||||
rule_source="manual",
|
||||
reasons_json=json.dumps(
|
||||
{"manual_rule_id": accepted_rule.id, "manual": "accepted_match", "scope": scope},
|
||||
separators=(",", ":"),
|
||||
),
|
||||
)
|
||||
)
|
||||
matches += 1
|
||||
manual += 1
|
||||
continue
|
||||
|
||||
if scope == "outside_osm_scope":
|
||||
missing += 1
|
||||
payloads.append(
|
||||
_RouteMatchPayload(
|
||||
gtfs_route_id=route.id,
|
||||
osm_feature_id=None,
|
||||
confidence=0.0,
|
||||
status="missing",
|
||||
rule_source="auto",
|
||||
reasons_json=json.dumps(
|
||||
{
|
||||
"reason": "outside loaded OSM route scope",
|
||||
"scope": scope,
|
||||
},
|
||||
separators=(",", ":"),
|
||||
),
|
||||
)
|
||||
)
|
||||
continue
|
||||
|
||||
best_feature: Optional[OsmFeature] = None
|
||||
best_score = 0.0
|
||||
best_reasons: dict[str, object] = {}
|
||||
route_geometry_profile = _geometry_profile(route.geometry_geojson)
|
||||
for feature in candidate_osm_routes_for_route(session, route, osm_dataset_ids):
|
||||
if _is_rejected_pair(manual_rules, route, route_source_id, feature, dataset_source_ids.get(feature.dataset_id)):
|
||||
continue
|
||||
feature_geometry_profile = _geometry_profile(feature.geometry_geojson)
|
||||
score, reasons = score_route_pair(
|
||||
route,
|
||||
feature,
|
||||
route_geometry_profile=route_geometry_profile,
|
||||
feature_geometry_profile=feature_geometry_profile,
|
||||
)
|
||||
if score > best_score:
|
||||
best_score = score
|
||||
best_feature = feature
|
||||
best_reasons = reasons
|
||||
status = _status_from_score(best_score)
|
||||
if best_feature is None or status == "missing":
|
||||
missing += 1
|
||||
best_feature_id = None
|
||||
best_reasons = {
|
||||
"reason": "no OSM candidate above threshold",
|
||||
"scope": scope,
|
||||
"best_score_below_threshold": round(float(best_score), 2) if best_score else 0,
|
||||
"best_reasons": best_reasons,
|
||||
}
|
||||
best_score = 0
|
||||
else:
|
||||
matches += 1
|
||||
best_feature = ensure_main_osm_feature(session, best_feature)
|
||||
best_feature_id = best_feature.id
|
||||
best_reasons["scope"] = scope
|
||||
payloads.append(
|
||||
_RouteMatchPayload(
|
||||
gtfs_route_id=route.id,
|
||||
osm_feature_id=best_feature_id,
|
||||
confidence=round(float(best_score), 2),
|
||||
status=status,
|
||||
rule_source="auto",
|
||||
reasons_json=json.dumps(best_reasons, separators=(",", ":")),
|
||||
)
|
||||
)
|
||||
changes = _apply_route_match_payloads(session, payloads)
|
||||
session.flush()
|
||||
return {"matches": matches, "missing": missing, "manual": manual, **changes}
|
||||
|
||||
|
||||
def _apply_route_match_payloads(session: Session, payloads: list[_RouteMatchPayload]) -> dict[str, int]:
|
||||
if not payloads:
|
||||
return {"created": 0, "updated": 0, "unchanged": 0}
|
||||
route_ids = [payload.gtfs_route_id for payload in payloads]
|
||||
existing_rows = session.scalars(
|
||||
select(RouteMatch).where(RouteMatch.gtfs_route_id.in_(route_ids)).order_by(RouteMatch.gtfs_route_id, RouteMatch.id)
|
||||
).all()
|
||||
existing_by_route: dict[int, list[RouteMatch]] = {}
|
||||
for row in existing_rows:
|
||||
existing_by_route.setdefault(row.gtfs_route_id, []).append(row)
|
||||
|
||||
created = 0
|
||||
updated = 0
|
||||
unchanged = 0
|
||||
duplicate_ids: list[int] = []
|
||||
now = datetime.now(timezone.utc)
|
||||
for payload in payloads:
|
||||
existing = existing_by_route.get(payload.gtfs_route_id, [])
|
||||
current = _preferred_existing_match(existing)
|
||||
if current is None:
|
||||
session.add(
|
||||
RouteMatch(
|
||||
gtfs_route_id=payload.gtfs_route_id,
|
||||
osm_feature_id=payload.osm_feature_id,
|
||||
confidence=payload.confidence,
|
||||
status=payload.status,
|
||||
rule_source=payload.rule_source,
|
||||
reasons_json=payload.reasons_json,
|
||||
)
|
||||
)
|
||||
created += 1
|
||||
continue
|
||||
|
||||
duplicate_ids.extend(row.id for row in existing if row.id != current.id)
|
||||
if _route_match_payload_equal(current, payload):
|
||||
unchanged += 1
|
||||
continue
|
||||
current.osm_feature_id = payload.osm_feature_id
|
||||
current.confidence = payload.confidence
|
||||
current.status = payload.status
|
||||
current.rule_source = payload.rule_source
|
||||
current.reasons_json = payload.reasons_json
|
||||
current.updated_at = now
|
||||
updated += 1
|
||||
|
||||
for chunk in _chunks_int(duplicate_ids, 1000):
|
||||
session.execute(delete(RouteMatch).where(RouteMatch.id.in_(chunk)))
|
||||
return {"created": created, "updated": updated, "unchanged": unchanged}
|
||||
|
||||
|
||||
def _preferred_existing_match(rows: list[RouteMatch]) -> RouteMatch | None:
|
||||
if not rows:
|
||||
return None
|
||||
return next((row for row in rows if row.rule_source == "manual"), rows[0])
|
||||
|
||||
|
||||
def _route_match_payload_equal(row: RouteMatch, payload: _RouteMatchPayload) -> bool:
|
||||
return (
|
||||
row.osm_feature_id == payload.osm_feature_id
|
||||
and round(float(row.confidence or 0), 2) == round(float(payload.confidence or 0), 2)
|
||||
and row.status == payload.status
|
||||
and row.rule_source == payload.rule_source
|
||||
and (row.reasons_json or None) == (payload.reasons_json or None)
|
||||
)
|
||||
|
||||
|
||||
def _build_osm_route_index(osm_routes: list[OsmFeature]) -> _OsmRouteIndex:
|
||||
by_ref: dict[str, list[OsmFeature]] = {}
|
||||
by_route_key: dict[str, list[OsmFeature]] = {}
|
||||
by_mode: dict[str, list[OsmFeature]] = {}
|
||||
for feature in osm_routes:
|
||||
ref = norm_ref(feature.ref or "")
|
||||
if ref:
|
||||
by_ref.setdefault(ref, []).append(feature)
|
||||
if feature.route_key:
|
||||
by_route_key.setdefault(feature.route_key, []).append(feature)
|
||||
if feature.mode:
|
||||
by_mode.setdefault(feature.mode, []).append(feature)
|
||||
return _OsmRouteIndex(all_routes=osm_routes, by_ref=by_ref, by_route_key=by_route_key, by_mode=by_mode)
|
||||
|
||||
|
||||
def _candidate_osm_routes(route: GtfsRoute, index: _OsmRouteIndex) -> list[OsmFeature]:
|
||||
selected: list[OsmFeature] = []
|
||||
seen: set[int] = set()
|
||||
|
||||
def add(features: list[OsmFeature], *, require_compatible_mode: bool = True) -> None:
|
||||
for feature in features:
|
||||
if feature.id in seen:
|
||||
continue
|
||||
if require_compatible_mode and not _mode_compatible(route.mode or "", feature.mode or ""):
|
||||
continue
|
||||
seen.add(feature.id)
|
||||
selected.append(feature)
|
||||
|
||||
route_ref = norm_ref(route.short_name or route.route_id)
|
||||
if route_ref:
|
||||
add(index.by_ref.get(route_ref, []))
|
||||
if route.route_key:
|
||||
add(index.by_route_key.get(route.route_key, []))
|
||||
if selected:
|
||||
return _spatially_ranked_candidates(route, selected, MAX_EXACT_REF_CANDIDATES)
|
||||
|
||||
compatible_modes = MODE_GROUPS.get(route.mode or "", {route.mode or ""})
|
||||
mode_candidates: list[OsmFeature] = []
|
||||
for mode in compatible_modes:
|
||||
if mode:
|
||||
mode_candidates.extend(index.by_mode.get(mode, []))
|
||||
if not mode_candidates:
|
||||
mode_candidates = index.all_routes
|
||||
|
||||
gtfs_bbox = (route.min_lon, route.min_lat, route.max_lon, route.max_lat)
|
||||
near_candidates: list[tuple[float, OsmFeature]] = []
|
||||
for feature in mode_candidates:
|
||||
osm_bbox = (feature.min_lon, feature.min_lat, feature.max_lon, feature.max_lat)
|
||||
distance = approx_bbox_center_distance_deg(gtfs_bbox, osm_bbox)
|
||||
if bbox_overlap(gtfs_bbox, osm_bbox):
|
||||
near_candidates.append((0.0, feature))
|
||||
elif distance is not None and distance < 0.12:
|
||||
near_candidates.append((distance, feature))
|
||||
fallback_limit = MAX_FALLBACK_CANDIDATES_WITH_REF if route_ref else MAX_FALLBACK_CANDIDATES_WITHOUT_REF
|
||||
fallback = [feature for _, feature in sorted(near_candidates, key=lambda item: item[0])[:fallback_limit]]
|
||||
if not fallback:
|
||||
fallback = mode_candidates[:fallback_limit]
|
||||
add(fallback)
|
||||
return _spatially_ranked_candidates(route, selected, fallback_limit)
|
||||
|
||||
|
||||
def candidate_osm_routes_for_route(session: Session, route: GtfsRoute, osm_dataset_ids: list[int]) -> list[OsmFeature]:
|
||||
if not osm_dataset_ids:
|
||||
return []
|
||||
selected: list[OsmFeature] = []
|
||||
seen: set[tuple[int, str, str]] = set()
|
||||
|
||||
def add(features: list[OsmFeature], *, require_compatible_mode: bool = True) -> None:
|
||||
for feature in features:
|
||||
key = (feature.dataset_id, feature.osm_type, feature.osm_id)
|
||||
if key in seen:
|
||||
continue
|
||||
if require_compatible_mode and not _mode_compatible(route.mode or "", feature.mode or ""):
|
||||
continue
|
||||
seen.add(key)
|
||||
selected.append(feature)
|
||||
|
||||
route_ref = norm_ref(route.short_name or route.route_id)
|
||||
route_keys = [key for key in [route.route_key, route_ref] if key]
|
||||
for route_key in dict.fromkeys(route_keys):
|
||||
add(
|
||||
query_osm_features(
|
||||
session,
|
||||
osm_dataset_ids,
|
||||
kinds=["route"],
|
||||
route_key=route_key,
|
||||
)
|
||||
)
|
||||
if selected:
|
||||
return _spatially_ranked_candidates(route, selected, MAX_EXACT_REF_CANDIDATES)
|
||||
|
||||
gtfs_bbox = (route.min_lon, route.min_lat, route.max_lon, route.max_lat)
|
||||
compatible_modes = sorted(MODE_GROUPS.get(route.mode or "", {route.mode or ""}) - {""})
|
||||
if not any(value is None for value in gtfs_bbox):
|
||||
bbox = _expanded_bbox(gtfs_bbox, 0.10)
|
||||
add(
|
||||
query_osm_features(
|
||||
session,
|
||||
osm_dataset_ids,
|
||||
kinds=["route"],
|
||||
modes=compatible_modes or None,
|
||||
bbox=bbox,
|
||||
limit=MAX_FALLBACK_CANDIDATES_WITHOUT_REF * 4,
|
||||
),
|
||||
require_compatible_mode=False,
|
||||
)
|
||||
if not selected:
|
||||
add(
|
||||
query_osm_features(
|
||||
session,
|
||||
osm_dataset_ids,
|
||||
kinds=["route"],
|
||||
modes=compatible_modes or None,
|
||||
limit=MAX_FALLBACK_CANDIDATES_WITHOUT_REF,
|
||||
),
|
||||
require_compatible_mode=False,
|
||||
)
|
||||
fallback_limit = MAX_FALLBACK_CANDIDATES_WITH_REF if route_ref else MAX_FALLBACK_CANDIDATES_WITHOUT_REF
|
||||
return _spatially_ranked_candidates(route, selected, fallback_limit)
|
||||
|
||||
|
||||
def score_route_pair(
|
||||
route: GtfsRoute,
|
||||
feature: OsmFeature,
|
||||
route_geometry_profile: _GeometryProfile | None = None,
|
||||
feature_geometry_profile: _GeometryProfile | None = None,
|
||||
) -> tuple[float, dict[str, object]]:
|
||||
score = 0.0
|
||||
reasons: dict[str, object] = {}
|
||||
|
||||
gtfs_mode = route.mode or ""
|
||||
osm_mode = feature.mode or ""
|
||||
if _mode_compatible(gtfs_mode, osm_mode):
|
||||
score += 25
|
||||
reasons["mode"] = "compatible"
|
||||
elif gtfs_mode and osm_mode:
|
||||
reasons["mode"] = f"mismatch: {gtfs_mode} != {osm_mode}"
|
||||
return 0.0, reasons
|
||||
|
||||
gtfs_ref = norm_ref(route.short_name or route.route_id)
|
||||
osm_ref = norm_ref(feature.ref or "")
|
||||
if gtfs_ref and osm_ref:
|
||||
if gtfs_ref == osm_ref:
|
||||
score += 25
|
||||
reasons["ref"] = "exact"
|
||||
elif gtfs_ref in osm_ref or osm_ref in gtfs_ref:
|
||||
score += 15
|
||||
reasons["ref"] = "partial"
|
||||
|
||||
gtfs_name = norm_text(" ".join(v for v in [route.long_name, route.short_name, route.route_id] if v))
|
||||
osm_name = norm_text(" ".join(v for v in [feature.name, feature.ref] if v))
|
||||
name_similarity = _ratio(gtfs_name, osm_name)
|
||||
score += 20 * name_similarity
|
||||
reasons["name_similarity"] = round(name_similarity, 3)
|
||||
|
||||
gtfs_operator = norm_text(route.operator_name or "")
|
||||
osm_operator = norm_text(" ".join(v for v in [feature.operator, feature.network] if v))
|
||||
operator_similarity = _ratio(gtfs_operator, osm_operator) if gtfs_operator and osm_operator else 0
|
||||
score += 15 * operator_similarity
|
||||
reasons["operator_similarity"] = round(operator_similarity, 3)
|
||||
|
||||
gtfs_bbox = (route.min_lon, route.min_lat, route.max_lon, route.max_lat)
|
||||
osm_bbox = (feature.min_lon, feature.min_lat, feature.max_lon, feature.max_lat)
|
||||
center_distance = None
|
||||
if bbox_overlap(gtfs_bbox, osm_bbox):
|
||||
score += 14
|
||||
reasons["bbox"] = "overlap"
|
||||
if gtfs_ref and osm_ref and gtfs_ref == osm_ref and _mode_compatible(gtfs_mode, osm_mode):
|
||||
score += 8
|
||||
reasons["line_identity"] = "exact_ref_mode_bbox_overlap"
|
||||
else:
|
||||
center_distance = approx_bbox_center_distance_deg(gtfs_bbox, osm_bbox)
|
||||
if center_distance is not None:
|
||||
if center_distance < 0.01:
|
||||
score += 12
|
||||
elif center_distance < 0.03:
|
||||
score += 8
|
||||
elif center_distance < 0.08:
|
||||
score += 4
|
||||
elif gtfs_ref and osm_ref and gtfs_ref == osm_ref and center_distance > OSM_SCOPE_NEAR_DISTANCE_DEG:
|
||||
score -= 8
|
||||
reasons["spatial_penalty"] = "exact_ref_far_bbox_center"
|
||||
reasons["bbox_center_distance_deg"] = round(center_distance, 5)
|
||||
|
||||
geometry_metrics = (
|
||||
_geometry_match_metrics_from_profiles(route_geometry_profile, feature_geometry_profile)
|
||||
if route_geometry_profile is not None and feature_geometry_profile is not None
|
||||
else _geometry_match_metrics(route.geometry_geojson, feature.geometry_geojson)
|
||||
)
|
||||
if geometry_metrics is not None:
|
||||
reasons["geometry"] = geometry_metrics
|
||||
geometry_score = 34 * float(geometry_metrics["gtfs_on_osm_ratio"]) + 8 * float(geometry_metrics["osm_on_gtfs_ratio"])
|
||||
if float(geometry_metrics["endpoint_distance_deg"]) < GEOMETRY_PROXIMITY_DEG * 2:
|
||||
geometry_score += 6
|
||||
if float(geometry_metrics["length_ratio"]) < 0.35 or float(geometry_metrics["length_ratio"]) > 2.8:
|
||||
geometry_score -= 8
|
||||
reasons["geometry_length"] = "implausible_ratio"
|
||||
score += max(0.0, min(42.0, geometry_score))
|
||||
|
||||
# Extra small boost for same normalized route key.
|
||||
if route.route_key and feature.route_key and route.route_key == feature.route_key:
|
||||
score += 5
|
||||
reasons["route_key"] = "same"
|
||||
|
||||
if gtfs_ref and osm_ref and gtfs_ref == osm_ref and _mode_compatible(gtfs_mode, osm_mode):
|
||||
if bbox_overlap(gtfs_bbox, osm_bbox):
|
||||
score = max(score, 88.0)
|
||||
reasons["strong_identity"] = "exact_ref_mode_bbox_overlap"
|
||||
elif center_distance is not None and center_distance < 0.02:
|
||||
score = max(score, 82.0)
|
||||
reasons["strong_identity"] = "exact_ref_mode_near_bbox_center"
|
||||
|
||||
if route.route_key and feature.route_key and route.route_key == feature.route_key and _mode_compatible(gtfs_mode, osm_mode):
|
||||
if bbox_overlap(gtfs_bbox, osm_bbox):
|
||||
score = max(score, 86.0)
|
||||
reasons.setdefault("strong_identity", "same_route_key_mode_bbox_overlap")
|
||||
|
||||
if geometry_metrics is not None:
|
||||
gtfs_on_osm = float(geometry_metrics["gtfs_on_osm_ratio"])
|
||||
endpoint_distance = float(geometry_metrics["endpoint_distance_deg"])
|
||||
if gtfs_on_osm >= 0.82 and endpoint_distance < GEOMETRY_PROXIMITY_DEG * 3 and _mode_compatible(gtfs_mode, osm_mode):
|
||||
if gtfs_ref and osm_ref and gtfs_ref == osm_ref:
|
||||
score = max(score, 90.0)
|
||||
reasons["strong_identity"] = "exact_ref_mode_geometry_overlap"
|
||||
elif gtfs_ref and osm_ref and (gtfs_ref in osm_ref or osm_ref in gtfs_ref):
|
||||
score = max(score, 82.0)
|
||||
reasons["strong_identity"] = "partial_ref_mode_geometry_overlap"
|
||||
|
||||
if (
|
||||
gtfs_ref
|
||||
and osm_ref
|
||||
and gtfs_ref == osm_ref
|
||||
and center_distance is not None
|
||||
and center_distance > OSM_SCOPE_NEAR_DISTANCE_DEG
|
||||
and not bbox_overlap(gtfs_bbox, osm_bbox)
|
||||
and (
|
||||
geometry_metrics is None
|
||||
or float(geometry_metrics.get("gtfs_on_osm_ratio", 0.0)) < 0.25
|
||||
)
|
||||
):
|
||||
score = min(score, 58.0)
|
||||
reasons["spatial_cap"] = "exact_ref_far_without_geometry_overlap"
|
||||
|
||||
return min(score, 100.0), reasons
|
||||
|
||||
|
||||
def route_match_scope(route: GtfsRoute, osm_scope_bbox: tuple[float | None, float | None, float | None, float | None]) -> str:
|
||||
route_bbox = (route.min_lon, route.min_lat, route.max_lon, route.max_lat)
|
||||
if any(value is None for value in route_bbox) or any(value is None for value in osm_scope_bbox):
|
||||
return "unknown_scope"
|
||||
if bbox_overlap(route_bbox, osm_scope_bbox):
|
||||
return "in_osm_scope"
|
||||
distance = approx_bbox_center_distance_deg(route_bbox, osm_scope_bbox)
|
||||
if distance is not None and distance < OSM_SCOPE_NEAR_DISTANCE_DEG:
|
||||
return "near_osm_scope"
|
||||
return "outside_osm_scope"
|
||||
|
||||
|
||||
def _combined_bbox(features: list[OsmFeature]) -> tuple[float | None, float | None, float | None, float | None]:
|
||||
boxes = [
|
||||
(feature.min_lon, feature.min_lat, feature.max_lon, feature.max_lat)
|
||||
for feature in features
|
||||
if None not in (feature.min_lon, feature.min_lat, feature.max_lon, feature.max_lat)
|
||||
]
|
||||
if not boxes:
|
||||
return (None, None, None, None)
|
||||
return (
|
||||
min(float(box[0]) for box in boxes if box[0] is not None),
|
||||
min(float(box[1]) for box in boxes if box[1] is not None),
|
||||
max(float(box[2]) for box in boxes if box[2] is not None),
|
||||
max(float(box[3]) for box in boxes if box[3] is not None),
|
||||
)
|
||||
|
||||
|
||||
def _spatially_ranked_candidates(route: GtfsRoute, candidates: list[OsmFeature], limit: int) -> list[OsmFeature]:
|
||||
return [
|
||||
feature
|
||||
for _, feature in sorted(
|
||||
((_spatial_rank(route, feature), feature) for feature in candidates),
|
||||
key=lambda item: item[0],
|
||||
)[: max(1, limit)]
|
||||
]
|
||||
|
||||
|
||||
def _spatial_rank(route: GtfsRoute, feature: OsmFeature) -> tuple[int, float, str]:
|
||||
route_bbox = (route.min_lon, route.min_lat, route.max_lon, route.max_lat)
|
||||
feature_bbox = (feature.min_lon, feature.min_lat, feature.max_lon, feature.max_lat)
|
||||
distance = approx_bbox_center_distance_deg(route_bbox, feature_bbox)
|
||||
if bbox_overlap(route_bbox, feature_bbox):
|
||||
bucket = 0
|
||||
elif distance is not None and distance < OSM_SCOPE_NEAR_DISTANCE_DEG:
|
||||
bucket = 1
|
||||
elif distance is not None:
|
||||
bucket = 2
|
||||
else:
|
||||
bucket = 3
|
||||
return (bucket, distance if distance is not None else 999.0, feature.osm_id)
|
||||
|
||||
|
||||
def _expanded_bbox(
|
||||
bbox: tuple[float | None, float | None, float | None, float | None],
|
||||
padding: float,
|
||||
) -> tuple[float, float, float, float] | None:
|
||||
min_lon, min_lat, max_lon, max_lat = bbox
|
||||
if None in (min_lon, min_lat, max_lon, max_lat):
|
||||
return None
|
||||
return (float(min_lon) - padding, float(min_lat) - padding, float(max_lon) + padding, float(max_lat) + padding)
|
||||
|
||||
|
||||
def _chunks_int(values: list[int], size: int) -> list[list[int]]:
|
||||
return [values[start : start + size] for start in range(0, len(values), max(1, size))]
|
||||
|
||||
|
||||
def _emit_progress(
|
||||
progress_callback: ProgressCallback | None,
|
||||
event_type: str,
|
||||
message: str,
|
||||
progress_current: int | None,
|
||||
progress_total: int | None,
|
||||
metadata: dict[str, object] | None = None,
|
||||
) -> None:
|
||||
if progress_callback is not None:
|
||||
progress_callback(event_type, message, progress_current, progress_total, metadata)
|
||||
|
||||
|
||||
def _geometry_match_metrics(route_geometry: str | None, feature_geometry: str | None) -> dict[str, float] | None:
|
||||
route_profile = _geometry_profile(route_geometry)
|
||||
feature_profile = _geometry_profile(feature_geometry)
|
||||
return _geometry_match_metrics_from_profiles(route_profile, feature_profile)
|
||||
|
||||
|
||||
def _geometry_profile(geometry_text: str | None) -> _GeometryProfile | None:
|
||||
if not geometry_text:
|
||||
return None
|
||||
try:
|
||||
geom = shape(json.loads(geometry_text))
|
||||
except Exception: # noqa: BLE001 - malformed geometry should not break matching
|
||||
return None
|
||||
lines = _iter_lines(geom)
|
||||
if not lines:
|
||||
return None
|
||||
length = sum(line.length for line in lines)
|
||||
if length == 0:
|
||||
return None
|
||||
sample_points = _sample_line_points(lines, GEOMETRY_SAMPLE_POINTS)
|
||||
if not sample_points:
|
||||
return None
|
||||
return _GeometryProfile(geom=geom, lines=lines, length=length, sample_points=sample_points)
|
||||
|
||||
|
||||
def _geometry_match_metrics_from_profiles(
|
||||
route_profile: _GeometryProfile | None, feature_profile: _GeometryProfile | None
|
||||
) -> dict[str, float] | None:
|
||||
if route_profile is None or feature_profile is None:
|
||||
return None
|
||||
gtfs_on_osm = _near_point_ratio(route_profile.sample_points, feature_profile.geom, GEOMETRY_PROXIMITY_DEG)
|
||||
osm_on_gtfs = _near_point_ratio(feature_profile.sample_points, route_profile.geom, GEOMETRY_PROXIMITY_DEG)
|
||||
endpoint_distance = _endpoint_distance(route_profile.lines, feature_profile.geom)
|
||||
length_ratio = route_profile.length / feature_profile.length if feature_profile.length else 0.0
|
||||
return {
|
||||
"gtfs_on_osm_ratio": round(gtfs_on_osm, 3),
|
||||
"osm_on_gtfs_ratio": round(osm_on_gtfs, 3),
|
||||
"endpoint_distance_deg": round(endpoint_distance, 6),
|
||||
"length_ratio": round(length_ratio, 3),
|
||||
}
|
||||
|
||||
|
||||
def _iter_lines(geom) -> list[LineString]:
|
||||
if isinstance(geom, LineString):
|
||||
return [geom]
|
||||
if isinstance(geom, MultiLineString):
|
||||
return [line for line in geom.geoms if isinstance(line, LineString) and line.length > 0]
|
||||
return []
|
||||
|
||||
|
||||
def _sample_line_points(lines: list[LineString], count: int) -> list[Point]:
|
||||
total_length = sum(line.length for line in lines)
|
||||
if total_length == 0:
|
||||
return []
|
||||
points = []
|
||||
for index in range(count):
|
||||
target = total_length * (index / max(1, count - 1))
|
||||
traversed = 0.0
|
||||
for line in lines:
|
||||
next_traversed = traversed + line.length
|
||||
if target <= next_traversed or line is lines[-1]:
|
||||
points.append(line.interpolate(max(0.0, min(line.length, target - traversed))))
|
||||
break
|
||||
traversed = next_traversed
|
||||
return points
|
||||
|
||||
|
||||
def _near_point_ratio(points: list[Point], geom, max_distance: float) -> float:
|
||||
if not points:
|
||||
return 0.0
|
||||
near = sum(1 for point in points if geom.distance(point) <= max_distance)
|
||||
return near / len(points)
|
||||
|
||||
|
||||
def _endpoint_distance(gtfs_lines: list[LineString], osm_geom) -> float:
|
||||
longest = max(gtfs_lines, key=lambda line: line.length)
|
||||
coords = list(longest.coords)
|
||||
if len(coords) < 2:
|
||||
return 999.0
|
||||
return osm_geom.distance(Point(coords[0])) + osm_geom.distance(Point(coords[-1]))
|
||||
|
||||
|
||||
def _manual_match_rules(session: Session) -> list[_ManualMatchRule]:
|
||||
rules = session.scalars(
|
||||
select(MatchRule)
|
||||
.where(MatchRule.active.is_(True), MatchRule.rule_type.in_(["accept_match", "reject_match"]))
|
||||
.order_by(MatchRule.id.desc())
|
||||
).all()
|
||||
parsed: list[_ManualMatchRule] = []
|
||||
for rule in rules:
|
||||
try:
|
||||
selector = json.loads(rule.selector_json or "{}")
|
||||
action = json.loads(rule.action_json or "{}")
|
||||
except json.JSONDecodeError:
|
||||
continue
|
||||
route_selector = selector.get("gtfs") if isinstance(selector.get("gtfs"), dict) else selector
|
||||
osm_selector = action.get("osm") if isinstance(action.get("osm"), dict) else selector.get("osm")
|
||||
if not isinstance(osm_selector, dict) and selector.get("osm_feature_id") is not None:
|
||||
osm_selector = {"osm_feature_id": selector.get("osm_feature_id")}
|
||||
status = str(action.get("status") or ("accepted" if rule.rule_type == "accept_match" else "rejected"))
|
||||
parsed.append(
|
||||
_ManualMatchRule(
|
||||
id=rule.id,
|
||||
rule_type=rule.rule_type,
|
||||
route_selector=route_selector,
|
||||
osm_selector=osm_selector if isinstance(osm_selector, dict) else None,
|
||||
status=status,
|
||||
)
|
||||
)
|
||||
return parsed
|
||||
|
||||
|
||||
def _accepted_rule_for_route(
|
||||
rules: list[_ManualMatchRule], route: GtfsRoute, route_source_id: int | None
|
||||
) -> _ManualMatchRule | None:
|
||||
for rule in rules:
|
||||
if rule.rule_type != "accept_match":
|
||||
continue
|
||||
if rule.status != "accepted":
|
||||
continue
|
||||
if _route_matches_selector(route, route_source_id, rule.route_selector):
|
||||
return rule
|
||||
return None
|
||||
|
||||
|
||||
def _feature_for_rule(
|
||||
features: list[OsmFeature], dataset_source_ids: dict[int, int], rule: _ManualMatchRule
|
||||
) -> OsmFeature | None:
|
||||
if not rule.osm_selector:
|
||||
return None
|
||||
for feature in features:
|
||||
if _feature_matches_selector(feature, dataset_source_ids.get(feature.dataset_id), rule.osm_selector):
|
||||
return feature
|
||||
return None
|
||||
|
||||
|
||||
def _feature_for_rule_from_storage(
|
||||
session: Session,
|
||||
osm_dataset_ids: list[int],
|
||||
dataset_source_ids: dict[int, int],
|
||||
rule: _ManualMatchRule,
|
||||
) -> OsmFeature | None:
|
||||
if not rule.osm_selector:
|
||||
return None
|
||||
selector = rule.osm_selector
|
||||
legacy_id = _safe_int(selector.get("osm_feature_id"))
|
||||
if legacy_id is not None:
|
||||
feature = session.get(OsmFeature, legacy_id)
|
||||
if feature is not None and _feature_matches_selector(feature, dataset_source_ids.get(feature.dataset_id), selector):
|
||||
return feature
|
||||
scoped_dataset_ids = list(osm_dataset_ids)
|
||||
expected_source = selector.get("source_id")
|
||||
if expected_source is not None:
|
||||
expected_source_id = _safe_int(expected_source)
|
||||
if expected_source_id is not None:
|
||||
scoped_dataset_ids = [
|
||||
dataset_id
|
||||
for dataset_id in scoped_dataset_ids
|
||||
if dataset_source_ids.get(dataset_id) == expected_source_id
|
||||
]
|
||||
dataset_id = _safe_int(selector.get("dataset_id"))
|
||||
if dataset_id is not None:
|
||||
scoped_dataset_ids = [value for value in scoped_dataset_ids if value == dataset_id]
|
||||
if not scoped_dataset_ids:
|
||||
return None
|
||||
|
||||
features: list[OsmFeature] = []
|
||||
osm_type = selector.get("osm_type")
|
||||
osm_id = selector.get("osm_id")
|
||||
if osm_type and osm_id:
|
||||
features = query_osm_features(
|
||||
session,
|
||||
scoped_dataset_ids,
|
||||
kinds=["route"],
|
||||
osm_type=str(osm_type),
|
||||
osm_id=str(osm_id),
|
||||
limit=10,
|
||||
)
|
||||
if not features:
|
||||
route_key = selector.get("route_key")
|
||||
if route_key:
|
||||
features = query_osm_features(session, scoped_dataset_ids, kinds=["route"], route_key=str(route_key))
|
||||
if not features:
|
||||
ref = norm_ref(selector.get("ref"))
|
||||
if ref:
|
||||
features = query_osm_features(session, scoped_dataset_ids, kinds=["route"], route_key=ref)
|
||||
for feature in features:
|
||||
if _feature_matches_selector(feature, dataset_source_ids.get(feature.dataset_id), selector):
|
||||
return feature
|
||||
return None
|
||||
|
||||
|
||||
def _is_rejected_pair(
|
||||
rules: list[_ManualMatchRule],
|
||||
route: GtfsRoute,
|
||||
route_source_id: int | None,
|
||||
feature: OsmFeature,
|
||||
feature_source_id: int | None,
|
||||
) -> bool:
|
||||
for rule in rules:
|
||||
if rule.rule_type != "reject_match":
|
||||
continue
|
||||
if not _route_matches_selector(route, route_source_id, rule.route_selector):
|
||||
continue
|
||||
if rule.osm_selector and _feature_matches_selector(feature, feature_source_id, rule.osm_selector):
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def _route_matches_selector(route: GtfsRoute, source_id: int | None, selector: dict[str, object]) -> bool:
|
||||
legacy_id = selector.get("gtfs_route_id")
|
||||
if legacy_id is not None and _safe_int(legacy_id) == route.id:
|
||||
return True
|
||||
expected_source = selector.get("source_id")
|
||||
if expected_source is not None and source_id is not None and _safe_int(expected_source) != source_id:
|
||||
return False
|
||||
route_id = selector.get("route_id")
|
||||
if route_id and str(route_id) == route.route_id:
|
||||
return True
|
||||
route_key = selector.get("route_key")
|
||||
if route_key and route.route_key and str(route_key) == route.route_key:
|
||||
return True
|
||||
ref = norm_ref(selector.get("ref"))
|
||||
mode = selector.get("mode")
|
||||
if ref and ref == norm_ref(route.short_name or route.route_id):
|
||||
return not mode or _mode_compatible(str(mode), route.mode or "")
|
||||
return False
|
||||
|
||||
|
||||
def _feature_matches_selector(feature: OsmFeature, source_id: int | None, selector: dict[str, object]) -> bool:
|
||||
legacy_id = selector.get("osm_feature_id")
|
||||
if legacy_id is not None and _safe_int(legacy_id) == feature.id:
|
||||
return True
|
||||
expected_source = selector.get("source_id")
|
||||
if expected_source is not None and source_id is not None and _safe_int(expected_source) != source_id:
|
||||
return False
|
||||
osm_type = selector.get("osm_type")
|
||||
osm_id = selector.get("osm_id")
|
||||
if osm_type and osm_id and str(osm_type) == feature.osm_type and str(osm_id) == feature.osm_id:
|
||||
return True
|
||||
route_key = selector.get("route_key")
|
||||
if route_key and feature.route_key and str(route_key) == feature.route_key:
|
||||
return True
|
||||
ref = norm_ref(selector.get("ref"))
|
||||
mode = selector.get("mode")
|
||||
if ref and ref == norm_ref(feature.ref or ""):
|
||||
return not mode or _mode_compatible(str(mode), feature.mode or "")
|
||||
return False
|
||||
|
||||
|
||||
def _safe_int(value: object) -> int | None:
|
||||
try:
|
||||
return int(value) # type: ignore[arg-type]
|
||||
except (TypeError, ValueError):
|
||||
return None
|
||||
|
||||
|
||||
def _mode_compatible(gtfs_mode: str, osm_mode: str) -> bool:
|
||||
if not gtfs_mode or not osm_mode:
|
||||
return True
|
||||
if gtfs_mode == osm_mode:
|
||||
return True
|
||||
return osm_mode in MODE_GROUPS.get(gtfs_mode, {gtfs_mode}) or gtfs_mode in MODE_GROUPS.get(osm_mode, {osm_mode})
|
||||
|
||||
|
||||
def _ratio(a: str, b: str) -> float:
|
||||
if not a or not b:
|
||||
return 0.0
|
||||
if a == b:
|
||||
return 1.0
|
||||
token_ratio = _token_similarity(a, b)
|
||||
if a in b or b in a:
|
||||
token_ratio = max(token_ratio, 0.82)
|
||||
return token_ratio
|
||||
|
||||
|
||||
def _token_similarity(a: str, b: str) -> float:
|
||||
left = set(a.split())
|
||||
right = set(b.split())
|
||||
if not left or not right:
|
||||
return 0.0
|
||||
return len(left & right) / len(left | right)
|
||||
|
||||
|
||||
def _status_from_score(score: float) -> str:
|
||||
if score >= 85:
|
||||
return "matched"
|
||||
if score >= 65:
|
||||
return "probable"
|
||||
if score >= 40:
|
||||
return "weak"
|
||||
return "missing"
|
||||
508
app/pipeline/osm_addresses.py
Normal file
508
app/pipeline/osm_addresses.py
Normal file
@@ -0,0 +1,508 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import math
|
||||
import re
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
from typing import Callable
|
||||
|
||||
import osmium
|
||||
from sqlalchemy import delete, func, select, text
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.config import settings
|
||||
from app.models import Dataset, OsmAddress
|
||||
from app.pipeline.routing_layer import active_routing_dataset
|
||||
from app.spatial import analyze_postgresql_tables, refresh_postgis_geometries
|
||||
|
||||
|
||||
ProgressCallback = Callable[[str, str, int | None, int | None, dict[str, object] | None], None]
|
||||
ADDRESS_INDEX_VERSION = "osm_addresses_v2_nodes_ways_area_geometry"
|
||||
ADDRESS_TAGS = {
|
||||
"addr:housenumber",
|
||||
"addr:housename",
|
||||
"addr:street",
|
||||
"addr:place",
|
||||
"addr:postcode",
|
||||
"addr:city",
|
||||
"addr:country",
|
||||
"addr:unit",
|
||||
"addr:suburb",
|
||||
"addr:district",
|
||||
"addr:municipality",
|
||||
"entrance",
|
||||
"name",
|
||||
}
|
||||
|
||||
|
||||
@dataclass
|
||||
class AddressIndexResult:
|
||||
dataset_id: int
|
||||
input_path: str
|
||||
addresses: int
|
||||
node_addresses: int
|
||||
way_addresses: int
|
||||
skipped: int
|
||||
version: str = ADDRESS_INDEX_VERSION
|
||||
|
||||
def as_dict(self) -> dict[str, object]:
|
||||
return {
|
||||
"version": self.version,
|
||||
"dataset_id": self.dataset_id,
|
||||
"input_path": self.input_path,
|
||||
"addresses": self.addresses,
|
||||
"node_addresses": self.node_addresses,
|
||||
"way_addresses": self.way_addresses,
|
||||
"skipped": self.skipped,
|
||||
}
|
||||
|
||||
|
||||
def rebuild_address_index(
|
||||
session: Session,
|
||||
*,
|
||||
dataset_id: int | None = None,
|
||||
input_path: str | Path | None = None,
|
||||
reset: bool = True,
|
||||
batch_size: int = 20_000,
|
||||
progress_callback: ProgressCallback | None = None,
|
||||
) -> dict[str, object]:
|
||||
dataset = session.get(Dataset, dataset_id) if dataset_id is not None else active_routing_dataset(session)
|
||||
if dataset is None:
|
||||
raise ValueError("No OSM PBF dataset is available for address indexing.")
|
||||
path = Path(input_path or dataset.local_path)
|
||||
if not path.exists():
|
||||
raise FileNotFoundError(f"Address index PBF does not exist: {path}")
|
||||
|
||||
if reset:
|
||||
_emit(progress_callback, "address_index_clear_started", "Clearing existing OSM address index.", None, None, {"dataset_id": dataset.id})
|
||||
_clear_address_rows(session, dataset_id=int(dataset.id))
|
||||
session.commit()
|
||||
|
||||
if settings.is_postgresql_database:
|
||||
_emit(progress_callback, "address_index_indexes_dropped", "Dropping address lookup indexes before bulk import.", None, None, {"dataset_id": dataset.id})
|
||||
_drop_address_indexes(session)
|
||||
session.commit()
|
||||
|
||||
_emit(progress_callback, "address_index_import_started", "Importing OSM address nodes and ways.", None, None, {"dataset_id": dataset.id, "path": str(path)})
|
||||
handler = _AddressHandler(
|
||||
session=session,
|
||||
dataset_id=dataset.id,
|
||||
batch_size=batch_size,
|
||||
progress_callback=progress_callback,
|
||||
)
|
||||
if hasattr(osmium, "FileProcessor"):
|
||||
_apply_address_file_processor(handler, path)
|
||||
else:
|
||||
handler.apply_file(str(path), locations=True)
|
||||
handler.flush()
|
||||
|
||||
return finalize_address_index(
|
||||
session,
|
||||
dataset_id=dataset.id,
|
||||
input_path=path,
|
||||
node_addresses=handler.node_address_count,
|
||||
way_addresses=handler.way_address_count,
|
||||
skipped=handler.skipped_count,
|
||||
progress_callback=progress_callback,
|
||||
)
|
||||
|
||||
|
||||
def finalize_address_index(
|
||||
session: Session,
|
||||
*,
|
||||
dataset_id: int,
|
||||
input_path: str | Path,
|
||||
node_addresses: int = 0,
|
||||
way_addresses: int = 0,
|
||||
skipped: int = 0,
|
||||
progress_callback: ProgressCallback | None = None,
|
||||
) -> dict[str, object]:
|
||||
dataset = session.get(Dataset, dataset_id)
|
||||
if dataset is None:
|
||||
raise ValueError("Address index dataset does not exist.")
|
||||
if settings.is_postgresql_database:
|
||||
_emit(progress_callback, "address_index_geometry_started", "Refreshing address point geometries.", None, None, {"dataset_id": dataset.id})
|
||||
refresh_postgis_geometries(session, dataset_id=dataset.id, tables=["osm_addresses"], only_missing=False)
|
||||
session.commit()
|
||||
_emit(progress_callback, "address_index_indexes_started", "Rebuilding address lookup indexes.", None, None, {"dataset_id": dataset.id})
|
||||
_create_address_indexes(session)
|
||||
session.commit()
|
||||
analyze_postgresql_tables(session, ["osm_addresses"])
|
||||
address_count = int(session.scalar(select(func.count()).select_from(OsmAddress).where(OsmAddress.dataset_id == dataset.id)) or 0)
|
||||
metadata = _metadata(dataset)
|
||||
metadata["address_index"] = {
|
||||
"version": ADDRESS_INDEX_VERSION,
|
||||
"addresses": address_count,
|
||||
"node_addresses": int(node_addresses),
|
||||
"way_addresses": int(way_addresses),
|
||||
"skipped": int(skipped),
|
||||
"input_path": str(input_path),
|
||||
}
|
||||
dataset.metadata_json = json.dumps(metadata, indent=2)
|
||||
session.commit()
|
||||
result = AddressIndexResult(
|
||||
dataset_id=dataset.id,
|
||||
input_path=str(input_path),
|
||||
addresses=address_count,
|
||||
node_addresses=node_addresses,
|
||||
way_addresses=way_addresses,
|
||||
skipped=skipped,
|
||||
).as_dict()
|
||||
_emit(progress_callback, "address_index_import_completed", "OSM address index import completed.", address_count, address_count, result)
|
||||
return result
|
||||
|
||||
|
||||
def _clear_address_rows(session: Session, *, dataset_id: int) -> None:
|
||||
if settings.is_postgresql_database:
|
||||
other_dataset_count = int(
|
||||
session.scalar(
|
||||
select(func.count(func.distinct(OsmAddress.dataset_id))).where(OsmAddress.dataset_id != int(dataset_id))
|
||||
)
|
||||
or 0
|
||||
)
|
||||
if other_dataset_count == 0:
|
||||
session.execute(text("TRUNCATE TABLE osm_addresses RESTART IDENTITY"))
|
||||
return
|
||||
session.execute(delete(OsmAddress).where(OsmAddress.dataset_id == int(dataset_id)))
|
||||
|
||||
|
||||
def address_index_status(session: Session) -> dict[str, object]:
|
||||
dataset = active_routing_dataset(session)
|
||||
dataset_id = None if dataset is None else int(dataset.id)
|
||||
address_count = 0
|
||||
metadata: dict[str, object] = {}
|
||||
if dataset is not None:
|
||||
metadata = _metadata(dataset).get("address_index") or {}
|
||||
if isinstance(metadata, dict):
|
||||
try:
|
||||
address_count = int(metadata.get("addresses") or 0)
|
||||
except (TypeError, ValueError):
|
||||
address_count = 0
|
||||
if not address_count:
|
||||
address_count = int(session.scalar(select(func.count()).select_from(OsmAddress).where(OsmAddress.dataset_id == dataset.id)) or 0)
|
||||
installed_version = metadata.get("version") if isinstance(metadata, dict) else None
|
||||
return {
|
||||
"dataset_id": dataset_id,
|
||||
"addresses": address_count,
|
||||
"available": address_count > 0,
|
||||
"version": installed_version,
|
||||
"current_version": ADDRESS_INDEX_VERSION,
|
||||
"stale": bool(address_count and installed_version != ADDRESS_INDEX_VERSION),
|
||||
"input_path": metadata.get("input_path") if isinstance(metadata, dict) else None,
|
||||
}
|
||||
|
||||
|
||||
class _AddressHandler(osmium.SimpleHandler):
|
||||
def __init__(
|
||||
self,
|
||||
*,
|
||||
session: Session,
|
||||
dataset_id: int,
|
||||
batch_size: int,
|
||||
progress_callback: ProgressCallback | None,
|
||||
) -> None:
|
||||
super().__init__()
|
||||
self.session = session
|
||||
self.dataset_id = int(dataset_id)
|
||||
self.batch_size = max(1_000, int(batch_size))
|
||||
self.progress_callback = progress_callback
|
||||
self.rows: list[dict[str, object]] = []
|
||||
self.address_count = 0
|
||||
self.node_address_count = 0
|
||||
self.way_address_count = 0
|
||||
self.skipped_count = 0
|
||||
self.processed_count = 0
|
||||
|
||||
def node(self, node) -> None:
|
||||
self.process_node(node)
|
||||
|
||||
def way(self, way) -> None:
|
||||
self.process_way(way)
|
||||
|
||||
def process_object(self, obj) -> None:
|
||||
if hasattr(obj, "nodes"):
|
||||
self.process_way(obj)
|
||||
elif hasattr(obj, "location"):
|
||||
self.process_node(obj)
|
||||
|
||||
def process_node(self, node) -> None:
|
||||
self.processed_count += 1
|
||||
tags = {tag.k: tag.v for tag in node.tags}
|
||||
if not _has_address(tags):
|
||||
return
|
||||
if not node.location.valid():
|
||||
self.skipped_count += 1
|
||||
return
|
||||
row = _address_row(
|
||||
dataset_id=self.dataset_id,
|
||||
osm_type="node",
|
||||
osm_id=str(node.id),
|
||||
tags=tags,
|
||||
lon=float(node.location.lon),
|
||||
lat=float(node.location.lat),
|
||||
bounds=(float(node.location.lon), float(node.location.lat), float(node.location.lon), float(node.location.lat)),
|
||||
geometry_geojson=None,
|
||||
)
|
||||
if row is None:
|
||||
self.skipped_count += 1
|
||||
return
|
||||
self.rows.append(row)
|
||||
self.node_address_count += 1
|
||||
self._after_address()
|
||||
|
||||
def process_way(self, way) -> None:
|
||||
self.processed_count += 1
|
||||
tags = {tag.k: tag.v for tag in way.tags}
|
||||
if not _has_address(tags):
|
||||
return
|
||||
coords = [
|
||||
(float(node.location.lon), float(node.location.lat))
|
||||
for node in way.nodes
|
||||
if node.location.valid()
|
||||
]
|
||||
if not coords:
|
||||
self.skipped_count += 1
|
||||
return
|
||||
lon, lat = _centroid(coords)
|
||||
min_lon = min(coord[0] for coord in coords)
|
||||
max_lon = max(coord[0] for coord in coords)
|
||||
min_lat = min(coord[1] for coord in coords)
|
||||
max_lat = max(coord[1] for coord in coords)
|
||||
row = _address_row(
|
||||
dataset_id=self.dataset_id,
|
||||
osm_type="way",
|
||||
osm_id=str(way.id),
|
||||
tags=tags,
|
||||
lon=lon,
|
||||
lat=lat,
|
||||
bounds=(min_lon, min_lat, max_lon, max_lat),
|
||||
geometry_geojson=_address_area_geometry_geojson(coords, closed=_way_is_closed(way)),
|
||||
)
|
||||
if row is None:
|
||||
self.skipped_count += 1
|
||||
return
|
||||
self.rows.append(row)
|
||||
self.way_address_count += 1
|
||||
self._after_address()
|
||||
|
||||
def _after_address(self) -> None:
|
||||
self.address_count += 1
|
||||
if len(self.rows) >= self.batch_size:
|
||||
self.flush()
|
||||
if self.address_count % 50_000 == 0:
|
||||
_emit(
|
||||
self.progress_callback,
|
||||
"address_index_import_batch",
|
||||
f"Imported {self.address_count:,} OSM addresses.",
|
||||
self.address_count,
|
||||
None,
|
||||
{"processed": self.processed_count, "skipped": self.skipped_count},
|
||||
)
|
||||
|
||||
def flush(self) -> None:
|
||||
if not self.rows:
|
||||
return
|
||||
self.session.bulk_insert_mappings(OsmAddress, self.rows)
|
||||
self.session.commit()
|
||||
self.rows = []
|
||||
|
||||
|
||||
def _apply_address_file_processor(handler: _AddressHandler, path: Path) -> None:
|
||||
processor = (
|
||||
osmium.FileProcessor(str(path), osmium.osm.NODE | osmium.osm.WAY)
|
||||
.with_locations()
|
||||
.with_filter(osmium.filter.KeyFilter("addr:housenumber", "addr:housename"))
|
||||
)
|
||||
for obj in processor:
|
||||
handler.process_object(obj)
|
||||
|
||||
|
||||
def _has_address(tags: dict[str, str]) -> bool:
|
||||
housenumber = _clean(tags.get("addr:housenumber") or tags.get("addr:housename"))
|
||||
if not housenumber:
|
||||
return False
|
||||
return any(_clean(tags.get(key)) for key in ("addr:street", "addr:place", "addr:city", "addr:postcode"))
|
||||
|
||||
|
||||
def _address_row(
|
||||
*,
|
||||
dataset_id: int,
|
||||
osm_type: str,
|
||||
osm_id: str,
|
||||
tags: dict[str, str],
|
||||
lon: float,
|
||||
lat: float,
|
||||
bounds: tuple[float, float, float, float],
|
||||
geometry_geojson: str | None = None,
|
||||
) -> dict[str, object] | None:
|
||||
housenumber = _clean(tags.get("addr:housenumber") or tags.get("addr:housename"))
|
||||
street = _clean(tags.get("addr:street"))
|
||||
place = _clean(tags.get("addr:place"))
|
||||
postcode = _clean(tags.get("addr:postcode"))
|
||||
city = _clean(tags.get("addr:city") or tags.get("addr:municipality"))
|
||||
country = _clean(tags.get("addr:country"))
|
||||
unit = _clean(tags.get("addr:unit"))
|
||||
name = _clean(tags.get("name"))
|
||||
display_name = _display_name(housenumber=housenumber, street=street, place=place, postcode=postcode, city=city, name=name)
|
||||
if not display_name:
|
||||
return None
|
||||
search_text = _search_text(display_name, housenumber, street, place, postcode, city, country, unit, name)
|
||||
selected_tags = {key: tags[key] for key in sorted(ADDRESS_TAGS) if key in tags}
|
||||
min_lon, min_lat, max_lon, max_lat = bounds
|
||||
return {
|
||||
"dataset_id": dataset_id,
|
||||
"osm_type": osm_type,
|
||||
"osm_id": osm_id,
|
||||
"housenumber": housenumber,
|
||||
"street": street,
|
||||
"place": place,
|
||||
"postcode": postcode,
|
||||
"city": city,
|
||||
"country": country,
|
||||
"unit": unit,
|
||||
"name": name,
|
||||
"display_name": display_name,
|
||||
"search_text": search_text,
|
||||
"lon": lon,
|
||||
"lat": lat,
|
||||
"min_lon": min_lon,
|
||||
"min_lat": min_lat,
|
||||
"max_lon": max_lon,
|
||||
"max_lat": max_lat,
|
||||
"geometry_geojson": geometry_geojson,
|
||||
"tags_json": json.dumps(selected_tags, separators=(",", ":")) if selected_tags else None,
|
||||
}
|
||||
|
||||
|
||||
def _address_area_geometry_geojson(coords: list[tuple[float, float]], *, closed: bool | None = None) -> str | None:
|
||||
if closed is False:
|
||||
return None
|
||||
if len(coords) < 3:
|
||||
return None
|
||||
ring_coords = list(coords)
|
||||
first = ring_coords[0]
|
||||
last = ring_coords[-1]
|
||||
already_closed = abs(first[0] - last[0]) <= 1e-12 and abs(first[1] - last[1]) <= 1e-12
|
||||
if not already_closed:
|
||||
if closed is not True:
|
||||
return None
|
||||
ring_coords.append(first)
|
||||
if len(ring_coords) < 4:
|
||||
return None
|
||||
ring = [[float(lon), float(lat)] for lon, lat in ring_coords]
|
||||
if len({(round(lon, 12), round(lat, 12)) for lon, lat in ring_coords[:-1]}) < 3:
|
||||
return None
|
||||
return json.dumps({"type": "Polygon", "coordinates": [ring]}, separators=(",", ":"))
|
||||
|
||||
|
||||
def _way_is_closed(way) -> bool:
|
||||
try:
|
||||
nodes = way.nodes
|
||||
return len(nodes) >= 3 and nodes[0].ref == nodes[-1].ref
|
||||
except (AttributeError, IndexError, TypeError):
|
||||
return False
|
||||
|
||||
|
||||
def _display_name(
|
||||
*,
|
||||
housenumber: str | None,
|
||||
street: str | None,
|
||||
place: str | None,
|
||||
postcode: str | None,
|
||||
city: str | None,
|
||||
name: str | None,
|
||||
) -> str | None:
|
||||
road = street or place or name
|
||||
if road and housenumber:
|
||||
first = f"{road} {housenumber}"
|
||||
else:
|
||||
first = road or housenumber
|
||||
locality = " ".join(part for part in [postcode, city] if part)
|
||||
if first and locality:
|
||||
return f"{first}, {locality}"
|
||||
return first or locality
|
||||
|
||||
|
||||
def _search_text(*parts: str | None) -> str:
|
||||
return re.sub(r"\s+", " ", " ".join(part.casefold() for part in parts if part)).strip()
|
||||
|
||||
|
||||
def _clean(value: object) -> str | None:
|
||||
cleaned = re.sub(r"\s+", " ", str(value or "")).strip()
|
||||
return cleaned or None
|
||||
|
||||
|
||||
def _centroid(coords: list[tuple[float, float]]) -> tuple[float, float]:
|
||||
if len(coords) >= 4 and coords[0] == coords[-1]:
|
||||
area = 0.0
|
||||
cx = 0.0
|
||||
cy = 0.0
|
||||
for (x1, y1), (x2, y2) in zip(coords, coords[1:]):
|
||||
cross = x1 * y2 - x2 * y1
|
||||
area += cross
|
||||
cx += (x1 + x2) * cross
|
||||
cy += (y1 + y2) * cross
|
||||
if abs(area) > 1e-18:
|
||||
factor = 1 / (3 * area)
|
||||
return cx * factor, cy * factor
|
||||
return (
|
||||
math.fsum(coord[0] for coord in coords) / len(coords),
|
||||
math.fsum(coord[1] for coord in coords) / len(coords),
|
||||
)
|
||||
|
||||
|
||||
def _drop_address_indexes(session: Session) -> None:
|
||||
for name in [
|
||||
"ix_osm_addresses_dataset_city_street",
|
||||
"ix_osm_addresses_dataset_postcode",
|
||||
"ix_osm_addresses_bbox",
|
||||
"ix_osm_addresses_geom_gist",
|
||||
"ix_osm_addresses_area_geom_gist",
|
||||
"ix_osm_addresses_search_trgm",
|
||||
"ix_osm_addresses_display_trgm",
|
||||
"ix_osm_addresses_street_key_house",
|
||||
"ix_osm_addresses_street_key_trgm",
|
||||
]:
|
||||
session.execute(text(f"DROP INDEX IF EXISTS {name}"))
|
||||
|
||||
|
||||
def _create_address_indexes(session: Session) -> None:
|
||||
statements = [
|
||||
"CREATE INDEX IF NOT EXISTS ix_osm_addresses_dataset_city_street ON osm_addresses (dataset_id, city, street, housenumber)",
|
||||
"CREATE INDEX IF NOT EXISTS ix_osm_addresses_dataset_postcode ON osm_addresses (dataset_id, postcode)",
|
||||
"CREATE INDEX IF NOT EXISTS ix_osm_addresses_bbox ON osm_addresses (dataset_id, min_lon, max_lon, min_lat, max_lat)",
|
||||
]
|
||||
if settings.is_postgresql_database:
|
||||
statements.extend(
|
||||
[
|
||||
"CREATE INDEX IF NOT EXISTS ix_osm_addresses_geom_gist ON osm_addresses USING GIST (geom)",
|
||||
"CREATE INDEX IF NOT EXISTS ix_osm_addresses_area_geom_gist ON osm_addresses USING GIST (area_geom)",
|
||||
"CREATE INDEX IF NOT EXISTS ix_osm_addresses_search_trgm ON osm_addresses USING GIN (LOWER(COALESCE(search_text, '')) gin_trgm_ops)",
|
||||
"CREATE INDEX IF NOT EXISTS ix_osm_addresses_display_trgm ON osm_addresses USING GIN (LOWER(COALESCE(display_name, '')) gin_trgm_ops)",
|
||||
"CREATE INDEX IF NOT EXISTS ix_osm_addresses_street_key_house ON osm_addresses (dataset_id, REPLACE(LOWER(COALESCE(NULLIF(street, ''), NULLIF(place, ''), '')), 'ß', 'ss'), housenumber)",
|
||||
"CREATE INDEX IF NOT EXISTS ix_osm_addresses_street_key_trgm ON osm_addresses USING GIN (REPLACE(LOWER(COALESCE(NULLIF(street, ''), NULLIF(place, ''), '')), 'ß', 'ss') gin_trgm_ops)",
|
||||
]
|
||||
)
|
||||
for statement in statements:
|
||||
session.execute(text(statement))
|
||||
|
||||
|
||||
def _metadata(dataset: Dataset) -> dict[str, object]:
|
||||
try:
|
||||
value = json.loads(dataset.metadata_json or "{}")
|
||||
except json.JSONDecodeError:
|
||||
return {}
|
||||
return value if isinstance(value, dict) else {}
|
||||
|
||||
|
||||
def _emit(
|
||||
progress_callback: ProgressCallback | None,
|
||||
event_type: str,
|
||||
message: str,
|
||||
progress_current: int | None,
|
||||
progress_total: int | None,
|
||||
metadata: dict[str, object] | None = None,
|
||||
) -> None:
|
||||
if progress_callback is not None:
|
||||
progress_callback(event_type, message, progress_current, progress_total, metadata)
|
||||
100
app/pipeline/osm_diff.py
Normal file
100
app/pipeline/osm_diff.py
Normal file
@@ -0,0 +1,100 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from urllib.parse import urlparse
|
||||
|
||||
from sqlalchemy import select
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.config import settings
|
||||
from app.models import Dataset, Source
|
||||
from app.pipeline.download import materialize_source
|
||||
from app.pipeline.osm_pbf import _raw_format
|
||||
from app.pipeline.osm_replication import fetch_replication_state
|
||||
from app.pipeline.utils import sha256_file
|
||||
|
||||
|
||||
def run_osm_diff_source(session: Session, source: Source) -> Dataset:
|
||||
"""Commit an OSM change file as a raw update artifact.
|
||||
|
||||
Applying the diff to an authoritative OSM base extract is a separate step;
|
||||
this importer deliberately records the file without treating it as a
|
||||
complete visual route layer.
|
||||
"""
|
||||
if _looks_like_update_directory(source.url):
|
||||
return _commit_update_directory_state(session, source)
|
||||
|
||||
raw_path = materialize_source(source)
|
||||
raw_hash = sha256_file(raw_path)
|
||||
existing = session.scalar(
|
||||
select(Dataset)
|
||||
.where(Dataset.source_id == source.id, Dataset.kind == "osm_diff_raw", Dataset.sha256 == raw_hash)
|
||||
.order_by(Dataset.id.desc())
|
||||
)
|
||||
if existing is not None:
|
||||
return existing
|
||||
|
||||
dataset = Dataset(
|
||||
source_id=source.id,
|
||||
kind="osm_diff_raw",
|
||||
local_path=str(raw_path),
|
||||
sha256=raw_hash,
|
||||
is_active=False,
|
||||
status="committed",
|
||||
metadata_json=json.dumps(
|
||||
{
|
||||
"stage": "raw_osm_diff",
|
||||
"raw_format": _raw_format(raw_path),
|
||||
"source_url": source.url,
|
||||
},
|
||||
indent=2,
|
||||
),
|
||||
)
|
||||
session.add(dataset)
|
||||
session.flush()
|
||||
return dataset
|
||||
|
||||
|
||||
def _commit_update_directory_state(session: Session, source: Source) -> Dataset:
|
||||
state = fetch_replication_state(source.url, timeout=settings.osm_diff_state_timeout_seconds)
|
||||
source_dir = settings.data_dir / "sources" / f"source_{source.id}"
|
||||
source_dir.mkdir(parents=True, exist_ok=True)
|
||||
state_path = source_dir / f"state_{state.sequence_number}.txt"
|
||||
state_path.write_text(
|
||||
"\n".join(f"{key}={value}" for key, value in sorted(state.raw.items())) + "\n",
|
||||
encoding="utf-8",
|
||||
)
|
||||
state_hash = sha256_file(state_path)
|
||||
existing = session.scalar(
|
||||
select(Dataset)
|
||||
.where(Dataset.source_id == source.id, Dataset.kind == "osm_diff_state", Dataset.sha256 == state_hash)
|
||||
.order_by(Dataset.id.desc())
|
||||
)
|
||||
if existing is not None:
|
||||
return existing
|
||||
dataset = Dataset(
|
||||
source_id=source.id,
|
||||
kind="osm_diff_state",
|
||||
local_path=str(state_path),
|
||||
sha256=state_hash,
|
||||
is_active=False,
|
||||
status="committed",
|
||||
metadata_json=json.dumps(
|
||||
{
|
||||
"stage": "osm_diff_state",
|
||||
"updates_url": source.url,
|
||||
"sequence_number": state.sequence_number,
|
||||
"timestamp": state.timestamp,
|
||||
"state": state.raw,
|
||||
},
|
||||
indent=2,
|
||||
),
|
||||
)
|
||||
session.add(dataset)
|
||||
session.flush()
|
||||
return dataset
|
||||
|
||||
|
||||
def _looks_like_update_directory(url: str) -> bool:
|
||||
lower_path = urlparse(url).path.lower()
|
||||
return lower_path.endswith("-updates") or lower_path.endswith("-updates/")
|
||||
248
app/pipeline/osm_geojson.py
Normal file
248
app/pipeline/osm_geojson.py
Normal file
@@ -0,0 +1,248 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
from sqlalchemy import select
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.config import settings
|
||||
from app.models import Dataset, OsmFeature, Source
|
||||
from app.osm_classification import infer_osm_route_scope
|
||||
from app.osm_storage import (
|
||||
OSM_STORAGE_METADATA_KEY,
|
||||
OSM_STORAGE_MAIN,
|
||||
OSM_STORAGE_SIDECAR_FEATURES,
|
||||
create_osm_sidecar,
|
||||
dedupe_osm_feature_rows,
|
||||
effective_osm_feature_storage,
|
||||
)
|
||||
from app.pipeline.download import materialize_source
|
||||
from app.pipeline.utils import first_nonempty, geometry_json_and_bbox, norm_ref, norm_text, sha256_file
|
||||
from app.spatial import analyze_postgresql_tables, refresh_postgis_geometries
|
||||
|
||||
ROUTE_MODES = {
|
||||
"train",
|
||||
"railway",
|
||||
"light_rail",
|
||||
"subway",
|
||||
"tram",
|
||||
"bus",
|
||||
"trolleybus",
|
||||
"coach",
|
||||
"ferry",
|
||||
"monorail",
|
||||
"funicular",
|
||||
"aerialway",
|
||||
}
|
||||
|
||||
|
||||
def run_osm_geojson_source(session: Session, source: Source) -> Dataset:
|
||||
local_path = materialize_source(source)
|
||||
source_hash = sha256_file(local_path)
|
||||
existing = session.scalar(
|
||||
select(Dataset)
|
||||
.where(
|
||||
Dataset.source_id == source.id,
|
||||
Dataset.kind == "osm_geojson",
|
||||
Dataset.sha256 == source_hash,
|
||||
Dataset.is_active.is_(True),
|
||||
Dataset.status == "imported",
|
||||
)
|
||||
.order_by(Dataset.id.desc())
|
||||
)
|
||||
if existing is not None:
|
||||
return existing
|
||||
return import_osm_geojson(session=session, source=source, path=local_path, source_hash=source_hash)
|
||||
|
||||
|
||||
def import_osm_geojson(
|
||||
session: Session,
|
||||
source: Source,
|
||||
path: Path,
|
||||
source_hash: str | None = None,
|
||||
*,
|
||||
storage_mode: str | None = None,
|
||||
) -> Dataset:
|
||||
for dataset in source.datasets:
|
||||
dataset.is_active = False
|
||||
|
||||
dataset = Dataset(
|
||||
source_id=source.id,
|
||||
kind="osm_geojson",
|
||||
local_path=str(path),
|
||||
sha256=source_hash or sha256_file(path),
|
||||
is_active=True,
|
||||
status="importing",
|
||||
)
|
||||
session.add(dataset)
|
||||
session.flush()
|
||||
|
||||
source_hash = source_hash or sha256_file(path)
|
||||
dataset.metadata_json = json.dumps(
|
||||
prepare_osm_geojson_storage(
|
||||
session=session,
|
||||
dataset=dataset,
|
||||
path=path,
|
||||
source_hash=source_hash,
|
||||
storage_mode=storage_mode,
|
||||
),
|
||||
indent=2,
|
||||
)
|
||||
|
||||
dataset.status = "imported"
|
||||
source.status = "ok"
|
||||
source.last_error = None
|
||||
session.flush()
|
||||
return dataset
|
||||
|
||||
|
||||
def prepare_osm_geojson_storage(
|
||||
*,
|
||||
session: Session,
|
||||
dataset: Dataset,
|
||||
path: Path,
|
||||
source_hash: str | None = None,
|
||||
storage_mode: str | None = None,
|
||||
) -> dict[str, object]:
|
||||
data = json.loads(path.read_text(encoding="utf-8"))
|
||||
features = _as_features(data)
|
||||
feature_rows = [_feature_row(dataset.id, idx, feature) for idx, feature in enumerate(features)]
|
||||
storage = effective_osm_feature_storage(storage_mode)
|
||||
if storage not in {OSM_STORAGE_MAIN, OSM_STORAGE_SIDECAR_FEATURES}:
|
||||
raise ValueError(f"Unsupported OSM feature storage mode: {storage}")
|
||||
if storage == OSM_STORAGE_SIDECAR_FEATURES:
|
||||
return {
|
||||
"features": len(feature_rows),
|
||||
OSM_STORAGE_METADATA_KEY: create_osm_sidecar(dataset, feature_rows, source_hash=source_hash or dataset.sha256),
|
||||
}
|
||||
_insert_main_features(session, feature_rows)
|
||||
session.flush()
|
||||
refresh_postgis_geometries(session, dataset_id=dataset.id, tables=["osm_features"])
|
||||
analyze_postgresql_tables(session, ["osm_features"])
|
||||
return {"features": len(feature_rows), OSM_STORAGE_METADATA_KEY: {"mode": OSM_STORAGE_MAIN}}
|
||||
|
||||
|
||||
def _insert_main_features(session: Session, feature_rows: list[dict[str, object]]) -> None:
|
||||
objects: list[OsmFeature] = []
|
||||
deduped_rows, _duplicate_count = dedupe_osm_feature_rows(feature_rows)
|
||||
for row in deduped_rows:
|
||||
objects.append(
|
||||
OsmFeature(
|
||||
dataset_id=row["dataset_id"],
|
||||
osm_type=row["osm_type"],
|
||||
osm_id=row["osm_id"],
|
||||
kind=row["kind"],
|
||||
mode=row["mode"],
|
||||
route_scope=row["route_scope"],
|
||||
name=row["name"],
|
||||
ref=row["ref"],
|
||||
operator=row["operator"],
|
||||
network=row["network"],
|
||||
geometry_geojson=row["geometry_geojson"],
|
||||
min_lon=row["min_lon"],
|
||||
min_lat=row["min_lat"],
|
||||
max_lon=row["max_lon"],
|
||||
max_lat=row["max_lat"],
|
||||
tags_json=row["tags_json"],
|
||||
route_key=row["route_key"],
|
||||
operator_key=row["operator_key"],
|
||||
)
|
||||
)
|
||||
if len(objects) >= 5000:
|
||||
session.bulk_save_objects(objects)
|
||||
objects.clear()
|
||||
if objects:
|
||||
session.bulk_save_objects(objects)
|
||||
|
||||
|
||||
def _feature_row(dataset_id: int, idx: int, feature: dict[str, Any]) -> dict[str, object]:
|
||||
props = feature.get("properties") or {}
|
||||
geometry = feature.get("geometry")
|
||||
geometry_text, bbox = geometry_json_and_bbox(geometry)
|
||||
osm_type = str(first_nonempty(props.get("osm_type"), props.get("@type"), props.get("type"), "feature"))
|
||||
osm_id = str(first_nonempty(props.get("osm_id"), props.get("@id"), props.get("id"), f"feature_{idx}"))
|
||||
mode = _infer_mode(props)
|
||||
kind = _infer_kind(props, mode)
|
||||
name = first_nonempty(props.get("name"), props.get("official_name")) or None
|
||||
ref = first_nonempty(props.get("ref"), props.get("route_ref"), props.get("line")) or None
|
||||
operator = first_nonempty(props.get("operator"), props.get("agency"), props.get("brand")) or None
|
||||
network = first_nonempty(props.get("network"), props.get("network:short")) or None
|
||||
route_scope = infer_osm_route_scope(mode=mode, ref=ref, name=name, network=network, tags=props)
|
||||
route_key = norm_ref(ref) or norm_text(name) or norm_ref(osm_id)
|
||||
operator_key = norm_text(operator or network or "")
|
||||
return {
|
||||
"dataset_id": dataset_id,
|
||||
"osm_type": osm_type,
|
||||
"osm_id": osm_id,
|
||||
"kind": kind,
|
||||
"mode": mode,
|
||||
"route_scope": route_scope,
|
||||
"name": name,
|
||||
"ref": ref,
|
||||
"operator": operator,
|
||||
"network": network,
|
||||
"geometry_geojson": geometry_text,
|
||||
"min_lon": bbox[0],
|
||||
"min_lat": bbox[1],
|
||||
"max_lon": bbox[2],
|
||||
"max_lat": bbox[3],
|
||||
"tags_json": json.dumps(props, separators=(",", ":")),
|
||||
"route_key": route_key,
|
||||
"operator_key": operator_key,
|
||||
}
|
||||
|
||||
|
||||
def _as_features(data: Any) -> list[dict[str, Any]]:
|
||||
if isinstance(data, dict) and data.get("type") == "FeatureCollection":
|
||||
return [f for f in data.get("features", []) if isinstance(f, dict)]
|
||||
if isinstance(data, dict) and data.get("type") == "Feature":
|
||||
return [data]
|
||||
if isinstance(data, list):
|
||||
return [f for f in data if isinstance(f, dict)]
|
||||
raise ValueError("OSM source must be GeoJSON FeatureCollection, Feature, or list of Features")
|
||||
|
||||
|
||||
def _infer_mode(props: dict[str, Any]) -> str | None:
|
||||
for key in ("mode", "route", "route_master"):
|
||||
value = str(props.get(key) or "").strip()
|
||||
if value in ROUTE_MODES:
|
||||
return "train" if value == "railway" else value
|
||||
railway = str(props.get("railway") or "").strip()
|
||||
if railway in {"station", "halt"}:
|
||||
return "train"
|
||||
if railway == "tram_stop":
|
||||
return "tram"
|
||||
if railway == "subway_entrance":
|
||||
return "subway"
|
||||
if str(props.get("highway") or "") == "bus_stop" or str(props.get("amenity") or "") == "bus_station":
|
||||
return "bus"
|
||||
if str(props.get("amenity") or "") == "ferry_terminal":
|
||||
return "ferry"
|
||||
if str(props.get("aerialway") or "") == "station":
|
||||
return "aerialway"
|
||||
return None
|
||||
|
||||
|
||||
def _infer_kind(props: dict[str, Any], mode: str | None) -> str:
|
||||
explicit_kind = str(props.get("kind") or "").strip()
|
||||
if explicit_kind in {"route", "stop", "station", "terminal", "infra", "feature"}:
|
||||
return explicit_kind
|
||||
if str(props.get("type") or "") in {"route", "route_master"} or str(props.get("route") or "") in ROUTE_MODES:
|
||||
return "route"
|
||||
if str(props.get("amenity") or "") == "ferry_terminal":
|
||||
return "terminal"
|
||||
if str(props.get("amenity") or "") == "bus_station":
|
||||
return "terminal"
|
||||
if str(props.get("railway") or "") in {"station", "halt"}:
|
||||
return "station"
|
||||
if str(props.get("aerialway") or "") == "station":
|
||||
return "station"
|
||||
if str(props.get("public_transport") or "") in {"platform", "stop_position", "station"}:
|
||||
return "stop"
|
||||
if str(props.get("highway") or "") == "bus_stop":
|
||||
return "stop"
|
||||
if mode:
|
||||
return "infra"
|
||||
return "feature"
|
||||
456
app/pipeline/osm_labeling.py
Normal file
456
app/pipeline/osm_labeling.py
Normal file
@@ -0,0 +1,456 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime, timezone
|
||||
import json
|
||||
from pathlib import Path
|
||||
import sqlite3
|
||||
from typing import Callable
|
||||
|
||||
from sqlalchemy import func, select, text
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.models import Dataset, OsmFeature
|
||||
from app.osm_classification import OSM_ROUTE_SCOPE_CLASSIFIER_VERSION, infer_osm_route_scope_from_tags
|
||||
from app.osm_storage import (
|
||||
dataset_metadata,
|
||||
drop_osm_sidecar_route_scope_indexes,
|
||||
ensure_osm_sidecar_schema,
|
||||
features_are_sidecar,
|
||||
rebuild_osm_sidecar_indexes,
|
||||
sidecar_path,
|
||||
writable_sidecar_connection,
|
||||
)
|
||||
from app.pipeline.state import (
|
||||
STAGE_BUILD_INDEXES,
|
||||
STAGE_LABEL_FEATURES,
|
||||
dependency_hash,
|
||||
finish_pipeline_run,
|
||||
latest_completed_run,
|
||||
start_pipeline_run,
|
||||
)
|
||||
|
||||
|
||||
OSM_LABEL_FEATURES_VERSION = OSM_ROUTE_SCOPE_CLASSIFIER_VERSION
|
||||
MAIN_ROUTE_SCOPE_INDEX = "ix_osm_features_scope_bbox"
|
||||
MAIN_INDEX_REBUILD_THRESHOLD = 10_000
|
||||
SIDECAR_INDEX_REBUILD_THRESHOLD = 10_000
|
||||
ProgressCallback = Callable[[str, str, int | None, int | None, dict[str, object] | None], None]
|
||||
|
||||
|
||||
def relabel_osm_features(
|
||||
session: Session,
|
||||
*,
|
||||
dataset_id: int | None = None,
|
||||
chunk_size: int = 5000,
|
||||
force: bool = False,
|
||||
rebuild_indexes: bool = True,
|
||||
progress_callback: ProgressCallback | None = None,
|
||||
job_id: int | None = None,
|
||||
) -> dict[str, object]:
|
||||
datasets = _target_datasets(session, dataset_id)
|
||||
result: dict[str, object] = {
|
||||
"version": OSM_LABEL_FEATURES_VERSION,
|
||||
"datasets": len(datasets),
|
||||
"processed": 0,
|
||||
"changed": 0,
|
||||
"skipped": 0,
|
||||
"missing": 0,
|
||||
"index_rebuilds": 0,
|
||||
"dataset_results": [],
|
||||
}
|
||||
_emit_progress(
|
||||
progress_callback,
|
||||
"osm_labeling_started",
|
||||
f"Relabeling {len(datasets)} OSM dataset(s).",
|
||||
0,
|
||||
len(datasets),
|
||||
{"dataset_id": dataset_id, "force": force, "version": OSM_LABEL_FEATURES_VERSION},
|
||||
)
|
||||
for index, dataset in enumerate(datasets, start=1):
|
||||
dataset_result = relabel_osm_dataset(
|
||||
session,
|
||||
dataset,
|
||||
chunk_size=chunk_size,
|
||||
force=force,
|
||||
rebuild_indexes=rebuild_indexes,
|
||||
progress_callback=progress_callback,
|
||||
job_id=job_id,
|
||||
)
|
||||
result["processed"] = int(result["processed"]) + int(dataset_result.get("processed", 0) or 0)
|
||||
result["changed"] = int(result["changed"]) + int(dataset_result.get("changed", 0) or 0)
|
||||
result["skipped"] = int(result["skipped"]) + (1 if dataset_result.get("status") == "skipped" else 0)
|
||||
result["missing"] = int(result["missing"]) + (1 if dataset_result.get("status") == "missing_sidecar" else 0)
|
||||
result["index_rebuilds"] = int(result["index_rebuilds"]) + int(dataset_result.get("index_rebuilds", 0) or 0)
|
||||
result["dataset_results"].append(dataset_result) # type: ignore[union-attr]
|
||||
_emit_progress(
|
||||
progress_callback,
|
||||
"osm_labeling_dataset_completed",
|
||||
f"Relabeled {index}/{len(datasets)} OSM dataset(s).",
|
||||
index,
|
||||
len(datasets),
|
||||
dataset_result,
|
||||
)
|
||||
_emit_progress(progress_callback, "osm_labeling_completed", "OSM feature relabeling completed.", len(datasets), len(datasets), result)
|
||||
return result
|
||||
|
||||
|
||||
def relabel_osm_dataset(
|
||||
session: Session,
|
||||
dataset: Dataset,
|
||||
*,
|
||||
chunk_size: int = 5000,
|
||||
force: bool = False,
|
||||
rebuild_indexes: bool = True,
|
||||
progress_callback: ProgressCallback | None = None,
|
||||
job_id: int | None = None,
|
||||
) -> dict[str, object]:
|
||||
dependency = _label_dependency(dataset)
|
||||
dependency_hash_value = dependency_hash(dependency)
|
||||
if not force and _dataset_label_is_current(session, dataset, dependency_hash_value):
|
||||
return {
|
||||
"dataset_id": dataset.id,
|
||||
"source_id": dataset.source_id,
|
||||
"status": "skipped",
|
||||
"reason": "label_features dependency is current",
|
||||
"dependency_hash": dependency_hash_value,
|
||||
"version": OSM_LABEL_FEATURES_VERSION,
|
||||
"processed": 0,
|
||||
"changed": 0,
|
||||
"index_rebuilds": 0,
|
||||
}
|
||||
|
||||
run = start_pipeline_run(
|
||||
session,
|
||||
stage=STAGE_LABEL_FEATURES,
|
||||
version=OSM_LABEL_FEATURES_VERSION,
|
||||
dependency_hash_value=dependency_hash_value,
|
||||
source_id=dataset.source_id,
|
||||
dataset_id=dataset.id,
|
||||
job_id=job_id,
|
||||
inputs=dependency,
|
||||
)
|
||||
session.commit()
|
||||
try:
|
||||
if features_are_sidecar(dataset):
|
||||
counts = _relabel_sidecar_dataset(dataset, chunk_size=chunk_size, rebuild_indexes=rebuild_indexes, progress_callback=progress_callback)
|
||||
else:
|
||||
counts = _relabel_main_dataset(session, dataset, chunk_size=chunk_size, rebuild_indexes=rebuild_indexes, progress_callback=progress_callback)
|
||||
output = {
|
||||
"dataset_id": dataset.id,
|
||||
"source_id": dataset.source_id,
|
||||
"status": "completed",
|
||||
"dependency_hash": dependency_hash_value,
|
||||
"version": OSM_LABEL_FEATURES_VERSION,
|
||||
**counts,
|
||||
}
|
||||
_stamp_dataset_metadata(session, dataset, dependency_hash_value, output)
|
||||
finish_pipeline_run(session, run, outputs=output)
|
||||
session.commit()
|
||||
return output
|
||||
except FileNotFoundError as exc:
|
||||
output = {
|
||||
"dataset_id": dataset.id,
|
||||
"source_id": dataset.source_id,
|
||||
"status": "missing_sidecar",
|
||||
"dependency_hash": dependency_hash_value,
|
||||
"version": OSM_LABEL_FEATURES_VERSION,
|
||||
"processed": 0,
|
||||
"changed": 0,
|
||||
"index_rebuilds": 0,
|
||||
"error": str(exc),
|
||||
}
|
||||
finish_pipeline_run(session, run, status="failed", outputs=output, error=str(exc))
|
||||
session.commit()
|
||||
return output
|
||||
except Exception as exc:
|
||||
finish_pipeline_run(session, run, status="failed", error=str(exc))
|
||||
session.commit()
|
||||
raise
|
||||
|
||||
|
||||
def _target_datasets(session: Session, dataset_id: int | None) -> list[Dataset]:
|
||||
stmt = select(Dataset).where(Dataset.kind == "osm_geojson", Dataset.status == "imported")
|
||||
if dataset_id is None:
|
||||
stmt = stmt.where(Dataset.is_active.is_(True))
|
||||
else:
|
||||
stmt = stmt.where(Dataset.id == dataset_id)
|
||||
return session.scalars(stmt.order_by(Dataset.source_id, Dataset.id)).all()
|
||||
|
||||
|
||||
def _dataset_label_is_current(session: Session, dataset: Dataset, dependency_hash_value: str) -> bool:
|
||||
metadata = dataset_metadata(dataset)
|
||||
label_info = metadata.get("label_features")
|
||||
metadata_current = (
|
||||
isinstance(label_info, dict)
|
||||
and label_info.get("version") == OSM_LABEL_FEATURES_VERSION
|
||||
and label_info.get("dependency_hash") == dependency_hash_value
|
||||
)
|
||||
if not metadata_current:
|
||||
return False
|
||||
return (
|
||||
latest_completed_run(
|
||||
session,
|
||||
stage=STAGE_LABEL_FEATURES,
|
||||
version=OSM_LABEL_FEATURES_VERSION,
|
||||
dependency_hash_value=dependency_hash_value,
|
||||
source_id=dataset.source_id,
|
||||
dataset_id=dataset.id,
|
||||
)
|
||||
is not None
|
||||
)
|
||||
|
||||
|
||||
def _relabel_sidecar_dataset(
|
||||
dataset: Dataset,
|
||||
*,
|
||||
chunk_size: int,
|
||||
rebuild_indexes: bool,
|
||||
progress_callback: ProgressCallback | None,
|
||||
) -> dict[str, int | str]:
|
||||
path = sidecar_path(dataset)
|
||||
if path is None or not path.exists():
|
||||
raise FileNotFoundError(f"OSM sidecar does not exist: {path}")
|
||||
with writable_sidecar_connection(dataset) as connection:
|
||||
ensure_osm_sidecar_schema(connection)
|
||||
total = int(connection.execute("SELECT COUNT(*) FROM osm_features").fetchone()[0] or 0)
|
||||
should_rebuild_index = rebuild_indexes and total >= SIDECAR_INDEX_REBUILD_THRESHOLD
|
||||
if should_rebuild_index:
|
||||
drop_osm_sidecar_route_scope_indexes(connection)
|
||||
connection.commit()
|
||||
processed = 0
|
||||
changed = 0
|
||||
last_id = 0
|
||||
try:
|
||||
while True:
|
||||
rows = connection.execute(
|
||||
"""
|
||||
SELECT id, mode, ref, name, network, tags_json, route_scope
|
||||
FROM osm_features
|
||||
WHERE id > ?
|
||||
ORDER BY id
|
||||
LIMIT ?
|
||||
""",
|
||||
(last_id, max(1, int(chunk_size))),
|
||||
).fetchall()
|
||||
if not rows:
|
||||
break
|
||||
updates: list[tuple[str | None, int]] = []
|
||||
for row in rows:
|
||||
last_id = int(row["id"])
|
||||
new_scope = _classified_scope(row["mode"], row["ref"], row["name"], row["network"], row["tags_json"])
|
||||
if _normalize_scope(row["route_scope"]) != new_scope:
|
||||
updates.append((new_scope, last_id))
|
||||
if updates:
|
||||
connection.executemany("UPDATE osm_features SET route_scope = ? WHERE id = ?", updates)
|
||||
processed += len(rows)
|
||||
changed += len(updates)
|
||||
connection.commit()
|
||||
_emit_progress(
|
||||
progress_callback,
|
||||
"osm_labeling_batch",
|
||||
f"Relabeled {processed}/{total} OSM sidecar features.",
|
||||
processed,
|
||||
total,
|
||||
{"dataset_id": dataset.id, "changed": changed, "storage": "sidecar"},
|
||||
)
|
||||
finally:
|
||||
index_rebuilds = 0
|
||||
if should_rebuild_index:
|
||||
rebuild_osm_sidecar_indexes(connection)
|
||||
connection.commit()
|
||||
index_rebuilds = 1
|
||||
_record_sidecar_index_build(connection, dataset, path)
|
||||
_record_sidecar_label(connection, dataset, processed=processed, changed=changed)
|
||||
connection.commit()
|
||||
return {"storage": "sidecar", "processed": processed, "changed": changed, "index_rebuilds": index_rebuilds}
|
||||
|
||||
|
||||
def _relabel_main_dataset(
|
||||
session: Session,
|
||||
dataset: Dataset,
|
||||
*,
|
||||
chunk_size: int,
|
||||
rebuild_indexes: bool,
|
||||
progress_callback: ProgressCallback | None,
|
||||
) -> dict[str, int | str]:
|
||||
total = int(session.scalar(select(func.count()).select_from(OsmFeature).where(OsmFeature.dataset_id == dataset.id)) or 0)
|
||||
should_rebuild_index = rebuild_indexes and total >= MAIN_INDEX_REBUILD_THRESHOLD
|
||||
index_rebuilds = 0
|
||||
if should_rebuild_index:
|
||||
session.execute(text(f"DROP INDEX IF EXISTS {MAIN_ROUTE_SCOPE_INDEX}"))
|
||||
session.commit()
|
||||
processed = 0
|
||||
changed = 0
|
||||
last_id = 0
|
||||
try:
|
||||
while True:
|
||||
rows = session.scalars(
|
||||
select(OsmFeature)
|
||||
.where(OsmFeature.dataset_id == dataset.id, OsmFeature.id > last_id)
|
||||
.order_by(OsmFeature.id)
|
||||
.limit(max(1, int(chunk_size)))
|
||||
).all()
|
||||
if not rows:
|
||||
break
|
||||
updates: list[dict[str, object]] = []
|
||||
for feature in rows:
|
||||
last_id = int(feature.id)
|
||||
new_scope = _classified_scope(feature.mode, feature.ref, feature.name, feature.network, feature.tags_json)
|
||||
if _normalize_scope(feature.route_scope) != new_scope:
|
||||
updates.append({"id": feature.id, "route_scope": new_scope})
|
||||
if updates:
|
||||
session.bulk_update_mappings(OsmFeature, updates)
|
||||
processed += len(rows)
|
||||
changed += len(updates)
|
||||
session.commit()
|
||||
_emit_progress(
|
||||
progress_callback,
|
||||
"osm_labeling_batch",
|
||||
f"Relabeled {processed}/{total} main-table OSM features.",
|
||||
processed,
|
||||
total,
|
||||
{"dataset_id": dataset.id, "changed": changed, "storage": "main"},
|
||||
)
|
||||
finally:
|
||||
if should_rebuild_index:
|
||||
session.execute(
|
||||
text(
|
||||
"CREATE INDEX IF NOT EXISTS ix_osm_features_scope_bbox "
|
||||
"ON osm_features (dataset_id, kind, mode, route_scope, min_lon, max_lon, min_lat, max_lat)"
|
||||
)
|
||||
)
|
||||
session.commit()
|
||||
index_rebuilds = 1
|
||||
_record_main_index_build(session, dataset)
|
||||
return {"storage": "main", "processed": processed, "changed": changed, "index_rebuilds": index_rebuilds}
|
||||
|
||||
|
||||
def _classified_scope(mode: object, ref: object, name: object, network: object, tags_json: object) -> str | None:
|
||||
return _normalize_scope(
|
||||
infer_osm_route_scope_from_tags(
|
||||
None if mode is None else str(mode),
|
||||
None if ref is None else str(ref),
|
||||
None if name is None else str(name),
|
||||
None if network is None else str(network),
|
||||
None if tags_json is None else str(tags_json),
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
def _normalize_scope(value: object) -> str | None:
|
||||
text_value = str(value or "").strip()
|
||||
return text_value or None
|
||||
|
||||
|
||||
def _label_dependency(dataset: Dataset) -> dict[str, object]:
|
||||
metadata = dataset_metadata(dataset)
|
||||
storage = metadata.get("osm_storage") if isinstance(metadata, dict) else None
|
||||
path = sidecar_path(dataset)
|
||||
path_fingerprint: dict[str, object] | None = None
|
||||
if path is not None:
|
||||
resolved = Path(path)
|
||||
if resolved.exists():
|
||||
path_fingerprint = {"path": str(resolved), "exists": True}
|
||||
else:
|
||||
path_fingerprint = {"path": str(resolved), "missing": True}
|
||||
return {
|
||||
"dataset_id": dataset.id,
|
||||
"source_id": dataset.source_id,
|
||||
"kind": dataset.kind,
|
||||
"dataset_sha256": dataset.sha256,
|
||||
"storage": storage,
|
||||
"sidecar": path_fingerprint,
|
||||
"classifier_version": OSM_LABEL_FEATURES_VERSION,
|
||||
}
|
||||
|
||||
|
||||
def _stamp_dataset_metadata(session: Session, dataset: Dataset, dependency_hash_value: str, output: dict[str, object]) -> None:
|
||||
refreshed = session.get(Dataset, dataset.id)
|
||||
if refreshed is None:
|
||||
return
|
||||
metadata = dataset_metadata(refreshed)
|
||||
metadata["label_features"] = {
|
||||
"stage": STAGE_LABEL_FEATURES,
|
||||
"version": OSM_LABEL_FEATURES_VERSION,
|
||||
"dependency_hash": dependency_hash_value,
|
||||
"labeled_at": datetime.now(timezone.utc).isoformat(),
|
||||
"processed": output.get("processed", 0),
|
||||
"changed": output.get("changed", 0),
|
||||
"storage": output.get("storage"),
|
||||
}
|
||||
refreshed.metadata_json = json.dumps(metadata, indent=2)
|
||||
session.flush()
|
||||
|
||||
|
||||
def _record_sidecar_label(connection: sqlite3.Connection, dataset: Dataset, *, processed: int, changed: int) -> None:
|
||||
connection.execute("CREATE TABLE IF NOT EXISTS pipeline_metadata (key TEXT PRIMARY KEY, value TEXT NOT NULL)")
|
||||
connection.execute(
|
||||
"INSERT OR REPLACE INTO pipeline_metadata (key, value) VALUES (?, ?)",
|
||||
(
|
||||
"label_features",
|
||||
json.dumps(
|
||||
{
|
||||
"stage": STAGE_LABEL_FEATURES,
|
||||
"version": OSM_LABEL_FEATURES_VERSION,
|
||||
"dataset_id": dataset.id,
|
||||
"processed": processed,
|
||||
"changed": changed,
|
||||
"updated_at": datetime.now(timezone.utc).isoformat(),
|
||||
},
|
||||
sort_keys=True,
|
||||
separators=(",", ":"),
|
||||
),
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
def _record_sidecar_index_build(connection: sqlite3.Connection, dataset: Dataset, path: Path) -> None:
|
||||
connection.execute("CREATE TABLE IF NOT EXISTS pipeline_metadata (key TEXT PRIMARY KEY, value TEXT NOT NULL)")
|
||||
connection.execute(
|
||||
"INSERT OR REPLACE INTO pipeline_metadata (key, value) VALUES (?, ?)",
|
||||
(
|
||||
"build_indexes:route_scope",
|
||||
json.dumps(
|
||||
{
|
||||
"stage": STAGE_BUILD_INDEXES,
|
||||
"version": "osm_sidecar_indexes_v1",
|
||||
"dataset_id": dataset.id,
|
||||
"path": str(path),
|
||||
"updated_at": datetime.now(timezone.utc).isoformat(),
|
||||
},
|
||||
sort_keys=True,
|
||||
separators=(",", ":"),
|
||||
),
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
def _record_main_index_build(session: Session, dataset: Dataset) -> None:
|
||||
dependency = {
|
||||
"dataset_id": dataset.id,
|
||||
"index": MAIN_ROUTE_SCOPE_INDEX,
|
||||
"version": "osm_main_indexes_v1",
|
||||
}
|
||||
run = start_pipeline_run(
|
||||
session,
|
||||
stage=STAGE_BUILD_INDEXES,
|
||||
version="osm_main_indexes_v1",
|
||||
dependency_hash_value=dependency_hash(dependency),
|
||||
source_id=dataset.source_id,
|
||||
dataset_id=dataset.id,
|
||||
inputs=dependency,
|
||||
)
|
||||
finish_pipeline_run(session, run, outputs={"index": MAIN_ROUTE_SCOPE_INDEX})
|
||||
session.commit()
|
||||
|
||||
|
||||
def _emit_progress(
|
||||
callback: ProgressCallback | None,
|
||||
event_type: str,
|
||||
message: str,
|
||||
current: int | None,
|
||||
total: int | None,
|
||||
metadata: dict[str, object] | None,
|
||||
) -> None:
|
||||
if callback is not None:
|
||||
callback(event_type, message, current, total, metadata)
|
||||
1581
app/pipeline/osm_pbf.py
Normal file
1581
app/pipeline/osm_pbf.py
Normal file
File diff suppressed because it is too large
Load Diff
105
app/pipeline/osm_replication.py
Normal file
105
app/pipeline/osm_replication.py
Normal file
@@ -0,0 +1,105 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
import subprocess
|
||||
from urllib.parse import urljoin, urlparse
|
||||
|
||||
import requests
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class ReplicationState:
|
||||
sequence_number: int
|
||||
timestamp: str | None
|
||||
raw: dict[str, str]
|
||||
|
||||
|
||||
def fetch_replication_state(updates_url: str, *, timeout: float = 30) -> ReplicationState:
|
||||
state_url = _state_url(updates_url)
|
||||
response = requests.get(state_url, timeout=timeout)
|
||||
response.raise_for_status()
|
||||
return parse_replication_state_text(response.text)
|
||||
|
||||
|
||||
def parse_replication_state_text(text: str) -> ReplicationState:
|
||||
values: dict[str, str] = {}
|
||||
for line in text.splitlines():
|
||||
line = line.strip()
|
||||
if not line or line.startswith("#") or "=" not in line:
|
||||
continue
|
||||
key, value = line.split("=", 1)
|
||||
values[key.strip()] = _unescape_state_value(value.strip())
|
||||
sequence = values.get("sequenceNumber")
|
||||
if sequence is None:
|
||||
raise ValueError("replication state is missing sequenceNumber")
|
||||
try:
|
||||
sequence_number = int(sequence)
|
||||
except ValueError as exc:
|
||||
raise ValueError(f"invalid replication sequenceNumber: {sequence}") from exc
|
||||
return ReplicationState(
|
||||
sequence_number=sequence_number,
|
||||
timestamp=values.get("timestamp"),
|
||||
raw=values,
|
||||
)
|
||||
|
||||
|
||||
def diff_url_for_sequence(updates_url: str, sequence_number: int) -> str:
|
||||
padded = str(sequence_number).zfill(max(9, ((len(str(sequence_number)) + 2) // 3) * 3))
|
||||
parts = [padded[index : index + 3] for index in range(0, len(padded), 3)]
|
||||
return urljoin(_directory_url(updates_url), "/".join(parts) + ".osc.gz")
|
||||
|
||||
|
||||
def download_diff(updates_url: str, sequence_number: int, output_dir: Path, *, timeout: float = 120) -> Path:
|
||||
url = diff_url_for_sequence(updates_url, sequence_number)
|
||||
parsed_path = Path(urlparse(url).path)
|
||||
output_path = output_dir / parsed_path.name
|
||||
nested = output_dir / parsed_path.parent.name / output_path.name
|
||||
if output_path.exists():
|
||||
return output_path
|
||||
if nested.exists():
|
||||
return nested
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
temp_path = output_dir / f"{sequence_number}.download"
|
||||
with requests.get(url, stream=True, timeout=timeout) as response:
|
||||
response.raise_for_status()
|
||||
with temp_path.open("wb") as handle:
|
||||
for chunk in response.iter_content(chunk_size=1024 * 1024):
|
||||
if chunk:
|
||||
handle.write(chunk)
|
||||
temp_path.replace(output_path)
|
||||
return output_path
|
||||
|
||||
|
||||
def apply_osm_changes(base_path: Path, diff_paths: list[Path], output_path: Path, host_tool_path: Path) -> subprocess.CompletedProcess[str]:
|
||||
if not diff_paths:
|
||||
raise ValueError("no OSM change files supplied")
|
||||
output_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
command = [
|
||||
str(host_tool_path),
|
||||
"osmium",
|
||||
"apply-changes",
|
||||
"--output",
|
||||
str(output_path),
|
||||
"--overwrite",
|
||||
str(base_path),
|
||||
*[str(path) for path in diff_paths],
|
||||
]
|
||||
return subprocess.run(command, check=True, capture_output=True, text=True)
|
||||
|
||||
|
||||
def _state_url(updates_url: str) -> str:
|
||||
return urljoin(_directory_url(updates_url), "state.txt")
|
||||
|
||||
|
||||
def _directory_url(url: str) -> str:
|
||||
return url if url.endswith("/") else f"{url}/"
|
||||
|
||||
|
||||
def _unescape_state_value(value: str) -> str:
|
||||
return (
|
||||
value.replace("\\:", ":")
|
||||
.replace("\\=", "=")
|
||||
.replace("\\ ", " ")
|
||||
.replace("\\\\", "\\")
|
||||
)
|
||||
1903
app/pipeline/route_layer.py
Normal file
1903
app/pipeline/route_layer.py
Normal file
File diff suppressed because it is too large
Load Diff
473
app/pipeline/routing_layer.py
Normal file
473
app/pipeline/routing_layer.py
Normal file
@@ -0,0 +1,473 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import math
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
from typing import Callable
|
||||
|
||||
import osmium
|
||||
from sqlalchemy import delete, func, select, text
|
||||
from sqlalchemy.dialects.postgresql import insert as postgresql_insert
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.config import settings
|
||||
from app.models import Dataset, RoutingEdge, RoutingNode
|
||||
from app.spatial import analyze_postgresql_tables, refresh_postgis_geometries
|
||||
|
||||
|
||||
ProgressCallback = Callable[[str, str, int | None, int | None, dict[str, object] | None], None]
|
||||
ROUTING_LAYER_VERSION = "routing_layer_v2_osm_highway_segments_service_tags"
|
||||
|
||||
DRIVE_HIGHWAYS = {
|
||||
"motorway",
|
||||
"motorway_link",
|
||||
"trunk",
|
||||
"trunk_link",
|
||||
"primary",
|
||||
"primary_link",
|
||||
"secondary",
|
||||
"secondary_link",
|
||||
"tertiary",
|
||||
"tertiary_link",
|
||||
"unclassified",
|
||||
"residential",
|
||||
"living_street",
|
||||
"service",
|
||||
"road",
|
||||
"track",
|
||||
}
|
||||
WALK_HIGHWAYS = {
|
||||
"pedestrian",
|
||||
"footway",
|
||||
"path",
|
||||
"steps",
|
||||
"cycleway",
|
||||
"bridleway",
|
||||
"living_street",
|
||||
"residential",
|
||||
"service",
|
||||
"track",
|
||||
"unclassified",
|
||||
"tertiary",
|
||||
"tertiary_link",
|
||||
"secondary",
|
||||
"secondary_link",
|
||||
"primary",
|
||||
"primary_link",
|
||||
"road",
|
||||
}
|
||||
EXCLUDED_HIGHWAYS = {"construction", "proposed", "abandoned", "platform", "raceway"}
|
||||
NO_VALUES = {"no", "private", "agricultural", "forestry", "delivery", "customers"}
|
||||
YES_VALUES = {"yes", "designated", "permissive", "destination"}
|
||||
ONEWAY_FORWARD = {"yes", "true", "1"}
|
||||
ONEWAY_REVERSE = {"-1", "reverse"}
|
||||
DEFAULT_DRIVE_SPEED_KMH = {
|
||||
"motorway": 110,
|
||||
"motorway_link": 50,
|
||||
"trunk": 90,
|
||||
"trunk_link": 45,
|
||||
"primary": 70,
|
||||
"primary_link": 40,
|
||||
"secondary": 60,
|
||||
"secondary_link": 35,
|
||||
"tertiary": 50,
|
||||
"tertiary_link": 30,
|
||||
"unclassified": 40,
|
||||
"residential": 30,
|
||||
"living_street": 10,
|
||||
"service": 15,
|
||||
"road": 30,
|
||||
"track": 15,
|
||||
}
|
||||
DEFAULT_WALK_SPEED_MPS = 1.35
|
||||
STEP_WALK_SPEED_MPS = 0.65
|
||||
|
||||
|
||||
@dataclass
|
||||
class RoutingImportResult:
|
||||
dataset_id: int
|
||||
input_path: str
|
||||
nodes: int
|
||||
edges: int
|
||||
walk_edges: int
|
||||
drive_edges: int
|
||||
skipped_ways: int
|
||||
version: str = ROUTING_LAYER_VERSION
|
||||
|
||||
def as_dict(self) -> dict[str, object]:
|
||||
return {
|
||||
"version": self.version,
|
||||
"dataset_id": self.dataset_id,
|
||||
"input_path": self.input_path,
|
||||
"nodes": self.nodes,
|
||||
"edges": self.edges,
|
||||
"walk_edges": self.walk_edges,
|
||||
"drive_edges": self.drive_edges,
|
||||
"skipped_ways": self.skipped_ways,
|
||||
}
|
||||
|
||||
|
||||
def active_routing_dataset(session: Session) -> Dataset | None:
|
||||
active_osm = session.scalar(
|
||||
select(Dataset).where(Dataset.kind == "osm_geojson", Dataset.is_active.is_(True)).order_by(Dataset.id.desc())
|
||||
)
|
||||
if active_osm is not None:
|
||||
metadata = _metadata(active_osm)
|
||||
raw_dataset_id = metadata.get("raw_dataset_id")
|
||||
if raw_dataset_id is not None:
|
||||
raw = session.get(Dataset, int(raw_dataset_id))
|
||||
if raw is not None and Path(raw.local_path).exists():
|
||||
return raw
|
||||
return session.scalar(
|
||||
select(Dataset)
|
||||
.where(Dataset.kind == "osm_pbf_raw")
|
||||
.order_by(Dataset.is_active.desc(), Dataset.id.desc())
|
||||
)
|
||||
|
||||
|
||||
def rebuild_routing_layer(
|
||||
session: Session,
|
||||
*,
|
||||
dataset_id: int | None = None,
|
||||
input_path: str | Path | None = None,
|
||||
reset: bool = True,
|
||||
batch_size: int = 5000,
|
||||
progress_callback: ProgressCallback | None = None,
|
||||
) -> dict[str, object]:
|
||||
if not settings.is_postgresql_database:
|
||||
raise RuntimeError("The routing layer importer requires PostgreSQL/PostGIS.")
|
||||
dataset = session.get(Dataset, dataset_id) if dataset_id is not None else active_routing_dataset(session)
|
||||
if dataset is None:
|
||||
raise ValueError("No OSM PBF dataset is available for routing import.")
|
||||
path = Path(input_path or dataset.local_path)
|
||||
if not path.exists():
|
||||
raise FileNotFoundError(f"Routing import PBF does not exist: {path}")
|
||||
|
||||
if reset:
|
||||
_emit(progress_callback, "routing_layer_clear_started", "Clearing existing routing graph.", None, None, {"dataset_id": dataset.id})
|
||||
session.execute(delete(RoutingEdge).where(RoutingEdge.dataset_id == dataset.id))
|
||||
session.execute(delete(RoutingNode).where(RoutingNode.dataset_id == dataset.id))
|
||||
session.commit()
|
||||
|
||||
_emit(progress_callback, "routing_layer_import_started", "Importing routable OSM highway graph.", None, None, {"dataset_id": dataset.id, "path": str(path)})
|
||||
handler = _RoutingGraphHandler(session=session, dataset_id=dataset.id, batch_size=batch_size, progress_callback=progress_callback)
|
||||
handler.apply_file(str(path), locations=True)
|
||||
handler.flush()
|
||||
|
||||
return finalize_routing_layer(
|
||||
session,
|
||||
dataset_id=dataset.id,
|
||||
input_path=str(path),
|
||||
skipped_way_count=handler.skipped_way_count,
|
||||
progress_callback=progress_callback,
|
||||
)
|
||||
|
||||
|
||||
def finalize_routing_layer(
|
||||
session: Session,
|
||||
*,
|
||||
dataset_id: int | None = None,
|
||||
input_path: str | Path | None = None,
|
||||
skipped_way_count: int = 0,
|
||||
progress_callback: ProgressCallback | None = None,
|
||||
) -> dict[str, object]:
|
||||
if not settings.is_postgresql_database:
|
||||
raise RuntimeError("The routing layer finalizer requires PostgreSQL/PostGIS.")
|
||||
dataset = session.get(Dataset, dataset_id) if dataset_id is not None else active_routing_dataset(session)
|
||||
if dataset is None:
|
||||
raise ValueError("No routing dataset is available to finalize.")
|
||||
path = Path(input_path or dataset.local_path)
|
||||
_emit(progress_callback, "routing_layer_geometry_indexes_dropped", "Dropping routing geometry indexes before bulk refresh.", None, None, {"dataset_id": dataset.id})
|
||||
_drop_routing_geometry_indexes(session)
|
||||
session.commit()
|
||||
_emit(progress_callback, "routing_layer_geometry_started", "Refreshing routing node PostGIS geometries.", None, None, {"dataset_id": dataset.id})
|
||||
refresh_postgis_geometries(session, dataset_id=dataset.id, tables=["routing_nodes"], only_missing=False)
|
||||
session.commit()
|
||||
_emit(progress_callback, "routing_layer_geometry_indexes_started", "Rebuilding routing geometry indexes.", None, None, {"dataset_id": dataset.id})
|
||||
_create_routing_geometry_indexes(session)
|
||||
session.commit()
|
||||
analyze_postgresql_tables(session, ["routing_nodes", "routing_edges"])
|
||||
node_count = int(session.scalar(select(func.count()).select_from(RoutingNode).where(RoutingNode.dataset_id == dataset.id)) or 0)
|
||||
edge_count = int(session.scalar(select(func.count()).select_from(RoutingEdge).where(RoutingEdge.dataset_id == dataset.id)) or 0)
|
||||
walk_edge_count = int(session.scalar(select(func.count()).select_from(RoutingEdge).where(RoutingEdge.dataset_id == dataset.id, RoutingEdge.walk_cost_s.is_not(None))) or 0)
|
||||
drive_edge_count = int(session.scalar(select(func.count()).select_from(RoutingEdge).where(RoutingEdge.dataset_id == dataset.id, RoutingEdge.drive_cost_s.is_not(None))) or 0)
|
||||
dataset_metadata = _metadata(dataset)
|
||||
dataset_metadata["routing_layer"] = {
|
||||
"version": ROUTING_LAYER_VERSION,
|
||||
"nodes": node_count,
|
||||
"edges": edge_count,
|
||||
"walk_edges": walk_edge_count,
|
||||
"drive_edges": drive_edge_count,
|
||||
"input_path": str(path),
|
||||
}
|
||||
dataset.metadata_json = json.dumps(dataset_metadata, indent=2)
|
||||
session.commit()
|
||||
result = RoutingImportResult(
|
||||
dataset_id=dataset.id,
|
||||
input_path=str(path),
|
||||
nodes=node_count,
|
||||
edges=edge_count,
|
||||
walk_edges=walk_edge_count,
|
||||
drive_edges=drive_edge_count,
|
||||
skipped_ways=skipped_way_count,
|
||||
).as_dict()
|
||||
_emit(progress_callback, "routing_layer_import_completed", "Routing graph import completed.", edge_count, edge_count, result)
|
||||
return result
|
||||
|
||||
|
||||
def _drop_routing_geometry_indexes(session: Session) -> None:
|
||||
session.execute(text("DROP INDEX IF EXISTS ix_routing_nodes_geom_gist"))
|
||||
session.execute(text("DROP INDEX IF EXISTS ix_routing_edges_geom_gist"))
|
||||
session.execute(text("DROP INDEX IF EXISTS ix_routing_edges_bbox_box_gist"))
|
||||
|
||||
|
||||
def _create_routing_geometry_indexes(session: Session) -> None:
|
||||
session.execute(text("CREATE INDEX IF NOT EXISTS ix_routing_nodes_geom_gist ON routing_nodes USING GIST (geom)"))
|
||||
session.execute(text("CREATE INDEX IF NOT EXISTS ix_routing_edges_bbox_box_gist ON routing_edges USING GIST (box(point(max_lon, max_lat), point(min_lon, min_lat)))"))
|
||||
|
||||
|
||||
class _RoutingGraphHandler(osmium.SimpleHandler):
|
||||
def __init__(
|
||||
self,
|
||||
*,
|
||||
session: Session,
|
||||
dataset_id: int,
|
||||
batch_size: int,
|
||||
progress_callback: ProgressCallback | None,
|
||||
) -> None:
|
||||
super().__init__()
|
||||
self.session = session
|
||||
self.dataset_id = dataset_id
|
||||
self.batch_size = max(500, int(batch_size))
|
||||
self.progress_callback = progress_callback
|
||||
self.nodes: dict[int, dict[str, object]] = {}
|
||||
self.edges: list[dict[str, object]] = []
|
||||
self.node_count = int(
|
||||
session.scalar(select(func.count()).select_from(RoutingNode).where(RoutingNode.dataset_id == dataset_id)) or 0
|
||||
)
|
||||
self.edge_count = int(
|
||||
session.scalar(select(func.count()).select_from(RoutingEdge).where(RoutingEdge.dataset_id == dataset_id)) or 0
|
||||
)
|
||||
self.walk_edge_count = 0
|
||||
self.drive_edge_count = 0
|
||||
self.skipped_way_count = 0
|
||||
self.processed_way_count = 0
|
||||
|
||||
def way(self, way) -> None:
|
||||
tags = {tag.k: tag.v for tag in way.tags}
|
||||
highway = tags.get("highway")
|
||||
if not highway or highway in EXCLUDED_HIGHWAYS:
|
||||
self.skipped_way_count += 1
|
||||
return
|
||||
walkable = _walkable(tags, highway)
|
||||
drivable = _drivable(tags, highway)
|
||||
if not walkable and not drivable:
|
||||
self.skipped_way_count += 1
|
||||
return
|
||||
|
||||
nodes = []
|
||||
for node in way.nodes:
|
||||
if not node.location.valid():
|
||||
continue
|
||||
nodes.append((int(node.ref), float(node.location.lon), float(node.location.lat)))
|
||||
if len(nodes) < 2:
|
||||
self.skipped_way_count += 1
|
||||
return
|
||||
|
||||
oneway = _oneway_direction(tags, highway)
|
||||
drive_speed_mps = _drive_speed_mps(tags, highway)
|
||||
walk_speed_mps = STEP_WALK_SPEED_MPS if highway == "steps" else DEFAULT_WALK_SPEED_MPS
|
||||
for left, right in zip(nodes, nodes[1:]):
|
||||
source_id, source_lon, source_lat = left
|
||||
target_id, target_lon, target_lat = right
|
||||
if source_id == target_id:
|
||||
continue
|
||||
length_m = _distance_m(source_lat, source_lon, target_lat, target_lon)
|
||||
if length_m <= 0:
|
||||
continue
|
||||
if oneway == "reverse":
|
||||
source_id, target_id = target_id, source_id
|
||||
source_lon, target_lon = target_lon, source_lon
|
||||
source_lat, target_lat = target_lat, source_lat
|
||||
|
||||
walk_cost = length_m / walk_speed_mps if walkable else None
|
||||
drive_cost = length_m / drive_speed_mps if drivable and drive_speed_mps > 0 else None
|
||||
reverse_walk_cost = walk_cost
|
||||
reverse_drive_cost = None if oneway in {"forward", "reverse"} else drive_cost
|
||||
self.nodes[source_id] = {"dataset_id": self.dataset_id, "osm_node_id": source_id, "lon": source_lon, "lat": source_lat}
|
||||
self.nodes[target_id] = {"dataset_id": self.dataset_id, "osm_node_id": target_id, "lon": target_lon, "lat": target_lat}
|
||||
self.edges.append(
|
||||
{
|
||||
"dataset_id": self.dataset_id,
|
||||
"osm_way_id": int(way.id),
|
||||
"source_osm_node_id": source_id,
|
||||
"target_osm_node_id": target_id,
|
||||
"source_lon": source_lon,
|
||||
"source_lat": source_lat,
|
||||
"target_lon": target_lon,
|
||||
"target_lat": target_lat,
|
||||
"highway": highway,
|
||||
"name": tags.get("name"),
|
||||
"length_m": length_m,
|
||||
"walk_cost_s": walk_cost,
|
||||
"reverse_walk_cost_s": reverse_walk_cost,
|
||||
"drive_cost_s": drive_cost,
|
||||
"reverse_drive_cost_s": reverse_drive_cost,
|
||||
"geometry_geojson": json.dumps({"type": "LineString", "coordinates": [[source_lon, source_lat], [target_lon, target_lat]]}, separators=(",", ":")),
|
||||
"min_lon": min(source_lon, target_lon),
|
||||
"min_lat": min(source_lat, target_lat),
|
||||
"max_lon": max(source_lon, target_lon),
|
||||
"max_lat": max(source_lat, target_lat),
|
||||
"tags_json": _routing_tags_json(tags),
|
||||
}
|
||||
)
|
||||
self.edge_count += 1
|
||||
if walk_cost is not None:
|
||||
self.walk_edge_count += 1
|
||||
if drive_cost is not None:
|
||||
self.drive_edge_count += 1
|
||||
|
||||
self.processed_way_count += 1
|
||||
if len(self.edges) >= self.batch_size:
|
||||
self.flush()
|
||||
if self.processed_way_count % 100_000 == 0:
|
||||
_emit(
|
||||
self.progress_callback,
|
||||
"routing_layer_import_batch",
|
||||
f"Imported {self.edge_count:,} routing edges.",
|
||||
self.edge_count,
|
||||
None,
|
||||
{"processed_ways": self.processed_way_count, "nodes_pending": len(self.nodes), "edges": self.edge_count},
|
||||
)
|
||||
|
||||
def flush(self) -> None:
|
||||
if not self.nodes and not self.edges:
|
||||
return
|
||||
node_rows = list(self.nodes.values())
|
||||
edge_rows = self.edges
|
||||
if node_rows:
|
||||
stmt = postgresql_insert(RoutingNode).values(node_rows)
|
||||
stmt = stmt.on_conflict_do_nothing(index_elements=["dataset_id", "osm_node_id"])
|
||||
self.session.execute(stmt)
|
||||
self.node_count += len(node_rows)
|
||||
self.nodes.clear()
|
||||
if edge_rows:
|
||||
self.session.bulk_insert_mappings(RoutingEdge, edge_rows)
|
||||
self.edges = []
|
||||
self.session.commit()
|
||||
|
||||
|
||||
def _walkable(tags: dict[str, str], highway: str) -> bool:
|
||||
if highway not in WALK_HIGHWAYS:
|
||||
return False
|
||||
access = _tag_value(tags, "access")
|
||||
foot = _tag_value(tags, "foot")
|
||||
if foot in NO_VALUES:
|
||||
return False
|
||||
if access in NO_VALUES and foot not in YES_VALUES:
|
||||
return False
|
||||
if highway in {"motorway", "motorway_link", "trunk", "trunk_link"} and foot not in YES_VALUES:
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def _drivable(tags: dict[str, str], highway: str) -> bool:
|
||||
if highway not in DRIVE_HIGHWAYS:
|
||||
return False
|
||||
access = _tag_value(tags, "access")
|
||||
motor_vehicle = _tag_value(tags, "motor_vehicle")
|
||||
motorcar = _tag_value(tags, "motorcar")
|
||||
vehicle = _tag_value(tags, "vehicle")
|
||||
if motorcar in NO_VALUES or motor_vehicle in NO_VALUES or vehicle in NO_VALUES:
|
||||
return False
|
||||
if access in NO_VALUES and motorcar not in YES_VALUES and motor_vehicle not in YES_VALUES:
|
||||
return False
|
||||
if highway in {"footway", "path", "pedestrian", "steps", "cycleway", "bridleway"}:
|
||||
return motorcar in YES_VALUES or motor_vehicle in YES_VALUES
|
||||
return True
|
||||
|
||||
|
||||
def _oneway_direction(tags: dict[str, str], highway: str) -> str:
|
||||
oneway = _tag_value(tags, "oneway")
|
||||
if oneway in ONEWAY_REVERSE:
|
||||
return "reverse"
|
||||
if oneway in ONEWAY_FORWARD or tags.get("junction") == "roundabout" or highway == "motorway":
|
||||
return "forward"
|
||||
return "both"
|
||||
|
||||
|
||||
def _drive_speed_mps(tags: dict[str, str], highway: str) -> float:
|
||||
maxspeed = _parse_maxspeed(tags.get("maxspeed"))
|
||||
kmh = maxspeed or DEFAULT_DRIVE_SPEED_KMH.get(highway, 30)
|
||||
return max(5.0, float(kmh) / 3.6)
|
||||
|
||||
|
||||
def _parse_maxspeed(value: str | None) -> float | None:
|
||||
if not value:
|
||||
return None
|
||||
text = value.strip().lower()
|
||||
if text in {"signals", "none", "walk", "variable"}:
|
||||
return None
|
||||
if text.endswith("mph"):
|
||||
number = _leading_float(text[:-3])
|
||||
return None if number is None else number * 1.60934
|
||||
return _leading_float(text)
|
||||
|
||||
|
||||
def _leading_float(value: str) -> float | None:
|
||||
digits = []
|
||||
for char in value.strip():
|
||||
if char.isdigit() or char == ".":
|
||||
digits.append(char)
|
||||
elif digits:
|
||||
break
|
||||
if not digits:
|
||||
return None
|
||||
try:
|
||||
return float("".join(digits))
|
||||
except ValueError:
|
||||
return None
|
||||
|
||||
|
||||
def _routing_tags_json(tags: dict[str, str]) -> str:
|
||||
selected = {
|
||||
key: value
|
||||
for key, value in tags.items()
|
||||
if key in {"access", "bicycle", "bridge", "foot", "highway", "junction", "maxspeed", "motor_vehicle", "motorcar", "name", "oneway", "service", "surface", "tunnel", "vehicle"}
|
||||
}
|
||||
return json.dumps(selected, separators=(",", ":"))
|
||||
|
||||
|
||||
def _tag_value(tags: dict[str, str], key: str) -> str:
|
||||
return str(tags.get(key) or "").strip().lower()
|
||||
|
||||
|
||||
def _distance_m(lat_a: float, lon_a: float, lat_b: float, lon_b: float) -> float:
|
||||
radius = 6_371_000.0
|
||||
phi_a = math.radians(lat_a)
|
||||
phi_b = math.radians(lat_b)
|
||||
delta_phi = math.radians(lat_b - lat_a)
|
||||
delta_lambda = math.radians(lon_b - lon_a)
|
||||
hav = math.sin(delta_phi / 2) ** 2 + math.cos(phi_a) * math.cos(phi_b) * math.sin(delta_lambda / 2) ** 2
|
||||
return radius * 2 * math.atan2(math.sqrt(hav), math.sqrt(1 - hav))
|
||||
|
||||
|
||||
def _metadata(dataset: Dataset) -> dict[str, object]:
|
||||
try:
|
||||
value = json.loads(dataset.metadata_json or "{}")
|
||||
except json.JSONDecodeError:
|
||||
return {}
|
||||
return value if isinstance(value, dict) else {}
|
||||
|
||||
|
||||
def _emit(
|
||||
progress_callback: ProgressCallback | None,
|
||||
event_type: str,
|
||||
message: str,
|
||||
progress_current: int | None,
|
||||
progress_total: int | None,
|
||||
metadata: dict[str, object] | None = None,
|
||||
) -> None:
|
||||
if progress_callback is not None:
|
||||
progress_callback(event_type, message, progress_current, progress_total, metadata)
|
||||
40
app/pipeline/run.py
Normal file
40
app/pipeline/run.py
Normal file
@@ -0,0 +1,40 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime, timezone
|
||||
from typing import Callable, Any
|
||||
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.models import Source
|
||||
from app.pipeline.gtfs import run_gtfs_source
|
||||
from app.pipeline.osm_diff import run_osm_diff_source
|
||||
from app.pipeline.osm_geojson import run_osm_geojson_source
|
||||
from app.pipeline.osm_pbf import run_osm_pbf_source
|
||||
|
||||
|
||||
ProgressCallback = Callable[[str, str, int | None, int | None, dict[str, Any] | None], None]
|
||||
|
||||
|
||||
def run_source(session: Session, source: Source, progress_callback: ProgressCallback | None = None):
|
||||
source.status = "running"
|
||||
source.last_run_at = datetime.now(timezone.utc)
|
||||
source.last_error = None
|
||||
session.flush()
|
||||
try:
|
||||
if source.kind == "gtfs":
|
||||
dataset = run_gtfs_source(session, source, progress_callback=progress_callback)
|
||||
elif source.kind == "osm_geojson":
|
||||
dataset = run_osm_geojson_source(session, source)
|
||||
elif source.kind == "osm_pbf":
|
||||
dataset = run_osm_pbf_source(session, source, progress_callback=progress_callback)
|
||||
elif source.kind == "osm_diff":
|
||||
dataset = run_osm_diff_source(session, source)
|
||||
else:
|
||||
raise ValueError(f"Unsupported source kind: {source.kind}")
|
||||
source.status = "ok"
|
||||
source.last_error = None
|
||||
return dataset
|
||||
except Exception as exc: # noqa: BLE001 - persist pipeline error for UI
|
||||
source.status = "error"
|
||||
source.last_error = str(exc)
|
||||
raise
|
||||
294
app/pipeline/sample_data.py
Normal file
294
app/pipeline/sample_data.py
Normal file
@@ -0,0 +1,294 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import csv
|
||||
import io
|
||||
import json
|
||||
import zipfile
|
||||
from pathlib import Path
|
||||
from datetime import datetime, timezone
|
||||
|
||||
from sqlalchemy import delete, select
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.config import settings
|
||||
from app.db import init_db
|
||||
from app.models import (
|
||||
Dataset,
|
||||
CanonicalStop,
|
||||
CanonicalStopLink,
|
||||
GtfsAgency,
|
||||
GtfsCalendar,
|
||||
GtfsCalendarDate,
|
||||
GtfsRoute,
|
||||
GtfsRoutePatternLink,
|
||||
GtfsShape,
|
||||
GtfsStop,
|
||||
GtfsStopTime,
|
||||
GtfsTripRoutePatternLink,
|
||||
GtfsTrip,
|
||||
Itinerary,
|
||||
ItineraryLeg,
|
||||
Job,
|
||||
JobEvent,
|
||||
MatchRule,
|
||||
OsmDiffState,
|
||||
OsmFeature,
|
||||
PipelineRun,
|
||||
RouteMatch,
|
||||
RoutePattern,
|
||||
RoutePatternStop,
|
||||
RoutingEdge,
|
||||
RoutingNode,
|
||||
Source,
|
||||
SourceCatalogEntry,
|
||||
SourceUpdateCheck,
|
||||
TravelRequest,
|
||||
)
|
||||
from app.pipeline.matcher import run_route_matching
|
||||
from app.pipeline.route_layer import rebuild_route_layer
|
||||
from app.pipeline.run import run_source
|
||||
|
||||
|
||||
def load_sample_project(session: Session, *, preserve_job_id: int | None = None) -> dict:
|
||||
"""Clear the DB, create a small Berlin-like GTFS + OSM sample, import, and match."""
|
||||
init_db()
|
||||
clear_project_data(session, preserve_job_id=preserve_job_id, preserve_catalog=True)
|
||||
sample_dir = settings.data_dir / "sample"
|
||||
sample_dir.mkdir(parents=True, exist_ok=True)
|
||||
gtfs_path = sample_dir / "sample_berlin.gtfs.zip"
|
||||
osm_path = sample_dir / "sample_berlin_osm.geojson"
|
||||
create_sample_gtfs(gtfs_path)
|
||||
create_sample_osm_geojson(osm_path)
|
||||
|
||||
gtfs_source = Source(name="Sample Berlin GTFS", kind="gtfs", url=str(gtfs_path), country="DE", license="sample")
|
||||
osm_source = Source(name="Sample Berlin OSM transport", kind="osm_geojson", url=str(osm_path), country="DE", license="sample")
|
||||
session.add_all([gtfs_source, osm_source])
|
||||
session.flush()
|
||||
|
||||
gtfs_dataset = run_source(session, gtfs_source)
|
||||
osm_dataset = run_source(session, osm_source)
|
||||
match_result = run_route_matching(session)
|
||||
route_layer_result = rebuild_route_layer(session)
|
||||
return {
|
||||
"status": "ok",
|
||||
"gtfs_dataset_id": gtfs_dataset.id,
|
||||
"osm_dataset_id": osm_dataset.id,
|
||||
"match_result": match_result,
|
||||
"route_layer_result": route_layer_result,
|
||||
}
|
||||
|
||||
|
||||
def clear_project_data(
|
||||
session: Session,
|
||||
*,
|
||||
preserve_job_id: int | None = None,
|
||||
preserve_catalog: bool = True,
|
||||
) -> None:
|
||||
"""Clear user/project data while optionally preserving the current queue job."""
|
||||
session.execute(delete(PipelineRun))
|
||||
if preserve_job_id is None:
|
||||
session.execute(delete(JobEvent))
|
||||
session.execute(delete(Job))
|
||||
else:
|
||||
_cancel_other_jobs_for_reset(session, preserve_job_id)
|
||||
|
||||
for model in [
|
||||
ItineraryLeg,
|
||||
Itinerary,
|
||||
TravelRequest,
|
||||
SourceUpdateCheck,
|
||||
OsmDiffState,
|
||||
MatchRule,
|
||||
RouteMatch,
|
||||
GtfsTripRoutePatternLink,
|
||||
GtfsRoutePatternLink,
|
||||
RoutePatternStop,
|
||||
RoutePattern,
|
||||
CanonicalStopLink,
|
||||
CanonicalStop,
|
||||
RoutingEdge,
|
||||
RoutingNode,
|
||||
GtfsStopTime,
|
||||
GtfsCalendarDate,
|
||||
GtfsCalendar,
|
||||
GtfsShape,
|
||||
GtfsTrip,
|
||||
GtfsRoute,
|
||||
GtfsStop,
|
||||
GtfsAgency,
|
||||
OsmFeature,
|
||||
Dataset,
|
||||
Source,
|
||||
]:
|
||||
session.execute(delete(model))
|
||||
if not preserve_catalog:
|
||||
session.execute(delete(SourceCatalogEntry))
|
||||
session.flush()
|
||||
|
||||
|
||||
def _cancel_other_jobs_for_reset(session: Session, preserve_job_id: int) -> None:
|
||||
now = datetime.now(timezone.utc)
|
||||
active_statuses = {"queued", "running", "paused"}
|
||||
jobs = session.scalars(
|
||||
select(Job).where(Job.id != preserve_job_id, Job.status.in_(active_statuses))
|
||||
).all()
|
||||
for job in jobs:
|
||||
job.status = "cancelled"
|
||||
job.requested_action = None
|
||||
job.lease_owner = None
|
||||
job.lease_expires_at = None
|
||||
job.paused_at = None
|
||||
job.error = None
|
||||
job.updated_at = now
|
||||
job.finished_at = now
|
||||
session.add(
|
||||
JobEvent(
|
||||
job_id=job.id,
|
||||
event_type="cancelled_by_reset",
|
||||
message=f"Job cancelled by reset job #{preserve_job_id}.",
|
||||
progress_current=job.progress_current,
|
||||
progress_total=job.progress_total,
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
def create_sample_gtfs(path: Path) -> None:
|
||||
agencies = [
|
||||
{"agency_id": "BVG", "agency_name": "BVG", "agency_url": "https://example.invalid/bvg", "agency_timezone": "Europe/Berlin"},
|
||||
{"agency_id": "DB", "agency_name": "DB Regio", "agency_url": "https://example.invalid/db", "agency_timezone": "Europe/Berlin"},
|
||||
{"agency_id": "XAIR", "agency_name": "Example Airport Shuttle", "agency_url": "https://example.invalid/xair", "agency_timezone": "Europe/Berlin"},
|
||||
]
|
||||
stops = [
|
||||
{"stop_id": "hbf", "stop_name": "Berlin Hauptbahnhof", "stop_lat": "52.5251", "stop_lon": "13.3696"},
|
||||
{"stop_id": "friedrich", "stop_name": "Friedrichstraße", "stop_lat": "52.5201", "stop_lon": "13.3862"},
|
||||
{"stop_id": "alex", "stop_name": "Alexanderplatz", "stop_lat": "52.5219", "stop_lon": "13.4132"},
|
||||
{"stop_id": "ost", "stop_name": "Ostbahnhof", "stop_lat": "52.5100", "stop_lon": "13.4344"},
|
||||
{"stop_id": "zoo", "stop_name": "Zoologischer Garten", "stop_lat": "52.5069", "stop_lon": "13.3320"},
|
||||
{"stop_id": "wittenberg", "stop_name": "Wittenbergplatz", "stop_lat": "52.5020", "stop_lon": "13.3430"},
|
||||
{"stop_id": "potsdamer", "stop_name": "Potsdamer Platz", "stop_lat": "52.5096", "stop_lon": "13.3760"},
|
||||
{"stop_id": "stadtmitte", "stop_name": "Stadtmitte", "stop_lat": "52.5113", "stop_lon": "13.3907"},
|
||||
{"stop_id": "reichstag", "stop_name": "Reichstag", "stop_lat": "52.5186", "stop_lon": "13.3763"},
|
||||
{"stop_id": "hackescher", "stop_name": "Hackescher Markt", "stop_lat": "52.5220", "stop_lon": "13.4023"},
|
||||
{"stop_id": "naturkunde", "stop_name": "Naturkundemuseum", "stop_lat": "52.5300", "stop_lon": "13.3790"},
|
||||
{"stop_id": "wannsee", "stop_name": "Wannsee", "stop_lat": "52.4210", "stop_lon": "13.1797"},
|
||||
{"stop_id": "kladow", "stop_name": "Kladow", "stop_lat": "52.4547", "stop_lon": "13.1439"},
|
||||
{"stop_id": "airport", "stop_name": "Example Airport Terminal", "stop_lat": "52.3650", "stop_lon": "13.5100"},
|
||||
]
|
||||
routes = [
|
||||
{"route_id": "u2", "agency_id": "BVG", "route_short_name": "U2", "route_long_name": "Pankow - Ruhleben", "route_type": "1"},
|
||||
{"route_id": "re1", "agency_id": "DB", "route_short_name": "RE1", "route_long_name": "Magdeburg - Frankfurt Oder", "route_type": "2"},
|
||||
{"route_id": "m5", "agency_id": "BVG", "route_short_name": "M5", "route_long_name": "Hauptbahnhof - Hohenschönhausen", "route_type": "0"},
|
||||
{"route_id": "bus100", "agency_id": "BVG", "route_short_name": "100", "route_long_name": "Zoologischer Garten - Alexanderplatz", "route_type": "3"},
|
||||
{"route_id": "f10", "agency_id": "BVG", "route_short_name": "F10", "route_long_name": "Wannsee - Kladow", "route_type": "4"},
|
||||
{"route_id": "x99", "agency_id": "XAIR", "route_short_name": "X99", "route_long_name": "Airport Express Sample", "route_type": "3"},
|
||||
]
|
||||
trips = [
|
||||
{"route_id": r["route_id"], "service_id": "daily", "trip_id": f"{r['route_id']}_trip", "shape_id": f"{r['route_id']}_shape"}
|
||||
for r in routes
|
||||
]
|
||||
stop_sequences = {
|
||||
"u2_trip": ["zoo", "wittenberg", "potsdamer", "stadtmitte", "alex"],
|
||||
"re1_trip": ["hbf", "friedrich", "alex", "ost"],
|
||||
"m5_trip": ["hbf", "naturkunde", "hackescher", "alex"],
|
||||
"bus100_trip": ["zoo", "reichstag", "alex"],
|
||||
"f10_trip": ["wannsee", "kladow"],
|
||||
"x99_trip": ["alex", "airport"],
|
||||
}
|
||||
coords = {row["stop_id"]: (row["stop_lon"], row["stop_lat"]) for row in stops}
|
||||
stop_times = []
|
||||
shapes = []
|
||||
for trip in trips:
|
||||
trip_id = trip["trip_id"]
|
||||
for idx, stop_id in enumerate(stop_sequences[trip_id], start=1):
|
||||
stop_times.append(
|
||||
{
|
||||
"trip_id": trip_id,
|
||||
"arrival_time": f"08:{idx * 5:02d}:00",
|
||||
"departure_time": f"08:{idx * 5 + 1:02d}:00",
|
||||
"stop_id": stop_id,
|
||||
"stop_sequence": str(idx),
|
||||
}
|
||||
)
|
||||
lon, lat = coords[stop_id]
|
||||
shapes.append(
|
||||
{
|
||||
"shape_id": trip["shape_id"],
|
||||
"shape_pt_lat": lat,
|
||||
"shape_pt_lon": lon,
|
||||
"shape_pt_sequence": str(idx),
|
||||
}
|
||||
)
|
||||
calendar = [
|
||||
{
|
||||
"service_id": "daily",
|
||||
"monday": "1",
|
||||
"tuesday": "1",
|
||||
"wednesday": "1",
|
||||
"thursday": "1",
|
||||
"friday": "1",
|
||||
"saturday": "1",
|
||||
"sunday": "1",
|
||||
"start_date": "20260101",
|
||||
"end_date": "20261231",
|
||||
}
|
||||
]
|
||||
|
||||
with zipfile.ZipFile(path, "w", compression=zipfile.ZIP_DEFLATED) as zf:
|
||||
_write_csv(zf, "agency.txt", ["agency_id", "agency_name", "agency_url", "agency_timezone"], agencies)
|
||||
_write_csv(zf, "stops.txt", ["stop_id", "stop_name", "stop_lat", "stop_lon"], stops)
|
||||
_write_csv(zf, "routes.txt", ["route_id", "agency_id", "route_short_name", "route_long_name", "route_type"], routes)
|
||||
_write_csv(zf, "trips.txt", ["route_id", "service_id", "trip_id", "shape_id"], trips)
|
||||
_write_csv(zf, "stop_times.txt", ["trip_id", "arrival_time", "departure_time", "stop_id", "stop_sequence"], stop_times)
|
||||
_write_csv(
|
||||
zf,
|
||||
"calendar.txt",
|
||||
["service_id", "monday", "tuesday", "wednesday", "thursday", "friday", "saturday", "sunday", "start_date", "end_date"],
|
||||
calendar,
|
||||
)
|
||||
_write_csv(zf, "shapes.txt", ["shape_id", "shape_pt_lat", "shape_pt_lon", "shape_pt_sequence"], shapes)
|
||||
|
||||
|
||||
def _write_csv(zf: zipfile.ZipFile, name: str, fields: list[str], rows: list[dict[str, str]]) -> None:
|
||||
buffer = io.StringIO(newline="")
|
||||
writer = csv.DictWriter(buffer, fieldnames=fields)
|
||||
writer.writeheader()
|
||||
writer.writerows(rows)
|
||||
zf.writestr(name, buffer.getvalue())
|
||||
|
||||
|
||||
def create_sample_osm_geojson(path: Path) -> None:
|
||||
def line(fid, mode, ref, name, operator, coords):
|
||||
return {
|
||||
"type": "Feature",
|
||||
"geometry": {"type": "LineString", "coordinates": coords},
|
||||
"properties": {
|
||||
"osm_type": "relation",
|
||||
"osm_id": str(fid),
|
||||
"type": "route",
|
||||
"route": mode,
|
||||
"ref": ref,
|
||||
"name": name,
|
||||
"operator": operator,
|
||||
"network": "VBB" if operator == "BVG" else "DB",
|
||||
},
|
||||
}
|
||||
|
||||
def point(fid, kind, name, coords, props=None):
|
||||
props = props or {}
|
||||
props.update({"osm_type": "node", "osm_id": str(fid), "name": name})
|
||||
return {"type": "Feature", "geometry": {"type": "Point", "coordinates": coords}, "properties": props}
|
||||
|
||||
features = [
|
||||
line(1002, "subway", "U2", "U2 Ruhleben - Pankow", "BVG", [[13.3320, 52.5069], [13.3430, 52.5020], [13.3760, 52.5096], [13.3907, 52.5113], [13.4132, 52.5219]]),
|
||||
line(2001, "train", "RE1", "RE1 Magdeburg - Frankfurt Oder", "DB Regio", [[13.3696, 52.5251], [13.3862, 52.5201], [13.4132, 52.5219], [13.4344, 52.5100]]),
|
||||
line(5005, "tram", "M5", "M5 Hauptbahnhof - Hohenschönhausen", "BVG", [[13.3696, 52.5251], [13.3790, 52.5300], [13.4023, 52.5220], [13.4132, 52.5219]]),
|
||||
line(6100, "bus", "100", "Bus 100 Zoologischer Garten - Alexanderplatz", "BVG", [[13.3320, 52.5069], [13.3763, 52.5186], [13.4132, 52.5219]]),
|
||||
line(7010, "ferry", "F10", "F10 Wannsee - Kladow", "BVG", [[13.1797, 52.4210], [13.1439, 52.4547]]),
|
||||
line(5010, "tram", "M10", "M10 Warschauer Straße - Hauptbahnhof", "BVG", [[13.4500, 52.5050], [13.4020, 52.5300], [13.3696, 52.5251]]),
|
||||
point(9001, "station", "Berlin Hauptbahnhof", [13.3696, 52.5251], {"railway": "station"}),
|
||||
point(9002, "station", "Alexanderplatz", [13.4132, 52.5219], {"railway": "station"}),
|
||||
point(9003, "stop", "Zoologischer Garten", [13.3320, 52.5069], {"public_transport": "station", "railway": "station"}),
|
||||
point(9004, "terminal", "Wannsee Ferry Terminal", [13.1797, 52.4210], {"amenity": "ferry_terminal"}),
|
||||
point(9005, "terminal", "Kladow Ferry Terminal", [13.1439, 52.4547], {"amenity": "ferry_terminal"}),
|
||||
]
|
||||
path.write_text(json.dumps({"type": "FeatureCollection", "features": features}, indent=2), encoding="utf-8")
|
||||
135
app/pipeline/state.py
Normal file
135
app/pipeline/state.py
Normal file
@@ -0,0 +1,135 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime, timezone
|
||||
import hashlib
|
||||
import json
|
||||
from typing import Any
|
||||
|
||||
from sqlalchemy import select
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.models import PipelineRun
|
||||
|
||||
|
||||
STAGE_ACQUIRE_RAW = "acquire_raw"
|
||||
STAGE_FILTER_TRANSPORT = "filter_transport"
|
||||
STAGE_EXTRACT_GEOMETRY = "extract_geometry"
|
||||
STAGE_LABEL_FEATURES = "label_features"
|
||||
STAGE_BUILD_INDEXES = "build_indexes"
|
||||
STAGE_MATCH_ROUTES = "match_routes"
|
||||
STAGE_BUILD_ROUTE_LAYER = "build_route_layer"
|
||||
|
||||
|
||||
def stable_json(value: Any) -> str:
|
||||
return json.dumps(value, sort_keys=True, separators=(",", ":"), default=str)
|
||||
|
||||
|
||||
def dependency_hash(value: Any) -> str:
|
||||
return hashlib.sha256(stable_json(value).encode("utf-8")).hexdigest()
|
||||
|
||||
|
||||
def latest_completed_run(
|
||||
session: Session,
|
||||
*,
|
||||
stage: str,
|
||||
version: str,
|
||||
dependency_hash_value: str,
|
||||
source_id: int | None = None,
|
||||
dataset_id: int | None = None,
|
||||
) -> PipelineRun | None:
|
||||
stmt = (
|
||||
select(PipelineRun)
|
||||
.where(
|
||||
PipelineRun.stage == stage,
|
||||
PipelineRun.version == version,
|
||||
PipelineRun.dependency_hash == dependency_hash_value,
|
||||
PipelineRun.status == "completed",
|
||||
)
|
||||
.order_by(PipelineRun.finished_at.desc(), PipelineRun.id.desc())
|
||||
.limit(1)
|
||||
)
|
||||
if source_id is None:
|
||||
stmt = stmt.where(PipelineRun.source_id.is_(None))
|
||||
else:
|
||||
stmt = stmt.where(PipelineRun.source_id == source_id)
|
||||
if dataset_id is None:
|
||||
stmt = stmt.where(PipelineRun.dataset_id.is_(None))
|
||||
else:
|
||||
stmt = stmt.where(PipelineRun.dataset_id == dataset_id)
|
||||
return session.scalar(stmt)
|
||||
|
||||
|
||||
def start_pipeline_run(
|
||||
session: Session,
|
||||
*,
|
||||
stage: str,
|
||||
version: str,
|
||||
dependency_hash_value: str,
|
||||
source_id: int | None = None,
|
||||
dataset_id: int | None = None,
|
||||
job_id: int | None = None,
|
||||
inputs: dict[str, Any] | None = None,
|
||||
) -> PipelineRun:
|
||||
now = datetime.now(timezone.utc)
|
||||
run = PipelineRun(
|
||||
stage=stage,
|
||||
version=version,
|
||||
dependency_hash=dependency_hash_value,
|
||||
status="running",
|
||||
source_id=source_id,
|
||||
dataset_id=dataset_id,
|
||||
job_id=job_id,
|
||||
input_json=None if inputs is None else stable_json(inputs),
|
||||
started_at=now,
|
||||
updated_at=now,
|
||||
)
|
||||
session.add(run)
|
||||
session.flush()
|
||||
return run
|
||||
|
||||
|
||||
def finish_pipeline_run(
|
||||
session: Session,
|
||||
run: PipelineRun,
|
||||
*,
|
||||
status: str = "completed",
|
||||
outputs: dict[str, Any] | None = None,
|
||||
error: str | None = None,
|
||||
) -> PipelineRun:
|
||||
now = datetime.now(timezone.utc)
|
||||
run.status = status
|
||||
run.output_json = None if outputs is None else stable_json(outputs)
|
||||
run.error = error
|
||||
run.updated_at = now
|
||||
run.finished_at = now
|
||||
session.flush()
|
||||
return run
|
||||
|
||||
|
||||
def pipeline_run_payload(run: PipelineRun) -> dict[str, Any]:
|
||||
return {
|
||||
"id": run.id,
|
||||
"stage": run.stage,
|
||||
"version": run.version,
|
||||
"dependency_hash": run.dependency_hash,
|
||||
"status": run.status,
|
||||
"source_id": run.source_id,
|
||||
"dataset_id": run.dataset_id,
|
||||
"job_id": run.job_id,
|
||||
"input": _json_object(run.input_json),
|
||||
"output": _json_object(run.output_json),
|
||||
"error": run.error,
|
||||
"started_at": run.started_at.isoformat() if run.started_at else None,
|
||||
"updated_at": run.updated_at.isoformat() if run.updated_at else None,
|
||||
"finished_at": run.finished_at.isoformat() if run.finished_at else None,
|
||||
}
|
||||
|
||||
|
||||
def _json_object(text: str | None) -> dict[str, Any]:
|
||||
if not text:
|
||||
return {}
|
||||
try:
|
||||
value = json.loads(text)
|
||||
except json.JSONDecodeError:
|
||||
return {}
|
||||
return value if isinstance(value, dict) else {}
|
||||
89
app/pipeline/utils.py
Normal file
89
app/pipeline/utils.py
Normal file
@@ -0,0 +1,89 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import hashlib
|
||||
import json
|
||||
import re
|
||||
from pathlib import Path
|
||||
from typing import Iterable, Optional
|
||||
|
||||
from shapely.geometry import shape
|
||||
|
||||
|
||||
def sha256_file(path: Path) -> str:
|
||||
h = hashlib.sha256()
|
||||
with path.open("rb") as f:
|
||||
for chunk in iter(lambda: f.read(1024 * 1024), b""):
|
||||
h.update(chunk)
|
||||
return h.hexdigest()
|
||||
|
||||
|
||||
def norm_text(value: object) -> str:
|
||||
if value is None:
|
||||
return ""
|
||||
value = str(value).lower().strip()
|
||||
value = value.replace("ß", "ss")
|
||||
value = re.sub(r"[^a-z0-9]+", " ", value)
|
||||
return re.sub(r"\s+", " ", value).strip()
|
||||
|
||||
|
||||
def norm_ref(value: object) -> str:
|
||||
if value is None:
|
||||
return ""
|
||||
return re.sub(r"[^a-z0-9]+", "", str(value).lower())
|
||||
|
||||
|
||||
def first_nonempty(*values: object) -> str:
|
||||
for value in values:
|
||||
if value is None:
|
||||
continue
|
||||
text = str(value).strip()
|
||||
if text:
|
||||
return text
|
||||
return ""
|
||||
|
||||
|
||||
def geometry_json_and_bbox(geometry: object) -> tuple[Optional[str], tuple[Optional[float], Optional[float], Optional[float], Optional[float]]]:
|
||||
if geometry is None:
|
||||
return None, (None, None, None, None)
|
||||
try:
|
||||
geom = shape(geometry) if isinstance(geometry, dict) else geometry
|
||||
if geom.is_empty:
|
||||
return None, (None, None, None, None)
|
||||
min_lon, min_lat, max_lon, max_lat = geom.bounds
|
||||
return json.dumps(geom.__geo_interface__, separators=(",", ":")), (min_lon, min_lat, max_lon, max_lat)
|
||||
except Exception:
|
||||
return None, (None, None, None, None)
|
||||
|
||||
|
||||
def bbox_overlap(a: tuple[float | None, float | None, float | None, float | None], b: tuple[float | None, float | None, float | None, float | None]) -> bool:
|
||||
if any(v is None for v in (*a, *b)):
|
||||
return False
|
||||
aminx, aminy, amaxx, amaxy = a # type: ignore[misc]
|
||||
bminx, bminy, bmaxx, bmaxy = b # type: ignore[misc]
|
||||
return not (amaxx < bminx or bmaxx < aminx or amaxy < bminy or bmaxy < aminy)
|
||||
|
||||
|
||||
def bbox_center(b: tuple[float | None, float | None, float | None, float | None]) -> Optional[tuple[float, float]]:
|
||||
if any(v is None for v in b):
|
||||
return None
|
||||
minx, miny, maxx, maxy = b # type: ignore[misc]
|
||||
return ((minx + maxx) / 2, (miny + maxy) / 2)
|
||||
|
||||
|
||||
def approx_bbox_center_distance_deg(a: tuple[float | None, float | None, float | None, float | None], b: tuple[float | None, float | None, float | None, float | None]) -> Optional[float]:
|
||||
ca = bbox_center(a)
|
||||
cb = bbox_center(b)
|
||||
if ca is None or cb is None:
|
||||
return None
|
||||
return ((ca[0] - cb[0]) ** 2 + (ca[1] - cb[1]) ** 2) ** 0.5
|
||||
|
||||
|
||||
def batched(iterable: Iterable[dict], batch_size: int = 1000) -> Iterable[list[dict]]:
|
||||
batch: list[dict] = []
|
||||
for item in iterable:
|
||||
batch.append(item)
|
||||
if len(batch) >= batch_size:
|
||||
yield batch
|
||||
batch = []
|
||||
if batch:
|
||||
yield batch
|
||||
393
app/qa.py
Normal file
393
app/qa.py
Normal file
@@ -0,0 +1,393 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from datetime import datetime, timezone
|
||||
from typing import Any
|
||||
|
||||
from sqlalchemy import func, select
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.gtfs_storage import missing_sidecar_paths as missing_gtfs_sidecar_paths
|
||||
from app.models import (
|
||||
CanonicalStop,
|
||||
CanonicalStopLink,
|
||||
Dataset,
|
||||
GtfsAgency,
|
||||
GtfsCalendar,
|
||||
GtfsCalendarDate,
|
||||
GtfsRoute,
|
||||
GtfsShape,
|
||||
GtfsStop,
|
||||
GtfsTrip,
|
||||
Job,
|
||||
OsmFeature,
|
||||
RouteMatch,
|
||||
RoutePattern,
|
||||
RoutePatternStop,
|
||||
Source,
|
||||
SourceCatalogEntry,
|
||||
)
|
||||
from app.osm_storage import missing_sidecar_paths as missing_osm_sidecar_paths
|
||||
from app.pipeline.osm_addresses import ADDRESS_INDEX_VERSION
|
||||
from app.pipeline.routing_layer import active_routing_dataset
|
||||
|
||||
|
||||
def qa_summary(session: Session) -> dict[str, Any]:
|
||||
active_gtfs_datasets = session.scalars(
|
||||
select(Dataset).where(Dataset.kind == "gtfs", Dataset.is_active.is_(True)).order_by(Dataset.id)
|
||||
).all()
|
||||
active_osm_datasets = session.scalars(
|
||||
select(Dataset).where(Dataset.kind == "osm_geojson", Dataset.is_active.is_(True)).order_by(Dataset.id)
|
||||
).all()
|
||||
active_gtfs_ids = [int(dataset.id) for dataset in active_gtfs_datasets]
|
||||
active_osm_ids = [int(dataset.id) for dataset in active_osm_datasets]
|
||||
|
||||
source_catalog_total = _count(session, SourceCatalogEntry)
|
||||
registered_sources = _count(session, Source)
|
||||
linked_catalog_entries = int(
|
||||
session.scalar(
|
||||
select(func.count(func.distinct(Source.catalog_entry_id))).where(Source.catalog_entry_id.is_not(None))
|
||||
)
|
||||
or 0
|
||||
)
|
||||
priority_backlog = _priority_catalog_backlog(session)
|
||||
failed_sources = int(
|
||||
session.scalar(
|
||||
select(func.count())
|
||||
.select_from(Source)
|
||||
.where((Source.last_error.is_not(None)) | Source.status.in_(["failed", "error"]))
|
||||
)
|
||||
or 0
|
||||
)
|
||||
|
||||
active_jobs = _job_status_counts(session)
|
||||
missing_gtfs_sidecars = sum(1 for dataset in active_gtfs_datasets if missing_gtfs_sidecar_paths(dataset))
|
||||
missing_osm_sidecars = sum(1 for dataset in active_osm_datasets if missing_osm_sidecar_paths(dataset))
|
||||
|
||||
gtfs_counts = _gtfs_validation_counts(session, active_gtfs_ids)
|
||||
link_counts = _link_quality_counts(session, active_gtfs_ids, active_osm_ids)
|
||||
route_counts = _route_quality_counts(session, active_gtfs_ids)
|
||||
address_status = _lightweight_address_index_status(session)
|
||||
license_unknown = int(
|
||||
session.scalar(
|
||||
select(func.count())
|
||||
.select_from(Source)
|
||||
.where(Source.kind == "gtfs", (Source.license.is_(None)) | (func.lower(Source.license).in_(["", "unknown"])))
|
||||
)
|
||||
or 0
|
||||
)
|
||||
|
||||
return {
|
||||
"generated_at": datetime.now(timezone.utc).isoformat(),
|
||||
"decision": {
|
||||
"deployment": "same_workbench_for_now",
|
||||
"database": "same_postgresql_database_for_now",
|
||||
"split_trigger": "Split when third-party API, accounts/billing, heavy export jobs, or independent scaling are needed.",
|
||||
"api_contract": "/api/qa/summary is intentionally display-ready but stable enough to become a harmonization-service summary endpoint.",
|
||||
},
|
||||
"sections": [
|
||||
{
|
||||
"id": "source_discovery",
|
||||
"title": "Source Discovery",
|
||||
"items": [
|
||||
_item("Identified sources", source_catalog_total, "info", "Rows in the source catalog."),
|
||||
_item("Registered sources", registered_sources, "info", "Sources known to the importer."),
|
||||
_item("Catalog entries linked", linked_catalog_entries, "good" if linked_catalog_entries else "warn", "Catalog rows connected to importer sources."),
|
||||
_item("Priority catalog backlog", priority_backlog, "warn" if priority_backlog else "good", "P0/P1 catalog rows without a registered source."),
|
||||
],
|
||||
},
|
||||
{
|
||||
"id": "import_health",
|
||||
"title": "Import Health",
|
||||
"items": [
|
||||
_item("Active GTFS datasets", len(active_gtfs_ids), "good" if active_gtfs_ids else "warn", "Feeds currently participating in harmonization."),
|
||||
_item("Active OSM datasets", len(active_osm_ids), "good" if active_osm_ids else "warn", "Visual/spatial datasets currently active."),
|
||||
_item("Running jobs", active_jobs.get("running", 0), "warn" if active_jobs.get("running", 0) else "info", "Currently running queued work."),
|
||||
_item("Queued jobs", active_jobs.get("queued", 0), "info", "Outstanding queued work."),
|
||||
_item("Failed sources", failed_sources, "bad" if failed_sources else "good", "Sources with failed status or last_error."),
|
||||
_item("Missing GTFS sidecars", missing_gtfs_sidecars, "bad" if missing_gtfs_sidecars else "good", "Active GTFS datasets whose sidecar is unavailable."),
|
||||
_item("Missing OSM sidecars", missing_osm_sidecars, "bad" if missing_osm_sidecars else "good", "Active OSM datasets whose sidecar is unavailable."),
|
||||
],
|
||||
},
|
||||
{
|
||||
"id": "gtfs_validation",
|
||||
"title": "GTFS Validation",
|
||||
"items": [
|
||||
_item("Agencies", gtfs_counts["agencies"], "info", "Imported agency.txt rows."),
|
||||
_item("Stops", gtfs_counts["stops"], "info", "Imported stops."),
|
||||
_item("Routes", gtfs_counts["routes"], "info", "Imported routes."),
|
||||
_item("Trips", gtfs_counts["trips"], "info", "Imported trips."),
|
||||
_item("Shapes", gtfs_counts["shapes"], "info", "Imported shape records."),
|
||||
_item("Stops without coordinates", gtfs_counts["stops_without_coordinates"], "bad" if gtfs_counts["stops_without_coordinates"] else "good", "Stops that cannot be spatially linked or routed."),
|
||||
_item("Routes without geometry", gtfs_counts["routes_without_geometry"], "warn" if gtfs_counts["routes_without_geometry"] else "good", "Routes with no stored GTFS shape geometry."),
|
||||
_item("Routes without agency", gtfs_counts["routes_without_agency"], "warn" if gtfs_counts["routes_without_agency"] else "good", "Routes missing agency/operator references."),
|
||||
_item("Calendar range", gtfs_counts["calendar_range"], "info", "Min/max imported service dates from calendars and exceptions."),
|
||||
],
|
||||
},
|
||||
{
|
||||
"id": "deduplication",
|
||||
"title": "Deduplication and Stop Links",
|
||||
"items": [
|
||||
_item("Canonical stops", link_counts["canonical_stops"], "info", "Current normalized stop/station records."),
|
||||
_item("GTFS stop links", link_counts["gtfs_stop_links"], "good" if link_counts["gtfs_stop_links"] else "warn", "Timetable stops linked into canonical stops."),
|
||||
_item("GTFS stops without canonical link", link_counts["gtfs_stops_without_canonical"], "bad" if link_counts["gtfs_stops_without_canonical"] else "good", "Imported active stops that still need deduplication/linking."),
|
||||
_item("OSM visual stop links", link_counts["osm_stop_links"], "good" if link_counts["osm_stop_links"] else "warn", "OSM stop/station features linked to canonical stops."),
|
||||
_item("OSM stops without canonical link", link_counts["osm_stops_without_canonical"], "warn" if link_counts["osm_stops_without_canonical"] else "good", "Visual stops that are not yet linked to GTFS/canonical stops."),
|
||||
_item("Multi-source stop groups", link_counts["multi_source_stop_groups"], "info", "Canonical stops that merge GTFS stops from multiple datasets."),
|
||||
_item("Long-distance OSM links", link_counts["long_distance_osm_links"], "warn" if link_counts["long_distance_osm_links"] else "good", "OSM stop links over 150m from the canonical stop."),
|
||||
],
|
||||
},
|
||||
{
|
||||
"id": "route_quality",
|
||||
"title": "Route Matching and Geometry",
|
||||
"items": [
|
||||
_item("Matched/accepted routes", route_counts["matched_or_accepted"], "good" if route_counts["matched_or_accepted"] else "warn", "GTFS routes with accepted or automatic OSM matches."),
|
||||
_item("Probable matches", route_counts["probable"], "warn" if route_counts["probable"] else "info", "Potential conflicts needing review."),
|
||||
_item("Weak matches", route_counts["weak"], "warn" if route_counts["weak"] else "good", "Low-confidence route links."),
|
||||
_item("Missing route matches", route_counts["missing"], "bad" if route_counts["missing"] else "good", "Routes with no visual match."),
|
||||
_item("Unreviewed GTFS routes", route_counts["routes_without_match"], "warn" if route_counts["routes_without_match"] else "good", "Active GTFS routes without a RouteMatch row."),
|
||||
_item("Route patterns", route_counts["route_patterns"], "info", "Published visual route-layer patterns."),
|
||||
_item("Route patterns without stops", route_counts["route_patterns_without_stops"], "warn" if route_counts["route_patterns_without_stops"] else "good", "Visual patterns missing canonical stop sequence evidence."),
|
||||
],
|
||||
},
|
||||
{
|
||||
"id": "publication_readiness",
|
||||
"title": "Publication Readiness",
|
||||
"items": [
|
||||
_item("Address index stale", "yes" if address_status.get("stale") else "no", "warn" if address_status.get("stale") else "good", "Address polygons/search index version status."),
|
||||
_item("GTFS licenses unknown", license_unknown, "warn" if license_unknown else "good", "GTFS sources without explicit redistribution/license status."),
|
||||
_item("Canonical export", "draft", "warn", "Canonical Europe dataset export tables/API are not versioned yet."),
|
||||
_item("Third-party API", "later", "info", "Accounts, billing, quotas, and API backend are intentionally out of scope for this step."),
|
||||
],
|
||||
},
|
||||
],
|
||||
"next_actions": [
|
||||
"Add review queues for each non-zero bad/warn metric.",
|
||||
"Persist source authority and redistribution policy before publishing third-party exports.",
|
||||
"Create versioned canonical snapshots and export manifests.",
|
||||
],
|
||||
}
|
||||
|
||||
|
||||
def _item(label: str, value: object, tone: str, description: str) -> dict[str, object]:
|
||||
return {"label": label, "value": value, "tone": tone, "description": description}
|
||||
|
||||
|
||||
def _lightweight_address_index_status(session: Session) -> dict[str, object]:
|
||||
dataset = active_routing_dataset(session)
|
||||
if dataset is None or not dataset.metadata_json:
|
||||
return {"stale": False, "version": None, "current_version": ADDRESS_INDEX_VERSION}
|
||||
try:
|
||||
metadata = json.loads(dataset.metadata_json or "{}")
|
||||
except json.JSONDecodeError:
|
||||
metadata = {}
|
||||
address_index = metadata.get("address_index") if isinstance(metadata, dict) else {}
|
||||
if not isinstance(address_index, dict):
|
||||
address_index = {}
|
||||
version = address_index.get("version")
|
||||
return {
|
||||
"stale": bool(address_index and version != ADDRESS_INDEX_VERSION),
|
||||
"version": version,
|
||||
"current_version": ADDRESS_INDEX_VERSION,
|
||||
}
|
||||
|
||||
|
||||
def _count(session: Session, model, *where) -> int:
|
||||
stmt = select(func.count()).select_from(model)
|
||||
if where:
|
||||
stmt = stmt.where(*where)
|
||||
return int(session.scalar(stmt) or 0)
|
||||
|
||||
|
||||
def _priority_catalog_backlog(session: Session) -> int:
|
||||
linked = select(Source.id).where(Source.catalog_entry_id == SourceCatalogEntry.id).exists()
|
||||
return int(
|
||||
session.scalar(
|
||||
select(func.count())
|
||||
.select_from(SourceCatalogEntry)
|
||||
.where(SourceCatalogEntry.priority.in_(["P0", "P0 fallback", "P1"]), ~linked)
|
||||
)
|
||||
or 0
|
||||
)
|
||||
|
||||
|
||||
def _job_status_counts(session: Session) -> dict[str, int]:
|
||||
return {
|
||||
str(status): int(count)
|
||||
for status, count in session.execute(
|
||||
select(Job.status, func.count())
|
||||
.where(Job.dismissed_at.is_(None), Job.status.in_(["queued", "running", "paused", "failed"]))
|
||||
.group_by(Job.status)
|
||||
).all()
|
||||
}
|
||||
|
||||
|
||||
def _gtfs_validation_counts(session: Session, dataset_ids: list[int]) -> dict[str, object]:
|
||||
if not dataset_ids:
|
||||
return {
|
||||
"agencies": 0,
|
||||
"stops": 0,
|
||||
"routes": 0,
|
||||
"trips": 0,
|
||||
"shapes": 0,
|
||||
"stops_without_coordinates": 0,
|
||||
"routes_without_geometry": 0,
|
||||
"routes_without_agency": 0,
|
||||
"calendar_range": "none",
|
||||
}
|
||||
calendar_min, calendar_max = session.execute(
|
||||
select(func.min(GtfsCalendar.start_date), func.max(GtfsCalendar.end_date)).where(GtfsCalendar.dataset_id.in_(dataset_ids))
|
||||
).one()
|
||||
exception_min, exception_max = session.execute(
|
||||
select(func.min(GtfsCalendarDate.date), func.max(GtfsCalendarDate.date)).where(GtfsCalendarDate.dataset_id.in_(dataset_ids))
|
||||
).one()
|
||||
min_date = min(value for value in [calendar_min, exception_min] if value is not None) if (calendar_min or exception_min) else None
|
||||
max_date = max(value for value in [calendar_max, exception_max] if value is not None) if (calendar_max or exception_max) else None
|
||||
return {
|
||||
"agencies": _count(session, GtfsAgency, GtfsAgency.dataset_id.in_(dataset_ids)),
|
||||
"stops": _count(session, GtfsStop, GtfsStop.dataset_id.in_(dataset_ids)),
|
||||
"routes": _count(session, GtfsRoute, GtfsRoute.dataset_id.in_(dataset_ids)),
|
||||
"trips": _count(session, GtfsTrip, GtfsTrip.dataset_id.in_(dataset_ids)),
|
||||
"shapes": _count(session, GtfsShape, GtfsShape.dataset_id.in_(dataset_ids)),
|
||||
"stops_without_coordinates": _count(
|
||||
session,
|
||||
GtfsStop,
|
||||
GtfsStop.dataset_id.in_(dataset_ids),
|
||||
(GtfsStop.lat.is_(None)) | (GtfsStop.lon.is_(None)),
|
||||
),
|
||||
"routes_without_geometry": _count(
|
||||
session,
|
||||
GtfsRoute,
|
||||
GtfsRoute.dataset_id.in_(dataset_ids),
|
||||
(GtfsRoute.geometry_geojson.is_(None)) | (GtfsRoute.geometry_geojson == ""),
|
||||
),
|
||||
"routes_without_agency": _count(
|
||||
session,
|
||||
GtfsRoute,
|
||||
GtfsRoute.dataset_id.in_(dataset_ids),
|
||||
(GtfsRoute.agency_id.is_(None)) | (GtfsRoute.agency_id == ""),
|
||||
),
|
||||
"calendar_range": f"{min_date or 'unknown'} -> {max_date or 'unknown'}",
|
||||
}
|
||||
|
||||
|
||||
def _link_quality_counts(session: Session, gtfs_dataset_ids: list[int], osm_dataset_ids: list[int]) -> dict[str, int]:
|
||||
if gtfs_dataset_ids:
|
||||
gtfs_link_exists = (
|
||||
select(CanonicalStopLink.id)
|
||||
.where(
|
||||
CanonicalStopLink.object_type == "gtfs_stop",
|
||||
CanonicalStopLink.dataset_id == GtfsStop.dataset_id,
|
||||
CanonicalStopLink.object_id == GtfsStop.id,
|
||||
)
|
||||
.exists()
|
||||
)
|
||||
gtfs_stops_without_canonical = _count(
|
||||
session,
|
||||
GtfsStop,
|
||||
GtfsStop.dataset_id.in_(gtfs_dataset_ids),
|
||||
~gtfs_link_exists,
|
||||
)
|
||||
gtfs_stop_links = _count(
|
||||
session,
|
||||
CanonicalStopLink,
|
||||
CanonicalStopLink.object_type == "gtfs_stop",
|
||||
CanonicalStopLink.dataset_id.in_(gtfs_dataset_ids),
|
||||
)
|
||||
multi_source_subquery = (
|
||||
select(CanonicalStopLink.canonical_stop_id)
|
||||
.where(CanonicalStopLink.object_type == "gtfs_stop", CanonicalStopLink.dataset_id.in_(gtfs_dataset_ids))
|
||||
.group_by(CanonicalStopLink.canonical_stop_id)
|
||||
.having(func.count(func.distinct(CanonicalStopLink.dataset_id)) > 1)
|
||||
.subquery()
|
||||
)
|
||||
multi_source_stop_groups = int(session.scalar(select(func.count()).select_from(multi_source_subquery)) or 0)
|
||||
else:
|
||||
gtfs_stops_without_canonical = 0
|
||||
gtfs_stop_links = 0
|
||||
multi_source_stop_groups = 0
|
||||
|
||||
if osm_dataset_ids:
|
||||
osm_link_exists = (
|
||||
select(CanonicalStopLink.id)
|
||||
.where(
|
||||
CanonicalStopLink.object_type == "osm_feature",
|
||||
CanonicalStopLink.dataset_id == OsmFeature.dataset_id,
|
||||
CanonicalStopLink.object_id == OsmFeature.id,
|
||||
)
|
||||
.exists()
|
||||
)
|
||||
osm_stops_without_canonical = _count(
|
||||
session,
|
||||
OsmFeature,
|
||||
OsmFeature.dataset_id.in_(osm_dataset_ids),
|
||||
OsmFeature.kind.in_(["stop", "station", "terminal"]),
|
||||
~osm_link_exists,
|
||||
)
|
||||
osm_stop_links = _count(
|
||||
session,
|
||||
CanonicalStopLink,
|
||||
CanonicalStopLink.object_type == "osm_feature",
|
||||
CanonicalStopLink.dataset_id.in_(osm_dataset_ids),
|
||||
)
|
||||
long_distance_osm_links = _count(
|
||||
session,
|
||||
CanonicalStopLink,
|
||||
CanonicalStopLink.object_type == "osm_feature",
|
||||
CanonicalStopLink.dataset_id.in_(osm_dataset_ids),
|
||||
CanonicalStopLink.distance_m > 150,
|
||||
)
|
||||
else:
|
||||
osm_stops_without_canonical = 0
|
||||
osm_stop_links = 0
|
||||
long_distance_osm_links = 0
|
||||
|
||||
return {
|
||||
"canonical_stops": _count(session, CanonicalStop),
|
||||
"gtfs_stop_links": gtfs_stop_links,
|
||||
"gtfs_stops_without_canonical": gtfs_stops_without_canonical,
|
||||
"osm_stop_links": osm_stop_links,
|
||||
"osm_stops_without_canonical": osm_stops_without_canonical,
|
||||
"multi_source_stop_groups": multi_source_stop_groups,
|
||||
"long_distance_osm_links": long_distance_osm_links,
|
||||
}
|
||||
|
||||
|
||||
def _route_quality_counts(session: Session, gtfs_dataset_ids: list[int]) -> dict[str, int]:
|
||||
route_patterns = _count(session, RoutePattern)
|
||||
route_pattern_stop_exists = (
|
||||
select(RoutePatternStop.id)
|
||||
.where(RoutePatternStop.route_pattern_id == RoutePattern.id)
|
||||
.exists()
|
||||
)
|
||||
route_patterns_without_stops = _count(session, RoutePattern, ~route_pattern_stop_exists)
|
||||
if not gtfs_dataset_ids:
|
||||
return {
|
||||
"matched_or_accepted": 0,
|
||||
"probable": 0,
|
||||
"weak": 0,
|
||||
"missing": 0,
|
||||
"routes_without_match": 0,
|
||||
"route_patterns": route_patterns,
|
||||
"route_patterns_without_stops": route_patterns_without_stops,
|
||||
}
|
||||
match_rows = {
|
||||
str(status): int(count)
|
||||
for status, count in session.execute(
|
||||
select(RouteMatch.status, func.count())
|
||||
.join(GtfsRoute, GtfsRoute.id == RouteMatch.gtfs_route_id)
|
||||
.where(GtfsRoute.dataset_id.in_(gtfs_dataset_ids))
|
||||
.group_by(RouteMatch.status)
|
||||
).all()
|
||||
}
|
||||
match_exists = select(RouteMatch.id).where(RouteMatch.gtfs_route_id == GtfsRoute.id).exists()
|
||||
routes_without_match = _count(session, GtfsRoute, GtfsRoute.dataset_id.in_(gtfs_dataset_ids), ~match_exists)
|
||||
return {
|
||||
"matched_or_accepted": match_rows.get("matched", 0) + match_rows.get("accepted", 0),
|
||||
"probable": match_rows.get("probable", 0),
|
||||
"weak": match_rows.get("weak", 0),
|
||||
"missing": match_rows.get("missing", 0),
|
||||
"routes_without_match": routes_without_match,
|
||||
"route_patterns": route_patterns,
|
||||
"route_patterns_without_stops": route_patterns_without_stops,
|
||||
}
|
||||
911
app/routing.py
Normal file
911
app/routing.py
Normal file
@@ -0,0 +1,911 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import copy
|
||||
import heapq
|
||||
import json
|
||||
import math
|
||||
import threading
|
||||
import time
|
||||
from collections import OrderedDict
|
||||
from dataclasses import dataclass
|
||||
|
||||
from sqlalchemy import func, select, text
|
||||
from sqlalchemy.exc import SQLAlchemyError
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.config import settings
|
||||
from app.models import Dataset, RoutingEdge, RoutingNode
|
||||
from app.pipeline.routing_layer import active_routing_dataset
|
||||
from app.serializers import feature_collection
|
||||
|
||||
|
||||
WALK_HEURISTIC_MPS = 1.6
|
||||
DRIVE_HEURISTIC_MPS = 36.0
|
||||
DEFAULT_MAX_VISITED = 160_000
|
||||
PGR_WALK_BBOX_PADDING_KM = [0.5, 1.5, 4, 10, 25]
|
||||
PGR_DRIVE_BBOX_PADDING_KM = [2, 8, 25, 75, 200]
|
||||
PGR_WALK_STATEMENT_TIMEOUT_MS = 2_500
|
||||
PGR_DRIVE_STATEMENT_TIMEOUT_MS = 7_500
|
||||
ROUTE_CACHE_TTL_SECONDS = 15 * 60
|
||||
ROUTE_CACHE_MAX_ENTRIES = 512
|
||||
_route_cache_lock = threading.RLock()
|
||||
_route_cache: OrderedDict[tuple[object, ...], tuple[float, dict[str, object]]] = OrderedDict()
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class _GraphNode:
|
||||
osm_node_id: int
|
||||
lon: float
|
||||
lat: float
|
||||
distance_m: float
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class _Traversal:
|
||||
edge_id: int
|
||||
from_node: int
|
||||
to_node: int
|
||||
from_lon: float
|
||||
from_lat: float
|
||||
to_lon: float
|
||||
to_lat: float
|
||||
cost_s: float
|
||||
length_m: float
|
||||
highway: str | None
|
||||
name: str | None
|
||||
geometry_geojson: str
|
||||
reversed: bool
|
||||
|
||||
|
||||
def routing_status(db: Session) -> dict[str, object]:
|
||||
dataset = active_routing_dataset(db)
|
||||
dataset_id = None if dataset is None else int(dataset.id)
|
||||
node_count = 0
|
||||
edge_count = 0
|
||||
if dataset_id is not None:
|
||||
node_count, edge_count = _routing_status_counts(db, dataset, dataset_id)
|
||||
pgrouting_available = False
|
||||
pgrouting_installed = False
|
||||
if settings.is_postgresql_database:
|
||||
pgrouting_available = bool(
|
||||
db.execute(text("SELECT EXISTS (SELECT 1 FROM pg_available_extensions WHERE name = 'pgrouting')")).scalar()
|
||||
)
|
||||
pgrouting_installed = bool(
|
||||
db.execute(text("SELECT EXISTS (SELECT 1 FROM pg_extension WHERE extname = 'pgrouting')")).scalar()
|
||||
)
|
||||
return {
|
||||
"dataset_id": dataset_id,
|
||||
"nodes": node_count,
|
||||
"edges": edge_count,
|
||||
"available": edge_count > 0,
|
||||
"engine": "pgrouting" if pgrouting_installed else "python_astar",
|
||||
"pgrouting_available": pgrouting_available,
|
||||
"pgrouting_installed": pgrouting_installed,
|
||||
}
|
||||
|
||||
|
||||
def _routing_status_counts(db: Session, dataset: Dataset, dataset_id: int) -> tuple[int, int]:
|
||||
metadata = _metadata(dataset)
|
||||
routing_layer = metadata.get("routing_layer")
|
||||
if isinstance(routing_layer, dict):
|
||||
try:
|
||||
nodes = int(routing_layer.get("nodes") or 0)
|
||||
edges = int(routing_layer.get("edges") or 0)
|
||||
except (TypeError, ValueError):
|
||||
nodes = 0
|
||||
edges = 0
|
||||
if nodes or edges:
|
||||
return nodes, edges
|
||||
if settings.is_postgresql_database:
|
||||
rows = db.execute(
|
||||
text(
|
||||
"""
|
||||
SELECT relname, COALESCE(reltuples, 0)::bigint AS estimate
|
||||
FROM pg_class
|
||||
WHERE oid IN ('routing_nodes'::regclass, 'routing_edges'::regclass)
|
||||
"""
|
||||
)
|
||||
).mappings()
|
||||
estimates = {str(row["relname"]): int(row["estimate"] or 0) for row in rows}
|
||||
return estimates.get("routing_nodes", 0), estimates.get("routing_edges", 0)
|
||||
node_count = int(db.scalar(select(func.count()).select_from(RoutingNode).where(RoutingNode.dataset_id == dataset_id)) or 0)
|
||||
edge_count = int(db.scalar(select(func.count()).select_from(RoutingEdge).where(RoutingEdge.dataset_id == dataset_id)) or 0)
|
||||
return node_count, edge_count
|
||||
|
||||
|
||||
def _metadata(dataset: Dataset) -> dict[str, object]:
|
||||
if not dataset.metadata_json:
|
||||
return {}
|
||||
try:
|
||||
value = json.loads(dataset.metadata_json)
|
||||
except json.JSONDecodeError:
|
||||
return {}
|
||||
return value if isinstance(value, dict) else {}
|
||||
|
||||
|
||||
def route_between_points(
|
||||
db: Session,
|
||||
*,
|
||||
from_lon: float,
|
||||
from_lat: float,
|
||||
to_lon: float,
|
||||
to_lat: float,
|
||||
mode: str = "walk",
|
||||
dataset_id: int | None = None,
|
||||
max_visited: int = DEFAULT_MAX_VISITED,
|
||||
) -> dict[str, object]:
|
||||
if mode not in {"walk", "drive"}:
|
||||
raise ValueError("mode must be walk or drive")
|
||||
dataset = db.get(Dataset, dataset_id) if dataset_id is not None else active_routing_dataset(db)
|
||||
if dataset is None:
|
||||
raise ValueError("No routing dataset is available.")
|
||||
dataset_id = int(dataset.id)
|
||||
cache_key = _route_cache_key(dataset_id, mode, from_lon, from_lat, to_lon, to_lat)
|
||||
cached = _route_cache_get(cache_key)
|
||||
if cached is not None:
|
||||
return cached
|
||||
start = _nearest_node(db, dataset_id, from_lon, from_lat, mode)
|
||||
target = _nearest_node(db, dataset_id, to_lon, to_lat, mode)
|
||||
if start is None or target is None:
|
||||
raise ValueError("Routing graph has no nearby nodes for the requested mode.")
|
||||
if start.osm_node_id == target.osm_node_id:
|
||||
payload = _single_point_route(start, from_lon, from_lat, to_lon, to_lat, mode, dataset_id)
|
||||
_route_cache_put(cache_key, payload)
|
||||
return payload
|
||||
if settings.is_postgresql_database and _pgrouting_installed(db):
|
||||
try:
|
||||
payload = _route_with_pgrouting(
|
||||
db,
|
||||
dataset_id=dataset_id,
|
||||
mode=mode,
|
||||
start=start,
|
||||
target=target,
|
||||
from_lon=from_lon,
|
||||
from_lat=from_lat,
|
||||
to_lon=to_lon,
|
||||
to_lat=to_lat,
|
||||
)
|
||||
_route_cache_put(cache_key, payload)
|
||||
return payload
|
||||
except ValueError:
|
||||
pass
|
||||
except SQLAlchemyError:
|
||||
db.rollback()
|
||||
|
||||
heuristic_mps = WALK_HEURISTIC_MPS if mode == "walk" else DRIVE_HEURISTIC_MPS
|
||||
queue: list[tuple[float, float, int]] = []
|
||||
heapq.heappush(queue, (0.0, 0.0, start.osm_node_id))
|
||||
costs: dict[int, float] = {start.osm_node_id: 0.0}
|
||||
coords: dict[int, tuple[float, float]] = {start.osm_node_id: (start.lon, start.lat), target.osm_node_id: (target.lon, target.lat)}
|
||||
previous: dict[int, tuple[int, _Traversal]] = {}
|
||||
adjacency_cache: dict[int, list[_Traversal]] = {}
|
||||
visited: set[int] = set()
|
||||
|
||||
while queue and len(visited) < max(1, max_visited):
|
||||
_, cost, node_id = heapq.heappop(queue)
|
||||
if node_id in visited:
|
||||
continue
|
||||
visited.add(node_id)
|
||||
if node_id == target.osm_node_id:
|
||||
payload = _route_payload(
|
||||
dataset_id=dataset_id,
|
||||
mode=mode,
|
||||
start=start,
|
||||
target=target,
|
||||
from_lon=from_lon,
|
||||
from_lat=from_lat,
|
||||
to_lon=to_lon,
|
||||
to_lat=to_lat,
|
||||
previous=previous,
|
||||
total_cost_s=cost,
|
||||
visited=len(visited),
|
||||
)
|
||||
_route_cache_put(cache_key, payload)
|
||||
return payload
|
||||
for edge in adjacency_cache.setdefault(node_id, _outgoing_edges(db, dataset_id, node_id, mode)):
|
||||
coords[edge.to_node] = (edge.to_lon, edge.to_lat)
|
||||
next_cost = cost + edge.cost_s
|
||||
if next_cost >= costs.get(edge.to_node, float("inf")):
|
||||
continue
|
||||
costs[edge.to_node] = next_cost
|
||||
previous[edge.to_node] = (node_id, edge)
|
||||
heuristic = _distance_m(edge.to_lat, edge.to_lon, target.lat, target.lon) / heuristic_mps
|
||||
heapq.heappush(queue, (next_cost + heuristic, next_cost, edge.to_node))
|
||||
|
||||
raise ValueError(f"No {mode} route found within {max_visited:,} visited graph nodes.")
|
||||
|
||||
|
||||
def direct_route_between_points(
|
||||
db: Session,
|
||||
*,
|
||||
from_lon: float,
|
||||
from_lat: float,
|
||||
to_lon: float,
|
||||
to_lat: float,
|
||||
mode: str = "walk",
|
||||
dataset_id: int | None = None,
|
||||
reason: str | None = None,
|
||||
) -> dict[str, object]:
|
||||
if mode not in {"walk", "drive"}:
|
||||
raise ValueError("mode must be walk or drive")
|
||||
dataset = db.get(Dataset, dataset_id) if dataset_id is not None else active_routing_dataset(db)
|
||||
payload = _direct_route_payload(
|
||||
dataset_id=0 if dataset is None else int(dataset.id),
|
||||
mode=mode,
|
||||
from_lon=float(from_lon),
|
||||
from_lat=float(from_lat),
|
||||
to_lon=float(to_lon),
|
||||
to_lat=float(to_lat),
|
||||
)
|
||||
if reason:
|
||||
payload["warning"] = reason
|
||||
return payload
|
||||
|
||||
|
||||
def snap_point_to_routing_graph(
|
||||
db: Session,
|
||||
*,
|
||||
lon: float,
|
||||
lat: float,
|
||||
mode: str = "walk",
|
||||
dataset_id: int | None = None,
|
||||
max_distance_m: float = 250,
|
||||
) -> dict[str, object] | None:
|
||||
if mode not in {"walk", "drive"}:
|
||||
raise ValueError("mode must be walk or drive")
|
||||
dataset = db.get(Dataset, dataset_id) if dataset_id is not None else active_routing_dataset(db)
|
||||
if dataset is None:
|
||||
return None
|
||||
dataset_id = int(dataset.id)
|
||||
if settings.is_postgresql_database:
|
||||
return _snap_point_to_routing_edge_postgresql(
|
||||
db,
|
||||
dataset_id=dataset_id,
|
||||
lon=float(lon),
|
||||
lat=float(lat),
|
||||
mode=mode,
|
||||
max_distance_m=float(max_distance_m),
|
||||
)
|
||||
node = _nearest_node(db, dataset_id, float(lon), float(lat), mode)
|
||||
if node is None or node.distance_m > max_distance_m:
|
||||
return None
|
||||
return {
|
||||
"dataset_id": dataset_id,
|
||||
"lon": node.lon,
|
||||
"lat": node.lat,
|
||||
"distance_m": round(node.distance_m, 1),
|
||||
"source": "routing_node",
|
||||
"osm_node_id": node.osm_node_id,
|
||||
}
|
||||
|
||||
|
||||
def _snap_point_to_routing_edge_postgresql(
|
||||
db: Session,
|
||||
*,
|
||||
dataset_id: int,
|
||||
lon: float,
|
||||
lat: float,
|
||||
mode: str,
|
||||
max_distance_m: float,
|
||||
) -> dict[str, object] | None:
|
||||
cost_column = "walk_cost_s" if mode == "walk" else "drive_cost_s"
|
||||
reverse_cost_column = "reverse_walk_cost_s" if mode == "walk" else "reverse_drive_cost_s"
|
||||
radius_deg = max_distance_m / 111_320
|
||||
row = db.execute(
|
||||
text(
|
||||
f"""
|
||||
WITH point AS (
|
||||
SELECT ST_SetSRID(ST_MakePoint(:lon, :lat), 4326) AS geom
|
||||
),
|
||||
edges AS MATERIALIZED (
|
||||
SELECT
|
||||
edge.id,
|
||||
edge.highway,
|
||||
edge.name,
|
||||
CASE
|
||||
WHEN edge.tags_json IS NULL OR edge.tags_json = '' THEN NULL
|
||||
ELSE edge.tags_json::jsonb ->> 'service'
|
||||
END AS service,
|
||||
edge.source_osm_node_id,
|
||||
edge.target_osm_node_id,
|
||||
ST_SetSRID(
|
||||
ST_MakeLine(
|
||||
ST_MakePoint(edge.source_lon, edge.source_lat),
|
||||
ST_MakePoint(edge.target_lon, edge.target_lat)
|
||||
),
|
||||
4326
|
||||
) AS edge_geom
|
||||
FROM routing_edges AS edge
|
||||
CROSS JOIN point
|
||||
WHERE edge.dataset_id = :dataset_id
|
||||
AND (edge.{cost_column} IS NOT NULL OR edge.{reverse_cost_column} IS NOT NULL)
|
||||
AND box(point(edge.max_lon, edge.max_lat), point(edge.min_lon, edge.min_lat))
|
||||
&& box(
|
||||
point(:lon + :radius_deg, :lat + :radius_deg),
|
||||
point(:lon - :radius_deg, :lat - :radius_deg)
|
||||
)
|
||||
),
|
||||
candidate AS (
|
||||
SELECT
|
||||
edges.id,
|
||||
edges.highway,
|
||||
edges.name,
|
||||
edges.service,
|
||||
edges.source_osm_node_id,
|
||||
edges.target_osm_node_id,
|
||||
ST_ClosestPoint(edges.edge_geom, point.geom) AS snapped_geom,
|
||||
ST_DistanceSphere(edges.edge_geom, point.geom) AS distance_m,
|
||||
CASE
|
||||
WHEN edges.highway IN ('footway', 'pedestrian', 'steps') THEN 0
|
||||
WHEN edges.highway IN ('path', 'cycleway', 'bridleway') THEN 1
|
||||
WHEN edges.highway IN ('living_street', 'residential') THEN 2
|
||||
WHEN edges.highway = 'service' THEN 3
|
||||
ELSE 4
|
||||
END AS highway_rank,
|
||||
CASE
|
||||
WHEN :mode != 'walk' THEN 0
|
||||
WHEN edges.highway = 'service' THEN 20
|
||||
WHEN edges.highway IN ('primary', 'primary_link', 'secondary', 'secondary_link') THEN 10
|
||||
WHEN edges.highway IN ('tertiary', 'tertiary_link', 'unclassified', 'road') THEN 5
|
||||
ELSE 0
|
||||
END AS snap_penalty_m
|
||||
FROM edges
|
||||
CROSS JOIN point
|
||||
WHERE ST_DWithin(edges.edge_geom::geography, point.geom::geography, :max_distance_m)
|
||||
AND NOT (
|
||||
:mode = 'walk'
|
||||
AND edges.highway = 'service'
|
||||
AND COALESCE(edges.service, '') IN ('driveway', 'parking_aisle', 'drive-through')
|
||||
)
|
||||
ORDER BY
|
||||
ST_DistanceSphere(edges.edge_geom, point.geom) + CASE
|
||||
WHEN :mode != 'walk' THEN 0
|
||||
WHEN edges.highway = 'service' THEN 20
|
||||
WHEN edges.highway IN ('primary', 'primary_link', 'secondary', 'secondary_link') THEN 10
|
||||
WHEN edges.highway IN ('tertiary', 'tertiary_link', 'unclassified', 'road') THEN 5
|
||||
ELSE 0
|
||||
END,
|
||||
ST_DistanceSphere(edges.edge_geom, point.geom),
|
||||
highway_rank,
|
||||
edges.id
|
||||
LIMIT 1
|
||||
)
|
||||
SELECT
|
||||
id,
|
||||
highway,
|
||||
name,
|
||||
source_osm_node_id,
|
||||
target_osm_node_id,
|
||||
ST_X(snapped_geom) AS lon,
|
||||
ST_Y(snapped_geom) AS lat,
|
||||
distance_m
|
||||
FROM candidate
|
||||
"""
|
||||
),
|
||||
{
|
||||
"dataset_id": dataset_id,
|
||||
"lon": lon,
|
||||
"lat": lat,
|
||||
"radius_deg": radius_deg,
|
||||
"max_distance_m": max_distance_m,
|
||||
"mode": mode,
|
||||
},
|
||||
).mappings().first()
|
||||
if row is None:
|
||||
return None
|
||||
return {
|
||||
"dataset_id": dataset_id,
|
||||
"lon": float(row["lon"]),
|
||||
"lat": float(row["lat"]),
|
||||
"distance_m": round(float(row["distance_m"] or 0), 1),
|
||||
"source": "routing_edge",
|
||||
"edge_id": int(row["id"]),
|
||||
"highway": row["highway"],
|
||||
"name": row["name"],
|
||||
"source_osm_node_id": int(row["source_osm_node_id"]),
|
||||
"target_osm_node_id": int(row["target_osm_node_id"]),
|
||||
}
|
||||
|
||||
|
||||
def _route_cache_key(dataset_id: int, mode: str, from_lon: float, from_lat: float, to_lon: float, to_lat: float) -> tuple[object, ...]:
|
||||
return (
|
||||
int(dataset_id),
|
||||
mode,
|
||||
round(float(from_lon), 6),
|
||||
round(float(from_lat), 6),
|
||||
round(float(to_lon), 6),
|
||||
round(float(to_lat), 6),
|
||||
)
|
||||
|
||||
|
||||
def _route_cache_get(key: tuple[object, ...]) -> dict[str, object] | None:
|
||||
now = time.monotonic()
|
||||
with _route_cache_lock:
|
||||
cached = _route_cache.get(key)
|
||||
if cached is None:
|
||||
return None
|
||||
expires_at, payload = cached
|
||||
if expires_at <= now:
|
||||
_route_cache.pop(key, None)
|
||||
return None
|
||||
_route_cache.move_to_end(key)
|
||||
return copy.deepcopy(payload)
|
||||
|
||||
|
||||
def _route_cache_put(key: tuple[object, ...], payload: dict[str, object]) -> None:
|
||||
with _route_cache_lock:
|
||||
_route_cache[key] = (time.monotonic() + ROUTE_CACHE_TTL_SECONDS, copy.deepcopy(payload))
|
||||
_route_cache.move_to_end(key)
|
||||
while len(_route_cache) > ROUTE_CACHE_MAX_ENTRIES:
|
||||
_route_cache.popitem(last=False)
|
||||
|
||||
|
||||
def _pgrouting_installed(db: Session) -> bool:
|
||||
return bool(db.execute(text("SELECT EXISTS (SELECT 1 FROM pg_extension WHERE extname = 'pgrouting')")).scalar())
|
||||
|
||||
|
||||
def _route_with_pgrouting(
|
||||
db: Session,
|
||||
*,
|
||||
dataset_id: int,
|
||||
mode: str,
|
||||
start: _GraphNode,
|
||||
target: _GraphNode,
|
||||
from_lon: float,
|
||||
from_lat: float,
|
||||
to_lon: float,
|
||||
to_lat: float,
|
||||
) -> dict[str, object]:
|
||||
cost_column = "walk_cost_s" if mode == "walk" else "drive_cost_s"
|
||||
reverse_cost_column = "reverse_walk_cost_s" if mode == "walk" else "reverse_drive_cost_s"
|
||||
routing_cost = _routing_cost_expression(cost_column, mode)
|
||||
reverse_routing_cost = _routing_cost_expression(reverse_cost_column, mode)
|
||||
for padding_km in PGR_WALK_BBOX_PADDING_KM if mode == "walk" else PGR_DRIVE_BBOX_PADDING_KM:
|
||||
_set_local_statement_timeout(
|
||||
db,
|
||||
PGR_WALK_STATEMENT_TIMEOUT_MS if mode == "walk" else PGR_DRIVE_STATEMENT_TIMEOUT_MS,
|
||||
)
|
||||
bbox = _expanded_bbox(
|
||||
min(from_lon, to_lon, start.lon, target.lon),
|
||||
min(from_lat, to_lat, start.lat, target.lat),
|
||||
max(from_lon, to_lon, start.lon, target.lon),
|
||||
max(from_lat, to_lat, start.lat, target.lat),
|
||||
padding_km,
|
||||
)
|
||||
edge_sql = f"""
|
||||
SELECT
|
||||
id,
|
||||
source_osm_node_id AS source,
|
||||
target_osm_node_id AS target,
|
||||
COALESCE({routing_cost}, -1)::float8 AS cost,
|
||||
COALESCE({reverse_routing_cost}, -1)::float8 AS reverse_cost
|
||||
FROM routing_edges
|
||||
WHERE dataset_id = {int(dataset_id)}
|
||||
AND ({cost_column} IS NOT NULL OR {reverse_cost_column} IS NOT NULL)
|
||||
AND box(point(max_lon, max_lat), point(min_lon, min_lat))
|
||||
&& box(point({bbox[2]:.8f}, {bbox[3]:.8f}), point({bbox[0]:.8f}, {bbox[1]:.8f}))
|
||||
"""
|
||||
rows = db.execute(
|
||||
text(
|
||||
f"""
|
||||
WITH route AS (
|
||||
SELECT *
|
||||
FROM pgr_dijkstra(:edge_sql, :start_node, :target_node, directed := true)
|
||||
),
|
||||
steps AS (
|
||||
SELECT
|
||||
route.path_seq,
|
||||
route.node AS from_node,
|
||||
LEAD(route.node) OVER (ORDER BY route.path_seq) AS to_node,
|
||||
route.edge,
|
||||
route.cost
|
||||
FROM route
|
||||
)
|
||||
SELECT
|
||||
steps.path_seq,
|
||||
steps.from_node,
|
||||
steps.to_node,
|
||||
steps.cost,
|
||||
edge.id,
|
||||
edge.source_osm_node_id,
|
||||
edge.target_osm_node_id,
|
||||
edge.source_lon,
|
||||
edge.source_lat,
|
||||
edge.target_lon,
|
||||
edge.target_lat,
|
||||
edge.length_m,
|
||||
edge.highway,
|
||||
edge.name,
|
||||
edge.geometry_geojson,
|
||||
CASE
|
||||
WHEN steps.from_node = edge.source_osm_node_id THEN edge.{cost_column}
|
||||
ELSE edge.{reverse_cost_column}
|
||||
END AS actual_cost_s
|
||||
FROM steps
|
||||
JOIN routing_edges AS edge ON edge.id = steps.edge
|
||||
WHERE steps.edge <> -1
|
||||
ORDER BY steps.path_seq
|
||||
"""
|
||||
),
|
||||
{"edge_sql": edge_sql, "start_node": start.osm_node_id, "target_node": target.osm_node_id},
|
||||
).all()
|
||||
if rows:
|
||||
return _pgrouting_payload(
|
||||
dataset_id=dataset_id,
|
||||
mode=mode,
|
||||
start=start,
|
||||
target=target,
|
||||
from_lon=from_lon,
|
||||
from_lat=from_lat,
|
||||
to_lon=to_lon,
|
||||
to_lat=to_lat,
|
||||
rows=rows,
|
||||
padding_km=padding_km,
|
||||
)
|
||||
raise ValueError("pgRouting did not find a route in the bounded search area.")
|
||||
|
||||
|
||||
def _set_local_statement_timeout(db: Session, timeout_ms: int) -> None:
|
||||
db.execute(text("SELECT set_config('statement_timeout', :timeout, true)"), {"timeout": f"{int(timeout_ms)}ms"})
|
||||
|
||||
|
||||
def _pgrouting_payload(
|
||||
*,
|
||||
dataset_id: int,
|
||||
mode: str,
|
||||
start: _GraphNode,
|
||||
target: _GraphNode,
|
||||
from_lon: float,
|
||||
from_lat: float,
|
||||
to_lon: float,
|
||||
to_lat: float,
|
||||
rows,
|
||||
padding_km: float,
|
||||
) -> dict[str, object]:
|
||||
previous: dict[int, tuple[int, _Traversal]] = {}
|
||||
total_cost = 0.0
|
||||
for row in rows:
|
||||
if row.to_node is None:
|
||||
continue
|
||||
from_node = int(row.from_node)
|
||||
to_node = int(row.to_node)
|
||||
source_node = int(row.source_osm_node_id)
|
||||
target_node = int(row.target_osm_node_id)
|
||||
actual_cost = float(row.actual_cost_s if row.actual_cost_s is not None else row.cost or 0)
|
||||
reversed_edge = from_node == target_node and to_node == source_node
|
||||
if reversed_edge:
|
||||
from_lon_edge, from_lat_edge = float(row.target_lon), float(row.target_lat)
|
||||
to_lon_edge, to_lat_edge = float(row.source_lon), float(row.source_lat)
|
||||
else:
|
||||
from_lon_edge, from_lat_edge = float(row.source_lon), float(row.source_lat)
|
||||
to_lon_edge, to_lat_edge = float(row.target_lon), float(row.target_lat)
|
||||
total_cost += actual_cost
|
||||
previous[to_node] = (
|
||||
from_node,
|
||||
_Traversal(
|
||||
edge_id=int(row.id),
|
||||
from_node=from_node,
|
||||
to_node=to_node,
|
||||
from_lon=from_lon_edge,
|
||||
from_lat=from_lat_edge,
|
||||
to_lon=to_lon_edge,
|
||||
to_lat=to_lat_edge,
|
||||
cost_s=actual_cost,
|
||||
length_m=float(row.length_m),
|
||||
highway=row.highway,
|
||||
name=row.name,
|
||||
geometry_geojson=str(row.geometry_geojson),
|
||||
reversed=reversed_edge,
|
||||
),
|
||||
)
|
||||
payload = _route_payload(
|
||||
dataset_id=dataset_id,
|
||||
mode=mode,
|
||||
start=start,
|
||||
target=target,
|
||||
from_lon=from_lon,
|
||||
from_lat=from_lat,
|
||||
to_lon=to_lon,
|
||||
to_lat=to_lat,
|
||||
previous=previous,
|
||||
total_cost_s=total_cost,
|
||||
visited=len(rows),
|
||||
)
|
||||
payload["engine"] = "pgrouting"
|
||||
payload["bbox_padding_km"] = padding_km
|
||||
return payload
|
||||
|
||||
|
||||
def _routing_cost_expression(column: str, mode: str) -> str:
|
||||
if mode != "walk":
|
||||
return column
|
||||
return f"""
|
||||
CASE
|
||||
WHEN {column} IS NULL THEN NULL
|
||||
ELSE {column} * CASE
|
||||
WHEN highway IN ('footway', 'pedestrian') THEN 0.70
|
||||
WHEN highway = 'path' THEN 0.78
|
||||
WHEN highway = 'steps' THEN 0.95
|
||||
WHEN highway = 'cycleway' THEN 1.05
|
||||
WHEN highway = 'bridleway' THEN 1.10
|
||||
WHEN highway IN ('living_street', 'track') THEN 1.15
|
||||
WHEN highway IN ('residential', 'service') THEN 1.35
|
||||
WHEN highway IN ('unclassified', 'road') THEN 1.55
|
||||
WHEN highway IN ('tertiary', 'tertiary_link') THEN 1.80
|
||||
WHEN highway IN ('secondary', 'secondary_link') THEN 2.15
|
||||
WHEN highway IN ('primary', 'primary_link') THEN 2.50
|
||||
ELSE 1.30
|
||||
END
|
||||
END
|
||||
"""
|
||||
|
||||
|
||||
def _nearest_node(db: Session, dataset_id: int, lon: float, lat: float, mode: str) -> _GraphNode | None:
|
||||
cost_column = "walk_cost_s" if mode == "walk" else "drive_cost_s"
|
||||
reverse_cost_column = "reverse_walk_cost_s" if mode == "walk" else "reverse_drive_cost_s"
|
||||
row = None
|
||||
for candidate_limit in (64, 512, 4096):
|
||||
row = db.execute(
|
||||
text(
|
||||
f"""
|
||||
WITH nearest AS MATERIALIZED (
|
||||
SELECT node.osm_node_id, node.lon, node.lat, node.geom
|
||||
FROM routing_nodes AS node
|
||||
WHERE node.dataset_id = :dataset_id
|
||||
AND node.geom IS NOT NULL
|
||||
ORDER BY node.geom <-> ST_SetSRID(ST_MakePoint(:lon, :lat), 4326)
|
||||
LIMIT :candidate_limit
|
||||
),
|
||||
candidate AS (
|
||||
SELECT nearest.osm_node_id, nearest.lon, nearest.lat, nearest.geom
|
||||
FROM nearest
|
||||
WHERE EXISTS (
|
||||
SELECT 1
|
||||
FROM routing_edges AS edge
|
||||
WHERE edge.dataset_id = :dataset_id
|
||||
AND (
|
||||
(edge.source_osm_node_id = nearest.osm_node_id AND edge.{cost_column} IS NOT NULL)
|
||||
OR (edge.target_osm_node_id = nearest.osm_node_id AND edge.{reverse_cost_column} IS NOT NULL)
|
||||
)
|
||||
LIMIT 1
|
||||
)
|
||||
ORDER BY nearest.geom <-> ST_SetSRID(ST_MakePoint(:lon, :lat), 4326)
|
||||
LIMIT 1
|
||||
)
|
||||
SELECT osm_node_id, lon, lat, ST_DistanceSphere(geom, ST_SetSRID(ST_MakePoint(:lon, :lat), 4326)) AS distance_m
|
||||
FROM candidate
|
||||
"""
|
||||
),
|
||||
{"dataset_id": dataset_id, "lon": lon, "lat": lat, "candidate_limit": candidate_limit},
|
||||
).first()
|
||||
if row is not None:
|
||||
break
|
||||
if row is None:
|
||||
return None
|
||||
return _GraphNode(osm_node_id=int(row.osm_node_id), lon=float(row.lon), lat=float(row.lat), distance_m=float(row.distance_m or 0))
|
||||
|
||||
|
||||
def _outgoing_edges(db: Session, dataset_id: int, node_id: int, mode: str) -> list[_Traversal]:
|
||||
cost_column = "walk_cost_s" if mode == "walk" else "drive_cost_s"
|
||||
reverse_cost_column = "reverse_walk_cost_s" if mode == "walk" else "reverse_drive_cost_s"
|
||||
rows = db.execute(
|
||||
text(
|
||||
f"""
|
||||
SELECT
|
||||
id, source_osm_node_id, target_osm_node_id,
|
||||
source_lon, source_lat, target_lon, target_lat,
|
||||
length_m, highway, name, geometry_geojson,
|
||||
CASE
|
||||
WHEN source_osm_node_id = :node_id THEN {cost_column}
|
||||
ELSE {reverse_cost_column}
|
||||
END AS cost_s,
|
||||
target_osm_node_id != :node_id AS forward
|
||||
FROM routing_edges
|
||||
WHERE dataset_id = :dataset_id
|
||||
AND (
|
||||
(source_osm_node_id = :node_id AND {cost_column} IS NOT NULL)
|
||||
OR (target_osm_node_id = :node_id AND {reverse_cost_column} IS NOT NULL)
|
||||
)
|
||||
"""
|
||||
),
|
||||
{"dataset_id": dataset_id, "node_id": node_id},
|
||||
).all()
|
||||
edges = []
|
||||
for row in rows:
|
||||
forward = bool(row.forward)
|
||||
if forward:
|
||||
to_node = int(row.target_osm_node_id)
|
||||
from_lon, from_lat = float(row.source_lon), float(row.source_lat)
|
||||
to_lon, to_lat = float(row.target_lon), float(row.target_lat)
|
||||
else:
|
||||
to_node = int(row.source_osm_node_id)
|
||||
from_lon, from_lat = float(row.target_lon), float(row.target_lat)
|
||||
to_lon, to_lat = float(row.source_lon), float(row.source_lat)
|
||||
edges.append(
|
||||
_Traversal(
|
||||
edge_id=int(row.id),
|
||||
from_node=node_id,
|
||||
to_node=to_node,
|
||||
from_lon=from_lon,
|
||||
from_lat=from_lat,
|
||||
to_lon=to_lon,
|
||||
to_lat=to_lat,
|
||||
cost_s=float(row.cost_s),
|
||||
length_m=float(row.length_m),
|
||||
highway=row.highway,
|
||||
name=row.name,
|
||||
geometry_geojson=str(row.geometry_geojson),
|
||||
reversed=not forward,
|
||||
)
|
||||
)
|
||||
return edges
|
||||
|
||||
|
||||
def _route_payload(
|
||||
*,
|
||||
dataset_id: int,
|
||||
mode: str,
|
||||
start: _GraphNode,
|
||||
target: _GraphNode,
|
||||
from_lon: float,
|
||||
from_lat: float,
|
||||
to_lon: float,
|
||||
to_lat: float,
|
||||
previous: dict[int, tuple[int, _Traversal]],
|
||||
total_cost_s: float,
|
||||
visited: int,
|
||||
) -> dict[str, object]:
|
||||
edges: list[_Traversal] = []
|
||||
current = target.osm_node_id
|
||||
while current != start.osm_node_id:
|
||||
prior, edge = previous[current]
|
||||
edges.append(edge)
|
||||
current = prior
|
||||
edges.reverse()
|
||||
network_distance = sum(edge.length_m for edge in edges)
|
||||
access_distance = start.distance_m + target.distance_m
|
||||
features = []
|
||||
if start.distance_m:
|
||||
features.append(_connector_feature("access", mode, [[from_lon, from_lat], [start.lon, start.lat]], start.distance_m))
|
||||
for index, edge in enumerate(edges, start=1):
|
||||
geometry = json.loads(edge.geometry_geojson)
|
||||
if edge.reversed:
|
||||
geometry["coordinates"] = list(reversed(geometry.get("coordinates", [])))
|
||||
features.append(
|
||||
{
|
||||
"type": "Feature",
|
||||
"geometry": geometry,
|
||||
"properties": {
|
||||
"feature_type": "routing_edge",
|
||||
"sequence": index,
|
||||
"mode": mode,
|
||||
"edge_id": edge.edge_id,
|
||||
"highway": edge.highway,
|
||||
"name": edge.name,
|
||||
"length_m": edge.length_m,
|
||||
"cost_s": edge.cost_s,
|
||||
},
|
||||
}
|
||||
)
|
||||
if target.distance_m:
|
||||
features.append(_connector_feature("egress", mode, [[target.lon, target.lat], [to_lon, to_lat]], target.distance_m))
|
||||
duration_seconds = total_cost_s + _connector_seconds(access_distance, mode)
|
||||
return {
|
||||
"dataset_id": dataset_id,
|
||||
"mode": mode,
|
||||
"engine": "python_astar",
|
||||
"distance_m": round(network_distance + access_distance, 1),
|
||||
"network_distance_m": round(network_distance, 1),
|
||||
"access_distance_m": round(access_distance, 1),
|
||||
"duration_seconds": round(duration_seconds, 1),
|
||||
"duration_minutes": _duration_minutes_ceil(duration_seconds),
|
||||
"duration_label": _duration_label(duration_seconds),
|
||||
"visited_nodes": visited,
|
||||
"start_node": {"osm_node_id": start.osm_node_id, "distance_m": round(start.distance_m, 1)},
|
||||
"target_node": {"osm_node_id": target.osm_node_id, "distance_m": round(target.distance_m, 1)},
|
||||
"features": feature_collection(features),
|
||||
}
|
||||
|
||||
|
||||
def _single_point_route(start: _GraphNode, from_lon: float, from_lat: float, to_lon: float, to_lat: float, mode: str, dataset_id: int) -> dict[str, object]:
|
||||
return _direct_route_payload(
|
||||
dataset_id=dataset_id,
|
||||
mode=mode,
|
||||
from_lon=from_lon,
|
||||
from_lat=from_lat,
|
||||
to_lon=to_lon,
|
||||
to_lat=to_lat,
|
||||
engine="python_astar",
|
||||
start_node={"osm_node_id": start.osm_node_id, "distance_m": round(start.distance_m, 1)},
|
||||
target_node={"osm_node_id": start.osm_node_id, "distance_m": round(start.distance_m, 1)},
|
||||
visited_nodes=1,
|
||||
)
|
||||
|
||||
|
||||
def _direct_route_payload(
|
||||
*,
|
||||
dataset_id: int,
|
||||
mode: str,
|
||||
from_lon: float,
|
||||
from_lat: float,
|
||||
to_lon: float,
|
||||
to_lat: float,
|
||||
engine: str = "direct_fallback",
|
||||
start_node: dict[str, object] | None = None,
|
||||
target_node: dict[str, object] | None = None,
|
||||
visited_nodes: int = 0,
|
||||
) -> dict[str, object]:
|
||||
distance = _distance_m(from_lat, from_lon, to_lat, to_lon)
|
||||
duration_seconds = _connector_seconds(distance, mode)
|
||||
return {
|
||||
"dataset_id": dataset_id,
|
||||
"mode": mode,
|
||||
"engine": engine,
|
||||
"distance_m": round(distance, 1),
|
||||
"network_distance_m": 0,
|
||||
"access_distance_m": round(distance, 1),
|
||||
"duration_seconds": round(duration_seconds, 1),
|
||||
"duration_minutes": _duration_minutes_ceil(duration_seconds),
|
||||
"duration_label": _duration_label(duration_seconds),
|
||||
"visited_nodes": visited_nodes,
|
||||
"start_node": start_node,
|
||||
"target_node": target_node,
|
||||
"features": feature_collection([_connector_feature("direct", mode, [[from_lon, from_lat], [to_lon, to_lat]], distance)]),
|
||||
}
|
||||
|
||||
|
||||
def _connector_feature(kind: str, mode: str, coordinates: list[list[float]], distance_m: float) -> dict:
|
||||
return {
|
||||
"type": "Feature",
|
||||
"geometry": {"type": "LineString", "coordinates": coordinates},
|
||||
"properties": {
|
||||
"feature_type": "routing_connector",
|
||||
"connector": kind,
|
||||
"mode": mode,
|
||||
"length_m": distance_m,
|
||||
"cost_s": _connector_seconds(distance_m, mode),
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
def _connector_seconds(distance_m: float, mode: str) -> float:
|
||||
speed = 1.35 if mode == "walk" else 8.0
|
||||
return float(distance_m) / speed
|
||||
|
||||
|
||||
def _duration_minutes_ceil(seconds: int | float | None) -> int | None:
|
||||
if seconds is None:
|
||||
return None
|
||||
return max(0, int(math.ceil(float(seconds) / 60)))
|
||||
|
||||
|
||||
def _duration_label(seconds: int | float | None) -> str | None:
|
||||
minutes_total = _duration_minutes_ceil(seconds)
|
||||
if minutes_total is None:
|
||||
return None
|
||||
days = minutes_total // (24 * 60)
|
||||
remaining = minutes_total % (24 * 60)
|
||||
hours = remaining // 60
|
||||
minutes = remaining % 60
|
||||
if days:
|
||||
return f"{days}d {hours:02d}:{minutes:02d}"
|
||||
if hours:
|
||||
return f"{hours}:{minutes:02d}"
|
||||
return f"{minutes} min"
|
||||
|
||||
|
||||
def _expanded_bbox(min_lon: float, min_lat: float, max_lon: float, max_lat: float, padding_km: float) -> tuple[float, float, float, float]:
|
||||
mid_lat = (min_lat + max_lat) / 2
|
||||
lat_delta = padding_km / 111.0
|
||||
lon_delta = padding_km / max(1.0, 111.0 * math.cos(math.radians(mid_lat)))
|
||||
return (min_lon - lon_delta, min_lat - lat_delta, max_lon + lon_delta, max_lat + lat_delta)
|
||||
|
||||
|
||||
def _distance_m(lat_a: float, lon_a: float, lat_b: float, lon_b: float) -> float:
|
||||
radius = 6_371_000.0
|
||||
phi_a = math.radians(lat_a)
|
||||
phi_b = math.radians(lat_b)
|
||||
delta_phi = math.radians(lat_b - lat_a)
|
||||
delta_lambda = math.radians(lon_b - lon_a)
|
||||
hav = math.sin(delta_phi / 2) ** 2 + math.cos(phi_a) * math.cos(phi_b) * math.sin(delta_lambda / 2) ** 2
|
||||
return radius * 2 * math.atan2(math.sqrt(hav), math.sqrt(1 - hav))
|
||||
130
app/serializers.py
Normal file
130
app/serializers.py
Normal file
@@ -0,0 +1,130 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from typing import Any, Iterable
|
||||
|
||||
from app.models import GtfsRoute, GtfsStop, OsmFeature, RouteMatch, RoutePattern
|
||||
from app.osm_storage import osm_feature_public_id
|
||||
|
||||
|
||||
def feature_collection(features: Iterable[dict[str, Any]]) -> dict[str, Any]:
|
||||
return {"type": "FeatureCollection", "features": list(features)}
|
||||
|
||||
|
||||
def gtfs_route_feature(route: GtfsRoute, extra: dict[str, Any] | None = None) -> dict[str, Any] | None:
|
||||
if not route.geometry_geojson:
|
||||
return None
|
||||
props = {
|
||||
"id": route.id,
|
||||
"dataset_id": route.dataset_id,
|
||||
"route_id": route.route_id,
|
||||
"mode": route.mode,
|
||||
"route_scope": route.route_scope,
|
||||
"ref": route.short_name,
|
||||
"name": route.long_name,
|
||||
"operator": route.operator_name,
|
||||
"source": "gtfs",
|
||||
}
|
||||
if extra:
|
||||
props.update(extra)
|
||||
return {"type": "Feature", "geometry": json.loads(route.geometry_geojson), "properties": props}
|
||||
|
||||
|
||||
def osm_feature_feature(feature: OsmFeature, extra: dict[str, Any] | None = None) -> dict[str, Any] | None:
|
||||
if not feature.geometry_geojson:
|
||||
return None
|
||||
props = {
|
||||
"id": osm_feature_public_id(feature),
|
||||
"row_id": feature.id,
|
||||
"dataset_id": feature.dataset_id,
|
||||
"osm_type": feature.osm_type,
|
||||
"osm_id": feature.osm_id,
|
||||
"kind": feature.kind,
|
||||
"mode": feature.mode,
|
||||
"route_scope": feature.route_scope,
|
||||
"ref": feature.ref,
|
||||
"name": feature.name,
|
||||
"operator": feature.operator,
|
||||
"network": feature.network,
|
||||
"source": "osm",
|
||||
}
|
||||
if extra:
|
||||
props.update(extra)
|
||||
return {"type": "Feature", "geometry": json.loads(feature.geometry_geojson), "properties": props}
|
||||
|
||||
|
||||
def route_pattern_feature(pattern: RoutePattern, extra: dict[str, Any] | None = None) -> dict[str, Any] | None:
|
||||
if not pattern.geometry_geojson:
|
||||
return None
|
||||
props = {
|
||||
"id": pattern.id,
|
||||
"route_pattern_id": pattern.id,
|
||||
"route_ref": pattern.route_ref,
|
||||
"ref": pattern.route_ref,
|
||||
"name": pattern.route_name,
|
||||
"mode": pattern.mode,
|
||||
"route_scope": pattern.route_scope,
|
||||
"operator": pattern.operator_name,
|
||||
"source": "route_layer",
|
||||
"source_kind": pattern.source_kind,
|
||||
"status": pattern.status,
|
||||
"confidence": pattern.confidence,
|
||||
"osm_feature_id": pattern.osm_feature_id,
|
||||
"gtfs_route_id": pattern.gtfs_route_id,
|
||||
"gtfs_shape_id": pattern.gtfs_shape_id,
|
||||
}
|
||||
if extra:
|
||||
props.update(extra)
|
||||
return {"type": "Feature", "geometry": json.loads(pattern.geometry_geojson), "properties": props}
|
||||
|
||||
|
||||
def gtfs_stop_feature(stop: GtfsStop) -> dict[str, Any] | None:
|
||||
if stop.lon is None or stop.lat is None:
|
||||
return None
|
||||
return {
|
||||
"type": "Feature",
|
||||
"geometry": {"type": "Point", "coordinates": [stop.lon, stop.lat]},
|
||||
"properties": {
|
||||
"id": stop.id,
|
||||
"dataset_id": stop.dataset_id,
|
||||
"stop_id": stop.stop_id,
|
||||
"name": stop.name,
|
||||
"source": "gtfs",
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
def match_row(match: RouteMatch) -> dict[str, Any]:
|
||||
route = match.gtfs_route
|
||||
feature = match.osm_feature
|
||||
return {
|
||||
"id": match.id,
|
||||
"status": match.status,
|
||||
"confidence": match.confidence,
|
||||
"rule_source": match.rule_source,
|
||||
"gtfs": {
|
||||
"id": route.id,
|
||||
"dataset_id": route.dataset_id,
|
||||
"route_id": route.route_id,
|
||||
"mode": route.mode,
|
||||
"route_scope": route.route_scope,
|
||||
"ref": route.short_name,
|
||||
"name": route.long_name,
|
||||
"operator": route.operator_name,
|
||||
},
|
||||
"osm": None
|
||||
if feature is None
|
||||
else {
|
||||
"id": feature.id,
|
||||
"dataset_id": feature.dataset_id,
|
||||
"osm_type": feature.osm_type,
|
||||
"osm_id": feature.osm_id,
|
||||
"mode": feature.mode,
|
||||
"route_scope": feature.route_scope,
|
||||
"ref": feature.ref,
|
||||
"name": feature.name,
|
||||
"operator": feature.operator,
|
||||
"network": feature.network,
|
||||
},
|
||||
"reasons": json.loads(match.reasons_json or "{}"),
|
||||
}
|
||||
309
app/source_catalog.py
Normal file
309
app/source_catalog.py
Normal file
@@ -0,0 +1,309 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import csv
|
||||
import hashlib
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
from typing import Iterable
|
||||
|
||||
from sqlalchemy import func, or_, select
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.models import Source, SourceCatalogEntry
|
||||
|
||||
|
||||
DIRECT_INGEST_KINDS = {"gtfs", "osm_geojson", "osm_pbf"}
|
||||
|
||||
|
||||
def default_source_catalog_path() -> Path:
|
||||
return Path(__file__).resolve().parents[1] / "docs" / "source_catalog_seed.csv"
|
||||
|
||||
|
||||
def default_ingestable_sources_path() -> Path:
|
||||
return Path(__file__).resolve().parents[1] / "docs" / "ingestable_sources_seed.csv"
|
||||
|
||||
|
||||
def import_source_catalog(session: Session, path: Path | str | None = None, *, update_existing: bool = True) -> dict[str, int]:
|
||||
csv_path = _resolve_path(path, default_source_catalog_path())
|
||||
rows = _read_csv(csv_path)
|
||||
created = 0
|
||||
updated = 0
|
||||
skipped = 0
|
||||
for row in rows:
|
||||
source_name = _value(row, "Source name")
|
||||
if not source_name:
|
||||
skipped += 1
|
||||
continue
|
||||
payload = {
|
||||
"catalog_key": _catalog_key(row),
|
||||
"geography": _value(row, "Geography"),
|
||||
"country_code": _value(row, "Country code"),
|
||||
"mode_scope": _value(row, "Mode scope"),
|
||||
"source_name": source_name,
|
||||
"source_category": _value(row, "Source category"),
|
||||
"formats_apis": _value(row, "Formats / APIs"),
|
||||
"availability": _value(row, "Availability"),
|
||||
"coverage_notes": _value(row, "Coverage notes"),
|
||||
"geometry_notes": _value(row, "Supersedes OSM for"),
|
||||
"disruptions_closures": _value(row, "Disruptions / closures"),
|
||||
"operator_list_use": _value(row, "Operator-list use"),
|
||||
"access_license_notes": _value(row, "Access / licence notes"),
|
||||
"priority": _value(row, "Priority"),
|
||||
"source_url": _value(row, "Source URL"),
|
||||
"evidence_url": _value(row, "Evidence URL"),
|
||||
"next_pipeline_action": _value(row, "Next pipeline action"),
|
||||
}
|
||||
existing = session.scalar(select(SourceCatalogEntry).where(SourceCatalogEntry.catalog_key == payload["catalog_key"]))
|
||||
if existing is None:
|
||||
session.add(SourceCatalogEntry(**payload))
|
||||
created += 1
|
||||
continue
|
||||
if not update_existing:
|
||||
skipped += 1
|
||||
continue
|
||||
for key, value in payload.items():
|
||||
setattr(existing, key, value)
|
||||
existing.updated_at = datetime.now(timezone.utc)
|
||||
updated += 1
|
||||
session.flush()
|
||||
return {"created": created, "updated": updated, "skipped": skipped}
|
||||
|
||||
|
||||
def import_ingestable_sources(
|
||||
session: Session,
|
||||
path: Path | str | None = None,
|
||||
*,
|
||||
update_existing: bool = True,
|
||||
) -> dict[str, int]:
|
||||
csv_path = _resolve_path(path, default_ingestable_sources_path())
|
||||
rows = _read_csv(csv_path)
|
||||
created = 0
|
||||
updated = 0
|
||||
skipped = 0
|
||||
linked_catalog = 0
|
||||
for row in rows:
|
||||
name = _value(row, "name")
|
||||
kind = (_value(row, "kind") or "").lower()
|
||||
url = _value(row, "url")
|
||||
if not name or not url or kind not in DIRECT_INGEST_KINDS:
|
||||
skipped += 1
|
||||
continue
|
||||
catalog_entry = _catalog_entry_for_ingestable_row(session, row)
|
||||
payload = {
|
||||
"name": name,
|
||||
"kind": kind,
|
||||
"url": url,
|
||||
"country": _value(row, "country"),
|
||||
"license": _value(row, "license"),
|
||||
"priority": _value(row, "priority"),
|
||||
"mode_scope": _value(row, "mode_scope"),
|
||||
"source_basis": _value(row, "source_basis"),
|
||||
"notes": _value(row, "notes"),
|
||||
"catalog_entry_id": None if catalog_entry is None else catalog_entry.id,
|
||||
}
|
||||
existing = session.scalar(
|
||||
select(Source)
|
||||
.where(Source.kind == kind, Source.url == url)
|
||||
.order_by(Source.id)
|
||||
.limit(1)
|
||||
)
|
||||
if existing is None:
|
||||
existing = session.scalar(select(Source).where(Source.name == name, Source.url == url).order_by(Source.id).limit(1))
|
||||
if existing is None:
|
||||
session.add(Source(**payload))
|
||||
created += 1
|
||||
if catalog_entry is not None:
|
||||
linked_catalog += 1
|
||||
continue
|
||||
if not update_existing:
|
||||
skipped += 1
|
||||
continue
|
||||
for key, value in payload.items():
|
||||
setattr(existing, key, value)
|
||||
existing.enabled = True
|
||||
updated += 1
|
||||
if catalog_entry is not None:
|
||||
linked_catalog += 1
|
||||
session.flush()
|
||||
return {"created": created, "updated": updated, "skipped": skipped, "linked_catalog": linked_catalog}
|
||||
|
||||
|
||||
def source_catalog_summary(session: Session) -> dict[str, object]:
|
||||
priority_counts = {
|
||||
priority or "unknown": count
|
||||
for priority, count in session.execute(
|
||||
select(SourceCatalogEntry.priority, func.count()).group_by(SourceCatalogEntry.priority)
|
||||
).all()
|
||||
}
|
||||
status_counts = {
|
||||
status or "unknown": count
|
||||
for status, count in session.execute(select(SourceCatalogEntry.status, func.count()).group_by(SourceCatalogEntry.status)).all()
|
||||
}
|
||||
ingestable_sources = session.scalar(
|
||||
select(func.count()).select_from(Source).where(Source.source_basis.is_not(None) | Source.priority.is_not(None))
|
||||
) or 0
|
||||
return {
|
||||
"catalog_entries": session.scalar(select(func.count()).select_from(SourceCatalogEntry)) or 0,
|
||||
"catalog_by_priority": priority_counts,
|
||||
"catalog_by_status": status_counts,
|
||||
"seeded_ingestable_sources": ingestable_sources,
|
||||
}
|
||||
|
||||
|
||||
def source_catalog_rows(
|
||||
session: Session,
|
||||
*,
|
||||
q: str | None = None,
|
||||
country: str | None = None,
|
||||
priority: str | None = None,
|
||||
status: str | None = None,
|
||||
limit: int = 100,
|
||||
) -> list[SourceCatalogEntry]:
|
||||
stmt = select(SourceCatalogEntry).order_by(
|
||||
SourceCatalogEntry.priority,
|
||||
SourceCatalogEntry.country_code,
|
||||
SourceCatalogEntry.source_name,
|
||||
SourceCatalogEntry.id,
|
||||
)
|
||||
if q:
|
||||
pattern = f"%{q.strip()}%"
|
||||
stmt = stmt.where(
|
||||
or_(
|
||||
SourceCatalogEntry.source_name.ilike(pattern),
|
||||
SourceCatalogEntry.source_category.ilike(pattern),
|
||||
SourceCatalogEntry.formats_apis.ilike(pattern),
|
||||
SourceCatalogEntry.coverage_notes.ilike(pattern),
|
||||
SourceCatalogEntry.next_pipeline_action.ilike(pattern),
|
||||
)
|
||||
)
|
||||
if country:
|
||||
stmt = stmt.where(SourceCatalogEntry.country_code.ilike(f"%{country.strip()}%"))
|
||||
if priority:
|
||||
stmt = stmt.where(SourceCatalogEntry.priority == priority.strip())
|
||||
if status:
|
||||
stmt = stmt.where(SourceCatalogEntry.status == status.strip())
|
||||
return session.scalars(stmt.limit(max(1, min(limit, 500)))).all()
|
||||
|
||||
|
||||
def catalog_entry_payload(entry: SourceCatalogEntry, *, linked_source_count: int = 0) -> dict[str, object]:
|
||||
return {
|
||||
"id": entry.id,
|
||||
"geography": entry.geography,
|
||||
"country_code": entry.country_code,
|
||||
"mode_scope": entry.mode_scope,
|
||||
"source_name": entry.source_name,
|
||||
"source_category": entry.source_category,
|
||||
"formats_apis": entry.formats_apis,
|
||||
"availability": entry.availability,
|
||||
"coverage_notes": entry.coverage_notes,
|
||||
"geometry_notes": entry.geometry_notes,
|
||||
"disruptions_closures": entry.disruptions_closures,
|
||||
"operator_list_use": entry.operator_list_use,
|
||||
"access_license_notes": entry.access_license_notes,
|
||||
"priority": entry.priority,
|
||||
"source_url": entry.source_url,
|
||||
"evidence_url": entry.evidence_url,
|
||||
"next_pipeline_action": entry.next_pipeline_action,
|
||||
"status": entry.status,
|
||||
"linked_source_count": linked_source_count,
|
||||
"created_at": entry.created_at.isoformat() if entry.created_at else None,
|
||||
"updated_at": entry.updated_at.isoformat() if entry.updated_at else None,
|
||||
}
|
||||
|
||||
|
||||
def linked_source_counts(session: Session, entries: Iterable[SourceCatalogEntry]) -> dict[int, int]:
|
||||
entry_ids = [entry.id for entry in entries]
|
||||
if not entry_ids:
|
||||
return {}
|
||||
return {
|
||||
entry_id: count
|
||||
for entry_id, count in session.execute(
|
||||
select(Source.catalog_entry_id, func.count())
|
||||
.where(Source.catalog_entry_id.in_(entry_ids))
|
||||
.group_by(Source.catalog_entry_id)
|
||||
).all()
|
||||
if entry_id is not None
|
||||
}
|
||||
|
||||
|
||||
def _catalog_entry_for_ingestable_row(session: Session, row: dict[str, str]) -> SourceCatalogEntry | None:
|
||||
country = _value(row, "country")
|
||||
source_basis = _value(row, "source_basis")
|
||||
name = _value(row, "name")
|
||||
if not country and not source_basis and not name:
|
||||
return None
|
||||
if name:
|
||||
exact = session.scalar(
|
||||
select(SourceCatalogEntry)
|
||||
.where(func.lower(SourceCatalogEntry.source_name) == name.lower())
|
||||
.order_by(SourceCatalogEntry.id)
|
||||
.limit(1)
|
||||
)
|
||||
if exact is not None:
|
||||
return exact
|
||||
clauses = []
|
||||
if country:
|
||||
clauses.append(SourceCatalogEntry.country_code.ilike(f"%{country}%"))
|
||||
if source_basis:
|
||||
for token in _basis_tokens(source_basis):
|
||||
clauses.append(SourceCatalogEntry.source_name.ilike(f"%{token}%"))
|
||||
clauses.append(SourceCatalogEntry.coverage_notes.ilike(f"%{token}%"))
|
||||
if name:
|
||||
first_word = name.split()[0]
|
||||
if len(first_word) > 2:
|
||||
clauses.append(SourceCatalogEntry.source_name.ilike(f"%{first_word}%"))
|
||||
if not clauses:
|
||||
return None
|
||||
return session.scalar(
|
||||
select(SourceCatalogEntry)
|
||||
.where(or_(*clauses))
|
||||
.order_by(SourceCatalogEntry.priority, SourceCatalogEntry.id)
|
||||
.limit(1)
|
||||
)
|
||||
|
||||
|
||||
def _basis_tokens(value: str) -> list[str]:
|
||||
tokens = []
|
||||
for raw in value.replace("/", " ").replace("-", " ").split():
|
||||
token = raw.strip(" ,.;()")
|
||||
if len(token) >= 5 and token.lower() not in {"official", "mirror", "feeds", "transport"}:
|
||||
tokens.append(token)
|
||||
return tokens[:4]
|
||||
|
||||
|
||||
def _catalog_key(row: dict[str, str]) -> str:
|
||||
parts = [
|
||||
_value(row, "Country code"),
|
||||
_value(row, "Source name"),
|
||||
_value(row, "Source URL"),
|
||||
_value(row, "Formats / APIs"),
|
||||
]
|
||||
text = "|".join(part.lower() for part in parts if part)
|
||||
if not text:
|
||||
text = repr(sorted(row.items()))
|
||||
return hashlib.sha256(text.encode("utf-8")).hexdigest()
|
||||
|
||||
|
||||
def _read_csv(path: Path) -> list[dict[str, str]]:
|
||||
if not path.exists():
|
||||
raise FileNotFoundError(path)
|
||||
with path.open("r", encoding="utf-8-sig", newline="") as handle:
|
||||
reader = csv.DictReader(handle)
|
||||
return [dict(row) for row in reader]
|
||||
|
||||
|
||||
def _resolve_path(path: Path | str | None, default_path: Path) -> Path:
|
||||
if path is None:
|
||||
return default_path
|
||||
candidate = Path(path)
|
||||
if candidate.is_absolute():
|
||||
return candidate
|
||||
return Path.cwd() / candidate
|
||||
|
||||
|
||||
def _value(row: dict[str, str], key: str) -> str | None:
|
||||
value = row.get(key)
|
||||
if value is None:
|
||||
return None
|
||||
stripped = value.strip()
|
||||
return stripped or None
|
||||
256
app/source_updates.py
Normal file
256
app/source_updates.py
Normal file
@@ -0,0 +1,256 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
from urllib.parse import urlparse
|
||||
|
||||
import requests
|
||||
from sqlalchemy import select
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.config import settings
|
||||
from app.models import Dataset, Source, SourceUpdateCheck
|
||||
from app.pipeline.utils import norm_text, sha256_file
|
||||
|
||||
|
||||
def check_source_for_update(session: Session, source: Source) -> SourceUpdateCheck:
|
||||
active_dataset = session.scalar(
|
||||
select(Dataset)
|
||||
.where(Dataset.source_id == source.id, Dataset.is_active.is_(True))
|
||||
.order_by(Dataset.created_at.desc(), Dataset.id.desc())
|
||||
)
|
||||
recovery = _recover_missing_managed_cache_url(source)
|
||||
remote = _source_remote_metadata(source)
|
||||
if recovery is not None:
|
||||
remote["recovered_source_url"] = recovery["url"]
|
||||
remote["previous_source_url"] = recovery["previous_url"]
|
||||
update_available, reason = _update_decision(active_dataset, remote)
|
||||
check = SourceUpdateCheck(
|
||||
source_id=source.id,
|
||||
status=remote["status"],
|
||||
update_available=update_available,
|
||||
reason=reason,
|
||||
remote_url=source.url,
|
||||
etag=remote.get("etag"),
|
||||
last_modified=remote.get("last_modified"),
|
||||
content_length=remote.get("content_length"),
|
||||
content_type=remote.get("content_type"),
|
||||
local_mtime=remote.get("local_mtime"),
|
||||
local_size=remote.get("local_size"),
|
||||
local_sha256=remote.get("local_sha256"),
|
||||
active_dataset_id=None if active_dataset is None else active_dataset.id,
|
||||
active_dataset_sha256=None if active_dataset is None else active_dataset.sha256,
|
||||
metadata_json=json.dumps(remote, separators=(",", ":"), default=_json_default),
|
||||
)
|
||||
session.add(check)
|
||||
source.status = "update_check_error" if remote["status"] != "checked" else "update_available" if update_available else "up_to_date"
|
||||
source.last_error = None if remote["status"] == "checked" else reason
|
||||
session.flush()
|
||||
return check
|
||||
|
||||
|
||||
def latest_source_update_check(session: Session, source_id: int) -> SourceUpdateCheck | None:
|
||||
return session.scalar(
|
||||
select(SourceUpdateCheck)
|
||||
.where(SourceUpdateCheck.source_id == source_id)
|
||||
.order_by(SourceUpdateCheck.checked_at.desc(), SourceUpdateCheck.id.desc())
|
||||
)
|
||||
|
||||
|
||||
def update_check_payload(check: SourceUpdateCheck | None) -> dict | None:
|
||||
if check is None:
|
||||
return None
|
||||
try:
|
||||
metadata = json.loads(check.metadata_json or "{}")
|
||||
except json.JSONDecodeError:
|
||||
metadata = {}
|
||||
return {
|
||||
"id": check.id,
|
||||
"source_id": check.source_id,
|
||||
"checked_at": check.checked_at.isoformat() if check.checked_at else None,
|
||||
"status": check.status,
|
||||
"update_available": check.update_available,
|
||||
"reason": check.reason,
|
||||
"etag": check.etag,
|
||||
"last_modified": check.last_modified,
|
||||
"content_length": check.content_length,
|
||||
"content_type": check.content_type,
|
||||
"local_mtime": check.local_mtime.isoformat() if check.local_mtime else None,
|
||||
"local_size": check.local_size,
|
||||
"local_sha256": check.local_sha256,
|
||||
"active_dataset_id": check.active_dataset_id,
|
||||
"active_dataset_sha256": check.active_dataset_sha256,
|
||||
"metadata": metadata,
|
||||
}
|
||||
|
||||
|
||||
def record_dataset_update_metadata(dataset: Dataset, check: SourceUpdateCheck | None) -> None:
|
||||
if check is None:
|
||||
return
|
||||
try:
|
||||
metadata = json.loads(dataset.metadata_json or "{}")
|
||||
except json.JSONDecodeError:
|
||||
metadata = {}
|
||||
metadata["source_update_check"] = {
|
||||
"id": check.id,
|
||||
"checked_at": check.checked_at.isoformat() if check.checked_at else None,
|
||||
"etag": check.etag,
|
||||
"last_modified": check.last_modified,
|
||||
"content_length": check.content_length,
|
||||
"content_type": check.content_type,
|
||||
"local_mtime": check.local_mtime.isoformat() if check.local_mtime else None,
|
||||
"local_size": check.local_size,
|
||||
"local_sha256": check.local_sha256,
|
||||
"metadata": update_check_payload(check).get("metadata", {}),
|
||||
}
|
||||
dataset.metadata_json = json.dumps(metadata, indent=2, default=_json_default)
|
||||
|
||||
|
||||
def _source_remote_metadata(source: Source) -> dict:
|
||||
parsed = urlparse(source.url)
|
||||
if parsed.scheme in {"http", "https"}:
|
||||
return _http_metadata(source.url)
|
||||
path = Path(parsed.path) if parsed.scheme == "file" else Path(source.url)
|
||||
return _local_metadata(path)
|
||||
|
||||
|
||||
def _recover_missing_managed_cache_url(source: Source) -> dict | None:
|
||||
parsed = urlparse(source.url)
|
||||
if parsed.scheme in {"http", "https"}:
|
||||
return None
|
||||
path = Path(parsed.path) if parsed.scheme == "file" else Path(source.url)
|
||||
if path.exists() or not _is_managed_source_cache_path(path, source.id):
|
||||
return None
|
||||
replacement = _seed_source_url_for(source)
|
||||
if replacement is None:
|
||||
return None
|
||||
previous_url = source.url
|
||||
source.url = replacement
|
||||
return {"previous_url": previous_url, "url": replacement}
|
||||
|
||||
|
||||
def _is_managed_source_cache_path(path: Path, source_id: int) -> bool:
|
||||
source_dir = f"source_{source_id}"
|
||||
try:
|
||||
resolved = path.resolve()
|
||||
managed_dir = (settings.data_dir / "sources" / source_dir).resolve()
|
||||
resolved.relative_to(managed_dir)
|
||||
return True
|
||||
except ValueError:
|
||||
pass
|
||||
parts = path.parts
|
||||
return any(part == "sources" and index + 1 < len(parts) and parts[index + 1] == source_dir for index, part in enumerate(parts))
|
||||
|
||||
|
||||
def _seed_source_url_for(source: Source) -> str | None:
|
||||
seed_path = Path(__file__).resolve().parents[1] / "scripts" / "example_sources.json"
|
||||
if not seed_path.exists():
|
||||
return None
|
||||
try:
|
||||
rows = json.loads(seed_path.read_text(encoding="utf-8"))
|
||||
except (OSError, json.JSONDecodeError):
|
||||
return None
|
||||
source_tokens = set(norm_text(source.name).split())
|
||||
for row in rows if isinstance(rows, list) else []:
|
||||
if not isinstance(row, dict):
|
||||
continue
|
||||
url = str(row.get("url") or "")
|
||||
if urlparse(url).scheme not in {"http", "https"}:
|
||||
continue
|
||||
if row.get("kind") != source.kind:
|
||||
continue
|
||||
if source.country and row.get("country") and str(row.get("country")) != source.country:
|
||||
continue
|
||||
row_tokens = set(norm_text(row.get("name")).split())
|
||||
if row_tokens and (row_tokens <= source_tokens or source_tokens <= row_tokens):
|
||||
return url
|
||||
return None
|
||||
|
||||
|
||||
def _http_metadata(url: str) -> dict:
|
||||
response = None
|
||||
try:
|
||||
response = requests.head(url, allow_redirects=True, timeout=30)
|
||||
if response.status_code in {405, 501}:
|
||||
response.close()
|
||||
response = requests.get(url, stream=True, timeout=30)
|
||||
response.raise_for_status()
|
||||
except Exception as exc: # noqa: BLE001 - persisted as update-check status
|
||||
return {"status": "error", "error": str(exc)}
|
||||
finally:
|
||||
if response is not None:
|
||||
response.close()
|
||||
headers = response.headers
|
||||
content_length = headers.get("Content-Length")
|
||||
return {
|
||||
"status": "checked",
|
||||
"etag": headers.get("ETag"),
|
||||
"last_modified": headers.get("Last-Modified"),
|
||||
"content_length": int(content_length) if content_length and content_length.isdigit() else None,
|
||||
"content_type": headers.get("Content-Type"),
|
||||
"final_url": response.url,
|
||||
"update_artifact": _update_artifact(url, headers.get("Content-Type")),
|
||||
}
|
||||
|
||||
|
||||
def _local_metadata(path: Path) -> dict:
|
||||
if not path.exists():
|
||||
return {"status": "error", "error": f"Source file does not exist: {path}"}
|
||||
stat = path.stat()
|
||||
return {
|
||||
"status": "checked",
|
||||
"local_mtime": datetime.fromtimestamp(stat.st_mtime, tz=timezone.utc),
|
||||
"local_size": stat.st_size,
|
||||
"local_sha256": sha256_file(path),
|
||||
"update_artifact": _update_artifact(str(path), None),
|
||||
}
|
||||
|
||||
|
||||
def _update_decision(active_dataset: Dataset | None, remote: dict) -> tuple[bool, str]:
|
||||
if remote["status"] != "checked":
|
||||
return False, remote.get("error") or "update check failed"
|
||||
if active_dataset is None:
|
||||
return True, "no active dataset imported"
|
||||
if remote.get("local_sha256"):
|
||||
if remote["local_sha256"] == active_dataset.sha256:
|
||||
return False, "local file hash matches active dataset"
|
||||
return True, "local file hash differs from active dataset"
|
||||
|
||||
previous = _dataset_update_metadata(active_dataset)
|
||||
comparable = []
|
||||
for key in ("etag", "last_modified", "content_length"):
|
||||
current = remote.get(key)
|
||||
old = previous.get(key)
|
||||
if current is not None and old is not None:
|
||||
comparable.append(key)
|
||||
if str(current) != str(old):
|
||||
return True, f"remote {key} changed"
|
||||
if comparable:
|
||||
return False, "remote metadata matches active dataset"
|
||||
return True, "no previous remote metadata recorded"
|
||||
|
||||
|
||||
def _dataset_update_metadata(dataset: Dataset) -> dict:
|
||||
try:
|
||||
metadata = json.loads(dataset.metadata_json or "{}")
|
||||
except json.JSONDecodeError:
|
||||
return {}
|
||||
return metadata.get("source_update_check") or {}
|
||||
|
||||
|
||||
def _json_default(value):
|
||||
if isinstance(value, datetime):
|
||||
return value.isoformat()
|
||||
raise TypeError(f"Object of type {type(value).__name__} is not JSON serializable")
|
||||
|
||||
|
||||
def _update_artifact(url_or_path: str, content_type: str | None) -> dict:
|
||||
lower = url_or_path.lower()
|
||||
is_osm_diff = lower.endswith(".osc") or lower.endswith(".osc.gz")
|
||||
is_gtfs_zip = lower.endswith(".zip") or (content_type or "").lower() in {"application/zip", "application/x-zip-compressed"}
|
||||
return {
|
||||
"kind": "osm_diff" if is_osm_diff else "gtfs_or_archive" if is_gtfs_zip else "full_snapshot",
|
||||
"is_diff": is_osm_diff,
|
||||
"content_type": content_type,
|
||||
}
|
||||
158
app/spatial.py
Normal file
158
app/spatial.py
Normal file
@@ -0,0 +1,158 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from collections.abc import Iterable
|
||||
|
||||
from sqlalchemy import text
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.config import settings
|
||||
|
||||
|
||||
POSTGIS_GEOMETRY_TABLES = {
|
||||
"osm_features",
|
||||
"gtfs_routes",
|
||||
"gtfs_shapes",
|
||||
"gtfs_stops",
|
||||
"canonical_stops",
|
||||
"route_patterns",
|
||||
"osm_addresses",
|
||||
"routing_nodes",
|
||||
"routing_edges",
|
||||
}
|
||||
|
||||
|
||||
def using_postgresql() -> bool:
|
||||
return settings.is_postgresql_database
|
||||
|
||||
|
||||
def refresh_postgis_geometries(
|
||||
session: Session,
|
||||
*,
|
||||
dataset_id: int | None = None,
|
||||
tables: Iterable[str] | None = None,
|
||||
only_missing: bool = True,
|
||||
) -> None:
|
||||
if not using_postgresql():
|
||||
return
|
||||
selected = set(tables or POSTGIS_GEOMETRY_TABLES)
|
||||
unknown = selected - POSTGIS_GEOMETRY_TABLES
|
||||
if unknown:
|
||||
raise ValueError(f"Unsupported PostGIS geometry table(s): {', '.join(sorted(unknown))}")
|
||||
|
||||
if "osm_features" in selected:
|
||||
_refresh_geojson_geometry(session, "osm_features", dataset_id=dataset_id, only_missing=only_missing)
|
||||
if "gtfs_routes" in selected:
|
||||
_refresh_geojson_geometry(session, "gtfs_routes", dataset_id=dataset_id, only_missing=only_missing)
|
||||
if "gtfs_shapes" in selected:
|
||||
_refresh_geojson_geometry(session, "gtfs_shapes", dataset_id=dataset_id, only_missing=only_missing)
|
||||
if "route_patterns" in selected:
|
||||
_refresh_geojson_geometry(session, "route_patterns", dataset_id=None, only_missing=only_missing)
|
||||
if "osm_addresses" in selected:
|
||||
_refresh_address_geometry(session, dataset_id=dataset_id, only_missing=only_missing)
|
||||
if "gtfs_stops" in selected:
|
||||
_refresh_point_geometry(session, "gtfs_stops", dataset_id=dataset_id, only_missing=only_missing)
|
||||
if "canonical_stops" in selected:
|
||||
_refresh_point_geometry(session, "canonical_stops", dataset_id=None, only_missing=only_missing)
|
||||
if "routing_nodes" in selected:
|
||||
_refresh_point_geometry(session, "routing_nodes", dataset_id=dataset_id, only_missing=only_missing)
|
||||
if "routing_edges" in selected:
|
||||
_refresh_routing_edge_geometry(session, dataset_id=dataset_id, only_missing=only_missing)
|
||||
|
||||
|
||||
def analyze_postgresql_tables(session: Session, tables: Iterable[str]) -> None:
|
||||
if not using_postgresql():
|
||||
return
|
||||
for table in tables:
|
||||
session.execute(text(f"ANALYZE {table}"))
|
||||
|
||||
|
||||
def _refresh_geojson_geometry(session: Session, table: str, *, dataset_id: int | None, only_missing: bool) -> None:
|
||||
where = ["geometry_geojson IS NOT NULL", "geometry_geojson <> ''"]
|
||||
params: dict[str, object] = {}
|
||||
if dataset_id is not None:
|
||||
where.append("dataset_id = :dataset_id")
|
||||
params["dataset_id"] = int(dataset_id)
|
||||
if only_missing:
|
||||
where.append("geom IS NULL")
|
||||
session.execute(
|
||||
text(
|
||||
f"""
|
||||
UPDATE {table}
|
||||
SET geom = ST_SetSRID(ST_GeomFromGeoJSON(geometry_geojson), 4326)
|
||||
WHERE {" AND ".join(where)}
|
||||
"""
|
||||
),
|
||||
params,
|
||||
)
|
||||
|
||||
|
||||
def _refresh_point_geometry(session: Session, table: str, *, dataset_id: int | None, only_missing: bool) -> None:
|
||||
where = ["lon IS NOT NULL", "lat IS NOT NULL"]
|
||||
params: dict[str, object] = {}
|
||||
if dataset_id is not None:
|
||||
where.append("dataset_id = :dataset_id")
|
||||
params["dataset_id"] = int(dataset_id)
|
||||
if only_missing:
|
||||
where.append("geom IS NULL")
|
||||
session.execute(
|
||||
text(
|
||||
f"""
|
||||
UPDATE {table}
|
||||
SET geom = ST_SetSRID(ST_MakePoint(lon, lat), 4326)
|
||||
WHERE {" AND ".join(where)}
|
||||
"""
|
||||
),
|
||||
params,
|
||||
)
|
||||
|
||||
|
||||
def _refresh_address_geometry(session: Session, *, dataset_id: int | None, only_missing: bool) -> None:
|
||||
_refresh_point_geometry(session, "osm_addresses", dataset_id=dataset_id, only_missing=only_missing)
|
||||
where = ["geometry_geojson IS NOT NULL", "geometry_geojson <> ''"]
|
||||
params: dict[str, object] = {}
|
||||
if dataset_id is not None:
|
||||
where.append("dataset_id = :dataset_id")
|
||||
params["dataset_id"] = int(dataset_id)
|
||||
if only_missing:
|
||||
where.append("area_geom IS NULL")
|
||||
session.execute(
|
||||
text(
|
||||
f"""
|
||||
UPDATE osm_addresses
|
||||
SET area_geom = ST_SetSRID(ST_GeomFromGeoJSON(geometry_geojson), 4326)
|
||||
WHERE {" AND ".join(where)}
|
||||
"""
|
||||
),
|
||||
params,
|
||||
)
|
||||
|
||||
|
||||
def _refresh_routing_edge_geometry(session: Session, *, dataset_id: int | None, only_missing: bool) -> None:
|
||||
where = [
|
||||
"source_lon IS NOT NULL",
|
||||
"source_lat IS NOT NULL",
|
||||
"target_lon IS NOT NULL",
|
||||
"target_lat IS NOT NULL",
|
||||
]
|
||||
params: dict[str, object] = {}
|
||||
if dataset_id is not None:
|
||||
where.append("dataset_id = :dataset_id")
|
||||
params["dataset_id"] = int(dataset_id)
|
||||
if only_missing:
|
||||
where.append("geom IS NULL")
|
||||
session.execute(
|
||||
text(
|
||||
f"""
|
||||
UPDATE routing_edges
|
||||
SET geom = ST_SetSRID(
|
||||
ST_MakeLine(
|
||||
ST_MakePoint(source_lon, source_lat),
|
||||
ST_MakePoint(target_lon, target_lat)
|
||||
),
|
||||
4326
|
||||
)
|
||||
WHERE {" AND ".join(where)}
|
||||
"""
|
||||
),
|
||||
params,
|
||||
)
|
||||
4090
app/static/app.js
Normal file
4090
app/static/app.js
Normal file
File diff suppressed because it is too large
Load Diff
1498
app/static/style.css
Normal file
1498
app/static/style.css
Normal file
File diff suppressed because it is too large
Load Diff
329
app/templates/index.html
Normal file
329
app/templates/index.html
Normal file
@@ -0,0 +1,329 @@
|
||||
<!doctype html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="utf-8" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1" />
|
||||
<title>Mobility Workbench</title>
|
||||
<link rel="stylesheet" href="https://unpkg.com/leaflet@1.9.4/dist/leaflet.css" crossorigin="" />
|
||||
<link rel="stylesheet" href="/static/style.css?v=20260701-harmonizer-module" />
|
||||
</head>
|
||||
<body>
|
||||
<header>
|
||||
<div>
|
||||
<h1>Mobility Workbench</h1>
|
||||
<p>Harmonized transit, mapping data, route layer, map review, and journey tests.</p>
|
||||
</div>
|
||||
<div class="actions">
|
||||
<button id="refreshBtn">Refresh</button>
|
||||
</div>
|
||||
</header>
|
||||
|
||||
<main>
|
||||
<aside>
|
||||
<div class="sidebar-content">
|
||||
<details class="card sidebar-section" data-sidebar-section="stats" open>
|
||||
<summary><h2>Stats</h2></summary>
|
||||
<div class="sidebar-section-body">
|
||||
<div id="stats" class="stats"></div>
|
||||
</div>
|
||||
</details>
|
||||
|
||||
<details class="card sidebar-section" data-sidebar-section="qa" open>
|
||||
<summary><h2>QA</h2></summary>
|
||||
<div class="sidebar-section-body">
|
||||
<div class="qa-toolbar">
|
||||
<button type="button" id="refreshQaBtn">Refresh QA</button>
|
||||
</div>
|
||||
<div id="qaDashboard" class="qa-dashboard muted">No QA loaded.</div>
|
||||
</div>
|
||||
</details>
|
||||
|
||||
<details class="card sidebar-section" data-sidebar-section="jobs" open>
|
||||
<summary><h2>Jobs</h2></summary>
|
||||
<div class="sidebar-section-body">
|
||||
<div id="jobs" class="jobs muted">No jobs loaded.</div>
|
||||
</div>
|
||||
</details>
|
||||
|
||||
<details class="card sidebar-section" data-sidebar-section="harmonization" open>
|
||||
<summary><h2>GTFS Harmonization</h2></summary>
|
||||
<div class="sidebar-section-body">
|
||||
<details class="nested-section" data-sidebar-section="add-gtfs-source">
|
||||
<summary><h3>Add GTFS source</h3></summary>
|
||||
<div class="nested-section-body">
|
||||
<form id="sourceForm">
|
||||
<input name="catalog_entry_id" type="hidden" />
|
||||
<input name="kind" type="hidden" value="gtfs" />
|
||||
<label>Name <input name="name" required placeholder="DELFI / national GTFS" /></label>
|
||||
<label>URL or path <input name="url" required placeholder="https://.../feed.zip or ./data/feed.zip" /></label>
|
||||
<label>Country <input name="country" placeholder="DE" maxlength="8" /></label>
|
||||
<label>License <input name="license" placeholder="ODbL / CC-BY / unknown" /></label>
|
||||
<button type="submit">Add GTFS source</button>
|
||||
</form>
|
||||
</div>
|
||||
</details>
|
||||
|
||||
<details class="nested-section source-catalog-card" data-sidebar-section="source-catalog">
|
||||
<summary><h3>Transit source catalog</h3></summary>
|
||||
<div class="nested-section-body">
|
||||
<div id="sourceCatalogSummary" class="muted"></div>
|
||||
<div class="filter-row source-catalog-filter">
|
||||
<input id="sourceCatalogSearch" placeholder="Search catalog" />
|
||||
<input id="sourceCatalogCountry" placeholder="Country" />
|
||||
<select id="sourceCatalogPriority">
|
||||
<option value="">all priorities</option>
|
||||
<option value="P0">P0</option>
|
||||
<option value="P0 fallback">P0 fallback</option>
|
||||
<option value="P1">P1</option>
|
||||
<option value="P2">P2</option>
|
||||
<option value="P3">P3</option>
|
||||
<option value="P4">P4</option>
|
||||
<option value="P5">P5</option>
|
||||
</select>
|
||||
</div>
|
||||
<div class="source-catalog-actions">
|
||||
<button type="button" id="importSourceCatalogBtn">Import catalog</button>
|
||||
<button type="button" id="importIngestableSourcesBtn">Import ingestable seeds</button>
|
||||
</div>
|
||||
<div id="sourceCatalog"></div>
|
||||
</div>
|
||||
</details>
|
||||
|
||||
<details class="nested-section" data-sidebar-section="gtfs-feed-qa" open>
|
||||
<summary><h3>Feed QA</h3></summary>
|
||||
<div class="nested-section-body">
|
||||
<div class="qa-toolbar">
|
||||
<button type="button" id="refreshGtfsHarmonizationBtn">Refresh feeds</button>
|
||||
</div>
|
||||
<div id="gtfsHarmonizationInventory" class="harmonization-inventory muted">No GTFS feed QA loaded.</div>
|
||||
</div>
|
||||
</details>
|
||||
|
||||
<details class="nested-section" data-sidebar-section="gtfs-source-management" open>
|
||||
<summary><h3>GTFS source library</h3></summary>
|
||||
<div class="nested-section-body">
|
||||
<div class="filter-row">
|
||||
<input id="sourceSearch" placeholder="Filter GTFS sources" />
|
||||
</div>
|
||||
<div id="sources"></div>
|
||||
</div>
|
||||
</details>
|
||||
</div>
|
||||
</details>
|
||||
|
||||
<details class="card sidebar-section" data-sidebar-section="mapping" open>
|
||||
<summary><h2>Mapping Data</h2></summary>
|
||||
<div class="sidebar-section-body">
|
||||
<details class="nested-section" data-sidebar-section="add-map-source">
|
||||
<summary><h3>Add map source</h3></summary>
|
||||
<div class="nested-section-body">
|
||||
<form id="mappingSourceForm">
|
||||
<input name="catalog_entry_id" type="hidden" />
|
||||
<label>Name <input name="name" required placeholder="Germany OSM PBF" /></label>
|
||||
<label>Kind
|
||||
<select name="kind">
|
||||
<option value="osm_pbf">OSM PBF extract</option>
|
||||
<option value="osm_geojson">OSM transport GeoJSON</option>
|
||||
<option value="osm_diff">OSM change diff</option>
|
||||
</select>
|
||||
</label>
|
||||
<label>URL or path <input name="url" required placeholder="https://.../latest.osm.pbf or ./data/routes.geojson" /></label>
|
||||
<label>Country <input name="country" placeholder="DE" maxlength="8" /></label>
|
||||
<label>License <input name="license" placeholder="ODbL / CC-BY / unknown" /></label>
|
||||
<button type="submit">Add map source</button>
|
||||
</form>
|
||||
</div>
|
||||
</details>
|
||||
|
||||
<details class="nested-section source-catalog-card" data-sidebar-section="geofabrik">
|
||||
<summary><h3>Geofabrik OSM</h3></summary>
|
||||
<div class="nested-section-body">
|
||||
<div class="filter-row geofabrik-filter">
|
||||
<input id="geofabrikSearch" placeholder="Berlin, Germany, Hamburg" />
|
||||
<button type="button" id="geofabrikSearchBtn">Search</button>
|
||||
</div>
|
||||
<label class="inline-check"><input id="geofabrikDiffSource" type="checkbox" checked /> add diff source metadata</label>
|
||||
<div id="geofabrikResults" class="dataset-search-results muted">Search Geofabrik extracts, then add or import one as an OSM PBF source.</div>
|
||||
</div>
|
||||
</details>
|
||||
|
||||
<details class="nested-section" data-sidebar-section="mapping-source-management" open>
|
||||
<summary><h3>Map source library</h3></summary>
|
||||
<div class="nested-section-body">
|
||||
<div class="filter-row">
|
||||
<input id="mappingSourceSearch" placeholder="Filter map sources" />
|
||||
<select id="mappingSourceKindFilter">
|
||||
<option value="">all map kinds</option>
|
||||
<option value="osm_geojson">OSM GeoJSON</option>
|
||||
<option value="osm_pbf">OSM PBF</option>
|
||||
<option value="osm_diff">OSM diff</option>
|
||||
</select>
|
||||
</div>
|
||||
<div id="mappingSources"></div>
|
||||
</div>
|
||||
</details>
|
||||
</div>
|
||||
</details>
|
||||
|
||||
<details class="card sidebar-section" data-sidebar-section="datasets" open>
|
||||
<summary><h2>Datasets</h2></summary>
|
||||
<div class="sidebar-section-body">
|
||||
<details class="nested-section" data-sidebar-section="dataset-pipeline" open>
|
||||
<summary><h3>Derivation pipeline</h3></summary>
|
||||
<div class="nested-section-body">
|
||||
<div class="workflow-actions">
|
||||
<button id="runMatchBtn" type="button">Run matcher</button>
|
||||
<button id="buildRouteLayerBtn" type="button">Build route layer</button>
|
||||
<button id="loadSampleBtn" type="button">Reset sample</button>
|
||||
</div>
|
||||
</div>
|
||||
</details>
|
||||
|
||||
<details class="nested-section" data-sidebar-section="dataset-search">
|
||||
<summary><h3>Dataset search</h3></summary>
|
||||
<div class="nested-section-body">
|
||||
<form id="datasetSearchForm" class="dataset-search-form">
|
||||
<input id="datasetSearchQuery" placeholder="Route, line, stop, shape ID" autocomplete="off" />
|
||||
<div class="filter-row">
|
||||
<label class="inline-check"><input id="datasetSearchActiveOnly" type="checkbox" checked /> active only</label>
|
||||
<button type="submit">Search</button>
|
||||
</div>
|
||||
</form>
|
||||
<div id="datasetSearchResults" class="dataset-search-results muted">Search all imported datasets by label, route ID, and route-layer reference.</div>
|
||||
</div>
|
||||
</details>
|
||||
|
||||
<details class="nested-section matches-card" data-sidebar-section="route-matches">
|
||||
<summary><h3>Route matches</h3></summary>
|
||||
<div class="nested-section-body">
|
||||
<div class="filter-row">
|
||||
<select id="matchStatusFilter">
|
||||
<option value="">all</option>
|
||||
<option value="matched">matched</option>
|
||||
<option value="probable">probable</option>
|
||||
<option value="weak">weak</option>
|
||||
<option value="missing">missing</option>
|
||||
<option value="accepted">accepted</option>
|
||||
<option value="rejected">rejected</option>
|
||||
</select>
|
||||
<button id="reloadMatchesBtn">Reload</button>
|
||||
</div>
|
||||
<div id="matches"></div>
|
||||
</div>
|
||||
</details>
|
||||
|
||||
<details class="nested-section" data-sidebar-section="maintenance">
|
||||
<summary><h3>Maintenance</h3></summary>
|
||||
<div class="nested-section-body">
|
||||
<div class="maintenance-grid">
|
||||
<button type="button" data-admin-action="init-db">Init DB</button>
|
||||
<button type="button" data-admin-action="backfill-gtfs-shapes">Backfill GTFS shapes</button>
|
||||
<button type="button" data-admin-action="prune-cache-dry">Check cache</button>
|
||||
<button type="button" data-admin-action="prune-cache">Prune cache</button>
|
||||
<button type="button" data-admin-action="prune-inactive-dry">Check inactive</button>
|
||||
<button type="button" data-admin-action="prune-inactive">Prune inactive</button>
|
||||
<button type="button" data-admin-action="vacuum-db">Vacuum DB</button>
|
||||
<button type="button" class="danger" data-admin-action="reset-db">Reset DB</button>
|
||||
</div>
|
||||
<div id="adminStatus" class="admin-status muted"></div>
|
||||
</div>
|
||||
</details>
|
||||
</div>
|
||||
</details>
|
||||
|
||||
<details class="card sidebar-section" data-sidebar-section="layers" open>
|
||||
<summary><h2>Layers</h2></summary>
|
||||
<div class="sidebar-section-body">
|
||||
<div class="preset-row">
|
||||
<button type="button" data-layer-preset="network">Network</button>
|
||||
<button type="button" data-layer-preset="review">Matched/unmatched</button>
|
||||
<button type="button" data-layer-preset="unmatched">Unmatched</button>
|
||||
<button type="button" data-layer-preset="all">All</button>
|
||||
</div>
|
||||
<div id="layerControls" class="layer-controls"></div>
|
||||
<div id="mapStatus" class="map-status muted"></div>
|
||||
</div>
|
||||
</details>
|
||||
|
||||
</div>
|
||||
<button id="sidebarCollapseBtn" class="sidebar-collapse-handle" type="button" aria-label="Collapse left panel" title="Collapse left panel" aria-expanded="true">‹</button>
|
||||
</aside>
|
||||
|
||||
<section class="map-panel">
|
||||
<div id="map"></div>
|
||||
<div id="mapLoading" class="map-loading" hidden>
|
||||
<span class="spinner" aria-hidden="true"></span>
|
||||
<span id="mapLoadingText">Loading map layers...</span>
|
||||
</div>
|
||||
<section class="map-floating journey-card">
|
||||
<h2>Journey</h2>
|
||||
<form id="journeyForm">
|
||||
<div id="journeyTransitSnapshot" class="journey-snapshot muted">Transit snapshot loading...</div>
|
||||
<label>From <input id="journeyFromQuery" placeholder="Hauptbahnhof" autocomplete="off" /></label>
|
||||
<input id="journeyFromStop" type="hidden" />
|
||||
<div id="journeyFromSuggestions" class="stop-suggestions"></div>
|
||||
<button type="button" id="journeySwapBtn" class="journey-swap" title="Switch start and destination">Swap</button>
|
||||
<label>To <input id="journeyToQuery" placeholder="Alexanderplatz" autocomplete="off" /></label>
|
||||
<input id="journeyToStop" type="hidden" />
|
||||
<div id="journeyToSuggestions" class="stop-suggestions"></div>
|
||||
<label>Via <input id="journeyViaQuery" placeholder="optional stop" autocomplete="off" /></label>
|
||||
<input id="journeyViaStop" type="hidden" />
|
||||
<div id="journeyViaSuggestions" class="stop-suggestions"></div>
|
||||
<div class="journey-mode" role="radiogroup" aria-label="Route mode">
|
||||
<label><input type="radio" name="journeyMode" value="transit" checked /> Public transport</label>
|
||||
<label><input type="radio" name="journeyMode" value="walk" /> Walk</label>
|
||||
<label><input type="radio" name="journeyMode" value="drive" /> Car</label>
|
||||
</div>
|
||||
<div class="journey-options">
|
||||
<label>Date <input id="journeyServiceDate" type="date" /></label>
|
||||
<label>Departure <input id="journeyDeparture" type="time" value="08:00" /></label>
|
||||
<label>Transfer buffer <input id="journeyTransferMinutes" type="number" min="0" max="60" step="1" value="2" /></label>
|
||||
<label>Rank by
|
||||
<select id="journeyRanking">
|
||||
<option value="recommended">Recommended</option>
|
||||
<option value="earliest_arrival">Earliest arrival</option>
|
||||
<option value="duration">Shortest duration</option>
|
||||
<option value="fewest_transfers">Fewest transfers</option>
|
||||
</select>
|
||||
</label>
|
||||
</div>
|
||||
<label class="journey-direct"><input id="journeyDirectOnly" type="checkbox" /> Direct public transport only</label>
|
||||
<div class="journey-actions">
|
||||
<button type="button" id="journeyEarlierBtn">Earlier</button>
|
||||
<button type="submit" class="primary">Search</button>
|
||||
<button type="button" id="journeyLaterBtn">Later</button>
|
||||
</div>
|
||||
<button type="button" id="generateItinerariesBtn">Generate travel options</button>
|
||||
</form>
|
||||
<div id="journeyResults" class="journey-results"></div>
|
||||
<section class="itinerary-panel">
|
||||
<div class="journey-title">
|
||||
<span>Comparison</span>
|
||||
<button type="button" id="reloadItinerariesBtn">Reload</button>
|
||||
</div>
|
||||
<div id="itineraryResults" class="itinerary-results muted">Generate travel options to compare route families.</div>
|
||||
</section>
|
||||
</section>
|
||||
<div class="legend">
|
||||
<span><b class="line osm"></b>OSM existing routes</span>
|
||||
<span><b class="line gtfs"></b>GTFS covered routes</span>
|
||||
<span><b class="line missing"></b>GTFS missing OSM match</span>
|
||||
<span><b class="dot stops"></b>Stops / stations / terminals</span>
|
||||
</div>
|
||||
</section>
|
||||
</main>
|
||||
|
||||
<div id="overlay" class="overlay" hidden>
|
||||
<section class="overlay-panel">
|
||||
<div class="overlay-title">
|
||||
<h2 id="overlayTitle">Candidates</h2>
|
||||
<button id="overlayCloseBtn">Close</button>
|
||||
</div>
|
||||
<div id="overlayContent"></div>
|
||||
</section>
|
||||
</div>
|
||||
|
||||
<script src="https://unpkg.com/leaflet@1.9.4/dist/leaflet.js" crossorigin=""></script>
|
||||
<script src="/static/app.js?v=20260701-harmonizer-module"></script>
|
||||
</body>
|
||||
</html>
|
||||
155
app/worker_supervisor.py
Normal file
155
app/worker_supervisor.py
Normal file
@@ -0,0 +1,155 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import signal
|
||||
import subprocess
|
||||
import sys
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
|
||||
from app.config import settings
|
||||
|
||||
|
||||
@dataclass
|
||||
class WorkerHandle:
|
||||
index: int
|
||||
worker_id: str
|
||||
pid: int | None
|
||||
status: str
|
||||
pid_file: Path
|
||||
log_file: Path
|
||||
started_by_server: bool = False
|
||||
|
||||
|
||||
_handles: list[WorkerHandle] = []
|
||||
|
||||
|
||||
def start_queue_workers() -> list[WorkerHandle]:
|
||||
if not settings.queue_worker_autostart:
|
||||
return []
|
||||
worker_count = max(0, int(settings.queue_worker_count))
|
||||
handles: list[WorkerHandle] = []
|
||||
worker_dir = settings.data_dir / "workers"
|
||||
worker_dir.mkdir(parents=True, exist_ok=True)
|
||||
for index in range(worker_count):
|
||||
worker_id = f"server-worker-{index + 1}"
|
||||
pid_file = worker_dir / f"{worker_id}.pid"
|
||||
log_file = worker_dir / f"{worker_id}.log"
|
||||
existing_pid = _read_pid(pid_file)
|
||||
if existing_pid is not None and _pid_running(existing_pid):
|
||||
handles.append(
|
||||
WorkerHandle(
|
||||
index=index,
|
||||
worker_id=worker_id,
|
||||
pid=existing_pid,
|
||||
status="already_running",
|
||||
pid_file=pid_file,
|
||||
log_file=log_file,
|
||||
)
|
||||
)
|
||||
continue
|
||||
pid_file.unlink(missing_ok=True)
|
||||
process = _spawn_worker(worker_id, log_file)
|
||||
pid_file.write_text(str(process.pid), encoding="utf-8")
|
||||
handles.append(
|
||||
WorkerHandle(
|
||||
index=index,
|
||||
worker_id=worker_id,
|
||||
pid=process.pid,
|
||||
status="started",
|
||||
pid_file=pid_file,
|
||||
log_file=log_file,
|
||||
started_by_server=True,
|
||||
)
|
||||
)
|
||||
_handles[:] = handles
|
||||
return list(_handles)
|
||||
|
||||
|
||||
def stop_queue_workers() -> None:
|
||||
if not settings.queue_worker_stop_on_shutdown:
|
||||
return
|
||||
for handle in list(_handles):
|
||||
if not handle.started_by_server or handle.pid is None:
|
||||
continue
|
||||
_terminate_pid(handle.pid)
|
||||
handle.pid_file.unlink(missing_ok=True)
|
||||
|
||||
|
||||
def queue_worker_status() -> list[dict[str, object]]:
|
||||
if not settings.queue_worker_autostart:
|
||||
return []
|
||||
worker_dir = settings.data_dir / "workers"
|
||||
statuses: list[dict[str, object]] = []
|
||||
configured_count = max(0, int(settings.queue_worker_count))
|
||||
for index in range(configured_count):
|
||||
worker_id = f"server-worker-{index + 1}"
|
||||
pid_file = worker_dir / f"{worker_id}.pid"
|
||||
log_file = worker_dir / f"{worker_id}.log"
|
||||
pid = _read_pid(pid_file)
|
||||
running = pid is not None and _pid_running(pid)
|
||||
statuses.append(
|
||||
{
|
||||
"index": index,
|
||||
"worker_id": worker_id,
|
||||
"pid": pid,
|
||||
"running": running,
|
||||
"pid_file": str(pid_file),
|
||||
"log_file": str(log_file),
|
||||
}
|
||||
)
|
||||
return statuses
|
||||
|
||||
|
||||
def _spawn_worker(worker_id: str, log_file: Path) -> subprocess.Popen:
|
||||
root = Path(__file__).resolve().parents[1]
|
||||
command = [
|
||||
sys.executable,
|
||||
"-m",
|
||||
"app.cli",
|
||||
"worker",
|
||||
"--worker-id",
|
||||
worker_id,
|
||||
"--poll-interval",
|
||||
str(settings.queue_worker_poll_interval_seconds),
|
||||
]
|
||||
env = os.environ.copy()
|
||||
env["MOBILITY_SUPERVISED_WORKER"] = "1"
|
||||
log_file.parent.mkdir(parents=True, exist_ok=True)
|
||||
log_handle = log_file.open("ab", buffering=0)
|
||||
try:
|
||||
return subprocess.Popen(
|
||||
command,
|
||||
cwd=str(root),
|
||||
env=env,
|
||||
stdin=subprocess.DEVNULL,
|
||||
stdout=log_handle,
|
||||
stderr=subprocess.STDOUT,
|
||||
start_new_session=True,
|
||||
)
|
||||
finally:
|
||||
log_handle.close()
|
||||
|
||||
|
||||
def _read_pid(path: Path) -> int | None:
|
||||
try:
|
||||
return int(path.read_text(encoding="utf-8").strip())
|
||||
except (FileNotFoundError, ValueError, OSError):
|
||||
return None
|
||||
|
||||
|
||||
def _pid_running(pid: int) -> bool:
|
||||
try:
|
||||
os.kill(pid, 0)
|
||||
except ProcessLookupError:
|
||||
return False
|
||||
except PermissionError:
|
||||
return True
|
||||
return True
|
||||
|
||||
|
||||
def _terminate_pid(pid: int) -> None:
|
||||
try:
|
||||
os.kill(pid, signal.SIGTERM)
|
||||
except ProcessLookupError:
|
||||
return
|
||||
0
data/.gitkeep
Normal file
0
data/.gitkeep
Normal file
11
docker-compose.yml
Normal file
11
docker-compose.yml
Normal file
@@ -0,0 +1,11 @@
|
||||
services:
|
||||
workbench:
|
||||
build: .
|
||||
ports:
|
||||
- "8000:8000"
|
||||
environment:
|
||||
DATABASE_URL: sqlite:////app/data/workbench.sqlite
|
||||
DATA_DIR: /app/data
|
||||
GTFS_STOP_TIMES_IMPORT_LIMIT: 250000
|
||||
volumes:
|
||||
- ./data:/app/data
|
||||
202
docs/backlog.md
Normal file
202
docs/backlog.md
Normal file
@@ -0,0 +1,202 @@
|
||||
# Product and Engineering Backlog
|
||||
|
||||
Last updated: 2026-07-01
|
||||
|
||||
This backlog reflects the current Germany-scale PostGIS prototype. The target remains a Europe-scale mobility data workbench that builds canonical stops, stations, routes, route geometry, timetable links, transfer rules, routing graph data, address search, and coverage evidence from many public sources.
|
||||
|
||||
OSM-derived geometry is the preferred visual authority. GTFS, NeTEx, realtime, and official APIs are timetable, validation, routing, and gap-detection inputs. GTFS shapes are still valuable evidence, especially for missing OSM relations and temporary detours.
|
||||
|
||||
## Current State
|
||||
|
||||
- PostgreSQL/PostGIS is the active development database path; SQLite remains a legacy/test fallback.
|
||||
- Germany OSM and Germany GTFS/DELFI-scale imports are supported.
|
||||
- OSM address indexing is available and address search is bbox-aware without being bbox-limited.
|
||||
- Jobs and job events exist for imports, route matching, route-layer rebuilds, address indexing, relabeling, deletes, and maintenance.
|
||||
- Job rows expose a generic details overlay with planned/current/done phases, event log, metadata, and a compact queue snapshot.
|
||||
- A first QA dashboard skeleton exists for source discovery, import health, GTFS validation, canonical stop/link coverage, route matching, and publication readiness.
|
||||
- The GTFS harmonization target architecture is documented in `docs/gtfs_harmonization.md`.
|
||||
- GTFS source management is presented as a separate `GTFS Harmonization` UI module; OSM/map inputs are presented as a separate `Mapping Data` module.
|
||||
- Journey search consumes the active harmonized transit snapshot instead of exposing a raw GTFS source selector.
|
||||
- Route-layer rebuild runs through the queue, but it is still coarse-grained and can take minutes on national datasets.
|
||||
- The route-layer builder links canonical GTFS stops, OSM stops, OSM route relations, GTFS route patterns, and trip-pattern links.
|
||||
- Journey search is progressive and can publish intermediate results, but the underlying routing algorithm is still a prototype.
|
||||
- Walk and drive routing use the OSM-derived routing layer when available.
|
||||
|
||||
## Current Caveats
|
||||
|
||||
- Journey search is not yet a full RAPTOR/CSA-style router.
|
||||
- Address endpoints can multiply the search space: current behavior can use up to 4 access stops and 4 egress stops, creating up to 16 transit stop-pair searches per transfer stage.
|
||||
- Progressive stages still recompute too much. Searching `up to 2 transfers` repeats direct and one-transfer work before deeper expansion.
|
||||
- Walking access/egress legs are represented separately in journey output, but the search engine still needs a cleaner transfer budget model where access/egress walking never consumes public-transport transfer count.
|
||||
- Route-search caches are in-process only. They do not survive server restart, do not deduplicate identical searches already running in another thread/process, and only help once a stage/search has completed.
|
||||
- Route-layer rebuild currently clears/rebuilds derived tables. Until the rebuild completes, visual route-pattern link tables can be incomplete.
|
||||
- Timetable reachability should not depend on visual route-pattern links. The code has been patched in this checkout, but a running server must reload before using that fix.
|
||||
- Canonical stop extraction on national feeds is CPU/memory heavy and does too much Python-side grouping.
|
||||
- OSM stop-linking and OSM route-candidate indexing are still large spatial/batch operations.
|
||||
- GTFS detours are not classified as first-class route variants yet.
|
||||
- Local-transport-only routing is not a first-class profile yet.
|
||||
- Proper Alembic migrations are still missing; runtime schema maintenance should be reduced to an explicit migration/maintenance path.
|
||||
- The source and job database tables are still shared between harmonization, mapping, and routing; the current split is a product/UI boundary, not a separate service or database boundary yet.
|
||||
|
||||
## P0: Routing Performance and Correctness
|
||||
|
||||
These items directly address slow or failed searches such as `Berlin, Alexanderplatz` to `Heidelberg, Blumenstrasse 36`.
|
||||
|
||||
- Replace the demo round-expansion router with a timetable-native algorithm.
|
||||
Preferred direction: RAPTOR or CSA over preloaded arrays/tables, with rounds representing public-transport boardings rather than ad hoc SQL expansion.
|
||||
- Precompute a transfer graph.
|
||||
Store station-internal transfers, nearby walking transfers, platform/stop-place links, and allowed transfer times by mode/source/station.
|
||||
- Separate access/egress from transfer count.
|
||||
Walking from an address to the first stop, and from the last stop to an address, should never count as a vehicle transfer.
|
||||
- Add a durable journey cache.
|
||||
Cache normalized requests, address-to-stop candidates, stop-to-stop stage results, common station-pair results, and in-flight request deduplication in PostgreSQL.
|
||||
- Add hub-aware long-distance routing.
|
||||
For long-distance OD pairs, search local access to likely hubs, trunk rail/regional candidates, then local egress. Candidate hubs can be ranked by station importance, service frequency, route scope, distance, and direction.
|
||||
- Add a local-transport-only profile.
|
||||
Implement a Deutschlandticket-like profile that excludes long-distance route scopes and still supports regional rail, S-Bahn, subway, tram, bus, ferry, and walking transfers.
|
||||
- Add admissible pruning.
|
||||
Bound exploration by best known arrival, remaining distance, direction/off-course penalty, transfer budget, service frequency, and maximum tolerated detour.
|
||||
- Add journey diagnostics.
|
||||
Return searched stages, candidate counts, pruned reasons, access/egress stops, service date, source feeds, transfer stops, and whether no-route means no timetable path or a search limit was hit.
|
||||
- Add arrive-by search.
|
||||
This is important for route quality and for comparing against operator/DB route planners.
|
||||
- Add route profile controls in the UI.
|
||||
`fastest`, `earliest arrival`, `fewest transfers`, `local only`, `walk`, `drive`, `arrive by`, `via`, `avoid`, and transfer buffer controls.
|
||||
|
||||
## P0: Queue and Rebuild Robustness
|
||||
|
||||
- Move runtime schema maintenance out of normal app startup.
|
||||
The current checkout avoids redundant PostgreSQL DDL, but explicit migrations are still needed.
|
||||
- Add Alembic migrations.
|
||||
Use migrations for PostGIS columns, indexes, route-layer tables, routing tables, and cache tables.
|
||||
- Make route-layer rebuild use shadow tables or versioned rows.
|
||||
Build replacement rows without deleting the readable active layer first; atomically promote the new version when complete.
|
||||
- Make route-layer rebuild incremental.
|
||||
Rebuild only affected route patterns after new matches, stop-link decisions, source updates, or OSM diffs.
|
||||
- Add stale worker and stale pid reconciliation.
|
||||
Worker status should never report a pid as running unless the current server can verify it.
|
||||
- Improve cancellation.
|
||||
Long PostgreSQL statements need cancellable phases and visible progress rather than only a queued/running state.
|
||||
- Improve progress granularity and timings.
|
||||
The UI can display job events now, but long PostgreSQL statements still need finer checkpoints, elapsed times, estimated remaining work, and cancellable sub-phases.
|
||||
|
||||
## P1: Route Layer, Detours, and Geometry Provenance
|
||||
|
||||
- Classify GTFS route variants.
|
||||
Group trips by route, direction, shape, stop sequence, service date span, and trip frequency. Mark rare/temporary shapes as detours or temporary variants rather than replacing the canonical visual route.
|
||||
- Add stop-by-stop OSM path fallback.
|
||||
When an OSM route relation is missing or a GTFS shape is a detour, assemble geometry between matched consecutive stops using mode-constrained OSM paths.
|
||||
- Cache stop-to-stop route geometry.
|
||||
Key by mode, from canonical stop, to canonical stop, direction constraints, and graph version.
|
||||
- Store geometry provenance per route pattern.
|
||||
Examples: `osm_route_relation`, `gtfs_shape`, `stop_to_stop_osm_path`, `manual_override`, `detour_variant`.
|
||||
- Respect directionality.
|
||||
Bus/car paths need oneway handling; tram/rail paths need topology and direction evidence; reverse links must not be assumed valid.
|
||||
- Add route-pattern detail inspection.
|
||||
Show OSM geometry, GTFS shapes, linked trips, linked stops, direction evidence, confidence, and variant/detour status.
|
||||
- Add generalized route geometries.
|
||||
Store high-detail inspection geometry and simplified map geometry.
|
||||
|
||||
## P1: Canonical Stops, Stations, and Addresses
|
||||
|
||||
- Optimize canonical stop extraction.
|
||||
Push more grouping/linking into SQL, avoid loading all scheduled stops into Python, batch inserts, and keep stable canonical IDs when possible.
|
||||
- Build a canonical stop alias table.
|
||||
Persist normalized names, multilingual names, station codes, IBNR/EVA/UIC/IFOPT, stop_area IDs, OSM IDs, and source-specific aliases.
|
||||
- Improve station-complex modeling.
|
||||
Separate public stop place, station complex, platforms/tracks, entrances, bus bays, and nearby stop groups.
|
||||
- Add canonical stop detail overlay.
|
||||
Show linked GTFS stops, linked OSM stops/stations, source names, confidence, distances, and manual overrides.
|
||||
- Add manual canonical stop link/unlink decisions.
|
||||
Persist stop matching decisions like route matching decisions, so source updates do not overwrite reviewed links.
|
||||
- Improve address result folding.
|
||||
Prefer street-level suggestions for dense house-number ranges, but preserve exact address selection when a full address is typed.
|
||||
- Precompute address access candidates.
|
||||
Store nearest useful public-transport stops per address/street point, with mode/source/radius metadata.
|
||||
|
||||
## P1: More GTFS Sources and Deduplication
|
||||
|
||||
- Import more GTFS feeds where they improve authority or coverage.
|
||||
DB long-distance/regional feeds, state feeds, and neighboring-country feeds touching Germany are useful test cases.
|
||||
- Add source priority and authority ranking.
|
||||
Decide which source is more authoritative for stops, operators, routes, calendars, and geometry evidence.
|
||||
- Deduplicate operators/agencies.
|
||||
Merge agency/operator records with provenance and aliases instead of treating each GTFS `agency.txt` row as a separate operator.
|
||||
- Turn QA summary counters into review queues.
|
||||
Drill down from each bad/warn metric into concrete sources, stops, routes, links, and conflicts.
|
||||
- Add GTFS feed QA reports.
|
||||
Calendar coverage, stale feeds, missing shapes, impossible stop times, duplicate routes, route direction coverage, stop coordinate outliers.
|
||||
- Add conflict dashboards and reusable resolution workflows.
|
||||
Show canonical stops/routes with competing source claims, weak matches, missing visual geometry, authority-rule conflicts, and license blockers.
|
||||
|
||||
## P1: Scalable OSM and Map Outputs
|
||||
|
||||
- Keep OSM PBF import chunked and resumable.
|
||||
Keep previous active visual datasets available while the next import builds.
|
||||
- Add vector tile or PMTiles export.
|
||||
Needed for Germany/Europe route layers and dense editing views.
|
||||
- Add route-scope and mode-specific map generalization.
|
||||
Different zooms should use different detail levels and route classes.
|
||||
- Improve OSM route candidate indexing.
|
||||
Use stronger SQL/PostGIS filtering before loading route geometry into Python.
|
||||
- Add OSM diffs later.
|
||||
Minutely/hourly/daily diffs can update route and address layers without full country rebuilds.
|
||||
|
||||
## P2: Data Platform Hardening
|
||||
|
||||
- Add explicit read/write transaction boundaries for all long requests and jobs.
|
||||
- Add API pagination for large result sets.
|
||||
- Add import logs and source-run history.
|
||||
- Add database maintenance commands: analyze, vacuum, reindex, orphan cleanup.
|
||||
- Add test fixtures that do not mutate the live development database.
|
||||
- Add observability: query timings, job timings, row counts, cache hit rates, and per-stage routing metrics.
|
||||
|
||||
## P2: Better Map and Editing Workflows
|
||||
|
||||
- Add canonical stop and route detail side panels.
|
||||
- Add candidate map preview for stop matching, not only route matching.
|
||||
- Add unmatched/matched/weak/proposed visual layers with source filters.
|
||||
- Keep calculated journey geometry and stop markers always on top.
|
||||
- Add editable match queues for stops, station complexes, routes, and operators.
|
||||
- Add route-layer diff view after rebuilds.
|
||||
|
||||
## P3: Additional Formats and Live Data
|
||||
|
||||
- Add NeTEx import.
|
||||
- Add GTFS-Realtime ingestion for service alerts and trip updates.
|
||||
- Add SIRI profile support where national APIs expose it.
|
||||
- Add GBFS/shared mobility only after core public transport data is stable.
|
||||
- Model temporary closures and disruptions as validity-windowed events, not modifications to base route geometry.
|
||||
|
||||
## Open Optimization List
|
||||
|
||||
Not yet implemented, or only partially implemented:
|
||||
|
||||
- RAPTOR/CSA routing core.
|
||||
- Precomputed public-transport transfer graph.
|
||||
- Durable PostgreSQL route-search cache.
|
||||
- In-flight identical search coalescing.
|
||||
- Hub-aware long-distance routing.
|
||||
- Local-transport-only routing profile.
|
||||
- Access/egress legs excluded from transfer budget at the search-state level.
|
||||
- Better pruning for off-course exploration and dominated labels.
|
||||
- SQL/array-based canonical stop extraction.
|
||||
- Incremental route-layer rebuild.
|
||||
- Route-layer shadow tables/versioned activation.
|
||||
- Stop-to-stop OSM route fallback for missing routes and detours.
|
||||
- Detour/temporary variant classification.
|
||||
- PostGIS-first OSM route candidate filtering.
|
||||
- Vector tiles or PMTiles for large route layers.
|
||||
- Alembic migrations.
|
||||
- Persistent query/stage timing diagnostics.
|
||||
|
||||
## Recommended Next Sprint
|
||||
|
||||
1. Finish the route-layer rebuild currently in progress and verify route-pattern/trip-pattern link counts.
|
||||
2. Restart/reload the server so it picks up the current checkout fixes.
|
||||
3. Add route-search diagnostics and timing instrumentation around address access, direct, one-transfer, and round-search stages.
|
||||
4. Implement transfer graph precomputation and exclude access/egress walking from transfer count.
|
||||
5. Add a hub-aware city-to-city search path for long-distance requests.
|
||||
6. Add a local-only routing profile using route scopes.
|
||||
7. Convert route-layer rebuild to shadow/versioned tables or incremental updates.
|
||||
8. Add Alembic migrations and stop doing routine schema checks during normal app/worker startup.
|
||||
BIN
docs/europe_transport_sources_catalog_v2.xlsx
Normal file
BIN
docs/europe_transport_sources_catalog_v2.xlsx
Normal file
Binary file not shown.
59
docs/generated/gtfs_discovery_report.json
Normal file
59
docs/generated/gtfs_discovery_report.json
Normal file
@@ -0,0 +1,59 @@
|
||||
{
|
||||
"generated_at": "2026-07-01T13:22:51.928761+00:00",
|
||||
"countries": [
|
||||
"DE",
|
||||
"AT",
|
||||
"CH",
|
||||
"NL",
|
||||
"DK",
|
||||
"FR",
|
||||
"BE",
|
||||
"LU",
|
||||
"NO",
|
||||
"SE",
|
||||
"FI",
|
||||
"IE",
|
||||
"GB"
|
||||
],
|
||||
"sources": {
|
||||
"mobility_database": "https://files.mobilitydatabase.org/feeds_v2.csv",
|
||||
"mobility_acceptance_test_list": "https://raw.githubusercontent.com/MobilityData/gtfs-validator/master/scripts/mobility-database-harvester/acceptance_test_feed_list.csv",
|
||||
"ptna": "https://ptna.openstreetmap.de/gtfs/index.html"
|
||||
},
|
||||
"counts": {
|
||||
"candidates": 1090,
|
||||
"ingestable": 990,
|
||||
"test_run": 24,
|
||||
"by_source": {
|
||||
"curated_seed": 9,
|
||||
"curated_seed; mobility_database": 2,
|
||||
"curated_seed; mobility_database; mobility_validator_acceptance": 2,
|
||||
"mobility_database": 805,
|
||||
"mobility_database; mobility_validator_acceptance": 149,
|
||||
"mobility_database; mobility_validator_acceptance; ptna": 1,
|
||||
"mobility_validator_acceptance": 3,
|
||||
"ptna": 119
|
||||
},
|
||||
"ingestable_by_country": {
|
||||
"AT": 6,
|
||||
"BE": 15,
|
||||
"CH": 10,
|
||||
"DE": 68,
|
||||
"DK": 1,
|
||||
"EU": 1,
|
||||
"FI": 31,
|
||||
"FR": 727,
|
||||
"GB": 46,
|
||||
"IE": 68,
|
||||
"LU": 3,
|
||||
"NL": 3,
|
||||
"NO": 4,
|
||||
"SE": 7
|
||||
}
|
||||
},
|
||||
"files": {
|
||||
"candidates": "/mnt/DATA/git/meubility-workbench/docs/generated/gtfs_feed_candidates.csv",
|
||||
"ingestable": "/mnt/DATA/git/meubility-workbench/docs/generated/gtfs_ingestable_sources.csv",
|
||||
"test_run": "/mnt/DATA/git/meubility-workbench/docs/generated/gtfs_test_run_sources.csv"
|
||||
}
|
||||
}
|
||||
1091
docs/generated/gtfs_feed_candidates.csv
Normal file
1091
docs/generated/gtfs_feed_candidates.csv
Normal file
File diff suppressed because it is too large
Load Diff
991
docs/generated/gtfs_ingestable_sources.csv
Normal file
991
docs/generated/gtfs_ingestable_sources.csv
Normal file
@@ -0,0 +1,991 @@
|
||||
name,kind,url,country,license,mode_scope,source_basis,priority,notes
|
||||
AT Bean Shuttle GTFS,gtfs,https://www.beanshuttle.com/gtfs.zip,AT,,bus,Mobility Database feed catalog; MobilityData validator acceptance-test feed list,P0,Useful smoke-test feed list; prefer Mobility Database feeds_v2 metadata for production source review.; Mobility Database mirror: https://files.mobilitydatabase.org/mdb-2036/latest.zip
|
||||
BE Communauté Urbaine de Dunkerque / Réseau Dk Bus GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/f51fabfb-9d7a-44b7-bd03-d1032337fb80,BE,see https://opendatacommons.org/licenses/odbl/1.0/,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-82683/latest.zip
|
||||
BE De Lijn GTFS,gtfs,https://files.mobilitydatabase.org/mdb-684/latest.zip,BE,,bus,Mobility Database feed catalog; MobilityData validator acceptance-test feed list,P0,Selected Mobility Database latest.zip mirror because the catalog direct URL is known to be stale.; Useful smoke-test feed list; prefer Mobility Database feeds_v2 metadata for production source review.
|
||||
BE Eurostar International Ltd. GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/bfd97acd-63f3-4ea4-bfe8-70e4c7fd8d13,BE,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-82199/latest.zip
|
||||
BE Métropole Européenne de Lille GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/c9e5dd3f-8eed-4ad7-aec2-915240599cf6,BE,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-81995/latest.zip
|
||||
BE Société Régionale Wallonne du Transport GTFS,gtfs,http://opendata.tec-wl.be/Current%20GTFS/TEC-GTFS.zip,BE,,bus,Mobility Database feed catalog; MobilityData validator acceptance-test feed list,P0,Useful smoke-test feed list; prefer Mobility Database feeds_v2 metadata for production source review.; Mobility Database mirror: https://files.mobilitydatabase.org/mdb-1212/latest.zip
|
||||
CH Swiss national GTFS,gtfs,https://gtfs.geops.ch/dl/gtfs_complete.zip,CH,verify at opentransportdata.swiss,"rail,tram,metro,bus,ferry",European transport feeds / official Swiss OTD derivative,P0,geOps feed is derived from official Swiss Open Transport Data; verify production terms.
|
||||
CH Communauté d'Agglomération Annemasse - les Voirons Agglomération GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/373e19e2-af0a-4939-9f33-3f1268d1e0bb,CH,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-76779/latest.zip
|
||||
CH Communauté de communes pays d'Evian - vallée d'Abondance GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/429c8587-676a-4ed3-8279-e67403bc36f4,CH,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-80973/latest.zip
|
||||
CH Fluo Grand Est GTFS,gtfs,https://transport.data.gouv.fr/resources/80437/download?token=xdgqKBTAzhw4DSPz6zeGc4c5eW0LhwztcGv4-vpzP4U,CH,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-80437/latest.zip
|
||||
CH Pays de Gex Agglo GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/95c73b12-d117-4faf-bb6f-abfe3c95eee1,CH,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-81585/latest.zip
|
||||
CH Systemaufgaben Kundeninformation SKI+ GTFS,gtfs,https://data.opentransportdata.swiss/de/dataset/gtfsflex/permalink,CH,,bus,Mobility Database feed catalog; MobilityData validator acceptance-test feed list,P0,Useful smoke-test feed list; prefer Mobility Database feeds_v2 metadata for production source review.; Mobility Database mirror: https://files.mobilitydatabase.org/mdb-2053/latest.zip
|
||||
CH Systemaufgaben Kundeninformation SKI+ GTFS,gtfs,https://data.opentransportdata.swiss/dataset/timetable-2026-gtfs2020/permalink,CH,see https://opentransportdata.swiss/en/terms-of-use/#Definitions,bus,Mobility Database feed catalog; MobilityData validator acceptance-test feed list,P0,Useful smoke-test feed list; prefer Mobility Database feeds_v2 metadata for production source review.; Mobility Database mirror: https://files.mobilitydatabase.org/mdb-2898/latest.zip
|
||||
DE BBS Schapfl GTFS,gtfs,https://www.nvbw.de/fileadmin/user_upload/service/open_data/fahrplandaten_mit_liniennetz/ding.zip,DE,see https://www.nvbw.de/open-data/fahrplandaten/fahrplandaten-mit-liniennetz,bus,Mobility Database feed catalog; MobilityData validator acceptance-test feed list,P0,Useful smoke-test feed list; prefer Mobility Database feeds_v2 metadata for production source review.; Mobility Database mirror: https://files.mobilitydatabase.org/mdb-770/latest.zip
|
||||
DE bodo Verkehrsverbund GTFS,gtfs,https://www.nvbw.de/fileadmin/user_upload/service/open_data/fahrplandaten_mit_liniennetz/bodo.zip,DE,see https://www.nvbw.de/open-data/fahrplandaten/fahrplandaten-mit-liniennetz,bus,Mobility Database feed catalog; MobilityData validator acceptance-test feed list,P0,Useful smoke-test feed list; prefer Mobility Database feeds_v2 metadata for production source review.; Mobility Database mirror: https://files.mobilitydatabase.org/mdb-769/latest.zip
|
||||
DE generated national GTFS,gtfs,https://scraped.data.public-transport.earth/de/gtfs.zip,DE,verify upstream DELFI/Mobilithek/gtfs.de,"rail,tram,metro,bus",European transport feeds mirror,P0,Bootstrap only; prefer official DELFI/Mobilithek NeTEx for production.
|
||||
DE DELFI Germany-wide scheduled timetable data (GTFS),gtfs,https://www.opendata-oepnv.de/index.php?id=1384&tx_vrrkit_view%5Bsharing%5D=eyJkYXRhc2V0IjoiZGV1dHNjaGxhbmR3ZWl0ZS1zb2xsZmFocnBsYW5kYXRlbi1ndGZzIiwidXNlcklkIjo1MDM4fQ%3D%3D&tx_vrrkit_view%5Baction%5D=download&tx_vrrkit_view%5Bcontroller%5D=View,DE,see http://www.opendefinition.org/licenses/cc-by,bus,Mobility Database feed catalog,P0,
|
||||
DE Fluo Grand Est GTFS,gtfs,https://transport.data.gouv.fr/resources/80419/download?token=xdgqKBTAzhw4DSPz6zeGc4c5eW0LhwztcGv4-vpzP4U,DE,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-80419/latest.zip
|
||||
DE Fluo Grand Est GTFS,gtfs,https://transport.data.gouv.fr/resources/80467/download?token=xdgqKBTAzhw4DSPz6zeGc4c5eW0LhwztcGv4-vpzP4U,DE,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-80467/latest.zip
|
||||
DE Hofmann Omnibusverkehr GmbH GTFS,gtfs,https://www.nvbw.de/fileadmin/user_upload/service/open_data/fahrplandaten_mit_liniennetz/kvsh.zip,DE,see https://www.nvbw.de/open-data/fahrplandaten/fahrplandaten-mit-liniennetz,bus,Mobility Database feed catalog; MobilityData validator acceptance-test feed list,P0,Useful smoke-test feed list; prefer Mobility Database feeds_v2 metadata for production source review.; Mobility Database mirror: https://files.mobilitydatabase.org/mdb-1082/latest.zip
|
||||
DE Karlsruher Verkehrsverbund GTFS,gtfs,https://projekte.kvv-efa.de/GTFS/google_transit.zip,DE,see https://www.kvv.de/fahrplan/fahrplaene/open-data.html,bus,Mobility Database feed catalog; MobilityData validator acceptance-test feed list,P0,Useful smoke-test feed list; prefer Mobility Database feeds_v2 metadata for production source review.; Mobility Database mirror: https://files.mobilitydatabase.org/mdb-2231/latest.zip
|
||||
DE Münchner Verkehrs- und Tarifverbund GmbH (MVV) GTFS,gtfs,https://www.mvv-muenchen.de/fileadmin/mediapool/02-Fahrplanauskunft/03-Downloads/openData/mvv_gtfs.zip,DE,see https://www.mvv-muenchen.de/fahrplanauskunft/fuer-entwickler/opendata/index.html,bus,Mobility Database feed catalog; MobilityData validator acceptance-test feed list,P0,Useful smoke-test feed list; prefer Mobility Database feeds_v2 metadata for production source review.; Mobility Database mirror: https://files.mobilitydatabase.org/mdb-779/latest.zip
|
||||
DE Münchner Verkehrsgesellschaft (MVG) GTFS,gtfs,https://www.mvg.de/static/gtfs/google_transit.zip,DE,see https://www.mvg.de/verbindungen/Fahrplandaten.html,bus,Mobility Database feed catalog; MobilityData validator acceptance-test feed list; PTNA GTFS analysis; details: https://ptna.openstreetmap.de/en/gtfs-details.php?feed=DE-BY-MVG,P0,Useful smoke-test feed list; prefer Mobility Database feeds_v2 metadata for production source review.; PTNA candidate; use original publisher URL where available.; Mobility Database mirror: https://files.mobilitydatabase.org/mdb-2333/latest.zip
|
||||
"DE OVA-Aalen, OVA-Bopfingen, Beck+Schubert GTFS",gtfs,https://www.nvbw.de/fileadmin/user_upload/service/open_data/fahrplandaten_mit_liniennetz/oam.zip,DE,see https://www.nvbw.de/open-data/fahrplandaten/fahrplandaten-mit-liniennetz,bus,Mobility Database feed catalog; MobilityData validator acceptance-test feed list,P0,Useful smoke-test feed list; prefer Mobility Database feeds_v2 metadata for production source review.; Mobility Database mirror: https://files.mobilitydatabase.org/mdb-772/latest.zip
|
||||
DE Rhein-Neckar-Verkehr GTFS,gtfs,https://files.mobilitydatabase.org/mdb-777/latest.zip,DE,see https://opendata.rnv-online.de/dataset/gtfs-general-transit-feed-specification,bus,Mobility Database feed catalog; MobilityData validator acceptance-test feed list,P0,Selected Mobility Database latest.zip mirror because the catalog direct URL is known to be stale.; Useful smoke-test feed list; prefer Mobility Database feeds_v2 metadata for production source review.
|
||||
DE Rurtalbahn GmbH | ABELLIO Rail | VIAS GmbH| Aachener Straßenbahn und Energieversorgungs-AG | Rurtalbus GmbH | WestVerkehr GmbH | Staatsbahnen | National Express | ASEAG Netliner GTFS,gtfs,http://opendata.avv.de/current_GTFS/AVV_GTFS_Masten_mit_SPNV.zip,DE,,bus,Mobility Database feed catalog; MobilityData validator acceptance-test feed list,P0,Useful smoke-test feed list; prefer Mobility Database feeds_v2 metadata for production source review.; Mobility Database mirror: https://files.mobilitydatabase.org/mdb-1224/latest.zip
|
||||
"DE Rurtalbahn GmbH, ABELLIO Rail, VIAS GmbH, Aachener Straßenbahn und Energieversorgungs-AG, Rurtalbus GmbH, WestVerkehr GmbH, Staatsbahnen, National Express, ASEAG Netliner GTFS",gtfs,http://opendata.avv.de/current_GTFS/AVV_GTFS_mit_SPNV.zip,DE,,bus,Mobility Database feed catalog; MobilityData validator acceptance-test feed list,P0,Useful smoke-test feed list; prefer Mobility Database feeds_v2 metadata for production source review.; Mobility Database mirror: https://files.mobilitydatabase.org/mdb-1094/latest.zip
|
||||
DE Schweizer Reisen GTFS,gtfs,https://www.nvbw.de/fileadmin/user_upload/service/open_data/fahrplandaten_ohne_liniennetz/vgf.zip,DE,see https://www.nvbw.de/open-data/fahrplandaten/fahrplandaten-mit-liniennetz,bus,Mobility Database feed catalog; MobilityData validator acceptance-test feed list,P0,Useful smoke-test feed list; prefer Mobility Database feeds_v2 metadata for production source review.; Mobility Database mirror: https://files.mobilitydatabase.org/mdb-774/latest.zip
|
||||
DE SWU Verkehr GmbH (SWU) GTFS,gtfs,https://gtfs.swu.de/daten/SWU.zip,DE,,bus,Mobility Database feed catalog; MobilityData validator acceptance-test feed list,P0,Useful smoke-test feed list; prefer Mobility Database feeds_v2 metadata for production source review.; Mobility Database mirror: https://files.mobilitydatabase.org/mdb-776/latest.zip
|
||||
DE Verkehrsverbund Mittelthüringen (VMT) GTFS,gtfs,https://www.vmt-thueringen.de/fileadmin/VMT_Redaktion/OPEN_DATA/VMT_GTFS.zip,DE,see https://creativecommons.org/licenses/by-nd/2.0/de/,bus,Mobility Database feed catalog; MobilityData validator acceptance-test feed list,P0,Useful smoke-test feed list; prefer Mobility Database feeds_v2 metadata for production source review.; Mobility Database mirror: https://files.mobilitydatabase.org/mdb-1172/latest.zip
|
||||
DE Verkehrsverbund Großraum Nürnberg (VGN) GTFS,gtfs,http://www.vgn.de/opendata/GTFS.zip,DE,,bus,Mobility Database feed catalog; MobilityData validator acceptance-test feed list,P0,Useful smoke-test feed list; prefer Mobility Database feeds_v2 metadata for production source review.; Mobility Database mirror: https://files.mobilitydatabase.org/mdb-858/latest.zip
|
||||
DE VOLZ GTFS,gtfs,https://www.nvbw.de/fileadmin/user_upload/service/open_data/fahrplandaten_mit_liniennetz/vgc.zip,DE,,bus,Mobility Database feed catalog; MobilityData validator acceptance-test feed list,P0,Useful smoke-test feed list; prefer Mobility Database feeds_v2 metadata for production source review.; Mobility Database mirror: https://files.mobilitydatabase.org/mdb-906/latest.zip
|
||||
DK Rejseplanen GTFS,gtfs,https://www.rejseplanen.info/labs/GTFS.zip,DK,verify Rejseplanen Labs terms,"rail,bus",Rejseplanen Labs / European transport feeds; Mobility Database feed catalog; MobilityData validator acceptance-test feed list,P0,May require account/terms review for production.; Useful smoke-test feed list; prefer Mobility Database feeds_v2 metadata for production source review.; Mobility Database mirror: https://files.mobilitydatabase.org/mdb-1292/latest.zip
|
||||
"FI Haarasilta Toivo Samuli, Järvisen Liikenne Oy, Koiviston Auto Oy, Lehtimäen Liikenne Oy, Bus Travel Oy Reissu Ruoti, Tilausliikenne Kuisma Ky GTFS",gtfs,https://tvv.fra1.digitaloceanspaces.com/223.zip,FI,,bus,Mobility Database feed catalog; MobilityData validator acceptance-test feed list,P0,Useful smoke-test feed list; prefer Mobility Database feeds_v2 metadata for production source review.; Mobility Database mirror: https://files.mobilitydatabase.org/mdb-1129/latest.zip
|
||||
FI Helsingin seudun liikenne (HSL) GTFS,gtfs,http://dev.hsl.fi/gtfs/hsl.zip,FI,see http://developer.reittiopas.fi/pages/en/home.php,bus,Mobility Database feed catalog; MobilityData validator acceptance-test feed list,P0,Useful smoke-test feed list; prefer Mobility Database feeds_v2 metadata for production source review.; Mobility Database mirror: https://files.mobilitydatabase.org/mdb-865/latest.zip
|
||||
"FI J. Vainion Liikenne Oy, LS-Liikennelinjat Oy, Salon Tilausmatkat Oy GTFS",gtfs,https://tvv.fra1.digitaloceanspaces.com/239.zip,FI,,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tld-5529/latest.zip
|
||||
FI Koskilinjat Oy GTFS,gtfs,https://tvv.fra1.digitaloceanspaces.com/237.zip,FI,,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tld-5528/latest.zip
|
||||
FI Kuopio GTFS,gtfs,http://karttapalvelu.kuopio.fi/google_transit/google_transit.zip,FI,,bus,Mobility Database feed catalog; MobilityData validator acceptance-test feed list,P0,Useful smoke-test feed list; prefer Mobility Database feeds_v2 metadata for production source review.; Mobility Database mirror: https://files.mobilitydatabase.org/mdb-868/latest.zip
|
||||
"FI Kuopion Tila-Auto Oy*, Oy Pohjolan liikenne Ab*, Kymen Charterline Oy, Mika K. Niskanen Oy, Savonlinja Oy, Pohjolan Matka (Pohjolan Turistiauto Oy), Kuopion Liikenne GTFS",gtfs,https://tvv.fra1.digitaloceanspaces.com/221.zip,FI,,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tld-832/latest.zip
|
||||
FI Lappeenranta GTFS,gtfs,https://tvv.fra1.digitaloceanspaces.com/225.zip,FI,see https://opendata.waltti.fi/docs,bus,Mobility Database feed catalog; MobilityData validator acceptance-test feed list,P0,Useful smoke-test feed list; prefer Mobility Database feeds_v2 metadata for production source review.; Mobility Database mirror: https://files.mobilitydatabase.org/mdb-929/latest.zip
|
||||
FI Mikkeli GTFS,gtfs,https://tvv.fra1.digitaloceanspaces.com/227.zip,FI,see https://opendata.waltti.fi/docs#gtfs-packages,bus,Mobility Database feed catalog; MobilityData validator acceptance-test feed list,P0,Useful smoke-test feed list; prefer Mobility Database feeds_v2 metadata for production source review.; Mobility Database mirror: https://files.mobilitydatabase.org/mdb-1130/latest.zip
|
||||
FI Tampereen joukkoliikenne (JOLI) GTFS,gtfs,http://data.itsfactory.fi/journeys/files/gtfs/latest/gtfs_tampere.zip,FI,,bus,Mobility Database feed catalog; MobilityData validator acceptance-test feed list,P0,Useful smoke-test feed list; prefer Mobility Database feeds_v2 metadata for production source review.; Mobility Database mirror: https://files.mobilitydatabase.org/mdb-866/latest.zip
|
||||
FI Waltti - Hämeenlinna GTFS,gtfs,https://tvv.fra1.digitaloceanspaces.com/203.zip,FI,see https://creativecommons.org/licenses/by/4.0/,bus,Mobility Database feed catalog; MobilityData validator acceptance-test feed list,P0,Useful smoke-test feed list; prefer Mobility Database feeds_v2 metadata for production source review.; Mobility Database mirror: https://files.mobilitydatabase.org/mdb-1098/latest.zip
|
||||
FI Waltti - Joensuu GTFS,gtfs,https://tvv.fra1.digitaloceanspaces.com/207.zip,FI,see https://creativecommons.org/licenses/by/4.0/,bus,Mobility Database feed catalog; MobilityData validator acceptance-test feed list,P0,Useful smoke-test feed list; prefer Mobility Database feeds_v2 metadata for production source review.; Mobility Database mirror: https://files.mobilitydatabase.org/mdb-1227/latest.zip
|
||||
FI Waltti - Jyväskylä GTFS,gtfs,https://tvv.fra1.digitaloceanspaces.com/209.zip,FI,see https://creativecommons.org/licenses/by/4.0/,bus,Mobility Database feed catalog; MobilityData validator acceptance-test feed list,P0,Useful smoke-test feed list; prefer Mobility Database feeds_v2 metadata for production source review.; Mobility Database mirror: https://files.mobilitydatabase.org/mdb-867/latest.zip
|
||||
FI Waltti - Kotka GTFS,gtfs,https://tvv.fra1.digitaloceanspaces.com/217.zip,FI,see https://creativecommons.org/licenses/by/4.0/,bus,Mobility Database feed catalog; MobilityData validator acceptance-test feed list,P0,Useful smoke-test feed list; prefer Mobility Database feeds_v2 metadata for production source review.; Mobility Database mirror: https://files.mobilitydatabase.org/mdb-1127/latest.zip
|
||||
FI Waltti - Kouvola GTFS,gtfs,https://tvv.fra1.digitaloceanspaces.com/219.zip,FI,see https://creativecommons.org/licenses/by/4.0/,bus,Mobility Database feed catalog; MobilityData validator acceptance-test feed list,P0,Useful smoke-test feed list; prefer Mobility Database feeds_v2 metadata for production source review.; Mobility Database mirror: https://files.mobilitydatabase.org/mdb-1128/latest.zip
|
||||
FI Waltti - Oulu GTFS,gtfs,https://tvv.fra1.digitaloceanspaces.com/229.zip,FI,,bus,Mobility Database feed catalog; MobilityData validator acceptance-test feed list,P0,Useful smoke-test feed list; prefer Mobility Database feeds_v2 metadata for production source review.; Mobility Database mirror: https://files.mobilitydatabase.org/mdb-869/latest.zip
|
||||
FI Waltti - Pori GTFS,gtfs,https://tvv.fra1.digitaloceanspaces.com/231.zip,FI,see https://creativecommons.org/licenses/by/4.0/,bus,Mobility Database feed catalog; MobilityData validator acceptance-test feed list,P0,Useful smoke-test feed list; prefer Mobility Database feeds_v2 metadata for production source review.; Mobility Database mirror: https://files.mobilitydatabase.org/mdb-2839/latest.zip
|
||||
FI Waltti - Vaasa GTFS,gtfs,https://tvv.fra1.digitaloceanspaces.com/249.zip,FI,see https://opendata.waltti.fi/docs#gtfs-packages,bus,Mobility Database feed catalog; MobilityData validator acceptance-test feed list,P0,Useful smoke-test feed list; prefer Mobility Database feeds_v2 metadata for production source review.; Mobility Database mirror: https://files.mobilitydatabase.org/mdb-1131/latest.zip
|
||||
FR Agglo Pays d'Issoire GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/7d54a219-8fe7-4635-a9fc-cefe5e57462e,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-81643/latest.zip
|
||||
FR AGGLOMERATION DU GARD RHODANIEN GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/3ba2db00-235a-4958-980e-5cdd20ce204e,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-83042/latest.zip
|
||||
FR Agglomération Montargoise Et rives du loing GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/39240e80-d3f4-4702-ba93-520fae414649,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-83326/latest.zip
|
||||
FR Agglomération Sud Sainte Baume GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/050ef913-901c-49b7-babe-ef648e14314d,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-83893/latest.zip
|
||||
FR ALEO GTFS,gtfs,https://transport.data.gouv.fr/resources/83839/download?token=xdgqKBTAzhw4DSPz6zeGc4c5eW0LhwztcGv4-vpzP4U,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-83839/latest.zip
|
||||
FR Alliance Atlantique GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/c85977b8-7d18-439c-935c-935286503003,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-82832/latest.zip
|
||||
FR ALTERNEO GTFS,gtfs,https://transport.data.gouv.fr/resources/80676/download?token=xdgqKBTAzhw4DSPz6zeGc4c5eW0LhwztcGv4-vpzP4U,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-80676/latest.zip
|
||||
FR Ametis GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/571e3014-066d-4bd6-9f73-fb8065c928c6,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-80223/latest.zip
|
||||
FR Ametis GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/5bc8b7dc-0d4e-48e7-b2ec-eccfe3702c19,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-80705/latest.zip
|
||||
FR Angers Loire Métropole GTFS,gtfs,https://transport.data.gouv.fr/resources/84018/download?token=xdgqKBTAzhw4DSPz6zeGc4c5eW0LhwztcGv4-vpzP4U,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-84018/latest.zip
|
||||
FR Angers Loire Métropole GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/32f30b64-33f7-43bb-9b6f-34c21c2f83a3,FR,see https://opendatacommons.org/licenses/odbl/1.0/,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-80036/latest.zip
|
||||
FR Armor Argoat Mobilité • Guingamp-Paimpol Mobilité GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/52057eec-3786-444c-8696-8473c4c6888e,FR,see https://opendatacommons.org/licenses/odbl/1.0/,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-72361/latest.zip
|
||||
FR Auray Quiberon Terre Atlantique GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/4dc52cb9-909e-40dd-80e5-a90073bc80fc,FR,see https://opendatacommons.org/licenses/odbl/1.0/,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-83300/latest.zip
|
||||
FR BlaBlaCar Bus GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/fd54f81f-4389-4e73-be75-491133d011c3,FR,see https://opendatacommons.org/licenses/odbl/1.0/,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-52605/latest.zip
|
||||
FR Bordeaux Métropole GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/10b87ffe-e6bb-494d-93df-bb6019e223d9,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-83024/latest.zip
|
||||
FR Brest métropole GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/583d1419-058b-481b-b378-449cab744c82,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-43286/latest.zip
|
||||
FR Brittany Ferries GTFS,gtfs,https://transport.data.gouv.fr/resources/83427/download?token=xdgqKBTAzhw4DSPz6zeGc4c5eW0LhwztcGv4-vpzP4U,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-83427/latest.zip
|
||||
FR CA ANNONAY RHONE AGGLO GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/df09ccf2-6cf5-4d7a-b03a-b3ebb89f7e49,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-83872/latest.zip
|
||||
FR CA ANNONAY RHONE AGGLO GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/3bfed862-eba7-4480-9fd5-609b90f8a282,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-83716/latest.zip
|
||||
FR CA ANNONAY RHONE AGGLO GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/5e4b3c12-dd7d-4dcf-a761-41024867e56d,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-82904/latest.zip
|
||||
FR CA de CASTRES - MAZAMET GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/70c9f936-129e-41f4-940a-8e6f272535d1,FR,see https://opendatacommons.org/licenses/odbl/1.0/,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-79508/latest.zip
|
||||
FR CA Luberon Monts de Vaucluse GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/a6a32465-7366-47dc-a6fc-96f1d52b09d8,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-83394/latest.zip
|
||||
FR Carcassonne Agglo GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/941123b0-004e-4f48-81e7-8c62fb4f07aa,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-81876/latest.zip
|
||||
FR Challans Gois Communauté GTFS,gtfs,https://transport.data.gouv.fr/resources/83969/download?token=xdgqKBTAzhw4DSPz6zeGc4c5eW0LhwztcGv4-vpzP4U,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-83969/latest.zip
|
||||
FR CHATEAUROUX METROPOLE GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/bd5489c2-5f32-4b06-8d55-e5a6f4fd6a51,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-83794/latest.zip
|
||||
FR CITALIS GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/fc065c47-8644-4941-a8ca-4d8322a45749,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-82879/latest.zip
|
||||
FR CITALIS GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/fa444b8c-c1c6-4015-ac50-f748c12764d3,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-82880/latest.zip
|
||||
FR Clermont Auvergne Métropole GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/4e237a58-cd14-4746-b729-1337a40a8a7b,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-83465/latest.zip
|
||||
FR Coeur de Flandre Agglo GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/d10fb9b2-1a5a-498c-ba55-e89cf0136ef1,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-83202/latest.zip
|
||||
FR Collectivité de Corse GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/fe20cb23-34b8-4965-acf7-1b28bf966891,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-83200/latest.zip
|
||||
FR Collectivité de Corse GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/aabc13db-58bd-41d4-8c06-ea3a10e604a6,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-82994/latest.zip
|
||||
FR Collectivité de Corse GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/edabe706-150e-4320-a0cc-b68eed217495,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-82305/latest.zip
|
||||
FR Collectivité de Corse GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/f88c086a-4fe1-4af1-a027-f809d4422b65,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-82997/latest.zip
|
||||
FR Collectivité de Corse GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/a84de823-7cb2-4d87-917e-e2dda306d280,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-82995/latest.zip
|
||||
FR COMMUNAUTE D'AGGLOMERATION DES 2 BAIES EN MONTREUILLOIS GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/db348d20-dc9a-48c7-8120-30afa160a921,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-83315/latest.zip
|
||||
FR COMMUNAUTE DE COMMUNES COEUR DE MAURIENNE ARVAN GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/ff226a89-a0f7-4e6d-af0c-3b34b54306ae,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-83258/latest.zip
|
||||
FR Communauté d'Agglomération Beaune Côte et Sud GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/c00d487c-4766-4ca1-b736-e7de110331d9,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-82660/latest.zip
|
||||
FR Communauté d'agglomération Chauny-Tergnier-La Fère GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/6b80d2f2-ecd4-4b52-b481-c8b715ae8948,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-80221/latest.zip
|
||||
FR Communauté d'Agglomération de Bastia GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/ddbec19b-ed75-42f8-9205-573519298632,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-83962/latest.zip
|
||||
FR Communauté d'Agglomération de Cambrai GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/d00d1684-6980-4f84-ac38-363b6f058a68,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-80751/latest.zip
|
||||
FR Communauté d'agglomération de l'Albigeois GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/eff42667-b36b-4334-bc44-6cf620f90cbf,FR,see https://opendatacommons.org/licenses/odbl/1.0/,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-79687/latest.zip
|
||||
FR Communauté d'agglomération de l'Albigeois GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/15878ceb-7c8f-4546-bb0f-c540a15f2188,FR,see https://opendatacommons.org/licenses/odbl/1.0/,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-79728/latest.zip
|
||||
FR Communauté d'agglomération de l'Albigeois GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/79bc8313-3499-4bb3-a809-a8d80b44000a,FR,see https://opendatacommons.org/licenses/odbl/1.0/,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-81522/latest.zip
|
||||
FR Communauté d'Agglomération de l'Auxerrois GTFS,gtfs,https://transport.data.gouv.fr/resources/78934/download?token=xdgqKBTAzhw4DSPz6zeGc4c5eW0LhwztcGv4-vpzP4U,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-78934/latest.zip
|
||||
FR Communauté d'agglomération de la Baie de Somme GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/47ead4b7-dc95-41f0-916a-7411d9f6b79b,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-83757/latest.zip
|
||||
FR Communauté d'Agglomération de la Région de Château-Thierry GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/37ea6578-7f6d-4124-a150-3d450a529d69,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-83793/latest.zip
|
||||
FR Communauté d'Agglomération de Nevers GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/0b65dda9-51d3-4d84-811b-8a11629a5e55,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-81227/latest.zip
|
||||
FR Communauté d'agglomération du Boulonnais GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/579e4f5e-2916-4b95-90b8-f675943d0136,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-51449/latest.zip
|
||||
FR Communauté d'Agglomération du Caudrésis et du Catésis GTFS,gtfs,https://transport.data.gouv.fr/resources/83396/download?token=xdgqKBTAzhw4DSPz6zeGc4c5eW0LhwztcGv4-vpzP4U,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-83396/latest.zip
|
||||
FR Communauté d'Agglomération du Grand Cahors GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/1f768fa6-ee56-44d8-b322-75b946128fd8,FR,see https://opendatacommons.org/licenses/odbl/1.0/,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-81432/latest.zip
|
||||
FR Communauté d'Agglomération du Grand Dole GTFS,gtfs,https://transport.data.gouv.fr/resources/81254/download?token=xdgqKBTAzhw4DSPz6zeGc4c5eW0LhwztcGv4-vpzP4U,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-81254/latest.zip
|
||||
FR Communauté d'agglomération du grand Gueret GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/4b6bb4e3-86de-491c-8a0a-7892a02ccb60,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-79483/latest.zip
|
||||
FR Communauté d'Agglomération du Grand Sénonais INTERCOM GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/5f2edbfa-9025-467d-b769-80f4b1ec4ba6,FR,see https://opendatacommons.org/licenses/odbl/1.0/,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-81898/latest.zip
|
||||
FR Communauté d'Agglomération du Grand Villeneuvois GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/9248b0d3-3675-491b-8990-8058f0fdbcb5,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-82189/latest.zip
|
||||
FR Communauté d'Agglomération du Nord Grande-Terre GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/dbfbff0f-46b6-44d5-a4d4-60d4ac645026,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-83020/latest.zip
|
||||
FR Communauté d'Agglomération du Pays Ajaccien GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/5021a054-43c8-49b7-9166-832f2af6c8f0,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-83836/latest.zip
|
||||
FR Communauté d'agglomération du Pays de Grasse GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/1580b34b-8ebe-42b1-b75c-a4ae41d579b7,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-79821/latest.zip
|
||||
FR Communauté d'agglomération du Pays de Grasse GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/851decd7-714f-45ad-9579-d60c06fad9dd,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-79822/latest.zip
|
||||
FR Communauté d'Agglomération du Pays de Laon GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/ab24abac-0a10-4695-9d4d-c0df885b1970,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-80707/latest.zip
|
||||
FR Communauté d'Agglomération du Pays de Saint-Omer GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/a490d239-942a-4206-bed5-cef5a2007bcd,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-83987/latest.zip
|
||||
FR Communauté d'Agglomération du Pays de Saint-Omer GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/78d8705c-bcb9-40aa-98b7-2486ffe9fee5,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-83988/latest.zip
|
||||
FR Communauté d'Agglomération du Puy-en-Velay GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/0bdd928a-c207-4d69-9a0b-cd520e870d7f,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,
|
||||
FR Communauté d'Agglomération du Puy-en-Velay GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/c3d26d87-0b2d-4d26-83b5-9474f12e31a4,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,
|
||||
FR Communauté d'Agglomération du Puy-en-Velay GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/4544fd3f-cf34-4bab-b6ff-e5ad89476347,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,
|
||||
FR Communauté d'Agglomération du Puy-en-Velay GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/72cb82e9-df44-40fc-9fea-4f93f8a527af,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,
|
||||
FR Communauté d'Agglomération Gaillac Graulhet GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/d0579894-2c5a-42c1-a44c-d550af952f49,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-82583/latest.zip
|
||||
FR Communauté d'Agglomération Gaillac Graulhet GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/257e5818-a7cc-49a4-8ad1-3e84827e6458,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-82584/latest.zip
|
||||
FR Communauté d'Agglomération Gaillac Graulhet GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/6f4deabd-1cad-435c-8915-235132d74291,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-82196/latest.zip
|
||||
FR Communauté d'Agglomération Gaillac Graulhet GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/75f3dd86-0840-445a-92a6-b329bdc61e0c,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-81535/latest.zip
|
||||
FR Communauté d'Agglomération Gap-Tallard-Durance GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/78b7910e-1e31-4f47-b0a8-949647205ffa,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-81547/latest.zip
|
||||
FR Communauté d'Agglomération Hérault Méditerranée GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/8ce8ad14-836e-427d-bb43-a6ce0ab5ecda,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-64163/latest.zip
|
||||
FR Communauté d'agglomération Le Cotentin GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/2e97c9b3-a59f-42dd-9b9e-a232fa771f21,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-79831/latest.zip
|
||||
FR Communauté d'Agglomération Lunel Agglo GTFS,gtfs,https://transport.data.gouv.fr/resources/83852/download?token=xdgqKBTAzhw4DSPz6zeGc4c5eW0LhwztcGv4-vpzP4U,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-83852/latest.zip
|
||||
FR Communauté d'agglomération Porte de l'Isère GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/34d2cc17-3eb5-4581-917b-a8c06a112e61,FR,see https://opendatacommons.org/licenses/odbl/1.0/,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-52937/latest.zip
|
||||
FR Communauté d'Agglomération Privas Centre Ardèche GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/ba477c37-93d1-4900-8958-baf3c6de9d57,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-79272/latest.zip
|
||||
FR Communauté d'Agglomération Privas Centre Ardèche GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/360ad21a-9f4b-41bc-8bbe-a9299eb0f453,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-79274/latest.zip
|
||||
FR Communauté d'Agglomération Privas Centre Ardèche GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/5120cc52-6cc0-46b7-83b0-4c5cfd8f920c,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-81342/latest.zip
|
||||
FR Communauté d'Agglomération Provence Verte GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/9e2f1dd8-cbf4-4e56-b022-b350561a1cb9,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-83429/latest.zip
|
||||
FR Communauté d'Agglomération Royan Atlantique GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/d4915904-ebd0-43cf-9b35-fbfc04ce91fd,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-79499/latest.zip
|
||||
FR Communauté d'Agglomération Sophia Antipolis (CASA) GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/344e5a88-e993-4952-a8bf-503048020116,FR,see https://opendatacommons.org/licenses/odbl/1.0/,bus,Mobility Database feed catalog,P0,
|
||||
FR Communauté d'Agglomération Ventoux Comtat Venaissin GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/89e09fb6-0bf6-4ca5-95f3-50444ab6c681,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-83785/latest.zip
|
||||
FR Communauté de Communes Chinon Vienne et Loire GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/af410692-57e7-44a3-b888-4efccdf77014,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-83189/latest.zip
|
||||
FR Communauté de Communes Chinon Vienne et Loire GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/6348dadb-600c-42c8-bbea-f6ac09505ed3,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-82150/latest.zip
|
||||
FR Communauté de communes Cluses Arve et Montagne GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/0602751c-36aa-445d-88e2-aa4c51c13205,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-83578/latest.zip
|
||||
FR Communauté de Communes Cœur de Loire GTFS,gtfs,https://transport.data.gouv.fr/resources/83859/download?token=xdgqKBTAzhw4DSPz6zeGc4c5eW0LhwztcGv4-vpzP4U,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-83859/latest.zip
|
||||
FR Communauté de Communes de l'Ouest Vosgien GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/e25586ac-9c14-4872-b00e-e66cd23d4413,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-79814/latest.zip
|
||||
FR Communauté de communes de Millau Grands Causses GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/b1b9a7a6-1baf-4113-a6c3-4c5713764ffd,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-83978/latest.zip
|
||||
FR Communauté de communes de Serre-Ponçon GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/bb9234b1-3837-4765-b630-276dc8bdabea,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-83364/latest.zip
|
||||
FR Communauté de Communes des Montagnes du Giffre GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/58b67800-a341-44f8-9a31-864d578b9757,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-81549/latest.zip
|
||||
FR Communauté de Communes du Briançonnais GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/3ee23301-f454-4175-ba53-4734c30d5245,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-81455/latest.zip
|
||||
FR Communauté de Communes du Centre-Ouest GTFS,gtfs,https://transport.data.gouv.fr/resources/83891/download?token=xdgqKBTAzhw4DSPz6zeGc4c5eW0LhwztcGv4-vpzP4U,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-83891/latest.zip
|
||||
FR Communauté de communes du Guillestrois et du Queyras GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/bf92984d-4441-4cad-9879-b7209acba875,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-82604/latest.zip
|
||||
FR Communauté de Communes du Pays d'Orange en Provence GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/d6c75600-f730-421e-93df-74a143d3e723,FR,see https://opendatacommons.org/licenses/odbl/1.0/,bus,Mobility Database feed catalog,P0,
|
||||
FR Communauté de Communes du Pays des Écrins GTFS,gtfs,https://transport.data.gouv.fr/resources/83467/download?token=xdgqKBTAzhw4DSPz6zeGc4c5eW0LhwztcGv4-vpzP4U,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-83467/latest.zip
|
||||
FR Communauté de Communes du Sud Corse GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/74f0577f-f709-49db-9750-53c9341f6e50,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-81252/latest.zip
|
||||
FR Communauté de Communes Haute-Tarentaise GTFS,gtfs,https://transport.data.gouv.fr/resources/83848/download?token=xdgqKBTAzhw4DSPz6zeGc4c5eW0LhwztcGv4-vpzP4U,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-83848/latest.zip
|
||||
FR Communauté de Communes Haute-Tarentaise GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/76210b43-2647-413a-84ed-adf07b0a7392,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-83626/latest.zip
|
||||
FR Communauté de Communes Haute-Tarentaise GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/9f5669e6-202f-4166-99e4-0e15586e1473,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-83627/latest.zip
|
||||
FR Communauté de communes pays d'Evian - vallée d'Abondance GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/e8b38261-a2df-4f48-8749-93d56c468454,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-83746/latest.zip
|
||||
FR Communauté de Communes Pyrénées Vallées des Gaves GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/382a2cc7-336b-49ff-9fbd-f355629701b6,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-83155/latest.zip
|
||||
FR Communauté de communes Retz-en-Valois GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/39bc7064-a387-4eef-9fb3-05fe3c00782b,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-82689/latest.zip
|
||||
FR Communauté d’Agglomération du Centre Littoral de Guyane GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/9e9cea88-cdf9-4f3d-ae17-f30c9512d276,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-81705/latest.zip
|
||||
FR Communauté d’Agglomération du Centre Littoral de Guyane GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/12c4aed4-a268-4cb3-8fb5-7625a747cf56,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-83304/latest.zip
|
||||
FR Communauté d’Agglomération du Centre Littoral de Guyane GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/d358344a-ed1e-4d74-b092-9cbc517b10a8,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-83781/latest.zip
|
||||
FR Commune Les Deux Alpes GTFS,gtfs,https://transport.data.gouv.fr/resources/83810/download?token=xdgqKBTAzhw4DSPz6zeGc4c5eW0LhwztcGv4-vpzP4U,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-83810/latest.zip
|
||||
FR Commune Les Deux Alpes GTFS,gtfs,https://transport.data.gouv.fr/resources/83813/download?token=xdgqKBTAzhw4DSPz6zeGc4c5eW0LhwztcGv4-vpzP4U,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-83813/latest.zip
|
||||
FR Commune Les Deux Alpes GTFS,gtfs,https://transport.data.gouv.fr/resources/83989/download?token=xdgqKBTAzhw4DSPz6zeGc4c5eW0LhwztcGv4-vpzP4U,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-83989/latest.zip
|
||||
FR CTS - Compagnie des Transports Strasbourgeois GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/eeea9e52-4f8a-459e-aef5-a093a3b05356,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-79220/latest.zip
|
||||
FR DECAZEVILLE COMMUNAUTE GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/0163a7b9-f4b1-4d9b-9af1-dcc0e583c3b4,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-11492/latest.zip
|
||||
FR DiviaMobilités GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/e0dbd217-15cd-4e28-9459-211a27511a34,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-80742/latest.zip
|
||||
FR Dracénie Provence Verdon agglomération GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/a1d56fb0-e264-45f2-8822-c08746847a97,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-81907/latest.zip
|
||||
FR Durance Luberon Verdon Agglomération GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/37f08652-500d-488e-a47f-8c9efc5ae704,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-83291/latest.zip
|
||||
FR Durance Luberon Verdon Agglomération GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/117daa0d-0708-43aa-a562-7d5b3d6570b0,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-83789/latest.zip
|
||||
FR Département de Loire-Atlantique GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/b2678aad-0d78-45f8-aa59-f78723ede272,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-84024/latest.zip
|
||||
FR Ecov GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/54519164-6c88-4a48-9040-1e6ae482d277,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-84033/latest.zip
|
||||
FR Ecov GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/17957a96-2193-442d-b313-f39160dd41e4,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-84034/latest.zip
|
||||
FR Ecov GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/e1114a8b-ae58-4e27-b5bd-541777bcf812,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-84035/latest.zip
|
||||
FR ENVIA GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/52216d2f-072e-4b7d-af0c-15d8d4e98b09,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-82662/latest.zip
|
||||
FR Espace Communautaire Lons Agglomération GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/ec61df69-830c-4ca7-9d78-9a81c515e9de,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-82415/latest.zip
|
||||
FR Estérel Côte d'Azur Agglomération GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/11e73e58-edb0-4fab-a220-61a659cf6423,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-82294/latest.zip
|
||||
FR Eurométropole de Metz GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/92af6161-1b1a-4e0b-8f60-d97f213d993a,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-80725/latest.zip
|
||||
FR EVEOLE GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/99cf5e2f-87c2-4ff1-bc0d-32f04cc213ab,FR,see https://opendatacommons.org/licenses/odbl/1.0/,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-79544/latest.zip
|
||||
FR Flibco.com GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/f389ca22-d36e-46f3-9e00-5be700c7139b,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-83405/latest.zip
|
||||
FR Flixbus France GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/30d94e83-48a4-4c44-8a96-c082377f5221,FR,see https://opendatacommons.org/licenses/odbl/1.0/,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-11681/latest.zip
|
||||
FR Fluo Grand Est GTFS,gtfs,https://transport.data.gouv.fr/resources/83634/download?token=xdgqKBTAzhw4DSPz6zeGc4c5eW0LhwztcGv4-vpzP4U,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-83634/latest.zip
|
||||
FR Fluo Grand Est GTFS,gtfs,https://transport.data.gouv.fr/resources/82907/download?token=xdgqKBTAzhw4DSPz6zeGc4c5eW0LhwztcGv4-vpzP4U,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-82907/latest.zip
|
||||
FR Fluo Grand Est GTFS,gtfs,https://transport.data.gouv.fr/resources/83635/download?token=xdgqKBTAzhw4DSPz6zeGc4c5eW0LhwztcGv4-vpzP4U,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-83635/latest.zip
|
||||
FR Fluo Grand Est GTFS,gtfs,https://transport.data.gouv.fr/resources/80413/download?token=xdgqKBTAzhw4DSPz6zeGc4c5eW0LhwztcGv4-vpzP4U,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-80413/latest.zip
|
||||
FR Fluo Grand Est GTFS,gtfs,https://transport.data.gouv.fr/resources/80415/download?token=xdgqKBTAzhw4DSPz6zeGc4c5eW0LhwztcGv4-vpzP4U,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-80415/latest.zip
|
||||
FR Fluo Grand Est GTFS,gtfs,https://transport.data.gouv.fr/resources/80417/download?token=xdgqKBTAzhw4DSPz6zeGc4c5eW0LhwztcGv4-vpzP4U,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-80417/latest.zip
|
||||
FR Fluo Grand Est GTFS,gtfs,https://transport.data.gouv.fr/resources/81989/download?token=xdgqKBTAzhw4DSPz6zeGc4c5eW0LhwztcGv4-vpzP4U,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-81989/latest.zip
|
||||
FR Fluo Grand Est GTFS,gtfs,https://transport.data.gouv.fr/resources/80421/download?token=xdgqKBTAzhw4DSPz6zeGc4c5eW0LhwztcGv4-vpzP4U,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-80421/latest.zip
|
||||
FR Fluo Grand Est GTFS,gtfs,https://transport.data.gouv.fr/resources/80423/download?token=xdgqKBTAzhw4DSPz6zeGc4c5eW0LhwztcGv4-vpzP4U,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-80423/latest.zip
|
||||
FR Fluo Grand Est GTFS,gtfs,https://transport.data.gouv.fr/resources/80425/download?token=xdgqKBTAzhw4DSPz6zeGc4c5eW0LhwztcGv4-vpzP4U,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-80425/latest.zip
|
||||
FR Fluo Grand Est GTFS,gtfs,https://transport.data.gouv.fr/resources/80427/download?token=xdgqKBTAzhw4DSPz6zeGc4c5eW0LhwztcGv4-vpzP4U,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-80427/latest.zip
|
||||
FR Fluo Grand Est GTFS,gtfs,https://transport.data.gouv.fr/resources/80429/download?token=xdgqKBTAzhw4DSPz6zeGc4c5eW0LhwztcGv4-vpzP4U,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-80429/latest.zip
|
||||
FR Fluo Grand Est GTFS,gtfs,https://transport.data.gouv.fr/resources/80469/download?token=xdgqKBTAzhw4DSPz6zeGc4c5eW0LhwztcGv4-vpzP4U,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-80469/latest.zip
|
||||
FR Fluo Grand Est GTFS,gtfs,https://transport.data.gouv.fr/resources/80410/download?token=xdgqKBTAzhw4DSPz6zeGc4c5eW0LhwztcGv4-vpzP4U,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-80410/latest.zip
|
||||
FR Fluo Grand Est GTFS,gtfs,https://transport.data.gouv.fr/resources/80463/download?token=xdgqKBTAzhw4DSPz6zeGc4c5eW0LhwztcGv4-vpzP4U,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-80463/latest.zip
|
||||
FR Fluo Grand Est GTFS,gtfs,https://transport.data.gouv.fr/resources/80401/download?token=xdgqKBTAzhw4DSPz6zeGc4c5eW0LhwztcGv4-vpzP4U,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-80401/latest.zip
|
||||
FR Fluo Grand Est GTFS,gtfs,https://transport.data.gouv.fr/resources/80594/download?token=xdgqKBTAzhw4DSPz6zeGc4c5eW0LhwztcGv4-vpzP4U,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-80594/latest.zip
|
||||
FR Fluo Grand Est GTFS,gtfs,https://transport.data.gouv.fr/resources/80461/download?token=xdgqKBTAzhw4DSPz6zeGc4c5eW0LhwztcGv4-vpzP4U,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-80461/latest.zip
|
||||
FR Fluo Grand Est GTFS,gtfs,https://transport.data.gouv.fr/resources/80447/download?token=xdgqKBTAzhw4DSPz6zeGc4c5eW0LhwztcGv4-vpzP4U,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-80447/latest.zip
|
||||
FR Fluo Grand Est GTFS,gtfs,https://transport.data.gouv.fr/resources/80465/download?token=xdgqKBTAzhw4DSPz6zeGc4c5eW0LhwztcGv4-vpzP4U,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-80465/latest.zip
|
||||
FR Fluo Grand Est GTFS,gtfs,https://transport.data.gouv.fr/resources/80445/download?token=xdgqKBTAzhw4DSPz6zeGc4c5eW0LhwztcGv4-vpzP4U,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-80445/latest.zip
|
||||
FR Fluo Grand Est GTFS,gtfs,https://transport.data.gouv.fr/resources/80475/download?token=xdgqKBTAzhw4DSPz6zeGc4c5eW0LhwztcGv4-vpzP4U,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-80475/latest.zip
|
||||
FR Fluo Grand Est GTFS,gtfs,https://transport.data.gouv.fr/resources/80403/download?token=xdgqKBTAzhw4DSPz6zeGc4c5eW0LhwztcGv4-vpzP4U,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-80403/latest.zip
|
||||
FR Fluo Grand Est GTFS,gtfs,https://transport.data.gouv.fr/resources/80443/download?token=xdgqKBTAzhw4DSPz6zeGc4c5eW0LhwztcGv4-vpzP4U,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-80443/latest.zip
|
||||
FR Fluo Grand Est GTFS,gtfs,https://transport.data.gouv.fr/resources/80439/download?token=xdgqKBTAzhw4DSPz6zeGc4c5eW0LhwztcGv4-vpzP4U,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-80439/latest.zip
|
||||
FR Fluo Grand Est GTFS,gtfs,https://transport.data.gouv.fr/resources/80772/download?token=xdgqKBTAzhw4DSPz6zeGc4c5eW0LhwztcGv4-vpzP4U,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-80772/latest.zip
|
||||
FR Fluo Grand Est GTFS,gtfs,https://transport.data.gouv.fr/resources/80449/download?token=xdgqKBTAzhw4DSPz6zeGc4c5eW0LhwztcGv4-vpzP4U,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-80449/latest.zip
|
||||
FR Fluo Grand Est GTFS,gtfs,https://transport.data.gouv.fr/resources/83277/download?token=xdgqKBTAzhw4DSPz6zeGc4c5eW0LhwztcGv4-vpzP4U,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-83277/latest.zip
|
||||
FR Fluo Grand Est GTFS,gtfs,https://transport.data.gouv.fr/resources/80457/download?token=xdgqKBTAzhw4DSPz6zeGc4c5eW0LhwztcGv4-vpzP4U,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-80457/latest.zip
|
||||
FR Fluo Grand Est GTFS,gtfs,https://transport.data.gouv.fr/resources/80787/download?token=xdgqKBTAzhw4DSPz6zeGc4c5eW0LhwztcGv4-vpzP4U,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-80787/latest.zip
|
||||
FR Fluo Grand Est GTFS,gtfs,https://transport.data.gouv.fr/resources/83710/download?token=xdgqKBTAzhw4DSPz6zeGc4c5eW0LhwztcGv4-vpzP4U,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-83710/latest.zip
|
||||
FR Fluo Grand Est GTFS,gtfs,https://transport.data.gouv.fr/resources/80453/download?token=xdgqKBTAzhw4DSPz6zeGc4c5eW0LhwztcGv4-vpzP4U,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-80453/latest.zip
|
||||
FR Fluo Grand Est GTFS,gtfs,https://transport.data.gouv.fr/resources/82388/download?token=xdgqKBTAzhw4DSPz6zeGc4c5eW0LhwztcGv4-vpzP4U,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-82388/latest.zip
|
||||
FR Fluo Grand Est GTFS,gtfs,https://transport.data.gouv.fr/resources/80451/download?token=xdgqKBTAzhw4DSPz6zeGc4c5eW0LhwztcGv4-vpzP4U,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-80451/latest.zip
|
||||
FR Fluo Grand Est GTFS,gtfs,https://transport.data.gouv.fr/resources/80459/download?token=xdgqKBTAzhw4DSPz6zeGc4c5eW0LhwztcGv4-vpzP4U,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-80459/latest.zip
|
||||
FR Fluo Grand Est GTFS,gtfs,https://transport.data.gouv.fr/resources/80477/download?token=xdgqKBTAzhw4DSPz6zeGc4c5eW0LhwztcGv4-vpzP4U,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-80477/latest.zip
|
||||
FR Fluo Grand Est GTFS,gtfs,https://transport.data.gouv.fr/resources/80479/download?token=xdgqKBTAzhw4DSPz6zeGc4c5eW0LhwztcGv4-vpzP4U,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-80479/latest.zip
|
||||
FR Fluo Grand Est GTFS,gtfs,https://transport.data.gouv.fr/resources/82413/download?token=xdgqKBTAzhw4DSPz6zeGc4c5eW0LhwztcGv4-vpzP4U,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-82413/latest.zip
|
||||
FR Fluo Grand Est GTFS,gtfs,https://transport.data.gouv.fr/resources/80441/download?token=xdgqKBTAzhw4DSPz6zeGc4c5eW0LhwztcGv4-vpzP4U,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-80441/latest.zip
|
||||
FR Fluo Grand Est GTFS,gtfs,https://transport.data.gouv.fr/resources/80471/download?token=xdgqKBTAzhw4DSPz6zeGc4c5eW0LhwztcGv4-vpzP4U,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-80471/latest.zip
|
||||
FR Grand Autunois Morvan GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/b587f007-9b23-4b2a-a609-9368a4c2c490,FR,see https://opendatacommons.org/licenses/odbl/1.0/,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-83383/latest.zip
|
||||
FR Grand Avignon GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/68eddc46-b2f5-4396-a298-214c415f6984,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-83330/latest.zip
|
||||
FR GRAND BESANCON METROPOLE GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/e18e0aeb-8805-47fd-bcdb-c226d21c96fe,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-80590/latest.zip
|
||||
FR Grand Bourg Agglomération GTFS,gtfs,https://transport.data.gouv.fr/resources/83841/download?token=xdgqKBTAzhw4DSPz6zeGc4c5eW0LhwztcGv4-vpzP4U,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,
|
||||
FR Grand Bourg Agglomération GTFS,gtfs,https://transport.data.gouv.fr/resources/83842/download?token=xdgqKBTAzhw4DSPz6zeGc4c5eW0LhwztcGv4-vpzP4U,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-83842/latest.zip
|
||||
FR Grand Chambéry GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/a81cdf4b-5a70-43c9-8803-cef0430ed170,FR,see https://opendatacommons.org/licenses/odbl/1.0/,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-82272/latest.zip
|
||||
FR Grand Poitiers Open Data GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/82146378-f019-4048-aa16-7328081b0369,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-83389/latest.zip
|
||||
FR Intercom Bernay Terres de Normandie GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/9ffc26b2-d293-4ec5-9cf6-4690d542f019,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-83775/latest.zip
|
||||
FR Intercom de la Vire au Noireau GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/4d6b5107-fec7-41bb-9115-bbaafb7e71f1,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-83769/latest.zip
|
||||
FR Intercom de la Vire au Noireau GTFS,gtfs,https://transport.data.gouv.fr/resources/83846/download?token=xdgqKBTAzhw4DSPz6zeGc4c5eW0LhwztcGv4-vpzP4U,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-83846/latest.zip
|
||||
FR Intercom de la Vire au Noireau GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/f3b71a3c-85fd-48c3-a3f0-30b988a4e64b,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-83770/latest.zip
|
||||
FR J'ybus GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/9ba40024-aa3c-4e5c-b065-47eef0edf4e6,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-80092/latest.zip
|
||||
FR JVMALIN GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/a193c142-366b-4a7b-8afb-c0bdf23ca7ea,FR,see https://opendatacommons.org/licenses/odbl/1.0/,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-83521/latest.zip
|
||||
FR JVMALIN GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/6c52238c-1c1e-4c5a-922b-8b66c0415a9e,FR,see https://opendatacommons.org/licenses/odbl/1.0/,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-83530/latest.zip
|
||||
FR JVMALIN GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/ef2f826c-c1c6-4eb4-9da3-4c2f37b3afaf,FR,see https://opendatacommons.org/licenses/odbl/1.0/,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-83519/latest.zip
|
||||
FR JVMALIN GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/e6e0228d-9916-4216-bb5f-7d3edf57866c,FR,see https://opendatacommons.org/licenses/odbl/1.0/,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-83526/latest.zip
|
||||
FR JVMALIN GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/937d0757-36c0-4f96-95a4-287b58048c2d,FR,see https://opendatacommons.org/licenses/odbl/1.0/,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-83528/latest.zip
|
||||
FR JVMALIN GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/c654db3d-a710-4ed5-a327-417f812bf9a0,FR,see https://opendatacommons.org/licenses/odbl/1.0/,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-83529/latest.zip
|
||||
FR JVMALIN GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/485e9781-ac86-44c7-8289-3a7df46abe9d,FR,see https://opendatacommons.org/licenses/odbl/1.0/,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-83531/latest.zip
|
||||
FR Keolis Menton Riviera GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/72609821-2459-47fb-a63b-3dbbc0d96c92,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-79559/latest.zip
|
||||
FR Keolis RIOM GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/90846819-b970-48db-967b-8eb7d2da3e2c,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-82547/latest.zip
|
||||
FR L'agglo Foix-Varilhes GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/8c9dd4f7-f6a2-4760-915e-89e8f9666c5b,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-25381/latest.zip
|
||||
FR La Roche-sur-Yon Agglomération GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/735f1b54-94ee-4a21-b378-05c2b44ad70f,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-79520/latest.zip
|
||||
FR LAVAL Agglomération GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/bd780aef-b3c5-405a-8450-8365a7a0c315,FR,see https://opendatacommons.org/licenses/odbl/1.0/,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-81018/latest.zip
|
||||
FR Le Grand Chalon GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/f11a3766-1fe3-40ff-8d5a-27eacda3d9a7,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-82664/latest.zip
|
||||
FR Le Grand Narbonne Communauté d'Agglomération GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/21ca2b77-505b-4c4c-9154-a044e61b8560,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-82661/latest.zip
|
||||
FR LOIRE FOREZ AGGLOMERATION GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/4536fa0a-b880-47ba-9f94-42a6303e584b,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-81539/latest.zip
|
||||
"FR Loopi, Mobilité Touristique GTFS",gtfs,https://www.data.gouv.fr/api/1/datasets/r/179d2464-f007-4005-976e-a7fd98eabcfe,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-83994/latest.zip
|
||||
FR Lorient Agglomération GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/c32dc40b-3538-4796-a6c6-fd239d6c1364,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-80611/latest.zip
|
||||
FR Lulu en Bus (Ex. Lunéo) GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/94b9beba-c897-46ca-98a4-a78bf320047a,FR,see https://opendatacommons.org/licenses/odbl/1.0/,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-83509/latest.zip
|
||||
FR Martinique Transport GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/39fc07d6-65b5-49f0-a5f2-d56757a8dd42,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-82139/latest.zip
|
||||
FR Martinique Transport GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/c14a1893-58a1-4e7b-830e-bd1f9daa863d,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-82289/latest.zip
|
||||
FR Martinique Transport GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/6e599077-0719-44b4-82ad-0da90a282846,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-79898/latest.zip
|
||||
FR Martinique Transport GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/82481c27-2e52-40ef-a563-b011ba487ead,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-80606/latest.zip
|
||||
FR Martinique Transport GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/d910480b-c9a5-4f48-a257-4e1aa799c5c8,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-79897/latest.zip
|
||||
FR Montluçon Communauté GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/1ee93a61-23e0-49f0-bf76-b5c367d5e42c,FR,see https://opendatacommons.org/licenses/odbl/1.0/,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-83369/latest.zip
|
||||
FR Montpellier Méditerranée Métropole GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/2ef043c8-3b10-4d87-af5f-65fead127407,FR,see https://opendatacommons.org/licenses/odbl/1.0/,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-62323/latest.zip
|
||||
FR Montpellier Méditerranée Métropole GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/350c3f75-226e-4570-960a-dec2144926b6,FR,see https://opendatacommons.org/licenses/odbl/1.0/,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-81754/latest.zip
|
||||
FR Montpellier Méditerranée Métropole GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/c9d1350c-8f13-4761-b603-ecb12ad10b5b,FR,see https://opendatacommons.org/licenses/odbl/1.0/,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-83773/latest.zip
|
||||
FR Montélibus GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/974cede8-3a14-4c7b-b94d-b2655c31932e,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-82545/latest.zip
|
||||
FR Mulhouse Alsace Agglomération GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/7db50c2d-3fe4-4d3d-9942-57ac37c93a8d,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-80492/latest.zip
|
||||
FR Mâconnais Beaujolais Agglomération GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/2c2b58d3-e93b-47af-be7b-655520671cbc,FR,see https://opendatacommons.org/licenses/odbl/1.0/,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-83507/latest.zip
|
||||
FR Métropole d'Aix-Marseille-Provence GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/7eef6ec9-9ebb-44f2-becb-2efc522522d6,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-39589/latest.zip
|
||||
FR Métropole d'Aix-Marseille-Provence GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/e8a86701-6359-45de-bee5-95e648ec04e3,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-39591/latest.zip
|
||||
FR Métropole d'Aix-Marseille-Provence GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/3fc0ab9d-e7a3-4728-ac2b-be55d927e757,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-39592/latest.zip
|
||||
FR Métropole d'Aix-Marseille-Provence GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/42b6ba26-f817-455b-8ba2-fa30c100fff4,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-39593/latest.zip
|
||||
FR Métropole d'Aix-Marseille-Provence GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/d61403a1-7694-4a04-a229-044a43aff5c7,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-39594/latest.zip
|
||||
FR Métropole d'Aix-Marseille-Provence GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/2bbfc260-c908-4242-9796-c30cb36511ea,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-39595/latest.zip
|
||||
FR Métropole d'Aix-Marseille-Provence GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/05c200aa-798d-4660-bae7-3bf77e551c4c,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-39596/latest.zip
|
||||
FR Métropole d'Aix-Marseille-Provence GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/c671fcc9-fe50-4bee-bed0-78e6443c1010,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-39597/latest.zip
|
||||
FR Métropole d'Aix-Marseille-Provence GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/cde22673-d7e4-4cbd-837f-3c8bd9fb2f9b,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-39598/latest.zip
|
||||
FR Métropole d'Aix-Marseille-Provence GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/2316529b-3075-4013-8b85-46777fefd56e,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-39599/latest.zip
|
||||
FR Métropole d'Aix-Marseille-Provence GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/ab931edc-0bc1-4c66-960a-dc546b46df03,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-39601/latest.zip
|
||||
FR Métropole d'Aix-Marseille-Provence GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/10e025a1-9048-4c19-925a-466b2a79232f,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-39602/latest.zip
|
||||
FR Métropole d'Aix-Marseille-Provence GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/9b44cd04-63b5-46e7-a1f8-d5c1c9151d5a,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-39603/latest.zip
|
||||
FR Métropole d'Aix-Marseille-Provence GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/304e9bce-2f14-417f-a02e-3b3827de8d5f,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-80736/latest.zip
|
||||
FR Métropole d'Aix-Marseille-Provence GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/0d9a808a-9a9b-4fa1-b04f-339bbf2cfeb1,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-81969/latest.zip
|
||||
FR Métropole Nice Côte d'Azur GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/f5678ab2-c863-4b48-ba1f-9021c7d97634,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-83178/latest.zip
|
||||
FR Métropole Rouen Normandie GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/c0ebcf01-954a-4d24-b2d8-a00333ffe937,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-64973/latest.zip
|
||||
FR Métropole Toulon Provence Méditerranée GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/b0789d9e-5077-4124-b6b2-773353ada8cf,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-79560/latest.zip
|
||||
FR Métropole Toulon Provence Méditerranée GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/9bba0b17-2863-4ee1-a38a-c7a445f820d1,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-83739/latest.zip
|
||||
FR Nantes Métropole GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/a18d5977-ca80-4712-a9cf-1a555feb2621,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-83251/latest.zip
|
||||
FR NEMUS (Réseau de transports de Flers Agglo) GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/e833d9ed-201b-4154-aa16-45bc00939571,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-82094/latest.zip
|
||||
FR NEMUS (Réseau de transports de Flers Agglo) GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/7d399812-e018-4e35-b3ae-a5ba44c8ad91,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-83782/latest.zip
|
||||
FR Nouvelle-Aquitaine Mobilités GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/7eb95c82-711e-455c-86b1-ecf138697ebe,FR,see https://opendatacommons.org/licenses/odbl/1.0/,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-82324/latest.zip
|
||||
FR Nouvelle-Aquitaine Mobilités GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/161ac8dd-bdd4-40dd-bce7-d9edef4af632,FR,see https://opendatacommons.org/licenses/odbl/1.0/,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-82326/latest.zip
|
||||
FR Nouvelle-Aquitaine Mobilités GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/2b76718b-2ab0-4f95-8528-cce4bb6af4fe,FR,see https://opendatacommons.org/licenses/odbl/1.0/,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-82328/latest.zip
|
||||
FR Nouvelle-Aquitaine Mobilités GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/b3f2f823-fe55-4265-9efe-605d8156dea2,FR,see https://opendatacommons.org/licenses/odbl/1.0/,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-82330/latest.zip
|
||||
FR Nouvelle-Aquitaine Mobilités GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/73b1b1ee-2b81-445f-8930-b95a2b28a81b,FR,see https://opendatacommons.org/licenses/odbl/1.0/,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-82334/latest.zip
|
||||
FR Nouvelle-Aquitaine Mobilités GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/f7590323-d782-40ed-a6b2-67e37930f6cd,FR,see https://opendatacommons.org/licenses/odbl/1.0/,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-82336/latest.zip
|
||||
FR Nouvelle-Aquitaine Mobilités GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/ba4be162-cc7f-4c7c-9d96-376a225e9045,FR,see https://opendatacommons.org/licenses/odbl/1.0/,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-82338/latest.zip
|
||||
FR Nouvelle-Aquitaine Mobilités GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/8d1cc7f8-2b29-4844-b98c-4a5b35075d57,FR,see https://opendatacommons.org/licenses/odbl/1.0/,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-82340/latest.zip
|
||||
FR Nouvelle-Aquitaine Mobilités GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/476fec18-c51f-41e6-9d15-0030174389c3,FR,see https://opendatacommons.org/licenses/odbl/1.0/,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-82342/latest.zip
|
||||
FR Nouvelle-Aquitaine Mobilités GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/bbf7ffef-9706-42e8-a3c8-7113168621d7,FR,see https://opendatacommons.org/licenses/odbl/1.0/,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-82344/latest.zip
|
||||
FR Nouvelle-Aquitaine Mobilités GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/44806d1e-0798-40e6-8e7e-21d0ddfe81d2,FR,see https://opendatacommons.org/licenses/odbl/1.0/,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-82346/latest.zip
|
||||
FR Nouvelle-Aquitaine Mobilités GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/a70fab11-9ac9-481e-91c8-925e805741ba,FR,see https://opendatacommons.org/licenses/odbl/1.0/,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-82332/latest.zip
|
||||
FR Nouvelle-Aquitaine Mobilités GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/c3f86058-0be8-41f2-aa98-82d3a5625f22,FR,see https://opendatacommons.org/licenses/odbl/1.0/,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-82382/latest.zip
|
||||
FR Nouvelle-Aquitaine Mobilités GTFS,gtfs,https://transport.data.gouv.fr/resources/83982/download?token=xdgqKBTAzhw4DSPz6zeGc4c5eW0LhwztcGv4-vpzP4U,FR,see https://opendatacommons.org/licenses/odbl/1.0/,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-83982/latest.zip
|
||||
FR Nouvelle-Aquitaine Mobilités GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/88be32ef-3b1a-493f-b73f-660a1f1b24f7,FR,see https://opendatacommons.org/licenses/odbl/1.0/,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-82155/latest.zip
|
||||
FR Nouvelle-Aquitaine Mobilités GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/9564e981-7d13-481d-a29f-452025af7432,FR,see https://opendatacommons.org/licenses/odbl/1.0/,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-82319/latest.zip
|
||||
FR Nouvelle-Aquitaine Mobilités GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/33f62e9d-5495-413e-ae74-070e52ab51f7,FR,see https://opendatacommons.org/licenses/odbl/1.0/,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-82375/latest.zip
|
||||
FR Nouvelle-Aquitaine Mobilités GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/c521df03-4d5c-4b38-afee-ef6c0f43a491,FR,see https://opendatacommons.org/licenses/odbl/1.0/,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-82359/latest.zip
|
||||
FR Nouvelle-Aquitaine Mobilités GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/3af621da-a600-4da9-92c4-dfd994b22c31,FR,see https://opendatacommons.org/licenses/odbl/1.0/,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-82361/latest.zip
|
||||
FR Nouvelle-Aquitaine Mobilités GTFS,gtfs,https://transport.data.gouv.fr/resources/83853/download?token=xdgqKBTAzhw4DSPz6zeGc4c5eW0LhwztcGv4-vpzP4U,FR,see https://opendatacommons.org/licenses/odbl/1.0/,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-83853/latest.zip
|
||||
FR Nouvelle-Aquitaine Mobilités GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/6fa85772-cc88-43ae-88b9-290e2f9345ff,FR,see https://opendatacommons.org/licenses/odbl/1.0/,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-81557/latest.zip
|
||||
FR Nouvelle-Aquitaine Mobilités GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/fd030551-a63e-441a-98f0-231186f35d82,FR,see https://opendatacommons.org/licenses/odbl/1.0/,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-81555/latest.zip
|
||||
FR Nouvelle-Aquitaine Mobilités GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/4ea5fb1f-3b42-4bc4-a255-303250fca10d,FR,see https://opendatacommons.org/licenses/odbl/1.0/,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-82370/latest.zip
|
||||
FR Nouvelle-Aquitaine Mobilités GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/87a0b98b-8a5a-4ba7-ad93-5a1fb9924844,FR,see https://opendatacommons.org/licenses/odbl/1.0/,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-82355/latest.zip
|
||||
FR Nouvelle-Aquitaine Mobilités GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/907e15b8-9a0c-4b00-b1b3-325ab1ac7d4c,FR,see https://opendatacommons.org/licenses/odbl/1.0/,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-82357/latest.zip
|
||||
FR Nouvelle-Aquitaine Mobilités GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/0346e2ae-a004-40be-8ded-ffff9046d29d,FR,see https://opendatacommons.org/licenses/odbl/1.0/,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-82367/latest.zip
|
||||
FR Nouvelle-Aquitaine Mobilités GTFS,gtfs,https://transport.data.gouv.fr/resources/83533/download?token=xdgqKBTAzhw4DSPz6zeGc4c5eW0LhwztcGv4-vpzP4U,FR,see https://opendatacommons.org/licenses/odbl/1.0/,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-83533/latest.zip
|
||||
FR Nouvelle-Aquitaine Mobilités GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/d724a6df-7dc4-4b2d-be15-fc8b1d52c8ea,FR,see https://opendatacommons.org/licenses/odbl/1.0/,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-82352/latest.zip
|
||||
FR Nouvelle-Aquitaine Mobilités GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/99e0887d-4855-47f7-a03b-a8235dfc86b7,FR,see https://opendatacommons.org/licenses/odbl/1.0/,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-82348/latest.zip
|
||||
FR Nouvelle-Aquitaine Mobilités GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/03ebd538-f9be-4eb5-ab22-2b72a0ea338c,FR,see https://opendatacommons.org/licenses/odbl/1.0/,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-82377/latest.zip
|
||||
FR Nouvelle-Aquitaine Mobilités GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/7a0c4ec1-628d-4b72-b180-f0c2e750112f,FR,see https://opendatacommons.org/licenses/odbl/1.0/,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-82365/latest.zip
|
||||
FR Nouvelle-Aquitaine Mobilités GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/414c049b-323a-4a5c-b484-e3917a62f485,FR,see https://opendatacommons.org/licenses/odbl/1.0/,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-82363/latest.zip
|
||||
FR Nouvelle-Aquitaine Mobilités GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/f5437c89-50b3-4d35-8d38-be8b9417f7a0,FR,see https://opendatacommons.org/licenses/odbl/1.0/,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-82373/latest.zip
|
||||
FR Nouvelle-Aquitaine Mobilités GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/79ab3997-524a-48a3-baec-9f40f37f7bd1,FR,see https://opendatacommons.org/licenses/odbl/1.0/,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-82379/latest.zip
|
||||
FR Nouvelle-Aquitaine Mobilités GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/030b9c31-4237-4355-a4d5-842861d225be,FR,see https://opendatacommons.org/licenses/odbl/1.0/,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-82350/latest.zip
|
||||
FR Oise Mobilité - Syndicat mixte des transports collectifs de l'Oise GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/191ae476-2ce8-48d7-bed2-b9cb93f63364,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-81138/latest.zip
|
||||
FR Oise Mobilité - Syndicat mixte des transports collectifs de l'Oise GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/61277099-cde9-457c-919d-a1b7b20fe992,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-81650/latest.zip
|
||||
FR Oise Mobilité - Syndicat mixte des transports collectifs de l'Oise GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/d9ea4b4a-0717-4a7d-bd14-054afa192457,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-81146/latest.zip
|
||||
FR Oise Mobilité - Syndicat mixte des transports collectifs de l'Oise GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/0676f78f-66f9-4d37-a67e-0944ac5164e3,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-81072/latest.zip
|
||||
FR Oise Mobilité - Syndicat mixte des transports collectifs de l'Oise GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/23f2f5ed-c21c-4e87-840b-22512fe7c5d7,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-81645/latest.zip
|
||||
FR Oise Mobilité - Syndicat mixte des transports collectifs de l'Oise GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/2b8e53ce-a26c-4986-8d33-505e1b680bb4,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-81074/latest.zip
|
||||
FR Oise Mobilité - Syndicat mixte des transports collectifs de l'Oise GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/1f143621-2afa-4aa8-a958-d5399e709347,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-81042/latest.zip
|
||||
FR Oise Mobilité - Syndicat mixte des transports collectifs de l'Oise GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/e61f439a-2321-4ce2-bda7-89be2494304f,FR,see https://opendatacommons.org/licenses/odbl/1.0/,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-83921/latest.zip
|
||||
FR Oise Mobilité - Syndicat mixte des transports collectifs de l'Oise GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/2ca2ec9b-666a-4fc8-a348-6e7ec79e59a0,FR,see https://opendatacommons.org/licenses/odbl/1.0/,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-83922/latest.zip
|
||||
FR Oise Mobilité - Syndicat mixte des transports collectifs de l'Oise GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/b892a9c7-eda3-4d95-a70b-89b2391e81b0,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-81519/latest.zip
|
||||
FR Oise Mobilité - Syndicat mixte des transports collectifs de l'Oise GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/c6279e07-8e38-459a-b96a-96e4109d0816,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-81649/latest.zip
|
||||
FR Oise Mobilité - Syndicat mixte des transports collectifs de l'Oise GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/960fa36a-d183-4140-ae64-462d92a06eb8,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-81056/latest.zip
|
||||
FR Oise Mobilité - Syndicat mixte des transports collectifs de l'Oise GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/ef552f35-b49b-43d5-8bbc-ad95efdc3d95,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-81652/latest.zip
|
||||
FR Oise Mobilité - Syndicat mixte des transports collectifs de l'Oise GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/3074dfb5-2209-44b4-891a-a86694ecaeec,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-81054/latest.zip
|
||||
FR Oise Mobilité - Syndicat mixte des transports collectifs de l'Oise GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/ad0a4757-eb8a-4c06-b459-51442466d5c4,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-81058/latest.zip
|
||||
FR Oise Mobilité - Syndicat mixte des transports collectifs de l'Oise GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/44f6333d-58ab-4e0c-965e-5349d6d1cdcd,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-81068/latest.zip
|
||||
FR Oise Mobilité - Syndicat mixte des transports collectifs de l'Oise GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/2aeffed5-95e9-4587-a8f8-f52c32968566,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-81648/latest.zip
|
||||
FR Oise Mobilité - Syndicat mixte des transports collectifs de l'Oise GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/1364ed96-7246-4344-a04c-34bd06ef2b0c,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-81070/latest.zip
|
||||
FR Oise Mobilité - Syndicat mixte des transports collectifs de l'Oise GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/daba957b-c833-46ad-9bc1-12f3347ac4d9,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-81651/latest.zip
|
||||
FR Oise Mobilité - Syndicat mixte des transports collectifs de l'Oise GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/3a98dcdb-ca6e-40fc-8da9-09ab7f5661c9,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-81066/latest.zip
|
||||
FR Oise Mobilité - Syndicat mixte des transports collectifs de l'Oise GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/6fdd303a-7b76-4742-b877-d0923c59505b,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-81034/latest.zip
|
||||
FR Oise Mobilité - Syndicat mixte des transports collectifs de l'Oise GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/2c64085e-8c3d-4bb6-b935-a4b6c0a2e550,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-81158/latest.zip
|
||||
FR Ondéa - COMPAGNIE DE TRANSPORT DU LAC DU BOURGET-CTLB GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/b4e8d98c-e17a-4fb0-857f-8cb618039b2a,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-71223/latest.zip
|
||||
FR Open Data Rodez Agglomération GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/f17028a4-61ff-4b84-8856-f7c8937a2c28,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-81175/latest.zip
|
||||
FR Optymo - Belfort GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/797c424e-7d28-4e65-84f6-6677e47d8a6e,FR,see https://opendatacommons.org/licenses/odbl/1.0/,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-79374/latest.zip
|
||||
FR Orléans Métropole GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/b2dfbaa3-47e9-4749-b6a4-750bebd760e7,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-83751/latest.zip
|
||||
FR Orléans Métropole GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/b85b743c-448c-411f-8fe6-e09bec2700a5,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-81413/latest.zip
|
||||
FR Orléans Métropole GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/f884648b-d687-4448-bc4c-2fc988ea0aed,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-83784/latest.zip
|
||||
FR Oùra GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/231acea2-40cf-4c06-b2eb-646274e0b853,FR,see https://opendatacommons.org/licenses/odbl/1.0/,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-78904/latest.zip
|
||||
FR Oùra GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/dbd0bd0e-933a-4a21-8772-efdf41778e64,FR,see https://opendatacommons.org/licenses/odbl/1.0/,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-79810/latest.zip
|
||||
FR Oùra GTFS,gtfs,https://transport.data.gouv.fr/resources/83725/download?token=xdgqKBTAzhw4DSPz6zeGc4c5eW0LhwztcGv4-vpzP4U,FR,see https://opendatacommons.org/licenses/odbl/1.0/,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-83725/latest.zip
|
||||
FR Oùra GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/cd0ec3c7-431b-48df-bab5-269e5d5c05bb,FR,see https://opendatacommons.org/licenses/odbl/1.0/,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-80147/latest.zip
|
||||
FR Oùra GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/19a7a109-54c0-4a40-ab60-4d5ca43ce823,FR,see https://opendatacommons.org/licenses/odbl/1.0/,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-79859/latest.zip
|
||||
FR Oùra GTFS,gtfs,https://transport.data.gouv.fr/resources/84032/download?token=xdgqKBTAzhw4DSPz6zeGc4c5eW0LhwztcGv4-vpzP4U,FR,see https://opendatacommons.org/licenses/odbl/1.0/,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-84032/latest.zip
|
||||
FR Oùra GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/ff0dffab-3cf5-4c6b-b576-05bd462a1e33,FR,see https://opendatacommons.org/licenses/odbl/1.0/,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-79861/latest.zip
|
||||
FR Oùra GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/317b003c-dca6-4908-8829-6d2842a5f47c,FR,see https://opendatacommons.org/licenses/odbl/1.0/,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-79865/latest.zip
|
||||
FR Oùra GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/10bf603d-9302-435e-a1dd-a60d91a645ee,FR,see https://opendatacommons.org/licenses/odbl/1.0/,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-79864/latest.zip
|
||||
FR Palmbus GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/47bc8088-6c72-43ad-a959-a5bbdd1aa14f,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-79420/latest.zip
|
||||
FR PASTEL GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/42e3ea9d-251e-47c1-988d-f7227f0e2d0f,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-82704/latest.zip
|
||||
FR Pau Béarn Pyrénées Mobilités GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/cf2d4693-a21d-41af-9c92-a5f8847e38cb,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-83631/latest.zip
|
||||
FR Pau Béarn Pyrénées Mobilités GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/25c5caff-fb19-4358-bb83-7f18d320bea8,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-83632/latest.zip
|
||||
FR Pays de Saint Gilles Croix de Vie Agglomération GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/34972a83-5b0f-4f99-b9b7-19593a4c6e1d,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-82097/latest.zip
|
||||
FR PROVENCE-ALPES-AGGLOMERATION GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/0d9ebca0-d18e-44ad-ab95-d89e8a72d781,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-83915/latest.zip
|
||||
FR Quimper Bretagne Occidentale GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/95530333-7002-401d-9d64-2829496d1c36,FR,see https://opendatacommons.org/licenses/odbl/1.0/,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-83408/latest.zip
|
||||
FR REDON Agglomération GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/342f132a-055a-45dd-a50a-6bef89c58444,FR,see https://opendatacommons.org/licenses/odbl/1.0/,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-83126/latest.zip
|
||||
FR REDON Agglomération GTFS,gtfs,https://transport.data.gouv.fr/resources/83512/download?token=xdgqKBTAzhw4DSPz6zeGc4c5eW0LhwztcGv4-vpzP4U,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-83512/latest.zip
|
||||
FR RENFE VIAJEROS SOCIEDAD MERCANTIL ESTATAL SA GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/eae0fa46-087a-4018-ada9-d8add124e635,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-82386/latest.zip
|
||||
FR Région Auvergne-Rhône-Alpes GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/723f0cc2-476a-464c-a40e-bf8686f7bd8d,FR,see https://opendatacommons.org/licenses/odbl/1.0/,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-83113/latest.zip
|
||||
FR Région Auvergne-Rhône-Alpes GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/edf15063-dbbb-4543-a39d-60a39a418ee3,FR,see https://opendatacommons.org/licenses/odbl/1.0/,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-38889/latest.zip
|
||||
FR Région Auvergne-Rhône-Alpes GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/4e69fff6-6ae5-4ac0-9fa4-9c24ac92f291,FR,see https://opendatacommons.org/licenses/odbl/1.0/,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-80961/latest.zip
|
||||
FR Région Auvergne-Rhône-Alpes GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/2053f06d-439c-43fe-87fb-528179a8502e,FR,see https://opendatacommons.org/licenses/odbl/1.0/,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-81245/latest.zip
|
||||
FR Région Auvergne-Rhône-Alpes GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/6dc48e22-edc4-478d-896d-7fdd02bbcda9,FR,see https://opendatacommons.org/licenses/odbl/1.0/,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-38881/latest.zip
|
||||
FR Région Auvergne-Rhône-Alpes GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/71926816-92f8-4620-8b8f-e1804f645e26,FR,see https://opendatacommons.org/licenses/odbl/1.0/,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-80964/latest.zip
|
||||
FR Région Auvergne-Rhône-Alpes GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/40ee9d6c-3bb9-409e-b670-986212de63f2,FR,see https://opendatacommons.org/licenses/odbl/1.0/,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-40287/latest.zip
|
||||
FR Région Auvergne-Rhône-Alpes GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/d22ab458-e6b4-4334-96dc-18590a3d9e5d,FR,see https://opendatacommons.org/licenses/odbl/1.0/,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-83114/latest.zip
|
||||
FR Région Auvergne-Rhône-Alpes GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/fff9c08f-6172-4c94-b46e-c0144ae0bd10,FR,see https://opendatacommons.org/licenses/odbl/1.0/,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-38888/latest.zip
|
||||
FR Région Auvergne-Rhône-Alpes GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/4b74f1b9-fcc0-4e59-bf48-7ed4102e5222,FR,see https://opendatacommons.org/licenses/odbl/1.0/,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-81244/latest.zip
|
||||
FR Région Auvergne-Rhône-Alpes GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/c379471b-c554-4838-9c00-39d7ced7b53a,FR,see https://opendatacommons.org/licenses/odbl/1.0/,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-83112/latest.zip
|
||||
FR Région Auvergne-Rhône-Alpes GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/fda59af2-0eb3-4a8a-8e59-593f768f836a,FR,see https://opendatacommons.org/licenses/odbl/1.0/,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-80960/latest.zip
|
||||
FR Région Auvergne-Rhône-Alpes GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/bdbc488f-d0fa-42ae-8e05-a1c291a9abdf,FR,see https://opendatacommons.org/licenses/odbl/1.0/,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-83633/latest.zip
|
||||
FR Région Bourgogne-Franche-Comté GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/05cabcf8-c4bb-4dab-b8f3-ddb826790511,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-11081/latest.zip
|
||||
FR Région Bourgogne-Franche-Comté GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/b9bef242-2af5-4945-a38a-c37b7e459e9e,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-11082/latest.zip
|
||||
FR Région Bourgogne-Franche-Comté GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/fda806ca-8624-469d-9609-0279c91e914d,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-11083/latest.zip
|
||||
FR Région Bourgogne-Franche-Comté GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/6c4ac6eb-d6b0-496d-a52a-3e67666997c6,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-11084/latest.zip
|
||||
FR Région Bourgogne-Franche-Comté GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/e42ce1e1-81c5-4ace-9fa8-e6091a89378c,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-11085/latest.zip
|
||||
FR Région Bourgogne-Franche-Comté GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/e2d405ba-8b1c-4398-9623-45c8324da0bf,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-11086/latest.zip
|
||||
FR Région Bourgogne-Franche-Comté GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/639db1dc-8b31-481e-a42f-46b27e3605f3,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-11087/latest.zip
|
||||
FR Région Bourgogne-Franche-Comté GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/28d3c8bf-97f7-40ed-be9f-00e737f74757,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-83931/latest.zip
|
||||
FR Région Bretagne GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/8c7b32d0-6481-4bb8-b903-c50a04c72fec,FR,see https://opendatacommons.org/licenses/odbl/1.0/,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-81559/latest.zip
|
||||
FR Région Bretagne GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/073a6dbc-2bef-4088-afe4-68a29dd4e796,FR,see https://opendatacommons.org/licenses/odbl/1.0/,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-82872/latest.zip
|
||||
FR Région Bretagne GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/8a2c923c-d308-4391-801e-2369cdde7749,FR,see https://opendatacommons.org/licenses/odbl/1.0/,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-81478/latest.zip
|
||||
FR Région Bretagne GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/9ef537b6-fbeb-468f-9d6d-47c1d0d98504,FR,see https://opendatacommons.org/licenses/odbl/1.0/,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-81479/latest.zip
|
||||
FR Région Bretagne GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/705809de-4712-4c52-add3-03c4d9b6d621,FR,see https://opendatacommons.org/licenses/odbl/1.0/,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-81480/latest.zip
|
||||
FR Région Bretagne GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/c09012e4-cb57-41da-94c5-013a19c99e14,FR,see https://opendatacommons.org/licenses/odbl/1.0/,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-81481/latest.zip
|
||||
FR Région Bretagne GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/d09facf8-2bc9-4d8c-ad43-d5cb05f39406,FR,see https://opendatacommons.org/licenses/odbl/1.0/,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-81461/latest.zip
|
||||
FR Région Bretagne GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/c407501a-ee0d-425d-b954-42927c0eaffa,FR,see https://opendatacommons.org/licenses/odbl/1.0/,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-81462/latest.zip
|
||||
FR Région Bretagne GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/69948a62-0bac-4081-8ae0-d71c4d5b751d,FR,see https://opendatacommons.org/licenses/odbl/1.0/,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-81463/latest.zip
|
||||
FR Région Bretagne GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/1b9a895a-1843-4620-bcd4-3bbd08c396c0,FR,see https://opendatacommons.org/licenses/odbl/1.0/,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-81464/latest.zip
|
||||
FR Région Bretagne GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/3d50ba51-117b-4b13-80ca-0da5e9526042,FR,see https://opendatacommons.org/licenses/odbl/1.0/,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-83728/latest.zip
|
||||
FR Région Bretagne GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/2935244d-b140-470a-8dfb-fb9efe3e0ec9,FR,see https://opendatacommons.org/licenses/odbl/1.0/,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-83729/latest.zip
|
||||
FR Région Bretagne GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/7a438a9a-517b-492c-89b0-2e4d3b72ac00,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-81486/latest.zip
|
||||
FR Région Bretagne GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/b7a835d4-7a93-4d8b-87bd-c657800fc29f,FR,see https://opendatacommons.org/licenses/odbl/1.0/,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-81474/latest.zip
|
||||
FR Région Bretagne GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/80fc7c72-e21e-4458-a692-9da58db27566,FR,see https://opendatacommons.org/licenses/odbl/1.0/,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-81459/latest.zip
|
||||
FR Région Bretagne GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/becd68ea-3e7a-4336-b14a-dc3bdcf493c2,FR,see https://opendatacommons.org/licenses/odbl/1.0/,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-81476/latest.zip
|
||||
FR Région Bretagne GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/6866358a-cb32-451b-8a88-067f704cc770,FR,see https://opendatacommons.org/licenses/odbl/1.0/,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-82291/latest.zip
|
||||
FR Région Bretagne GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/905a57b4-eb87-4c31-86cc-9a86deff09e0,FR,see https://opendatacommons.org/licenses/odbl/1.0/,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-81770/latest.zip
|
||||
FR Région Bretagne GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/d30403b4-6ff8-4a37-a661-8220b18e3cc5,FR,see https://opendatacommons.org/licenses/odbl/1.0/,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-81927/latest.zip
|
||||
FR Région des Pays de la Loire GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/c0bd9ff1-97f9-43f8-aca4-8e80d7728324,FR,see https://opendatacommons.org/licenses/odbl/1.0/,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-79818/latest.zip
|
||||
FR Région des Pays de la Loire GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/916752e4-5daa-48bd-8bc1-4dd8d64f1d4a,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-80721/latest.zip
|
||||
FR Région des Pays de la Loire GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/b57c66f2-ecaf-4934-a530-753bcb8aad04,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-82641/latest.zip
|
||||
FR Région des Pays de la Loire GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/4c73f3c1-7f5a-4e38-b97d-75471bd47048,FR,see https://opendatacommons.org/licenses/odbl/1.0/,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-81416/latest.zip
|
||||
FR Région des Pays de la Loire GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/d5920d40-ea72-4e22-b506-2ca583950282,FR,see https://opendatacommons.org/licenses/odbl/1.0/,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-81421/latest.zip
|
||||
FR Région des Pays de la Loire GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/7bc6edfa-d68c-4cc7-a1a5-a8b05fcae444,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-81394/latest.zip
|
||||
FR Région Hauts-de-France GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/4258bcab-8da4-42fe-a811-0135d7476e85,FR,,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-82170/latest.zip
|
||||
FR Région Hauts-de-France GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/3e744287-1221-47fd-96e0-fa154e79c4f8,FR,,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-82169/latest.zip
|
||||
FR Région Hauts-de-France GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/429d30fb-ba6e-4e90-ad82-6d589a98cbfd,FR,,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-82277/latest.zip
|
||||
FR Région Hauts-de-France GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/293d12e7-7db8-42d8-bbb4-543aad13fc7e,FR,,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-81828/latest.zip
|
||||
FR Région Hauts-de-France GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/58babea3-fc7a-4e2d-be16-69936f10e81c,FR,,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-81829/latest.zip
|
||||
FR Région Hauts-de-France GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/bb4ce18c-af05-4a8c-b7ae-f9c53bdb3fae,FR,,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-81830/latest.zip
|
||||
FR Région Hauts-de-France GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/1e2937e3-d495-44bc-aec0-bd5da085e3a4,FR,,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-81832/latest.zip
|
||||
FR Région Hauts-de-France GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/ae3f2c20-ddf9-4469-a953-ebb56e273d3f,FR,,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-81820/latest.zip
|
||||
FR Région Hauts-de-France GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/67129a71-49d0-467d-b388-4672ab1d5593,FR,,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-81821/latest.zip
|
||||
FR Région Hauts-de-France GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/c50a9ec8-98dc-40ee-a842-555738197957,FR,,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-81822/latest.zip
|
||||
FR Région Hauts-de-France GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/5338146c-d7d5-44fe-aa17-df21e170b7d5,FR,,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-81823/latest.zip
|
||||
FR Région Hauts-de-France GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/46fa1d25-72a8-40e2-849d-c08c096a44c7,FR,,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-82276/latest.zip
|
||||
FR Région Hauts-de-France GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/34cc46fd-56f8-450c-802c-f5ad3a4914af,FR,,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-81826/latest.zip
|
||||
FR Région Hauts-de-France GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/deb2e6c8-fc48-4a11-b7d0-180f961fd4dc,FR,,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-81833/latest.zip
|
||||
FR Région Hauts-de-France GTFS,gtfs,https://transport.data.gouv.fr/resources/83620/download?token=xdgqKBTAzhw4DSPz6zeGc4c5eW0LhwztcGv4-vpzP4U,FR,,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-83620/latest.zip
|
||||
FR Région Occitanie / Pyrénées Méditerranée GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/d747fe79-2915-4cdd-8cc5-51a810baaca5,FR,see https://opendatacommons.org/licenses/odbl/1.0/,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-81026/latest.zip
|
||||
FR Région Provence-Alpes-Côte-d'Azur GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/9c8116cb-2f1a-4045-b149-c5bf6cae6bef,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-82137/latest.zip
|
||||
FR Région Provence-Alpes-Côte-d'Azur GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/1164d75b-4a9f-4c5a-b765-31d861b34fda,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-82285/latest.zip
|
||||
FR Région Provence-Alpes-Côte-d'Azur GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/6471d334-17df-4308-bae0-5736c24a2e6a,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-82136/latest.zip
|
||||
FR Région Réunion GTFS,gtfs,https://transport.data.gouv.fr/resources/80934/download?token=xdgqKBTAzhw4DSPz6zeGc4c5eW0LhwztcGv4-vpzP4U,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-80934/latest.zip
|
||||
FR Réseau beeMob - Transports en commun de l'Agglomération Béziers Méditerranée GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/c1581a3a-0b78-4944-905c-a8530206648a,FR,see https://opendatacommons.org/licenses/odbl/1.0/,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-83368/latest.zip
|
||||
FR Réseau CARSUD GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/8f3642e3-9fc3-45ed-af46-8c532966ace3,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-81183/latest.zip
|
||||
FR Réseau de transport en commun Le Havre Seine Métropole LiA GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/2178bfa8-9fe0-4633-8223-8c151728ef28,FR,see https://opendatacommons.org/licenses/odbl/1.0/,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-84037/latest.zip
|
||||
FR Réseau moova - Communauté d'Agglomération de Vesoul GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/0d6e6c56-8926-49b3-87e2-13c6f57c136b,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-81222/latest.zip
|
||||
FR Saint-Brieuc Armor Agglomération GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/9bccfc79-5d35-4fc3-8296-526b791fc950,FR,see https://opendatacommons.org/licenses/odbl/1.0/,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-67655/latest.zip
|
||||
FR Saint-Etienne Métropole GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/fc66b270-658c-4678-9794-229a1a8a4938,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-81023/latest.zip
|
||||
FR Saint-Nazaire agglo - La CARENE GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/a1867718-7921-42a6-b672-c53b8c583aa3,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-83881/latest.zip
|
||||
FR Sankéo GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/3047f942-98f0-4d4c-92cf-7c5de1fe233c,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-82900/latest.zip
|
||||
FR Sankéo GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/6e9de060-61f6-49aa-a7ae-f0e5f509415a,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-83734/latest.zip
|
||||
FR Sankéo GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/2afffa1f-aa4a-4fe4-9802-4b5f82bb96c6,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-83735/latest.zip
|
||||
FR SEMTO GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/c9c2f609-d0cd-4233-ad1b-cf86b9bf2dc8,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-81116/latest.zip
|
||||
FR SETRAM GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/5339d96c-6d20-4a01-939a-40f7b56d6cc1,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-79601/latest.zip
|
||||
FR Sibra GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/8b12f6db-9aa7-43dc-a179-013998a1c4c0,FR,see https://opendatacommons.org/licenses/odbl/1.0/,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-80100/latest.zip
|
||||
FR SNCF GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/9ae758ec-cd7a-40cd-a890-bb3963224942,FR,see https://opendatacommons.org/licenses/odbl/1.0/,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-83582/latest.zip
|
||||
FR SNCF GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/1a1eb5ee-6895-4895-a18e-e87827fff0be,FR,see https://opendatacommons.org/licenses/odbl/1.0/,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-67595/latest.zip
|
||||
FR Société d'économie Mixte des Transports Montalbanais GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/969c7483-e3d3-4ec2-b11d-db20105e9600,FR,see https://opendatacommons.org/licenses/odbl/1.0/,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-83750/latest.zip
|
||||
FR SPL Estival/ Cirest GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/3b659ebb-8c17-46f4-a8ac-78c4129a4a29,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-83774/latest.zip
|
||||
FR STAR GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/0644f537-575e-4cce-9570-06165d6f3b27,FR,see https://opendatacommons.org/licenses/odbl/1.0/,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-83281/latest.zip
|
||||
FR STAR GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/ef4779c4-d066-40d8-b8d9-fb8c7eea8820,FR,see https://opendatacommons.org/licenses/odbl/1.0/,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-83282/latest.zip
|
||||
FR STGA - réseau Möbius GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/021fd4a3-ba9a-4d78-aa65-71d4d289f389,FR,see https://opendatacommons.org/licenses/odbl/1.0/,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-82093/latest.zip
|
||||
FR Syndicat Intercommunal de Mobilité et d'Organisation Urbaine du Valenciennois GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/15438966-8d3c-4dd9-8905-189379ea4c7d,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-82442/latest.zip
|
||||
FR Syndicat Intercommunal des Transports urbains de l’Agglomération du Calaisis GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/5dcdb39b-aee3-4a0f-a9a9-be225f5a8b78,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-83807/latest.zip
|
||||
FR Syndicat intercommunal des Transports Urbains Soissonnais GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/b66f3a43-2ba1-4b61-8459-439104582a92,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-82154/latest.zip
|
||||
FR Syndicat intercommunal des Transports Urbains Soissonnais GTFS,gtfs,https://transport.data.gouv.fr/resources/83821/download?token=xdgqKBTAzhw4DSPz6zeGc4c5eW0LhwztcGv4-vpzP4U,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-83821/latest.zip
|
||||
FR Syndicat intercommunal des Transports Urbains Soissonnais GTFS,gtfs,https://transport.data.gouv.fr/resources/83820/download?token=xdgqKBTAzhw4DSPz6zeGc4c5eW0LhwztcGv4-vpzP4U,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-83820/latest.zip
|
||||
FR Syndicat mixte Atoumod GTFS,gtfs,https://transport.data.gouv.fr/resources/81942/download?token=xdgqKBTAzhw4DSPz6zeGc4c5eW0LhwztcGv4-vpzP4U,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-81942/latest.zip
|
||||
FR Syndicat mixte Atoumod GTFS,gtfs,https://transport.data.gouv.fr/resources/81177/download?token=xdgqKBTAzhw4DSPz6zeGc4c5eW0LhwztcGv4-vpzP4U,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-81177/latest.zip
|
||||
FR Syndicat mixte Atoumod GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/7e0312ef-a6e0-41ab-a75a-2b782a4b1d6b,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-83642/latest.zip
|
||||
FR Syndicat mixte Atoumod GTFS,gtfs,https://transport.data.gouv.fr/resources/82317/download?token=xdgqKBTAzhw4DSPz6zeGc4c5eW0LhwztcGv4-vpzP4U,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-82317/latest.zip
|
||||
FR Syndicat mixte Atoumod GTFS,gtfs,https://transport.data.gouv.fr/resources/82308/download?token=xdgqKBTAzhw4DSPz6zeGc4c5eW0LhwztcGv4-vpzP4U,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-82308/latest.zip
|
||||
FR Syndicat mixte Atoumod GTFS,gtfs,https://transport.data.gouv.fr/resources/83981/download?token=xdgqKBTAzhw4DSPz6zeGc4c5eW0LhwztcGv4-vpzP4U,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-83981/latest.zip
|
||||
FR Syndicat mixte Atoumod GTFS,gtfs,https://transport.data.gouv.fr/resources/82315/download?token=xdgqKBTAzhw4DSPz6zeGc4c5eW0LhwztcGv4-vpzP4U,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-82315/latest.zip
|
||||
FR Syndicat mixte Atoumod GTFS,gtfs,https://transport.data.gouv.fr/resources/82140/download?token=xdgqKBTAzhw4DSPz6zeGc4c5eW0LhwztcGv4-vpzP4U,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-82140/latest.zip
|
||||
FR Syndicat mixte Atoumod GTFS,gtfs,https://transport.data.gouv.fr/resources/82312/download?token=xdgqKBTAzhw4DSPz6zeGc4c5eW0LhwztcGv4-vpzP4U,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-82312/latest.zip
|
||||
FR Syndicat mixte Atoumod GTFS,gtfs,https://transport.data.gouv.fr/resources/80680/download?token=xdgqKBTAzhw4DSPz6zeGc4c5eW0LhwztcGv4-vpzP4U,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-80680/latest.zip
|
||||
FR Syndicat mixte Atoumod GTFS,gtfs,https://transport.data.gouv.fr/resources/81338/download?token=xdgqKBTAzhw4DSPz6zeGc4c5eW0LhwztcGv4-vpzP4U,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-81338/latest.zip
|
||||
FR Syndicat mixte Atoumod GTFS,gtfs,https://transport.data.gouv.fr/resources/82311/download?token=xdgqKBTAzhw4DSPz6zeGc4c5eW0LhwztcGv4-vpzP4U,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-82311/latest.zip
|
||||
FR Syndicat mixte Atoumod GTFS,gtfs,https://transport.data.gouv.fr/resources/82711/download?token=xdgqKBTAzhw4DSPz6zeGc4c5eW0LhwztcGv4-vpzP4U,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-82711/latest.zip
|
||||
FR Syndicat mixte Atoumod GTFS,gtfs,https://transport.data.gouv.fr/resources/82310/download?token=xdgqKBTAzhw4DSPz6zeGc4c5eW0LhwztcGv4-vpzP4U,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-82310/latest.zip
|
||||
FR Syndicat mixte Atoumod GTFS,gtfs,https://transport.data.gouv.fr/resources/82309/download?token=xdgqKBTAzhw4DSPz6zeGc4c5eW0LhwztcGv4-vpzP4U,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-82309/latest.zip
|
||||
FR Syndicat mixte Atoumod GTFS,gtfs,https://transport.data.gouv.fr/resources/82721/download?token=xdgqKBTAzhw4DSPz6zeGc4c5eW0LhwztcGv4-vpzP4U,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-82721/latest.zip
|
||||
FR Syndicat mixte Atoumod GTFS,gtfs,https://transport.data.gouv.fr/resources/80655/download?token=xdgqKBTAzhw4DSPz6zeGc4c5eW0LhwztcGv4-vpzP4U,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-80655/latest.zip
|
||||
FR Syndicat mixte Atoumod GTFS,gtfs,https://transport.data.gouv.fr/resources/80613/download?token=xdgqKBTAzhw4DSPz6zeGc4c5eW0LhwztcGv4-vpzP4U,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-80613/latest.zip
|
||||
FR Syndicat mixte Atoumod GTFS,gtfs,https://transport.data.gouv.fr/resources/81975/download?token=xdgqKBTAzhw4DSPz6zeGc4c5eW0LhwztcGv4-vpzP4U,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-81975/latest.zip
|
||||
FR Syndicat Mixte des Mobilités de l'Aire Grenobloise GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/91dbf9cb-4ab2-42a3-9f40-0791a53d58d2,FR,see https://opendatacommons.org/licenses/odbl/1.0/,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-81617/latest.zip
|
||||
FR Syndicat Mixte des Mobilités de l'Aire Grenobloise GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/b6ec7ba4-09bc-46df-b9a1-79a2c2668cf2,FR,see https://opendatacommons.org/licenses/odbl/1.0/,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-83459/latest.zip
|
||||
FR Syndicat Mixte des Mobilités de l'Aire Grenobloise GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/c1067233-c35e-4c53-93b3-050f707bf6a2,FR,see https://opendatacommons.org/licenses/odbl/1.0/,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-81794/latest.zip
|
||||
FR Syndicat Mixte des Mobilités de l'Aire Grenobloise GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/2ee1e02f-da22-4b86-8451-5ef67ae32cdd,FR,see https://opendatacommons.org/licenses/odbl/1.0/,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-83462/latest.zip
|
||||
FR Syndicat Mixte des Mobilités de l'Aire Grenobloise GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/3ec51aee-ce59-4608-b721-faae118ea1d0,FR,see https://opendatacommons.org/licenses/odbl/1.0/,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-81618/latest.zip
|
||||
FR Syndicat Mixte des Transports du Petit Cul de Sac Marin GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/ac89688b-252b-4b23-9e97-39226402cf2b,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-82727/latest.zip
|
||||
FR Syndicat Mixte des Transports Urbains du grand Nouméa GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/696cd51a-1d94-4d3f-9001-8e719c25fe03,FR,see https://opendatacommons.org/licenses/odbl/1.0/,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-82780/latest.zip
|
||||
FR Syndicat Mixte du Bassin d'Alès - Réseau Ales'Y GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/b9a0f32e-4386-454c-8759-b82653fa861e,FR,see https://opendatacommons.org/licenses/odbl/1.0/,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-75062/latest.zip
|
||||
FR SYTRAL Mobilités GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/abebedc6-28cf-4e2e-9c64-db57a40156f8,FR,see https://wiki.lafabriquedesmobilites.fr/wiki/Licence_Mobilit%C3%A9s,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-81943/latest.zip
|
||||
FR Sète Agglopole Méditerranée GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/9402f130-ac11-4615-8494-a5c84fcdb78c,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-81575/latest.zip
|
||||
FR TADAO GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/47a5e290-7883-4a40-bdcf-b693f64087b0,FR,see https://opendatacommons.org/licenses/odbl/1.0/,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-83643/latest.zip
|
||||
FR Tango GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/15aeb8a5-1cca-4bb9-ae5f-b6e67e4ff2ab,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-82911/latest.zip
|
||||
FR TBK - Réseau de transports publics de Quimperlé GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/e530ec1c-7432-4c3a-9908-e1c5fb44de3a,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-80671/latest.zip
|
||||
FR TCAT - Transports en Commun de l'Agglomération Troyenne GTFS,gtfs,https://transport.data.gouv.fr/resources/79747/download?token=xdgqKBTAzhw4DSPz6zeGc4c5eW0LhwztcGv4-vpzP4U,FR,see https://opendatacommons.org/licenses/odbl/1.0/,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-79747/latest.zip
|
||||
FR TCAT - Transports en Commun de l'Agglomération Troyenne GTFS,gtfs,https://transport.data.gouv.fr/resources/79847/download?token=xdgqKBTAzhw4DSPz6zeGc4c5eW0LhwztcGv4-vpzP4U,FR,see https://opendatacommons.org/licenses/odbl/1.0/,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-79847/latest.zip
|
||||
FR Tempo Bus GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/3fd582f2-e2ef-4ad7-894c-6f057b53b006,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-81219/latest.zip
|
||||
FR Tempo Bus GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/c1415ff3-7457-4b51-aead-aacbf03a474e,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-80245/latest.zip
|
||||
FR Tempo Bus GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/a17ab630-01bf-44ad-b99c-1e48db8eb78e,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-81220/latest.zip
|
||||
FR Thonon Agglomération GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/7cbb883b-d0cb-4bc7-b2e0-7537afdbc86e,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-82178/latest.zip
|
||||
FR TLP Mobilités GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/84f7501c-de7b-46d9-8e0c-14c30d9fa58b,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-83134/latest.zip
|
||||
FR Toulouse métropole GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/64318177-f5b5-4144-a6be-2d0f22d26c77,FR,see https://opendatacommons.org/licenses/odbl/1.0/,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-81678/latest.zip
|
||||
FR Tours Métropole Val de Loire GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/198b7602-5e74-4d88-b6bc-483af85a2430,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-80694/latest.zip
|
||||
FR Tours Métropole Val de Loire GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/6d20b816-3572-4490-982e-43f9ea0017a5,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-84001/latest.zip
|
||||
FR Tout'enbus GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/e91ff63e-2c15-4c86-8e69-499ceec7bd97,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-81255/latest.zip
|
||||
FR Trans-Landes GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/3a010c2b-67da-404c-8cbb-85e62cae129c,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-83418/latest.zip
|
||||
FR Trans-Landes GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/7e4a061d-60d8-498b-aee1-b4f23d942a0c,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-83203/latest.zip
|
||||
FR Trans-Landes GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/c5c0943e-0830-46ac-ba18-0d95a7227a4a,FR,see https://opendatacommons.org/licenses/odbl/1.0/,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-83416/latest.zip
|
||||
FR Transdev CapAtlantique Mobilités • Lila Presqu'île GTFS,gtfs,https://transport.data.gouv.fr/resources/83762/download?token=xdgqKBTAzhw4DSPz6zeGc4c5eW0LhwztcGv4-vpzP4U,FR,see https://opendatacommons.org/licenses/odbl/1.0/,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-83762/latest.zip
|
||||
FR Transdev Chamonix GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/83a2f241-a0a4-4901-b77b-8a71572a9f6a,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-82663/latest.zip
|
||||
FR Transdev Fougères • SURF GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/665d6c43-598d-4d9d-aa98-206072f4dfa0,FR,see https://opendatacommons.org/licenses/odbl/1.0/,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-64280/latest.zip
|
||||
FR Transdev GMVA Mobilités • Kicéo GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/565533c0-64ae-44d6-9dfa-169be5b805c6,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-82553/latest.zip
|
||||
FR Transdev Rail Sud Intermétropole GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/764d6d5b-d04e-4aa7-94b7-f8b2274d2964,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-83448/latest.zip
|
||||
FR Transdev SMA Mobilités • MAT GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/3bd31fbe-93f4-432d-ade7-ee8d69897880,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-82555/latest.zip
|
||||
FR Transdev Var GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/5b2dfdce-8ab2-41b2-a318-65f884e3ead2,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-82874/latest.zip
|
||||
FR Transdev Vierzon Mobilités • Le Vib GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/87091347-c7fa-4e63-8fb5-005891ece43b,FR,see https://opendatacommons.org/licenses/odbl/1.0/,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-82554/latest.zip
|
||||
FR Transdev — Pays de la Loire GTFS,gtfs,https://www.data.gouv.fr/fr/datasets/r/b8d1007f-567e-48c4-9c40-bee205f1e688,FR,see https://transport.data.gouv.fr/datasets/reseau-urbain-brevibus,bus,Mobility Database feed catalog; MobilityData validator acceptance-test feed list,P0,Useful smoke-test feed list; prefer Mobility Database feeds_v2 metadata for production source review.; Mobility Database mirror: https://files.mobilitydatabase.org/mdb-898/latest.zip
|
||||
FR Transdev • Bretagne GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/7ec415c9-a963-40fc-8350-7ca77cb824c8,FR,see https://opendatacommons.org/licenses/odbl/1.0/,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-79545/latest.zip
|
||||
FR Transdev • Bretagne GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/148bf3a3-792f-44e2-9757-e0cc0d83afae,FR,see https://opendatacommons.org/licenses/odbl/1.0/,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-79528/latest.zip
|
||||
FR Transdev • Bretagne GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/57c8ace6-014d-40cd-b2b5-a9c630f87123,FR,see https://opendatacommons.org/licenses/odbl/1.0/,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-81179/latest.zip
|
||||
FR Transdev • Centre-Val de Loire GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/5f1dfae0-7972-4caa-8a5d-e2d90a285585,FR,see https://opendatacommons.org/licenses/odbl/1.0/,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-79576/latest.zip
|
||||
FR Transdev • Normandie GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/b0687cf2-03f5-4d39-8412-01b3d93e0d2b,FR,see https://opendatacommons.org/licenses/odbl/1.0/,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-83783/latest.zip
|
||||
FR Transdev • Pays de la Loire GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/b8d1007f-567e-48c4-9c40-bee205f1e688,FR,see https://opendatacommons.org/licenses/odbl/1.0/,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-79151/latest.zip
|
||||
FR Transdev • Pays de la Loire GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/024123a0-8665-4983-90bd-134ef7d70383,FR,see https://opendatacommons.org/licenses/odbl/1.0/,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-80586/latest.zip
|
||||
FR Transdev • Pays de la Loire GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/2fc8108a-5b91-455a-8d10-1c938d360a9f,FR,see https://opendatacommons.org/licenses/odbl/1.0/,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-79537/latest.zip
|
||||
FR Transp'Or - Pays de l'Or Agglomération GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/b98b0597-0657-4a1a-b4cb-eee012da25b2,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-74201/latest.zip
|
||||
FR Transp'Or - Pays de l'Or Agglomération GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/bea6a9f7-2ebc-49b0-9c00-21e84a94b70b,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-79299/latest.zip
|
||||
FR Transp'Or - Pays de l'Or Agglomération GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/f2d93d79-f9f7-4455-9a3b-5d942d010cad,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-79359/latest.zip
|
||||
FR Transp'Or - Pays de l'Or Agglomération GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/cf277982-b61f-4ba0-b47a-5f6df5394d38,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-79551/latest.zip
|
||||
FR Transp'Or - Pays de l'Or Agglomération GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/bbbd5a29-2fbf-47ae-84fd-6d1ebb758eeb,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-82168/latest.zip
|
||||
FR Transp'Or - Pays de l'Or Agglomération GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/6144236f-d68c-4318-8ed8-5bb468ac735c,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-84029/latest.zip
|
||||
FR Transport du Grand Longwy GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/8a839ea6-21f5-4503-a8e1-05fe0fd2aa8e,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-83210/latest.zip
|
||||
FR Transports de l'Agglomération Châtelleraudaise GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/1711bfbe-1b9d-4a10-85d9-dcf604213f66,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-82166/latest.zip
|
||||
FR Transports Publics du Choletais GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/71e8ee8e-0b55-40a1-a297-21ef89aba4dc,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-79352/latest.zip
|
||||
FR TRENITALIA FRANCE GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/bdecea2c-ebc9-4f22-812d-927e4a2e4bad,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-81653/latest.zip
|
||||
FR Twisto GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/71728bd6-b9a4-48e3-93ee-ac566e42fe99,FR,,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-83513/latest.zip
|
||||
FR UNION DES BATELIERS ARCACHONNAIS GTFS,gtfs,https://transport.data.gouv.fr/resources/84043/download?token=xdgqKBTAzhw4DSPz6zeGc4c5eW0LhwztcGv4-vpzP4U,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-84043/latest.zip
|
||||
FR VICHY COMMUNAUTE GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/4653683f-48a6-4f84-b313-058687fc5d04,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-83310/latest.zip
|
||||
FR Vienne Condrieu Agglomération GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/e0831d78-5577-4cae-aa00-de5eeaf3ecb6,FR,see https://opendatacommons.org/licenses/odbl/1.0/,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-52917/latest.zip
|
||||
FR VITRE COMMUNAUTE GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/c7b620bf-e0c5-4f81-a34b-9c8d02c6e3a4,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-83019/latest.zip
|
||||
FR VITRE COMMUNAUTE GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/282974b2-bf13-41f2-a0bf-feb0682e594e,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-83276/latest.zip
|
||||
FR VITRE COMMUNAUTE GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/bf176d3d-c487-40f9-a823-68279395f2ab,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-83280/latest.zip
|
||||
FR Zenbus GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/4cb2f0a9-d3d8-402b-b0bb-afafb741e5f1,FR,see https://opendatacommons.org/licenses/odbl/1.0/,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-10230/latest.zip
|
||||
FR Zenbus GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/cf9e898c-9ba6-4820-a51a-93709c07f891,FR,see https://opendatacommons.org/licenses/odbl/1.0/,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-8119/latest.zip
|
||||
FR Zenbus GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/f2d4be3b-50bc-4f9b-9818-30860890a864,FR,see https://opendatacommons.org/licenses/odbl/1.0/,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-12601/latest.zip
|
||||
FR Zenbus GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/ab2d1451-dddd-4814-bc5c-8e219834a3e3,FR,see https://opendatacommons.org/licenses/odbl/1.0/,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-11725/latest.zip
|
||||
FR Zenbus GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/10bb8fff-521d-4b27-a85e-79ae704f1ccc,FR,see https://opendatacommons.org/licenses/odbl/1.0/,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-34265/latest.zip
|
||||
FR Zenbus GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/220e0fb8-fe04-4d99-9fca-cc812eaadfc3,FR,see https://opendatacommons.org/licenses/odbl/1.0/,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-12608/latest.zip
|
||||
FR Zenbus GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/f167d4d3-cd32-475b-a12f-0403534679e5,FR,see https://opendatacommons.org/licenses/odbl/1.0/,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-8597/latest.zip
|
||||
FR Zenbus GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/8df46b2d-1be4-4a84-a227-bc2c61af97ca,FR,see https://opendatacommons.org/licenses/odbl/1.0/,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-79014/latest.zip
|
||||
FR Zenbus GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/f1119940-adc5-4d2e-af7d-5535e50da539,FR,see https://opendatacommons.org/licenses/odbl/1.0/,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-79992/latest.zip
|
||||
FR Zenbus GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/ed7049d4-4063-457c-ab65-4a26c640e0a1,FR,see https://opendatacommons.org/licenses/odbl/1.0/,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-8201/latest.zip
|
||||
FR Zenbus GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/2f42baea-fbe1-4b01-bf4a-8232fd83b24e,FR,see https://opendatacommons.org/licenses/odbl/1.0/,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-8487/latest.zip
|
||||
FR évolitY GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/b45aa8d8-4bd4-4528-99c7-acfc980fdb09,FR,see https://opendatacommons.org/licenses/odbl/1.0/,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-81944/latest.zip
|
||||
FR Île-de-France Mobilités GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/413988ed-d340-467b-8be2-7b999fcd207a,FR,see https://wiki.lafabriquedesmobilites.fr/wiki/Licence_Mobilit%C3%A9s,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-80921/latest.zip
|
||||
FR Île-de-France Mobilités GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/d34a4b53-5d6d-41b5-9cd9-bb738637b7e9,FR,see https://wiki.lafabriquedesmobilites.fr/wiki/Licence_Mobilit%C3%A9s,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-80931/latest.zip
|
||||
FR Île-de-France Mobilités GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/781d4194-6721-4ee6-bc67-65a4181381aa,FR,see https://wiki.lafabriquedesmobilites.fr/wiki/Licence_Mobilit%C3%A9s,bus,Mobility Database feed catalog,P0,
|
||||
GB Blackpool Transport GTFS,gtfs,https://data.discoverpassenger.com/operator/bts/dataset/current/download/gtfs,GB,see https://www.nationalarchives.gov.uk/doc/open-government-licence/version/3/,bus,Mobility Database feed catalog; MobilityData validator acceptance-test feed list,P0,Useful smoke-test feed list; prefer Mobility Database feeds_v2 metadata for production source review.; Mobility Database mirror: https://files.mobilitydatabase.org/mdb-1930/latest.zip
|
||||
GB Bluestar Bus GTFS,gtfs,https://data.discoverpassenger.com/operator/bluestar/dataset/current/download/gtfs,GB,see https://www.nationalarchives.gov.uk/doc/open-government-licence/version/3/,bus,Mobility Database feed catalog; MobilityData validator acceptance-test feed list,P0,Useful smoke-test feed list; prefer Mobility Database feeds_v2 metadata for production source review.; Mobility Database mirror: https://files.mobilitydatabase.org/mdb-2907/latest.zip
|
||||
GB Borders Buses GTFS,gtfs,https://data.discoverpassenger.com/operator/bordersbuses/dataset/current/download/gtfs,GB,see https://www.nationalarchives.gov.uk/doc/open-government-licence/version/3/,bus,Mobility Database feed catalog; MobilityData validator acceptance-test feed list,P0,Useful smoke-test feed list; prefer Mobility Database feeds_v2 metadata for production source review.; Mobility Database mirror: https://files.mobilitydatabase.org/mdb-1929/latest.zip
|
||||
GB Carousel Buses GTFS,gtfs,https://data.discoverpassenger.com/operator/carouselbuses/dataset/current/download/gtfs,GB,see https://www.nationalarchives.gov.uk/doc/open-government-licence/version/3/,bus,Mobility Database feed catalog; MobilityData validator acceptance-test feed list,P0,Useful smoke-test feed list; prefer Mobility Database feeds_v2 metadata for production source review.; Mobility Database mirror: https://files.mobilitydatabase.org/mdb-1932/latest.zip
|
||||
GB Essex County Council GTFS,gtfs,https://data.essex.gov.uk/download/2ydj8/gl4/gtfs.zip,GB,see https://data.essex.gov.uk/dataset/2ydj8/essex-bus-network-data,bus,Mobility Database feed catalog; MobilityData validator acceptance-test feed list,P0,Useful smoke-test feed list; prefer Mobility Database feeds_v2 metadata for production source review.; Mobility Database mirror: https://files.mobilitydatabase.org/mdb-2688/latest.zip
|
||||
GB Fastrack GTFS,gtfs,https://www.kent-fastrack.co.uk/open-data/network/current?format=gtfs,GB,see https://www.nationalarchives.gov.uk/doc/open-government-licence/version/3/,bus,Mobility Database feed catalog,P0,
|
||||
GB Flixbus GB GTFS,gtfs,http://gtfs.gis.flix.tech/gtfs_generic_gb.zip,GB,,bus,Mobility Database feed catalog; MobilityData validator acceptance-test feed list,P0,Useful smoke-test feed list; prefer Mobility Database feeds_v2 metadata for production source review.; Mobility Database mirror: https://files.mobilitydatabase.org/mdb-2900/latest.zip
|
||||
GB BODS national GTFS,gtfs,https://data.bus-data.dft.gov.uk/timetable/download/gtfs-file/all/,GB,OGL/verify BODS terms,"rail,bus",BODS / Mobility Database; Mobility Database feed catalog; MobilityData validator acceptance-test feed list,P0,England/GB bus focus; heavy rail separate.; Useful smoke-test feed list; prefer Mobility Database feeds_v2 metadata for production source review.; Mobility Database mirror: https://files.mobilitydatabase.org/mdb-2014/latest.zip
|
||||
GB Green Line 702 GTFS,gtfs,https://data.discoverpassenger.com/operator/greenline/dataset/current/download/gtfs,GB,see https://www.nationalarchives.gov.uk/doc/open-government-licence/version/3/,bus,Mobility Database feed catalog; MobilityData validator acceptance-test feed list,P0,Useful smoke-test feed list; prefer Mobility Database feeds_v2 metadata for production source review.; Mobility Database mirror: https://files.mobilitydatabase.org/mdb-1937/latest.zip
|
||||
GB Hedingham and Chambers GTFS,gtfs,https://data.discoverpassenger.com/operator/hedinghamandchambers/dataset/current/download/gtfs,GB,see https://www.nationalarchives.gov.uk/doc/open-government-licence/version/3/,bus,Mobility Database feed catalog; MobilityData validator acceptance-test feed list,P0,Useful smoke-test feed list; prefer Mobility Database feeds_v2 metadata for production source review.; Mobility Database mirror: https://files.mobilitydatabase.org/mdb-1938/latest.zip
|
||||
GB Intalink GTFS,gtfs,https://data.discoverpassenger.com/operator/intalink/dataset/current/download/gtfs,GB,see https://www.nationalarchives.gov.uk/doc/open-government-licence/version/3/,bus,Mobility Database feed catalog; MobilityData validator acceptance-test feed list,P0,Useful smoke-test feed list; prefer Mobility Database feeds_v2 metadata for production source review.; Mobility Database mirror: https://files.mobilitydatabase.org/mdb-1939/latest.zip
|
||||
GB Midland Bluebird GTFS,gtfs,https://data.discoverpassenger.com/operator/mcgillsse/dataset/current/download/gtfs,GB,see https://www.nationalarchives.gov.uk/doc/open-government-licence/version/3/,bus,Mobility Database feed catalog; MobilityData validator acceptance-test feed list,P0,Useful smoke-test feed list; prefer Mobility Database feeds_v2 metadata for production source review.; Mobility Database mirror: https://files.mobilitydatabase.org/mdb-2911/latest.zip
|
||||
GB Newbury & District GTFS,gtfs,https://data.discoverpassenger.com/operator/kennections/dataset/current/download/gtfs,GB,see https://www.nationalarchives.gov.uk/doc/open-government-licence/version/3/,bus,Mobility Database feed catalog; MobilityData validator acceptance-test feed list,P0,Useful smoke-test feed list; prefer Mobility Database feeds_v2 metadata for production source review.; Mobility Database mirror: https://files.mobilitydatabase.org/mdb-1944/latest.zip
|
||||
GB Nottingham City Transport GTFS,gtfs,https://data.discoverpassenger.com/operator/nctx/dataset/current/download/gtfs,GB,see https://www.nationalarchives.gov.uk/doc/open-government-licence/version/3/,bus,Mobility Database feed catalog; MobilityData validator acceptance-test feed list,P0,Useful smoke-test feed list; prefer Mobility Database feeds_v2 metadata for production source review.; Mobility Database mirror: https://files.mobilitydatabase.org/mdb-1946/latest.zip
|
||||
GB Reading Buses GTFS,gtfs,https://data.discoverpassenger.com/operator/readingbuses/dataset/current/download/gtfs,GB,see https://www.nationalarchives.gov.uk/doc/open-government-licence/version/3/,bus,Mobility Database feed catalog; MobilityData validator acceptance-test feed list,P0,Useful smoke-test feed list; prefer Mobility Database feeds_v2 metadata for production source review.; Mobility Database mirror: https://files.mobilitydatabase.org/mdb-1949/latest.zip
|
||||
GB Salisbury Reds GTFS,gtfs,https://data.discoverpassenger.com/operator/salisburyreds/dataset/current/download/gtfs,GB,see https://www.nationalarchives.gov.uk/doc/open-government-licence/version/3/,bus,Mobility Database feed catalog; MobilityData validator acceptance-test feed list,P0,Useful smoke-test feed list; prefer Mobility Database feeds_v2 metadata for production source review.; Mobility Database mirror: https://files.mobilitydatabase.org/mdb-1950/latest.zip
|
||||
GB Swindon's Bus Company GTFS,gtfs,https://data.discoverpassenger.com/operator/swindonbus/dataset/current/download/gtfs,GB,see https://www.nationalarchives.gov.uk/doc/open-government-licence/version/3/,bus,Mobility Database feed catalog; MobilityData validator acceptance-test feed list,P0,Useful smoke-test feed list; prefer Mobility Database feeds_v2 metadata for production source review.; Mobility Database mirror: https://files.mobilitydatabase.org/mdb-1952/latest.zip
|
||||
GB Thames Valley Buses GTFS,gtfs,https://data.discoverpassenger.com/operator/courtney/dataset/current/download/gtfs,GB,see https://www.nationalarchives.gov.uk/doc/open-government-licence/version/3/,bus,Mobility Database feed catalog; MobilityData validator acceptance-test feed list,P0,Useful smoke-test feed list; prefer Mobility Database feeds_v2 metadata for production source review.; Mobility Database mirror: https://files.mobilitydatabase.org/mdb-2913/latest.zip
|
||||
GB Unilink GTFS,gtfs,https://data.discoverpassenger.com/operator/unilink/dataset/current/download/gtfs,GB,see https://www.nationalarchives.gov.uk/doc/open-government-licence/version/3/,bus,Mobility Database feed catalog; MobilityData validator acceptance-test feed list,P0,Useful smoke-test feed list; prefer Mobility Database feeds_v2 metadata for production source review.; Mobility Database mirror: https://files.mobilitydatabase.org/mdb-1955/latest.zip
|
||||
GB West Coast Motors GTFS,gtfs,https://data.discoverpassenger.com/operator/westcoastmotors/dataset/current/download/gtfs,GB,see https://www.nationalarchives.gov.uk/doc/open-government-licence/version/3/,bus,Mobility Database feed catalog; MobilityData validator acceptance-test feed list,P0,Useful smoke-test feed list; prefer Mobility Database feeds_v2 metadata for production source review.; Mobility Database mirror: https://files.mobilitydatabase.org/mdb-1957/latest.zip
|
||||
IE Aircoach GTFS,gtfs,https://www.transportforireland.ie/transitData/Data/GTFS_Aircoach.zip,IE,see https://www.transportforireland.ie/transitData/PT_Data.html,bus,Mobility Database feed catalog; MobilityData validator acceptance-test feed list,P0,Useful smoke-test feed list; prefer Mobility Database feeds_v2 metadata for production source review.; Mobility Database mirror: https://files.mobilitydatabase.org/mdb-2640/latest.zip
|
||||
"IE BK & Sons, JJ/Bernard Kavanagh GTFS",gtfs,https://www.transportforireland.ie/transitData/Data/GTFS_Bernard_Kavanagh.zip,IE,,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tld-592/latest.zip
|
||||
IE Bus Éireann GTFS,gtfs,https://www.transportforireland.ie/transitData/Data/GTFS_Bus_Eireann.zip,IE,see https://www.transportforireland.ie/transitData/PT_Data.html,bus,Mobility Database feed catalog; MobilityData validator acceptance-test feed list,P0,Useful smoke-test feed list; prefer Mobility Database feeds_v2 metadata for production source review.; Mobility Database mirror: https://files.mobilitydatabase.org/mdb-2636/latest.zip
|
||||
IE Citylink GTFS,gtfs,https://www.transportforireland.ie/transitData/Data/GTFS_Citylink.zip,IE,,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tld-637/latest.zip
|
||||
IE Dublin Bus GTFS,gtfs,https://www.transportforireland.ie/transitData/Data/GTFS_Dublin_Bus.zip,IE,see https://www.transportforireland.ie/transitData/PT_Data.html,bus,Mobility Database feed catalog; MobilityData validator acceptance-test feed list,P0,Useful smoke-test feed list; prefer Mobility Database feeds_v2 metadata for production source review.; Mobility Database mirror: https://files.mobilitydatabase.org/mdb-2635/latest.zip
|
||||
IE Dublin Bus Nitelink GTFS,gtfs,https://www.transportforireland.ie/transitData/Data/GTFS_Nitelink.zip,IE,,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tld-635/latest.zip
|
||||
IE Dublin Coach GTFS,gtfs,https://www.transportforireland.ie/transitData/Data/GTFS_Dublin_Coach.zip,IE,,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tld-626/latest.zip
|
||||
IE GoAhead Ireland GTFS,gtfs,https://www.transportforireland.ie/transitData/Data/GTFS_GoAhead.zip,IE,see https://www.transportforireland.ie/transitData/PT_Data.html,bus,Mobility Database feed catalog; MobilityData validator acceptance-test feed list,P0,Useful smoke-test feed list; prefer Mobility Database feeds_v2 metadata for production source review.; Mobility Database mirror: https://files.mobilitydatabase.org/mdb-2639/latest.zip
|
||||
IE Irish Rail GTFS,gtfs,https://www.transportforireland.ie/transitData/Data/GTFS_Irish_Rail.zip,IE,see https://www.transportforireland.ie/transitData/PT_Data.html,bus,Mobility Database feed catalog; MobilityData validator acceptance-test feed list,P0,Useful smoke-test feed list; prefer Mobility Database feeds_v2 metadata for production source review.; Mobility Database mirror: https://files.mobilitydatabase.org/mdb-2637/latest.zip
|
||||
IE J.J Kavanagh & Sons GTFS,gtfs,https://www.transportforireland.ie/transitData/Data/GTFS_JJ_Kavanagh.zip,IE,,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tld-620/latest.zip
|
||||
IE John Kearns GTFS,gtfs,https://www.transportforireland.ie/transitData/Data/GTFS_Kearns_Transport.zip,IE,,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tld-623/latest.zip
|
||||
IE LUAS GTFS,gtfs,https://www.transportforireland.ie/transitData/Data/GTFS_LUAS.zip,IE,see https://www.transportforireland.ie/transitData/PT_Data.html,bus,Mobility Database feed catalog; MobilityData validator acceptance-test feed list,P0,Useful smoke-test feed list; prefer Mobility Database feeds_v2 metadata for production source review.; Mobility Database mirror: https://files.mobilitydatabase.org/mdb-2638/latest.zip
|
||||
"IE LUAS, Bus Átha Cliath - Dublin Bus, Go-Ahead Ireland, Iarnród Éireann / Irish Rail, Bus Éireann GTFS",gtfs,https://www.transportforireland.ie/transitData/Data/GTFS_Realtime.zip,IE,,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tld-5901/latest.zip
|
||||
IE Matthews Coach Hire GTFS,gtfs,https://www.transportforireland.ie/transitData/Data/GTFS_Matthews.zip,IE,,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tld-617/latest.zip
|
||||
IE McGrath Coaches GTFS,gtfs,https://www.transportforireland.ie/transitData/Data/GTFS_McGrath.zip,IE,,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tld-601/latest.zip
|
||||
IE Slieve Bloom Coach Tours GTFS,gtfs,https://www.transportforireland.ie/transitData/Data/GTFS_Slieve_Bloom.zip,IE,,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tld-613/latest.zip
|
||||
IE Swords Express GTFS,gtfs,https://www.transportforireland.ie/transitData/Data/GTFS_Swords_Express.zip,IE,,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tld-611/latest.zip
|
||||
IE West Cork Connect GTFS,gtfs,https://addtransit.com/gtfsfile/21113/WestCorkConnect.zip,IE,,bus,Mobility Database feed catalog; MobilityData validator acceptance-test feed list,P0,Useful smoke-test feed list; prefer Mobility Database feeds_v2 metadata for production source review.; Mobility Database mirror: https://files.mobilitydatabase.org/mdb-2875/latest.zip
|
||||
IE Wexford Bus GTFS,gtfs,https://www.transportforireland.ie/transitData/Data/GTFS_Wexford_Bus.zip,IE,,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tld-616/latest.zip
|
||||
"LU Administration des transports publics, Luxembourg GTFS",gtfs,https://data.public.lu/fr/datasets/r/41ca1930-761a-42c3-a297-72facd2ad43e,LU,see https://creativecommons.org/licenses/by/4.0/,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/mdb-3132/latest.zip
|
||||
NL OpenOV national GTFS,gtfs,http://gtfs.openov.nl/gtfs-rt/gtfs-openov-nl.zip,NL,verify OpenOV/NDOV terms,"rail,tram,metro,bus,ferry",European transport feeds / OpenOV,P0,Use NDOV/OVapi for production and realtime.
|
||||
NL OVapi GTFS,gtfs,http://gtfs.ovapi.nl/gtfs-nl.zip,NL,,bus,Mobility Database feed catalog; MobilityData validator acceptance-test feed list,P0,Useful smoke-test feed list; prefer Mobility Database feeds_v2 metadata for production source review.; Mobility Database mirror: https://files.mobilitydatabase.org/mdb-1077/latest.zip
|
||||
NO Entur national aggregated GTFS,gtfs,https://storage.googleapis.com/marduk-production/outbound/gtfs/rb_norway-aggregated-gtfs.zip,NO,verify Entur terms/NLOD,"rail,tram,metro,bus,ferry",Entur; Mobility Database feed catalog,P0,GTFS is a subset; NeTEx is official/most complete.; Mobility Database mirror: https://files.mobilitydatabase.org/mdb-1078/latest.zip
|
||||
"NO Vestfold Kollektivtrafikk, Bastø Fosen GTFS",gtfs,https://storage.googleapis.com/marduk-production/outbound/gtfs/rb_vkt-aggregated-gtfs.zip,NO,,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tld-1048/latest.zip
|
||||
SE Scania Transport Services GTFS,gtfs,https://data.trilliumtransit.com/gtfs/scania-se/scania-se.zip,SE,,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tld-7891/latest.zip
|
||||
SE GTFS Sverige mirror,gtfs,https://scraped.data.public-transport.earth/se/gtfs.zip,SE,verify Trafiklab/Samtrafiken terms,"rail,tram,metro,bus,ferry",European transport feeds mirror,P0,Production source should use Trafiklab API key and official endpoint.
|
||||
SE TrafikLab GTFS,gtfs,https://api.resrobot.se/gtfs/sweden.zip,SE,see https://www.trafiklab.se/api/gtfs-datasets/gtfs-sverige-2/,bus,Mobility Database feed catalog; MobilityData validator acceptance-test feed list,P0,Useful smoke-test feed list; prefer Mobility Database feeds_v2 metadata for production source review.; Mobility Database mirror: https://files.mobilitydatabase.org/mdb-2661/latest.zip
|
||||
SE Trafiklab GTFS,gtfs,https://opendata.samtrafiken.se/gtfs-sweden/sweden.zip,SE,see https://www.trafiklab.se/api/gtfs-datasets/gtfs-sweden/#licence,bus,Mobility Database feed catalog,P0,
|
||||
AT Optima Express GTFS,gtfs,https://github.com/jonaes/gtfs/raw/refs/heads/main/output/optima_gtfs.zip,AT,,bus,Mobility Database feed catalog,P1,Mobility Database mirror: https://files.mobilitydatabase.org/mdb-3123/latest.zip
|
||||
"AT Wiener Lokalbahnen (WLB), Wiener Linien GTFS",gtfs,http://www.wienerlinien.at/ogd_realtime/doku/ogd/gtfs/gtfs.zip,AT,,bus,Mobility Database feed catalog; MobilityData validator acceptance-test feed list,P1,Useful smoke-test feed list; prefer Mobility Database feeds_v2 metadata for production source review.; Mobility Database mirror: https://files.mobilitydatabase.org/mdb-648/latest.zip
|
||||
BE Société nationale des chemins de fer belges (NMBS-SNCB) GTFS,gtfs,https://gtfs.irail.be/nmbs/gtfs/latest.zip,BE,,bus,Mobility Database feed catalog; MobilityData validator acceptance-test feed list,P1,Useful smoke-test feed list; prefer Mobility Database feeds_v2 metadata for production source review.; Mobility Database mirror: https://files.mobilitydatabase.org/mdb-686/latest.zip
|
||||
BE Société nationale des chemins de fer belges (NMBS-SNCB) GTFS,gtfs,https://data.gtfs.be/sncb/gtfs/be-sncb-gtfs.zip,BE,see https://gtfs.be/,bus,Mobility Database feed catalog; MobilityData validator acceptance-test feed list,P1,Useful smoke-test feed list; prefer Mobility Database feeds_v2 metadata for production source review.; Mobility Database mirror: https://files.mobilitydatabase.org/mdb-1859/latest.zip
|
||||
BE TEC GTFS,gtfs,https://data.gtfs.be/tec/gtfs/be-tec-gtfs.zip,BE,see https://gtfs.be/,bus,Mobility Database feed catalog; MobilityData validator acceptance-test feed list,P1,Useful smoke-test feed list; prefer Mobility Database feeds_v2 metadata for production source review.; Mobility Database mirror: https://files.mobilitydatabase.org/mdb-1868/latest.zip
|
||||
DE DB Long-distance Rail GTFS.DE,gtfs,https://download.gtfs.de/germany/fv_free/latest.zip,DE,Creative Commons 4.0,rail,GTFS.DE / Deutsche Bahn long-distance rail; Mobility Database feed catalog,P1,Use as the first focused German rail feed for cross-source station deduplication with VBB and FlixTrain.; Mobility Database mirror: https://files.mobilitydatabase.org/mdb-768/latest.zip
|
||||
DE DB ZugBus Regionalverkehr Alb-Bodensee GTFS,gtfs,https://www.nvbw.de/fileadmin/user_upload/service/open_data/fahrplandaten_mit_liniennetz/rab.zip,DE,see https://www.nvbw.de/open-data/fahrplandaten/fahrplandaten-mit-liniennetz,bus,Mobility Database feed catalog,P1,
|
||||
DE European Sleeper GTFS,gtfs,https://jbb.ghsq.de/gtfs/eu-es.gtfs.zip,DE,,bus,Mobility Database feed catalog,P1,Mobility Database mirror: https://files.mobilitydatabase.org/mdb-3107/latest.zip
|
||||
"DE Freiburger Verkehrs AG (VAG), R.A.S.T. Reisen (RAST), Tuniberg Express GTFS",gtfs,https://www.vag-freiburg.de/fileadmin/gtfs/VAGFR.zip,DE,see https://www.vag-freiburg.de/service-infos/downloads/gtfs-daten,bus,Mobility Database feed catalog; MobilityData validator acceptance-test feed list,P1,Useful smoke-test feed list; prefer Mobility Database feeds_v2 metadata for production source review.; Mobility Database mirror: https://files.mobilitydatabase.org/mdb-1115/latest.zip
|
||||
DE Mitteldeutscher Verkehrsverbund GmbH (MDV) GTFS,gtfs,https://www.mdv.de/site/uploads/gtfs_mdv.zip,DE,see https://creativecommons.org/licenses/by/4.0/deed.de,bus,Mobility Database feed catalog; MobilityData validator acceptance-test feed list,P1,Useful smoke-test feed list; prefer Mobility Database feeds_v2 metadata for production source review.; Mobility Database mirror: https://files.mobilitydatabase.org/mdb-2360/latest.zip
|
||||
DE naldo Verkehrsverbund GTFS,gtfs,https://www.nvbw.de/fileadmin/user_upload/service/open_data/fahrplandaten_ohne_liniennetz/naldo.zip,DE,see https://www.nvbw.de/open-data/fahrplandaten/fahrplandaten-mit-liniennetz,bus,Mobility Database feed catalog; MobilityData validator acceptance-test feed list,P1,Useful smoke-test feed list; prefer Mobility Database feeds_v2 metadata for production source review.; Mobility Database mirror: https://files.mobilitydatabase.org/mdb-771/latest.zip
|
||||
DE NVBW - Nahverkehrsgesellschaft Baden-Württemberg mbH GTFS,gtfs,https://www.nvbw.de/fileadmin/user_upload/service/open_data/fahrplandaten_mit_liniennetz/bwgesamt.zip,DE,see https://www.nvbw.de/open-data/lizenz,bus,Mobility Database feed catalog; MobilityData validator acceptance-test feed list,P1,Useful smoke-test feed list; prefer Mobility Database feeds_v2 metadata for production source review.; Mobility Database mirror: https://files.mobilitydatabase.org/mdb-2393/latest.zip
|
||||
DE Public Transport Germany GTFS,gtfs,https://download.gtfs.de/germany/nv_free/latest.zip,DE,see https://www.nvbw.de/open-data/lizenz,bus,Mobility Database feed catalog; MobilityData validator acceptance-test feed list,P1,Useful smoke-test feed list; prefer Mobility Database feeds_v2 metadata for production source review.; Mobility Database mirror: https://files.mobilitydatabase.org/mdb-1090/latest.zip
|
||||
"DE Regional Bus Stuttgart GmbH RBS, FMO, Regiobus Stuttgart, Friedrich Müller Omnibusunternehmen GTFS",gtfs,https://www.nvbw.de/fileadmin/user_upload/service/open_data/fahrplandaten_ohne_liniennetz/rbs.zip,DE,,bus,Mobility Database feed catalog,P1,
|
||||
DE Regional Rail Transport Germany GTFS,gtfs,https://download.gtfs.de/germany/rv_free/latest.zip,DE,,bus,Mobility Database feed catalog; MobilityData validator acceptance-test feed list,P1,Useful smoke-test feed list; prefer Mobility Database feeds_v2 metadata for production source review.; Mobility Database mirror: https://files.mobilitydatabase.org/mdb-1089/latest.zip
|
||||
DE SBG GTFS,gtfs,https://www.nvbw.de/fileadmin/user_upload/service/open_data/fahrplandaten_mit_liniennetz/sbg.zip,DE,,bus,Mobility Database feed catalog,P1,
|
||||
DE Stadtwerke Heilbronn (SWHN) GTFS,gtfs,https://www.nvbw.de/fileadmin/user_upload/service/open_data/fahrplandaten_mit_liniennetz/hnv.zip,DE,,bus,Mobility Database feed catalog; MobilityData validator acceptance-test feed list,P1,Useful smoke-test feed list; prefer Mobility Database feeds_v2 metadata for production source review.; Mobility Database mirror: https://files.mobilitydatabase.org/mdb-783/latest.zip
|
||||
DE SWEG Markgräflerland GTFS,gtfs,https://www.nvbw.de/fileadmin/user_upload/service/open_data/fahrplandaten_mit_liniennetz/sweg.zip,DE,,bus,Mobility Database feed catalog,P1,
|
||||
DE TBO Offenburg GTFS,gtfs,https://www.nvbw.de/fileadmin/user_upload/service/open_data/fahrplandaten_mit_liniennetz/tgo.zip,DE,see https://www.nvbw.de/open-data/fahrplandaten/fahrplandaten-mit-liniennetz,bus,Mobility Database feed catalog; MobilityData validator acceptance-test feed list,P1,Useful smoke-test feed list; prefer Mobility Database feeds_v2 metadata for production source review.; Mobility Database mirror: https://files.mobilitydatabase.org/mdb-900/latest.zip
|
||||
DE TuTicket GTFS,gtfs,https://www.nvbw.de/fileadmin/user_upload/service/open_data/fahrplandaten_mit_liniennetz/tuticket.zip,DE,,bus,Mobility Database feed catalog,P1,
|
||||
"DE Ulmer Eisenbahnfreunde, Sächsisch-Oberlausitzer Eisenbahngesellschaft, SDG Sächsische Dampfeisenbahngesellschaft mbH, SNCF, vlexx, DB AG, SBB, Nordbahn Eisenbahngesellschaft, Norddeutsche Eisenbahn Gesellschaft, AKN Eisenbahn GmbH, Ostde",gtfs,https://www.nvbw.de/fileadmin/user_upload/service/open_data/fahrplandaten_mit_liniennetz/bwspnv.zip,DE,see https://www.nvbw.de/open-data/fahrplandaten/fahrplandaten-mit-liniennetz,bus,Mobility Database feed catalog,P1,
|
||||
DE Verkehrsverbund Berlin-Brandenburg (VBB) GTFS,gtfs,http://vbb.de/vbbgtfs,DE,see http://vbb.de/vbbgtfs,bus,Mobility Database feed catalog,P1,Mobility Database mirror: https://files.mobilitydatabase.org/mdb-782/latest.zip
|
||||
DE Verkehrsverbund Pforzheim-Enzkreis (VPE) GTFS,gtfs,https://www.nvbw.de/fileadmin/user_upload/service/open_data/fahrplandaten_mit_liniennetz/vpe.zip,DE,see https://www.nvbw.de/open-data/fahrplandaten/fahrplandaten-mit-liniennetz,bus,Mobility Database feed catalog; MobilityData validator acceptance-test feed list,P1,Useful smoke-test feed list; prefer Mobility Database feeds_v2 metadata for production source review.; Mobility Database mirror: https://files.mobilitydatabase.org/mdb-1085/latest.zip
|
||||
DE Verkehrsverbund Rhein-Neckar GTFS,gtfs,https://geoportal.vrn.de/services/sharing/rest/content/items/4ec4b1d131eb46a6bb8e216ce9b90eff/data,DE,see https://www.vrn.de/opendata/datasets/soll-fahrplandaten-gtfs-aktuell,bus,Mobility Database feed catalog; MobilityData validator acceptance-test feed list,P1,Useful smoke-test feed list; prefer Mobility Database feeds_v2 metadata for production source review.; Mobility Database mirror: https://files.mobilitydatabase.org/mdb-1173/latest.zip
|
||||
DE Verkehrsverbund Rhein-Sieg (VRS) GTFS,gtfs,http://download.vrsinfo.de/gtfs/google_transit.zip,DE,see https://www.vrs.de/fahren/fahrplanauskunft/opendata-/-openservice,bus,Mobility Database feed catalog; MobilityData validator acceptance-test feed list,P1,Useful smoke-test feed list; prefer Mobility Database feeds_v2 metadata for production source review.; Mobility Database mirror: https://files.mobilitydatabase.org/mdb-778/latest.zip
|
||||
EU FlixBus/FlixTrain Europe GTFS,gtfs,http://gtfs.gis.flix.tech/gtfs_generic_eu.zip,EU,verify Flix terms,rail,Transitland feed,P1,Important commercial long-distance network; verify reuse before production.
|
||||
FI national GTFS,gtfs,https://traffic.navici.com/tiedostot/gtfs.zip,FI,verify Fintraffic/FINAP terms,"rail,tram,metro,bus,ferry",European transport feeds / Fintraffic,P1,Check current endpoint and whether HSL/Waltti should be ingested separately.
|
||||
"FI Finferries, Alandstrafiken, Rosita Oy, JS Ferryway Ltd Oy, Kuljetus-Savolainen Oy, Archipelago Lines Oy, Sundqvist Investments Oy Ab, Pörtö Line, Nordic Coast Line, Yksityinen, HSL, Vitharun, Espoon kaupunki, Suomen saaristokuljetus, Nor",gtfs,http://lautta.net/db/gtfs/gtfs.zip,FI,,bus,Mobility Database feed catalog; MobilityData validator acceptance-test feed list,P1,Useful smoke-test feed list; prefer Mobility Database feeds_v2 metadata for production source review.; Mobility Database mirror: https://openmobilitydata-data.s3.us-west-1.amazonaws.com/public/feeds/ferries/734/20221227-2/gtfs.zip
|
||||
"FI Porvoon Museorautatie, Pieksämäen Höyryveturiyhdistys ry GTFS",gtfs,https://rata.digitraffic.fi/api/v1/trains/gtfs-passenger.zip,FI,,bus,Mobility Database feed catalog,P1,
|
||||
FI Turku GTFS,gtfs,http://data.foli.fi/gtfs/gtfs.zip,FI,,bus,Mobility Database feed catalog; MobilityData validator acceptance-test feed list,P1,Useful smoke-test feed list; prefer Mobility Database feeds_v2 metadata for production source review.; Mobility Database mirror: https://files.mobilitydatabase.org/mdb-864/latest.zip
|
||||
"FI Vekka Liikenne Oy, Kainuun Tilausliikenne P. Jääskeläinen Ky, Oulaisten Liikenne Oy GTFS",gtfs,https://tvv.fra1.digitaloceanspaces.com/211.zip,FI,,bus,Mobility Database feed catalog,P1,Mobility Database mirror: https://files.mobilitydatabase.org/tld-829/latest.zip
|
||||
FR Communauté urbaine du Grand Nancy GTFS,gtfs,https://www.data.gouv.fr/fr/datasets/r/e7e78cd7-e186-4923-a272-9713fbc28b45,FR,,bus,Mobility Database feed catalog,P1,
|
||||
FR Fluo GTFS,gtfs,https://zenbus.net/gtfs/static/download.zip?dataset=grand-est-tgv,FR,see https://transport.data.gouv.fr/datasets/horaires-theoriques-et-temps-reel-du-reseau-fluo-grand-est-navettes-tgv-gtfs-gtfs-rt,bus,Mobility Database feed catalog,P1,
|
||||
FR Grasse Sillages Scolaire GTFS,gtfs,https://static.data.gouv.fr/resources/lignes-regulieres-de-transports-en-pays-de-grasse/20210203-152443/20210101-gtfs-sillagesscolaire.zip,FR,,bus,Mobility Database feed catalog,P1,
|
||||
FR Kicéo GTFS,gtfs,https://ratpdev-mosaic-prod-bucket-raw.s3-eu-west-1.amazonaws.com/21/exports/1/gtfs.zip,FR,see https://transport.data.gouv.fr/datasets/offre-de-transport-du-reseau-de-vannes-kiceo-donnees-theoriques-et-temps-reel/,bus,Mobility Database feed catalog,P1,
|
||||
FR Région Bourgogne-Franche-Comté GTFS,gtfs,https://exs.mobigo.cityway.fr/gtfs.aspx?key=OPENDATA&operatorCode=UT21,FR,see https://transport.data.gouv.fr/datasets/reseau-de-transport-interurbain-mobigo-en-bourgogne-franche-comte,bus,Mobility Database feed catalog; MobilityData validator acceptance-test feed list,P1,Useful smoke-test feed list; prefer Mobility Database feeds_v2 metadata for production source review.; Mobility Database mirror: https://files.mobilitydatabase.org/mdb-659/latest.zip
|
||||
FR Réseau Mistral GTFS,gtfs,https://data.metropoletpm.fr/explore/dataset/donnees-gtfs-du-reseau-de-bus-mistral/files/8e7042ab297b42f9dcc58f3db5326376/download/,FR,see https://www.etalab.gouv.fr/wp-content/uploads/2014/05/Open_Licence.pdf,bus,Mobility Database feed catalog,P1,
|
||||
FR SEMO GTFS,gtfs,https://www.data.gouv.fr/fr/datasets/r/f4f77c4d-c1ea-4938-80d9-2a77a150803f,FR,see https://www.etalab.gouv.fr/wp-content/uploads/2017/04/ETALAB-Licence-Ouverte-v2.0.pdf,bus,Mobility Database feed catalog,P1,
|
||||
FR Tout'enbus GTFS,gtfs,https://zenbus.net/gtfs/static/download.zip?dataset=toutenbus,FR,see https://transport.data.gouv.fr/datasets/horaires-theoriques-et-temps-reel-du-reseau-toutenbus-aubenas-gtfs-gtfs-rt,bus,Mobility Database feed catalog,P1,
|
||||
GB Brighton & Hove Bus GTFS,gtfs,https://data.discoverpassenger.com/operator/brightonhove/dataset/current/download/gtfs,GB,see https://www.nationalarchives.gov.uk/doc/open-government-licence/version/3/,bus,Mobility Database feed catalog; MobilityData validator acceptance-test feed list,P1,Useful smoke-test feed list; prefer Mobility Database feeds_v2 metadata for production source review.; Mobility Database mirror: https://files.mobilitydatabase.org/mdb-1928/latest.zip
|
||||
GB Cardiff Bus GTFS,gtfs,https://data.discoverpassenger.com/operator/ccts/dataset/current/download/gtfs,GB,see https://www.nationalarchives.gov.uk/doc/open-government-licence/version/3/,bus,Mobility Database feed catalog; MobilityData validator acceptance-test feed list,P1,Useful smoke-test feed list; prefer Mobility Database feeds_v2 metadata for production source review.; Mobility Database mirror: https://files.mobilitydatabase.org/mdb-1931/latest.zip
|
||||
GB Chiltern Railways GTFS,gtfs,https://openmobilitydata-data.s3.us-west-1.amazonaws.com/public/feeds/association-of-train-operating-companies/284/20210423/gtfs.zip,GB,,bus,Mobility Database feed catalog; MobilityData validator acceptance-test feed list,P1,Useful smoke-test feed list; prefer Mobility Database feeds_v2 metadata for production source review.; Mobility Database mirror: https://files.mobilitydatabase.org/mdb-1311/latest.zip
|
||||
GB Coach Services GTFS,gtfs,https://data.discoverpassenger.com/operator/coachservices/dataset/current/download/gtfs,GB,see https://www.nationalarchives.gov.uk/doc/open-government-licence/version/3/,bus,Mobility Database feed catalog; MobilityData validator acceptance-test feed list,P1,Useful smoke-test feed list; prefer Mobility Database feeds_v2 metadata for production source review.; Mobility Database mirror: https://files.mobilitydatabase.org/mdb-2909/latest.zip
|
||||
GB East Yorkshire GTFS,gtfs,https://data.discoverpassenger.com/operator/eyms/dataset/current/download/gtfs,GB,see https://www.nationalarchives.gov.uk/doc/open-government-licence/version/3/,bus,Mobility Database feed catalog; MobilityData validator acceptance-test feed list,P1,Useful smoke-test feed list; prefer Mobility Database feeds_v2 metadata for production source review.; Mobility Database mirror: https://files.mobilitydatabase.org/mdb-1933/latest.zip
|
||||
GB Go Cornwall Bus GTFS,gtfs,https://data.discoverpassenger.com/operator/gocornwallbus/dataset/current/download/gtfs,GB,see https://www.nationalarchives.gov.uk/doc/open-government-licence/version/3/,bus,Mobility Database feed catalog; MobilityData validator acceptance-test feed list,P1,Useful smoke-test feed list; prefer Mobility Database feeds_v2 metadata for production source review.; Mobility Database mirror: https://files.mobilitydatabase.org/mdb-1934/latest.zip
|
||||
GB Go North East GTFS,gtfs,https://data.discoverpassenger.com/operator/gonortheast/dataset/current/download/gtfs,GB,see https://www.nationalarchives.gov.uk/doc/open-government-licence/version/3/,bus,Mobility Database feed catalog; MobilityData validator acceptance-test feed list,P1,Useful smoke-test feed list; prefer Mobility Database feeds_v2 metadata for production source review.; Mobility Database mirror: https://files.mobilitydatabase.org/mdb-1935/latest.zip
|
||||
GB Go North West GTFS,gtfs,https://data.discoverpassenger.com/operator/gonorthwest/dataset/current/download/gtfs,GB,see https://www.nationalarchives.gov.uk/doc/open-government-licence/version/3/,bus,Mobility Database feed catalog; MobilityData validator acceptance-test feed list,P1,Useful smoke-test feed list; prefer Mobility Database feeds_v2 metadata for production source review.; Mobility Database mirror: https://files.mobilitydatabase.org/mdb-1936/latest.zip
|
||||
GB konectbus GTFS,gtfs,https://data.discoverpassenger.com/operator/konectbus/dataset/current/download/gtfs,GB,see https://www.nationalarchives.gov.uk/doc/open-government-licence/version/3/,bus,Mobility Database feed catalog; MobilityData validator acceptance-test feed list,P1,Useful smoke-test feed list; prefer Mobility Database feeds_v2 metadata for production source review.; Mobility Database mirror: https://files.mobilitydatabase.org/mdb-1940/latest.zip
|
||||
GB McGill's Buses GTFS,gtfs,https://data.discoverpassenger.com/operator/mcgills/dataset/current/download/gtfs,GB,see https://www.nationalarchives.gov.uk/doc/open-government-licence/version/3/,bus,Mobility Database feed catalog; MobilityData validator acceptance-test feed list,P1,Useful smoke-test feed list; prefer Mobility Database feeds_v2 metadata for production source review.; Mobility Database mirror: https://files.mobilitydatabase.org/mdb-1941/latest.zip
|
||||
GB Metrobus GTFS,gtfs,https://data.discoverpassenger.com/operator/metrobus/dataset/current/download/gtfs,GB,see https://www.nationalarchives.gov.uk/doc/open-government-licence/version/3/,bus,Mobility Database feed catalog; MobilityData validator acceptance-test feed list,P1,Useful smoke-test feed list; prefer Mobility Database feeds_v2 metadata for production source review.; Mobility Database mirror: https://files.mobilitydatabase.org/mdb-1942/latest.zip
|
||||
GB Morebus GTFS,gtfs,https://data.discoverpassenger.com/operator/morebus/dataset/current/download/gtfs,GB,see https://www.nationalarchives.gov.uk/doc/open-government-licence/version/3/,bus,Mobility Database feed catalog; MobilityData validator acceptance-test feed list,P1,Useful smoke-test feed list; prefer Mobility Database feeds_v2 metadata for production source review.; Mobility Database mirror: https://files.mobilitydatabase.org/mdb-1943/latest.zip
|
||||
GB Newport Bus GTFS,gtfs,https://data.discoverpassenger.com/operator/newportbus/dataset/current/download/gtfs,GB,see https://www.nationalarchives.gov.uk/doc/open-government-licence/version/3/,bus,Mobility Database feed catalog; MobilityData validator acceptance-test feed list,P1,Useful smoke-test feed list; prefer Mobility Database feeds_v2 metadata for production source review.; Mobility Database mirror: https://files.mobilitydatabase.org/mdb-1945/latest.zip
|
||||
GB Oxford Bus GTFS,gtfs,https://data.discoverpassenger.com/operator/oxfordbus/dataset/current/download/gtfs,GB,see https://www.nationalarchives.gov.uk/doc/open-government-licence/version/3/,bus,Mobility Database feed catalog; MobilityData validator acceptance-test feed list,P1,Useful smoke-test feed list; prefer Mobility Database feeds_v2 metadata for production source review.; Mobility Database mirror: https://files.mobilitydatabase.org/mdb-1947/latest.zip
|
||||
GB Plymouth Citybus GTFS,gtfs,https://data.discoverpassenger.com/operator/plymouthbus/dataset/current/download/gtfs,GB,see https://www.nationalarchives.gov.uk/doc/open-government-licence/version/3/,bus,Mobility Database feed catalog; MobilityData validator acceptance-test feed list,P1,Useful smoke-test feed list; prefer Mobility Database feeds_v2 metadata for production source review.; Mobility Database mirror: https://files.mobilitydatabase.org/mdb-1948/latest.zip
|
||||
GB Sanders Coaches GTFS,gtfs,https://data.discoverpassenger.com/operator/sanderscoaches/dataset/current/download/gtfs,GB,see https://www.nationalarchives.gov.uk/doc/open-government-licence/version/3/,bus,Mobility Database feed catalog; MobilityData validator acceptance-test feed list,P1,Useful smoke-test feed list; prefer Mobility Database feeds_v2 metadata for production source review.; Mobility Database mirror: https://files.mobilitydatabase.org/mdb-2912/latest.zip
|
||||
GB Southern Vectis GTFS,gtfs,https://data.discoverpassenger.com/operator/southernvectis/dataset/current/download/gtfs,GB,see https://www.nationalarchives.gov.uk/doc/open-government-licence/version/3/,bus,Mobility Database feed catalog; MobilityData validator acceptance-test feed list,P1,Useful smoke-test feed list; prefer Mobility Database feeds_v2 metadata for production source review.; Mobility Database mirror: https://files.mobilitydatabase.org/mdb-1951/latest.zip
|
||||
"GB Transport for Greater Manchester Arriva in the North West, Atlantic Travel, Bestway Travel, Belle Vue Coaches, Blackburn Private Hire, The Burnley Bus Company, Bullocks Coaches, Cumfy Bus, D&G Bus, Don Fraser Coaches Ltd, Finch Coaches, ",gtfs,https://odata.tfgm.com/opendata/downloads/TfGMgtfsnew.zip,GB,,bus,Mobility Database feed catalog; MobilityData validator acceptance-test feed list,P1,Useful smoke-test feed list; prefer Mobility Database feeds_v2 metadata for production source review.; Mobility Database mirror: https://files.mobilitydatabase.org/mdb-1103/latest.zip
|
||||
GB Unibus GTFS,gtfs,https://data.discoverpassenger.com/operator/unibus/dataset/current/download/gtfs,GB,see https://www.nationalarchives.gov.uk/doc/open-government-licence/version/3/,bus,Mobility Database feed catalog; MobilityData validator acceptance-test feed list,P1,Useful smoke-test feed list; prefer Mobility Database feeds_v2 metadata for production source review.; Mobility Database mirror: https://files.mobilitydatabase.org/mdb-1954/latest.zip
|
||||
GB Warrington's Own Buses GTFS,gtfs,https://data.discoverpassenger.com/operator/warrington/dataset/current/download/gtfs,GB,see https://www.nationalarchives.gov.uk/doc/open-government-licence/version/3/,bus,Mobility Database feed catalog; MobilityData validator acceptance-test feed list,P1,Useful smoke-test feed list; prefer Mobility Database feeds_v2 metadata for production source review.; Mobility Database mirror: https://files.mobilitydatabase.org/mdb-1956/latest.zip
|
||||
GB Xplore Dundee GTFS,gtfs,https://data.discoverpassenger.com/operator/xploredundee/dataset/current/download/gtfs,GB,see https://www.nationalarchives.gov.uk/doc/open-government-licence/version/3/,bus,Mobility Database feed catalog; MobilityData validator acceptance-test feed list,P1,Useful smoke-test feed list; prefer Mobility Database feeds_v2 metadata for production source review.; Mobility Database mirror: https://files.mobilitydatabase.org/mdb-2906/latest.zip
|
||||
"IE Catherine Madigan, Kelly Travel, Donal Harrington Coach Service, Farragher International Travel Services, Burkesbus, Collins Coaches, Halpenny Transport, McGonagle Bus & Coach Hire, Tralee Peoples Bus Service, Flight Link Limited, Keelin",gtfs,https://www.transportforireland.ie/transitData/Data/GTFS_Small_Operators.zip,IE,,bus,Mobility Database feed catalog,P1,Mobility Database mirror: https://files.mobilitydatabase.org/tld-5576/latest.zip
|
||||
IE Transport for Ireland combined GTFS,gtfs,https://www.transportforireland.ie/transitData/google_transit_combined.zip,IE,verify NTA terms/fair use,"rail,tram,bus",Transport for Ireland / European transport feeds,P1,Pair with NTA GTFS-Realtime later.
|
||||
"IE TFI Local Link Mayo, TFI Local Link Kerry, TFI Local Link Cork, TFI Local Link Galway, TFI Local Link Longford Westmeath Roscommon, TFI Local Link Cavan Monaghan, TFI Local Link Tipperary, TFI Local Link Waterford, TFI Local Link Louth M",gtfs,https://www.transportforireland.ie/transitData/Data/GTFS_Local_Link.zip,IE,,bus,Mobility Database feed catalog,P1,Mobility Database mirror: https://files.mobilitydatabase.org/tld-641/latest.zip
|
||||
IE Transport for Ireland (TFI) GTFS,gtfs,https://www.transportforireland.ie/transitData/Data/GTFS_All.zip,IE,,bus,Mobility Database feed catalog; MobilityData validator acceptance-test feed list,P1,Useful smoke-test feed list; prefer Mobility Database feeds_v2 metadata for production source review.; Mobility Database mirror: https://files.mobilitydatabase.org/mdb-2364/latest.zip
|
||||
LU GTFS mirror,gtfs,https://scraped.data.public-transport.earth/lu/gtfs.zip,LU,verify data.public.lu terms,"rail,tram,bus",European transport feeds mirror,P1,Prefer data.public.lu NeTEx for production.
|
||||
DE AVV Aachen GTFS,gtfs,http://opendata.avv.de/current_GTFS/,DE,,bus,PTNA GTFS analysis; details: https://ptna.openstreetmap.de/en/gtfs-details.php?feed=DE-NW-AVV,P2,PTNA candidate; use original publisher URL where available.
|
||||
DE RhönEnergie Bus GmbH GTFS,gtfs,https://gtfs.rhoenenergie-bus.de/GTFS.zip,DE,,bus,PTNA GTFS analysis; details: https://ptna.openstreetmap.de/en/gtfs-details.php?feed=DE-HE-REB,P2,PTNA candidate; use original publisher URL where available.
|
||||
FI Helsingin seudun liikenne GTFS,gtfs,https://infopalvelut.storage.hsldev.com/gtfs/hsl.zip,FI,,bus,PTNA GTFS analysis; details: https://ptna.openstreetmap.de/en/gtfs-details.php?feed=FI-18-HSL,P2,PTNA candidate; use original publisher URL where available.
|
||||
FR CITYWAY GTFS,gtfs,https://www.korrigo.bzh/ftp/OPENDATA/LINEOTIM_Complet.gtfs.zip,FR,,bus,PTNA GTFS analysis; details: https://ptna.openstreetmap.de/en/gtfs-details.php?feed=FR-BRE-Lineotim,P2,PTNA candidate; use original publisher URL where available.
|
||||
FR Keolis Rennes GTFS,gtfs,https://transport.data.gouv.fr/datasets/versions-des-horaires-theoriques-des-lignes-de-bus-et-de-metro-du-reseau-star-au-format-gtfs,FR,,bus,PTNA GTFS analysis; details: https://ptna.openstreetmap.de/en/gtfs-details.php?feed=FR-BRE-Star,P2,PTNA candidate; use original publisher URL where available.
|
||||
FR KorriGo GTFS,gtfs,https://www.korrigo.bzh/ftp/OPENDATA/ARBUS.gtfs.zip,FR,,bus,PTNA GTFS analysis; details: https://ptna.openstreetmap.de/en/gtfs-details.php?feed=FR-BRE-ARBUS,P2,PTNA candidate; use original publisher URL where available.
|
||||
FR KorriGo GTFS,gtfs,https://www.korrigo.bzh/ftp/OPENDATA/BREIZHGO_CAR_NS.gtfs.zip,FR,,bus,PTNA GTFS analysis; details: https://ptna.openstreetmap.de/en/gtfs-details.php?feed=FR-BRE-BREIZHGO_CAR_NS,P2,PTNA candidate; use original publisher URL where available.
|
||||
FR KorriGo GTFS,gtfs,https://www.korrigo.bzh/ftp/OPENDATA/BREIZHGO_CAR_RLP.gtfs.zip,FR,,bus,PTNA GTFS analysis; details: https://ptna.openstreetmap.de/en/gtfs-details.php?feed=FR-BRE-BREIZHGO_CAR_RLP,P2,PTNA candidate; use original publisher URL where available.
|
||||
FR KorriGo GTFS,gtfs,https://www.korrigo.bzh/ftp/OPENDATA/BREIZHGO_CAR_22.gtfs.zip,FR,,bus,PTNA GTFS analysis; details: https://ptna.openstreetmap.de/en/gtfs-details.php?feed=FR-BRE-BREIZHGO_CAR_22,P2,PTNA candidate; use original publisher URL where available.
|
||||
FR KorriGo GTFS,gtfs,https://www.korrigo.bzh/ftp/OPENDATA/BREIZHGO_CAR_29.gtfs.zip,FR,,bus,PTNA GTFS analysis; details: https://ptna.openstreetmap.de/en/gtfs-details.php?feed=FR-BRE-BREIZHGO_CAR_29,P2,PTNA candidate; use original publisher URL where available.
|
||||
FR KorriGo GTFS,gtfs,https://www.korrigo.bzh/ftp/OPENDATA/BREIZHGO_CAR_35.gtfs.zip,FR,,bus,PTNA GTFS analysis; details: https://ptna.openstreetmap.de/en/gtfs-details.php?feed=FR-BRE-BREIZHGO_CAR_35,P2,PTNA candidate; use original publisher URL where available.
|
||||
FR KorriGo GTFS,gtfs,https://www.korrigo.bzh/ftp/OPENDATA/BREIZHGO_CAR_56.gtfs.zip,FR,,bus,PTNA GTFS analysis; details: https://ptna.openstreetmap.de/en/gtfs-details.php?feed=FR-BRE-BREIZHGO_CAR_56,P2,PTNA candidate; use original publisher URL where available.
|
||||
FR KorriGo GTFS,gtfs,https://www.korrigo.bzh/ftp/OPENDATA/TUDBUS.gtfs.zip,FR,,bus,PTNA GTFS analysis; details: https://ptna.openstreetmap.de/en/gtfs-details.php?feed=FR-BRE-TUDBUS,P2,PTNA candidate; use original publisher URL where available.
|
||||
FR Mecatran GTFS,gtfs,https://transport.data.gouv.fr/datasets/offres-de-services-bus-et-tramway-gtfs,FR,,bus,PTNA GTFS analysis; details: https://ptna.openstreetmap.de/en/gtfs-details.php?feed=FR-NAQ-TBM,P2,PTNA candidate; use original publisher URL where available.
|
||||
FR Ministère chargé des transports GTFS,gtfs,https://transport.data.gouv.fr/datasets/donnees-de-transport-en-commun-reseau-altigo-communaute-de-communes-du-brianconnais-format-gtfs,FR,,bus,PTNA GTFS analysis; details: https://ptna.openstreetmap.de/en/gtfs-details.php?feed=FR-PAC-Altigo,P2,PTNA candidate; use original publisher URL where available.
|
||||
FR Région Pays de la Loire GTFS,gtfs,https://donnees.paysdelaloire.fr/data/pdl44.zip,FR,,bus,PTNA GTFS analysis; details: https://ptna.openstreetmap.de/en/gtfs-details.php?feed=FR-PDL-Aleop_44,P2,PTNA candidate; use original publisher URL where available.
|
||||
FR SNCF GTFS,gtfs,https://eu.ftp.opendatasoft.com/sncf/plandata/export-opendata-sncf-gtfs.zip,FR,,bus,PTNA GTFS analysis; details: https://ptna.openstreetmap.de/en/gtfs-details.php?feed=FR-SNCF,P2,PTNA candidate; use original publisher URL where available.
|
||||
NL OVapi GTFS,gtfs,https://gtfs.ovapi.nl/nl/gtfs-nl.zip,NL,,bus,PTNA GTFS analysis; details: https://ptna.openstreetmap.de/en/gtfs-details.php?feed=NL-OVApi,P2,PTNA candidate; use original publisher URL where available.
|
||||
SE Samtrafiken i Sverige AB GTFS,gtfs,https://opendata.samtrafiken.se/gtfs-sweden/sweden.zip?key={apikey},SE,,bus,PTNA GTFS analysis; details: https://ptna.openstreetmap.de/en/gtfs-details.php?feed=SE-Samtrafiken,P2,PTNA candidate; use original publisher URL where available.
|
||||
AT Bean Shuttle GTFS,gtfs,http://gtfs.beanshuttle.com/google_transit.zip,AT,,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://files.mobilitydatabase.org/mdb-859/latest.zip
|
||||
AT OBB Personenverkehr AG Kundenservice GTFS,gtfs,https://static.oebb.at/open-data/soll-fahrplan-gtfs/GTFS_OP_2024_obb.zip,AT,see https://data.oebb.at/de/datensaetze~soll-fahrplan-gtfs~,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://files.mobilitydatabase.org/mdb-2138/latest.zip
|
||||
AT Wiener Linien GTFS,gtfs,https://wien.gv.at/data/zip/gtfs.zip,AT,,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://openmobilitydata-data.s3.us-west-1.amazonaws.com/public/feeds/stadt-wien/888/20200623/gtfs.zip
|
||||
"BE ABuss OÜ, Aktsiaselts Hansa Bussiliinid, Aktsiaselts MK Autobuss, Alukvik OÜ, Arilix OÜ, AS Lux Express Estonia, Asunduse osaühing, ATG Bussiliinid OÜ, Atko Bussiliinid AS, ATKO Liinid OÜ, ATKO Transport OÜ, Ekspress-Auto L Osaühing, Eks",gtfs,https://files.mobilitydatabase.org/mdb-1095/latest.zip,BE,,bus,MobilityData validator acceptance-test feed list,P3,Useful smoke-test feed list; prefer Mobility Database feeds_v2 metadata for production source review.
|
||||
BE De Lijn GTFS,gtfs,https://data.gtfs.be/delijn/gtfs/be-delijn-gtfs.zip,BE,see https://gtfs.be,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://files.mobilitydatabase.org/mdb-1869/latest.zip
|
||||
BE De Waterbus GTFS,gtfs,https://addtransit.com/gtfsfile/85165/DeWaterbus.zip,BE,,bus,Mobility Database feed catalog; MobilityData validator acceptance-test feed list,P3,Useful smoke-test feed list; prefer Mobility Database feeds_v2 metadata for production source review.; Mobility Database mirror: https://files.mobilitydatabase.org/mdb-685/latest.zip
|
||||
BE DeWaterbus GTFS,gtfs,https://data.gtfs.be/dewaterbus/gtfs/be-dewaterbus-gtfs.zip,BE,see https://gtfs.be/,bus,Mobility Database feed catalog; MobilityData validator acceptance-test feed list,P3,Useful smoke-test feed list; prefer Mobility Database feeds_v2 metadata for production source review.; Mobility Database mirror: https://files.mobilitydatabase.org/mdb-1860/latest.zip
|
||||
"BE DPN, AVL, CFL, CFLBus, RGTR, TICE, TRAM GTFS",gtfs,http://openov.lu/data/gtfs/gtfs-openov-lu.zip,BE,see http://openov.lu/data/gtfs/LICENSE.TXT,bus,Mobility Database feed catalog; MobilityData validator acceptance-test feed list,P3,Useful smoke-test feed list; prefer Mobility Database feeds_v2 metadata for production source review.; Mobility Database mirror: https://files.mobilitydatabase.org/mdb-1091/latest.zip
|
||||
BE Société des Transports Intercommunaux de Bruxelles/Maatschappij voor het Intercommunaal Vervoer te Brussel (STIB / MIVB) GTFS,gtfs,https://stibmivb.opendatasoft.com/api/datasets/1.0/gtfs-files-production/alternative_exports/gtfszip/,BE,see https://stibmivb.opendatasoft.com/explore/dataset/gtfs-files-production/information/,bus,Mobility Database feed catalog; MobilityData validator acceptance-test feed list,P3,Useful smoke-test feed list; prefer Mobility Database feeds_v2 metadata for production source review.; Mobility Database mirror: https://files.mobilitydatabase.org/mdb-1088/latest.zip
|
||||
BE Société des Transports Intercommunaux de Bruxelles/Maatschappij voor het Intercommunaal Vervoer te Brussel (STIB / MIVB) GTFS,gtfs,https://data.gtfs.be/stib/gtfs/be-stib-gtfs.zip,BE,see https://gtfs.be,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://files.mobilitydatabase.org/mdb-1857/latest.zip
|
||||
CH SBB CFF FFS GTFS,gtfs,https://opentransportdata.swiss/de/dataset/timetable-2021-gtfs2020/permalink,CH,,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://openmobilitydata-data.s3.us-west-1.amazonaws.com/public/feeds/sbb-cff-ffs/793/20211208/gtfs.zip
|
||||
CH Shuttler GTFS,gtfs,https://shuttler.ch/transit-data/gtfs/gtfs.zip,CH,see https://www.shuttler.ch/gtc,bus,Mobility Database feed catalog; MobilityData validator acceptance-test feed list,P3,Useful smoke-test feed list; prefer Mobility Database feeds_v2 metadata for production source review.; Mobility Database mirror: https://files.mobilitydatabase.org/mdb-1970/latest.zip
|
||||
CH Systemaufgaben Kundeninformation SKI+ GTFS,gtfs,https://data.opentransportdata.swiss/en/dataset/timetable-2025-gtfs2020/permalink,CH,,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://files.mobilitydatabase.org/tld-725/latest.zip
|
||||
DE Augsburger Verkehrs- und Tarifverbund (AVV) GTFS,gtfs,https://www.avv-augsburg.de/fileadmin/user_upload/OpenData/GTFS_AVV.zip,DE,,bus,Mobility Database feed catalog; MobilityData validator acceptance-test feed list,P3,Useful smoke-test feed list; prefer Mobility Database feeds_v2 metadata for production source review.; Mobility Database mirror: https://files.mobilitydatabase.org/mdb-857/latest.zip
|
||||
DE Bürgerbus Leupoldsgrün (Landkreis Hof) GTFS,gtfs,https://openmobilitydata-data.s3.us-west-1.amazonaws.com/public/feeds/b-rgerbus-leupoldsgr-n-landkreis-hof/1126/20190414/gtfs.zip,DE,,bus,Mobility Database feed catalog; MobilityData validator acceptance-test feed list,P3,Useful smoke-test feed list; prefer Mobility Database feeds_v2 metadata for production source review.; Mobility Database mirror: https://files.mobilitydatabase.org/mdb-1152/latest.zip
|
||||
DE CROSS Solution GTFS,gtfs,https://citybus-waechtersbach-8be41f.gitlab.io/citybus-waechtersbach-gtfs.zip,DE,see https://gitlab.com/cbleek/citybus-waechtersbach/-/blob/main/LICENSE?ref_type=heads,bus,Mobility Database feed catalog; MobilityData validator acceptance-test feed list,P3,Useful smoke-test feed list; prefer Mobility Database feeds_v2 metadata for production source review.; Mobility Database mirror: https://files.mobilitydatabase.org/mdb-2902/latest.zip
|
||||
"DE DB, SBB, EC, ÖBB, NS, DSB, MAV, CD, PKP, RE, DPN, RZD, SWX, CFL GTFS",gtfs,https://files.mobilitydatabase.org/mdb-1139/latest.zip,DE,,bus,MobilityData validator acceptance-test feed list,P3,Useful smoke-test feed list; prefer Mobility Database feeds_v2 metadata for production source review.
|
||||
DE Erfurter Verkehrsbetriebe AG GTFS,gtfs,https://www.vmt-thueringen.de/fileadmin/user_upload/Open_Data/VMT_GTFS.zip,DE,,bus,Mobility Database feed catalog; MobilityData validator acceptance-test feed list,P3,Useful smoke-test feed list; prefer Mobility Database feeds_v2 metadata for production source review.; Mobility Database mirror: https://files.mobilitydatabase.org/mdb-780/latest.zip
|
||||
DE Filsland Verkehrsverbund GTFS,gtfs,https://openmobilitydata-data.s3.us-west-1.amazonaws.com/public/feeds/filsland-verkehrsverbund/1185/20200628/gtfs.zip,DE,,bus,Mobility Database feed catalog; MobilityData validator acceptance-test feed list,P3,Useful smoke-test feed list; prefer Mobility Database feeds_v2 metadata for production source review.; Mobility Database mirror: https://files.mobilitydatabase.org/mdb-1183/latest.zip
|
||||
DE FlixBus GTFS,gtfs,https://transport.beta.gouv.fr/resources/11681/download,DE,see http://data.ndovloket.nl/LICENTIE-CC0.TXT,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://files.mobilitydatabase.org/mdb-853/latest.zip
|
||||
"DE Fluo Grand Est 67, CTS, DB AG, SBB, THURBO, Breisgau-S-Bahn GmbH, NVBW, Schwarzer Reisen, BBS Mittelschwaben, Probst, Brandner UA, Stadtbus Kempten, Gairing, RBI Regionalbus Isny, RBA Kempten, RBA Lindau, NeuBus, Schwabenbus Dillingen, S",gtfs,https://www.nvbw.de/fileadmin/user_upload/service/open_data/fahrplandaten_mit_liniennetz/bwsbahnubahn.zip,DE,see https://www.nvbw.de/open-data/fahrplandaten/fahrplandaten-mit-liniennetz,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://files.mobilitydatabase.org/mdb-1075/latest.zip
|
||||
DE Hamburger Verkehrsverbund GmbH (HVV) GTFS,gtfs,https://daten.transparenz.hamburg.de/Dataport.HmbTG.ZS.Webservice.GetRessource100/GetRessource100.svc/dbe5f144-b806-4377-aac3-d3572b139b23/Upload__hvv_Rohdaten_GTFS_Fpl_20250108.ZIP,DE,see https://www.govdata.de/dl-de/by-2-0,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://files.mobilitydatabase.org/mdb-1226/latest.zip
|
||||
DE Hamburger Verkehrsverbund GmbH (HVV) GTFS,gtfs,https://daten.transparenz.hamburg.de/Dataport.HmbTG.ZS.Webservice.GetRessource100/GetRessource100.svc/fbb583e7-48a7-4ca5-8166-be31ea4b741f/Upload__hvv_Rohdaten_GTFS_Fpl_20250408.ZIP,DE,see https://www.govdata.de/dl-de/by-2-0,bus,Mobility Database feed catalog; MobilityData validator acceptance-test feed list,P3,Useful smoke-test feed list; prefer Mobility Database feeds_v2 metadata for production source review.; Mobility Database mirror: https://files.mobilitydatabase.org/mdb-2899/latest.zip
|
||||
DE HofBus GTFS,gtfs,https://openmobilitydata-data.s3.us-west-1.amazonaws.com/public/feeds/hofbus/1197/20190801/gtfs.zip,DE,,bus,Mobility Database feed catalog; MobilityData validator acceptance-test feed list,P3,Useful smoke-test feed list; prefer Mobility Database feeds_v2 metadata for production source review.; Mobility Database mirror: https://files.mobilitydatabase.org/mdb-1252/latest.zip
|
||||
DE Karlsruher Verkehrsverbund GTFS,gtfs,https://www.nvbw.de/fileadmin/user_upload/service/open_data/fahrplandaten_mit_liniennetz/rvs.zip,DE,see https://www.nvbw.de/open-data/fahrplandaten/fahrplandaten-mit-liniennetz,bus,Mobility Database feed catalog,P3,
|
||||
DE Karlsruher Verkehrsverbundes GTFS,gtfs,https://openmobilitydata-data.s3.us-west-1.amazonaws.com/public/feeds/karlsruher-verkehrsverbundes/896/20240103/gtfs.zip,DE,see https://www.kvv.de/fahrplan/fahrplaene/open-data.html,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://files.mobilitydatabase.org/mdb-1225/latest.zip
|
||||
DE Leipziger Verkehrsbetriebe (LVB) GTFS,gtfs,https://opendata.leipzig.de/dataset/8803f612-2ce1-4643-82d1-213434889200/resource/b38955c4-431c-4e8b-a4ef-9964a3a2c95d/download/gtfsmdvlvb.zip,DE,,bus,Mobility Database feed catalog; MobilityData validator acceptance-test feed list,P3,Useful smoke-test feed list; prefer Mobility Database feeds_v2 metadata for production source review.; Mobility Database mirror: https://files.mobilitydatabase.org/mdb-781/latest.zip
|
||||
DE Münchner Verkehrs- und Tarifverbund GmbH (MVV) GTFS,gtfs,https://www.mvv-muenchen.de/fileadmin/mediapool/02-Fahrplanauskunft/03-Downloads/openData/mvv_ohneShape_20241004095702.zip,DE,see https://www.mvv-muenchen.de/fahrplanauskunft/fuer-entwickler/opendata/index.html,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://files.mobilitydatabase.org/mdb-2252/latest.zip
|
||||
DE Münchner Verkehrs- und Tarifverbund GmbH (MVV) GTFS,gtfs,https://www.mvv-muenchen.de/fileadmin/mediapool/02-Fahrplanauskunft/03-Downloads/openData/mvv_gtfs_01.zip,DE,see https://www.mvv-muenchen.de/fahrplanauskunft/fuer-entwickler/opendata/index.html,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://files.mobilitydatabase.org/mdb-2365/latest.zip
|
||||
DE NVBW GTFS,gtfs,https://www.nvbw.de/fileadmin/user_upload/service/open_data/fahrplandaten_mit_liniennetz/vhb.zip,DE,see https://www.nvbw.de/open-data/lizenz,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://files.mobilitydatabase.org/mdb-914/latest.zip
|
||||
DE OstalbMobil – Verkehrsverbund GTFS,gtfs,https://openmobilitydata-data.s3.us-west-1.amazonaws.com/public/feeds/ostalbmobil-verkehrsverbund/1236/20200601/gtfs.zip,DE,see https://www.nvbw.de/aufgaben/digitale-mobilitaet/lizenz/,bus,Mobility Database feed catalog; MobilityData validator acceptance-test feed list,P3,Useful smoke-test feed list; prefer Mobility Database feeds_v2 metadata for production source review.; Mobility Database mirror: https://files.mobilitydatabase.org/mdb-1202/latest.zip
|
||||
DE RNV GTFS,gtfs,https://opendata.rnv-online.de/node/132/download,DE,,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://openmobilitydata-data.s3.us-west-1.amazonaws.com/public/feeds/rhein-neckar-verkehr/629/20200528/gtfs.zip
|
||||
DE Rursee-Schifffahrt KG GTFS,gtfs,https://de.data.public-transport.earth/gtfs-germany.zip,DE,,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://files.mobilitydatabase.org/mdb-784/latest.zip
|
||||
DE Swiss Federal Railways (SBB) GTFS,gtfs,https://opentransportdata.swiss/de/dataset/timetable-2022-gtfs2020/permalink,DE,see https://opentransportdata.swiss/en/terms-of-use/,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://files.mobilitydatabase.org/mdb-1092/latest.zip
|
||||
DE Systemaufgaben Kundeninformation SKI+ GTFS,gtfs,https://data.opentransportdata.swiss/en/dataset/timetable-2024-gtfs2020/permalink,DE,see https://opentransportdata.swiss/en/terms-of-use/#Definitions,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://files.mobilitydatabase.org/mdb-2144/latest.zip
|
||||
DE VBB GTFS,gtfs,https://vbb.de/media/download/2029,DE,,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://openmobilitydata-data.s3.us-west-1.amazonaws.com/public/feeds/verkehrsverbund-berlin-brandenburg/213/20210521/gtfs.zip
|
||||
DE Verkehrsverbund Rhein-Ruhr GTFS,gtfs,https://www.opendata-oepnv.de/dataset/496eea5d-d6ef-4dc2-aeb0-d15c4fbf3178/resource/9874f617-0b5d-46c4-93da-cc0bb8598fd1/download/20220129_gtfs_vrr.zip,DE,see https://opendata.ruhr/dataset/soll-fahrplandaten-vrr,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://files.mobilitydatabase.org/mdb-785/latest.zip
|
||||
DE Verkehrsverbund Rhein-Ruhr GTFS,gtfs,https://www.opendata-oepnv.de/dataset/496eea5d-d6ef-4dc2-aeb0-d15c4fbf3178/resource/773be6b9-daec-4c39-971a-c9e5668f148b/download/20250508_gtfs_vrr_od.zip,DE,see https://www.opendata-oepnv.de/ht/de/organisation/verkehrsverbuende/vrr/startseite?tx_vrrkit_view%5Baction%5D=details&tx_vrrkit_view%5Bcontroller%5D=View&tx_vrrkit_view%5Bdataset_name%5D=soll-fahrplandaten-vrr&cHash=02c1406b5f625dd48a64d,bus,Mobility Database feed catalog; MobilityData validator acceptance-test feed list,P3,Useful smoke-test feed list; prefer Mobility Database feeds_v2 metadata for production source review.; Mobility Database mirror: https://files.mobilitydatabase.org/mdb-2651/latest.zip
|
||||
"DE Železničná spoločnosť Slovensko, a.s. GTFS",gtfs,https://files.mobilitydatabase.org/mdb-1832/latest.zip,DE,,bus,MobilityData validator acceptance-test feed list,P3,Useful smoke-test feed list; prefer Mobility Database feeds_v2 metadata for production source review.
|
||||
FI Joensuu GTFS,gtfs,https://dev.hsl.fi/gtfs.waltti/joensuu.zip,FI,,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://openmobilitydata-data.s3.us-west-1.amazonaws.com/public/feeds/city-of-joensuu/732/20190922/gtfs.zip
|
||||
FI Jyväskylä GTFS,gtfs,https://data.jyvaskyla.fi/tiedostot/linkkidata.zip,FI,,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://openmobilitydata-data.s3.us-west-1.amazonaws.com/public/feeds/city-of-jyvaskyla/728/20180214/gtfs.zip
|
||||
FI Kajaani GTFS,gtfs,https://openmobilitydata-data.s3.us-west-1.amazonaws.com/public/feeds/city-of-kajaani/1103/20190921/gtfs.zip,FI,,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://files.mobilitydatabase.org/mdb-1136/latest.zip
|
||||
FI Komia Liikenne GTFS,gtfs,https://openmobilitydata-data.s3.us-west-1.amazonaws.com/public/feeds/komia-liikenne/1225/20231206/gtfs.zip,FI,see https://www.komialiikenne.fi,bus,Mobility Database feed catalog; MobilityData validator acceptance-test feed list,P3,Useful smoke-test feed list; prefer Mobility Database feeds_v2 metadata for production source review.; Mobility Database mirror: https://files.mobilitydatabase.org/mdb-1255/latest.zip
|
||||
FI Mäntylän Alueen Liikenne GTFS,gtfs,https://github.com/eidancottierr-jpg/mal-testgtfs/raw/refs/heads/main/mantyla_gtfs_schdl.zip,FI,,bus,Mobility Database feed catalog; MobilityData validator acceptance-test feed list,P3,Useful smoke-test feed list; prefer Mobility Database feeds_v2 metadata for production source review.; Mobility Database mirror: https://files.mobilitydatabase.org/mdb-2905/latest.zip
|
||||
FI Oulun joukkoliikenne GTFS,gtfs,https://transitdata.fi/oulu/google_transit.zip,FI,,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://openmobilitydata-data.s3.us-west-1.amazonaws.com/public/feeds/oulun-joukkoliikenne/729/20190529/gtfs.zip
|
||||
FI POS-ELY Joensuu GTFS,gtfs,https://tvv.fra1.digitaloceanspaces.com/183.zip,FI,see https://opendata.waltti.fi/getting-started,bus,Mobility Database feed catalog; MobilityData validator acceptance-test feed list,P3,Useful smoke-test feed list; prefer Mobility Database feeds_v2 metadata for production source review.; Mobility Database mirror: https://files.mobilitydatabase.org/mdb-1125/latest.zip
|
||||
FI POSELY GTFS,gtfs,https://openmobilitydata-data.s3.us-west-1.amazonaws.com/public/feeds/liikennevirasto/733/20180808/gtfs.zip,FI,,bus,Mobility Database feed catalog; MobilityData validator acceptance-test feed list,P3,Useful smoke-test feed list; prefer Mobility Database feeds_v2 metadata for production source review.; Mobility Database mirror: https://files.mobilitydatabase.org/mdb-1317/latest.zip
|
||||
FR Agglobus GTFS,gtfs,https://data.centrevaldeloire.fr/api/v2/catalog/datasets/agglobus-offre-theorique-mobilite-reseau-urbain-de-bourges/files/03b395ff43085db427c8f51d83e88643,FR,see https://transport.data.gouv.fr/datasets/agglobus-offre-theorique-mobilite-reseau-urbain-de-bourges,bus,Mobility Database feed catalog; MobilityData validator acceptance-test feed list,P3,Useful smoke-test feed list; prefer Mobility Database feeds_v2 metadata for production source review.; Mobility Database mirror: https://files.mobilitydatabase.org/mdb-1898/latest.zip
|
||||
FR Agglomération Sud Sainte Baume GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/a67da149-19bc-445d-b2d3-855fd82aa240,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-80498/latest.zip
|
||||
FR Agglomération Sud Sainte Baume GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/358ab51c-78e8-45a4-ae5a-a05722af6c09,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-80363/latest.zip
|
||||
FR Aix en Bus GTFS,gtfs,http://tsvc2.pilote3.cityway.fr/api/Export/v1/GetExportedDataFile?ExportFormat=Gtfs&OperatorCode=AIXENBUS,FR,,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://files.mobilitydatabase.org/mdb-888/latest.zip
|
||||
"FR Aleop Renfort LR 85, Aléop en Loire-Atlantique, Aléop en Maine-et-Loire, Aléop en Mayenne, Aléop en Sarthe, Aléop en Vendée, Aléop en Vendée et Loire-Atlantique, Aléop express Régionale, Aléop TER, projet Aléop en Loire-Atlantique, Yeu C",gtfs,https://donnees.paysdelaloire.fr/data/pdl.zip,FR,see https://data.paysdelaloire.fr/explore/dataset/234400034_lignes-regulieres-de-transports-regionaux-arrets/information/,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://files.mobilitydatabase.org/mdb-1096/latest.zip
|
||||
FR Ales'Y GTFS,gtfs,https://zenbus.net/gtfs/static/download.zip?dataset=alesy,FR,see https://transport.data.gouv.fr/datasets/horaires-theoriques-et-temps-reel-du-reseau-ntecc-ales-gtfs-gtfs-rt,bus,Mobility Database feed catalog; MobilityData validator acceptance-test feed list,P3,Useful smoke-test feed list; prefer Mobility Database feeds_v2 metadata for production source review.; Mobility Database mirror: https://files.mobilitydatabase.org/mdb-1889/latest.zip
|
||||
FR Alliance Atlantique GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/ce6d5603-41cf-4fed-b93f-ed95859003b5,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-81924/latest.zip
|
||||
FR Alliance Atlantique GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/95654461-59d1-4a33-8830-f90d55e8a217,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-82619/latest.zip
|
||||
FR Ametis GTFS,gtfs,https://www.data.gouv.fr/fr/datasets/r/1e116130-3670-496d-b8dc-cb8c628dd8b6,FR,see https://transport.data.gouv.fr/datasets/fichier-gtfs/,bus,Mobility Database feed catalog,P3,
|
||||
FR Angers Loire Métropole GTFS,gtfs,https://transport.data.gouv.fr/resources/83579/download?token=xdgqKBTAzhw4DSPz6zeGc4c5eW0LhwztcGv4-vpzP4U,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-83579/latest.zip
|
||||
FR ARAVIS Bus GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/14769e22-a9cd-42b8-85a7-2e0f556e8091,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-83755/latest.zip
|
||||
FR Bibus GTFS,gtfs,https://ratpdev-mosaic-prod-bucket-raw.s3-eu-west-1.amazonaws.com/11/exports/1/gtfs.zip,FR,see https://transport.beta.gouv.fr/resources/43286,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://files.mobilitydatabase.org/mdb-999/latest.zip
|
||||
FR Bibus GTFS,gtfs,https://applications002.brest-metropole.fr/VIPDU72/GPB/Lot_BrestMetropole_Bibus.zip,FR,,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://openmobilitydata-data.s3.us-west-1.amazonaws.com/public/feeds/bibus/593/20211210/gtfs.zip
|
||||
FR BlaBlaCar Bus GTFS,gtfs,https://bus-api.blablacar.com/gtfs.zip,FR,see https://www.data.gouv.fr/fr/datasets/blablacar-bus-horaires-theoriques-et-temps-reel-du-reseau-europeen/#/information,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://files.mobilitydatabase.org/mdb-1314/latest.zip
|
||||
FR Bmob GTFS,gtfs,https://zenbus.net/gtfs/static/download.zip?dataset=bernay,FR,see https://transport.data.gouv.fr/datasets/horaires-theoriques-et-temps-reel-du-reseau-bernay-lbus-gtfs-gtfs-rt,bus,Mobility Database feed catalog; MobilityData validator acceptance-test feed list,P3,Useful smoke-test feed list; prefer Mobility Database feeds_v2 metadata for production source review.; Mobility Database mirror: https://files.mobilitydatabase.org/mdb-1886/latest.zip
|
||||
FR BreizhGo GTFS,gtfs,https://exs.breizgo.cityway.fr/ftp/GTFS/MOBIBREIZHBRET.gtfs.zip,FR,see https://transport.data.gouv.fr/datasets/base-de-donnees-multimodale-transports-publics-en-bretagne-mobibreizh-gtfs,bus,Mobility Database feed catalog; MobilityData validator acceptance-test feed list,P3,Useful smoke-test feed list; prefer Mobility Database feeds_v2 metadata for production source review.; Mobility Database mirror: https://files.mobilitydatabase.org/mdb-1837/latest.zip
|
||||
FR Buss GTFS,gtfs,https://zenbus.net/gtfs/static/download.zip?dataset=buss-cdasaintes,FR,see https://transport.data.gouv.fr/datasets/horaires-theoriques-et-temps-reel-du-reseau-buss-saintes-gtfs-gtfs-rt,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://files.mobilitydatabase.org/mdb-1888/latest.zip
|
||||
FR CA du Pays de Grasse GTFS,gtfs,https://transport-data-gouv-fr-resource-history-prod.cellar-c2.services.clever-cloud.com/79822/79822.20230921.151241.357814.zip,FR,see https://transport.data.gouv.fr/datasets/lignes-regulieres-de-transports-sillages-en-pays-de-grasse-urbain-et-scolaire,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://files.mobilitydatabase.org/mdb-2004/latest.zip
|
||||
FR CAA GTFS,gtfs,https://exs.tcra2.cityway.fr/gtfs.aspx?key=UID&operatorCode=TCRA,FR,see https://transport.data.gouv.fr/datasets/gtfs-et-gtfs-rt-reseau-orizo-grand-avignon,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://files.mobilitydatabase.org/mdb-1878/latest.zip
|
||||
FR Car Jaune GTFS,gtfs,https://pysae.com/api/v2/groups/car-jaune/gtfs/pub,FR,see https://transport.data.gouv.fr/datasets/horaires-theoriques-au-format-gtfs-et-horaires-temps-reel-au-format-gtfs-rt-du-reseau-car-jaune-a-la-reunion,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://files.mobilitydatabase.org/mdb-2458/latest.zip
|
||||
FR Cara'bus GTFS,gtfs,https://data.agglo-royan.fr/dataset/9b761974-a195-4e33-91b7-ecee3b368016/resource/d4915904-ebd0-43cf-9b35-fbfc04ce91fd/download/gtfs_20250613_092554_tdra.zip,FR,see https://transport.data.gouv.fr/datasets/donnees-gtfs-du-reseau-de-transport-public-cara-bus,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://files.mobilitydatabase.org/mdb-2842/latest.zip
|
||||
FR Carabus GTFS,gtfs,https://data.agglo-royan.fr/dataset/9b761974-a195-4e33-91b7-ecee3b368016/resource/144c2734-9d66-4177-904d-a67768f5ee1d/download/carabus-royan-fr20170326.zip,FR,,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://openmobilitydata-data.s3.us-west-1.amazonaws.com/public/feeds/carabus/873/20181001/gtfs.zip
|
||||
FR Cars Région Auvergne-Rhône-Alpes (Transisère) GTFS,gtfs,https://www.itinisere.fr/fr/donnees-open-data/169/OpenData/Download?fileName=CG38.GTFS.zip,FR,,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://files.mobilitydatabase.org/mdb-985/latest.zip
|
||||
FR CHATEAUROUX METROPOLE GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/7e14bf5c-6afb-452f-a469-02e6f44fcfdd,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-81365/latest.zip
|
||||
FR CHATEAUROUX METROPOLE GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/006828d3-e583-4ce7-9226-4b3a59ac2d28,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-83445/latest.zip
|
||||
FR Ciotabus GTFS,gtfs,http://tsvc2.pilote3.cityway.fr/api/Export/v1/GetExportedDataFile?ExportFormat=Gtfs&OperatorCode=CIOTABUS,FR,,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://files.mobilitydatabase.org/mdb-1056/latest.zip
|
||||
FR Clermont Auvergne Métropole GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/313b9e3c-5da1-4509-be05-a334f8af1265,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P3,
|
||||
FR Collectivité de Corse GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/ba2f71b6-23ca-4b51-b319-bd9576933d94,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-82998/latest.zip
|
||||
FR Collectivité de Corse GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/eff447a1-b61c-4573-8aae-c778bd8e07d0,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-83021/latest.zip
|
||||
FR Collectivité de Corse GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/69c3db8a-a5fd-471f-b59f-46b9faded381,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-83882/latest.zip
|
||||
FR Collegamenti marittimi Grandi Navi Veloci GTFS,gtfs,https://openmobilitydata-data.s3.us-west-1.amazonaws.com/public/feeds/collegamenti-marittimi-grandi-navi-veloci/1164/20210512/gtfs.zip,FR,,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://files.mobilitydatabase.org/mdb-1265/latest.zip
|
||||
FR Collegamenti marittimi Grimaldi GTFS,gtfs,https://openmobilitydata-data.s3.us-west-1.amazonaws.com/public/feeds/collegamenti-marittimi-moby/1163/20210927/gtfs.zip,FR,,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://files.mobilitydatabase.org/mdb-1192/latest.zip
|
||||
FR Collegamenti marittimi Grimaldi GTFS,gtfs,https://www.sardegnamobilita.it/opendata/R_SARDEGTRASP_00031_1_GTFS_dati_grimaldi.zip,FR,,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://files.mobilitydatabase.org/mdb-2994/latest.zip
|
||||
FR Collegamenti marittimi Sardinia Ferries GTFS,gtfs,https://www.sardegnamobilita.it/opendata/dati_sardinia_ferries.zip,FR,,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://files.mobilitydatabase.org/mdb-1050/latest.zip
|
||||
FR Collegamenti marittimi Sardinia Ferries GTFS,gtfs,https://www.sardegnamobilita.it/opendata/R_SARDEGTRASP_00028_1_dati_sardinia_ferries.zip,FR,,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://files.mobilitydatabase.org/mdb-2995/latest.zip
|
||||
FR Collegamenti marittimi Tirrenia GTFS,gtfs,https://www.sardegnamobilita.it/opendata/R_SARDEGTRASP_00026_1_GTFS_dati_tirrenia.zip,FR,,bus,Mobility Database feed catalog; MobilityData validator acceptance-test feed list,P3,Useful smoke-test feed list; prefer Mobility Database feeds_v2 metadata for production source review.; Mobility Database mirror: https://files.mobilitydatabase.org/mdb-2996/latest.zip
|
||||
FR COMMUNAUTE D'AGGLOMERATION GRAND SUD CARAIBE GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/cc0e964e-2021-4b2d-8bde-05c8b493a248,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-83511/latest.zip
|
||||
FR Communauté d'Agglomération de Bastia GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/f88dae7d-cf93-4a4b-aa82-9256d5f3cefc,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-81624/latest.zip
|
||||
FR Communauté d'Agglomération de l'Auxerrois GTFS,gtfs,https://www.data.gouv.fr/fr/datasets/r/ade0bfc2-0e7a-4087-96dd-a76aeb61d196,FR,see https://transport.data.gouv.fr/datasets/reseau-de-transports-en-commun-de-la-communaute-dagglomeration-de-lauxerrois/,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://files.mobilitydatabase.org/mdb-642/latest.zip
|
||||
FR Communauté d'agglomération de la Baie de Somme GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/0a10e218-937c-4795-b9e7-c81533a38446,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-82577/latest.zip
|
||||
FR Communauté d'Agglomération de la Région de Château-Thierry GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/f56b6c9c-0a25-4d87-a1b1-4b108e3d0a22,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-81574/latest.zip
|
||||
FR Communauté d'Agglomération du Pays Ajaccien GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/3f08460c-8ec7-4b9e-a244-8855292b9e24,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-8645/latest.zip
|
||||
FR Communauté d'Agglomération du Pays de Saint-Omer GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/584491f9-0ace-4ee2-a49e-23caedb6f3f1,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-81457/latest.zip
|
||||
FR Communauté d'Agglomération Ventoux Comtat Venaissin GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/76a25b4b-55cb-4dfc-a884-e0acd9b81a13,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-82728/latest.zip
|
||||
FR Communauté de communes de Millau Grands Causses GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/b38a8202-1d7e-4345-964c-f07a335ea90b,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-82411/latest.zip
|
||||
FR Communauté de communes de Millau Grands Causses GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/c5b39c38-3a01-40fb-9d66-26ce3a760b9b,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-83930/latest.zip
|
||||
FR Communauté de communes de Serre-Ponçon GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/619cfea8-a866-40bc-b431-b7f85af6066f,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-81974/latest.zip
|
||||
FR Communauté de communes de Serre-Ponçon GTFS,gtfs,https://transport.data.gouv.fr/resources/84030/download?token=xdgqKBTAzhw4DSPz6zeGc4c5eW0LhwztcGv4-vpzP4U,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-84030/latest.zip
|
||||
FR Communauté de communes de Serre-Ponçon GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/f8a8d797-035b-4adf-a447-a8624639232e,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-83736/latest.zip
|
||||
FR Communauté de Communes du Golfe de Saint-Tropez GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/7a182d63-ebe2-4490-8bc8-188ae7312c23,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-81866/latest.zip
|
||||
FR Communauté de Communes du Golfe de Saint-Tropez GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/629ffcc9-152e-4526-9720-3ccf944c97e2,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-81772/latest.zip
|
||||
FR Communauté de Communes du Golfe de Saint-Tropez GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/d4d91019-c15a-4733-bd2d-0ebf17d69993,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-81858/latest.zip
|
||||
FR Communauté de Communes du Golfe de Saint-Tropez GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/8d670add-a215-486a-bf96-2b44b3b6aebe,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-81867/latest.zip
|
||||
FR Communauté de Communes du Golfe de Saint-Tropez GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/7b2f319f-a028-4db7-826f-1bd71d636a01,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-81870/latest.zip
|
||||
FR Communauté de Communes du Golfe de Saint-Tropez GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/7c5842e2-0420-4e60-a2ad-90c077c03505,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-81873/latest.zip
|
||||
FR Communauté de Communes du Golfe de Saint-Tropez GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/bf169a63-e51b-44b3-bca0-e7aaa3335109,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-81863/latest.zip
|
||||
FR Communauté de Communes du Golfe de Saint-Tropez GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/147fd87d-9898-42a9-af88-748c0ac9bb52,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-81773/latest.zip
|
||||
FR Communauté de Communes Haute-Tarentaise GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/d6c3eae8-fab6-4ff2-adc2-32bb2d213dd7,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-83629/latest.zip
|
||||
FR Communauté de communes Retz-en-Valois GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/eff9558e-4575-429f-bbe1-0cd37f1e6432,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-83788/latest.zip
|
||||
FR Communauté dʼAgglomération Amiens Métropole GTFS,gtfs,https://transport-data-gouv-fr-resource-history-prod.cellar-c2.services.clever-cloud.com/80705/80705.20240117.090826.898613.zip,FR,see https://transport.data.gouv.fr/datasets/ametis,bus,Mobility Database feed catalog; MobilityData validator acceptance-test feed list,P3,Useful smoke-test feed list; prefer Mobility Database feeds_v2 metadata for production source review.; Mobility Database mirror: https://files.mobilitydatabase.org/mdb-1988/latest.zip
|
||||
FR Communauté dʼAgglomération du Pays de Saint-Omer GTFS,gtfs,https://www.data.gouv.fr/fr/datasets/r/5ec3d89e-cd57-4416-a5ba-c84c4784f958,FR,see https://transport.data.gouv.fr/datasets/transports-en-commun-audomarois-1,bus,Mobility Database feed catalog,P3,
|
||||
FR Communauté dʼAgglomération du Pays de Saint-Omer GTFS,gtfs,https://transport-data-gouv-fr-resource-history-prod.cellar-c2.services.clever-cloud.com/81457/81457.20231230.060844.417532.zip,FR,see https://transport.data.gouv.fr/datasets/transports-en-commun-audomarois-1,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://files.mobilitydatabase.org/mdb-1989/latest.zip
|
||||
FR Communauté Urbaine d'Arras GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/4ded8fb3-a38d-45c5-b4a7-a2f6ac17ba4f,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-83372/latest.zip
|
||||
FR Communauté Urbaine de Dunkerque (Réseau Dk Bus) GTFS,gtfs,https://www.data.gouv.fr/fr/datasets/r/c82ea9cd-f9b0-4ce3-ad04-139f4f2f1e84,FR,see https://transport.data.gouv.fr/datasets/offre-de-transports-reseau-dk-bus-de-la-communaute-urbaine-de-dunkerque-gtfs,bus,Mobility Database feed catalog,P3,
|
||||
FR Communauté Urbaine de Dunkerque (Réseau Dk Bus) GTFS,gtfs,https://transport-data-gouv-fr-resource-history-prod.cellar-c2.services.clever-cloud.com/81377/81377.20231227.061215.032973.zip,FR,see https://transport.data.gouv.fr/datasets/offre-de-transports-reseau-dk-bus-de-la-communaute-urbaine-de-dunkerque-gtfs,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://files.mobilitydatabase.org/mdb-1990/latest.zip
|
||||
FR COMMUNE DE COURCHEVEL GTFS,gtfs,https://transport.data.gouv.fr/resources/83744/download?token=xdgqKBTAzhw4DSPz6zeGc4c5eW0LhwztcGv4-vpzP4U,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-83744/latest.zip
|
||||
FR Compagnie des Transports Strasbourgeois (CTS) GTFS,gtfs,http://opendata.cts-strasbourg.fr/fichiers/gtfs/google_transit.zip,FR,,bus,Mobility Database feed catalog; MobilityData validator acceptance-test feed list,P3,Useful smoke-test feed list; prefer Mobility Database feeds_v2 metadata for production source review.; Mobility Database mirror: https://files.mobilitydatabase.org/mdb-856/latest.zip
|
||||
FR Corsica Ferries GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/b95d6838-1143-4653-840f-eebc33c16fa7,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-81996/latest.zip
|
||||
FR DECAZEVILLE COMMUNAUTE GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/f72c946b-b798-4013-ab1c-05f98bea8230,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-22408/latest.zip
|
||||
FR DECAZEVILLE COMMUNAUTE GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/7242620a-b4af-4be5-bfa4-b1f684442552,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-80253/latest.zip
|
||||
FR DiviaMobilités GTFS,gtfs,https://data.explore.divia.fr/api/datasets/1.0/gtfs-divia-mobilites/attachments/gtfs_diviamobilites_current_zip,FR,see https://transport.data.gouv.fr/datasets/gtfs-diviamobilites,bus,Mobility Database feed catalog,P3,
|
||||
FR DiviaMobilités GTFS,gtfs,https://transport-data-gouv-fr-resource-history-prod.cellar-c2.services.clever-cloud.com/80742/80742.20241107.160916.535554.zip,FR,see https://www.etalab.gouv.fr/licence-ouverte-open-licence/,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://files.mobilitydatabase.org/mdb-2153/latest.zip
|
||||
FR Département de Loire-Atlantique GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/b04f2bcc-a99c-4d45-a22a-b61f491c30ad,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P3,
|
||||
FR Département de Loire-Atlantique GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/daebfad5-7273-4637-b897-88308b2d96d7,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-83727/latest.zip
|
||||
"FR Eurostar, Thalys, SNCF, DB ICE, DB Intercity, c2c Service, Chiltern Railways Service, First Great Western Service, Gatwick Express Service, East Coast, Heathrow Express Service, Hull Trains Service, East Midland Trains, Greater Anglia, L",gtfs,https://gtfs.eurostar.com/assets/gtfs.zip,FR,see https://transport.data.gouv.fr/datasets/eurostar-gtfs,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://files.mobilitydatabase.org/mdb-2431/latest.zip
|
||||
FR Filibus - Chartres Métropole Transports GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/8d4c3e5c-1702-4649-b47a-b16c6016dcc6,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-80587/latest.zip
|
||||
FR Fluo Grand Est GTFS,gtfs,https://transport.data.gouv.fr/resources/80431/download?token=xdgqKBTAzhw4DSPz6zeGc4c5eW0LhwztcGv4-vpzP4U,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-80431/latest.zip
|
||||
FR Ginko GTFS,gtfs,https://api.ginko.voyage/gtfs-ginko.zip,FR,,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://files.mobilitydatabase.org/mdb-1116/latest.zip
|
||||
FR Grand Avignon GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/556b9c3d-ec50-406b-9c22-7d37e0f6a15b,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P3,
|
||||
FR Hobus GTFS,gtfs,https://zenbus.net/gtfs/static/download.zip?dataset=hobus,FR,see https://transport.data.gouv.fr/datasets/horaires-theoriques-et-temps-reel-du-reseau-hobus-de-honfleur-gtfs-gtfs-rt,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://files.mobilitydatabase.org/mdb-836/latest.zip
|
||||
FR Hobus GTFS,gtfs,https://static.data.gouv.fr/resources/hobus-honfleur/20250110-080454/pt-th-offer-hobus-gtfs-20250109-812-opendata.zip,FR,see https://transport.data.gouv.fr/datasets/hobus-honfleur,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://files.mobilitydatabase.org/mdb-2685/latest.zip
|
||||
FR iDBUS GTFS,gtfs,https://api.idbus.com/gtfs.zip,FR,,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://openmobilitydata-data.s3.us-west-1.amazonaws.com/public/feeds/idbus/519/20211017/gtfs.zip
|
||||
FR Ile dʼYeu GTFS,gtfs,https://app.mecatran.com/utw/ws/gtfsfeed/static/pdlYeuContinent?apiKey=2c715462180f36483d5f24340c706b627f2f2361,FR,see https://www.etalab.gouv.fr/licence-ouverte-open-licence/,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://files.mobilitydatabase.org/mdb-1257/latest.zip
|
||||
FR Ilévia (Réseau de transport Transpole de la Métropole Européenne de Lille) GTFS,gtfs,https://transport-data-gouv-fr-resource-history-prod.cellar-c2.services.clever-cloud.com/81995/81995.20241104.060921.630251.zip,FR,see https://www.etalab.gouv.fr/licence-ouverte-open-licence/,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://files.mobilitydatabase.org/mdb-2152/latest.zip
|
||||
FR Impulsyon GTFS,gtfs,https://odp.larochesuryon.fr/248500589_arrets-horaires-et-circuits-impulsyon-gtfs-rsy/gtfs_impulsyon.zip,FR,see https://transport.data.gouv.fr/datasets/arrets-horaires-et-circuits-impulsyon-a-la-roche-sur-yon,bus,Mobility Database feed catalog,P3,
|
||||
FR Impulsyon GTFS,gtfs,https://transport-data-gouv-fr-resource-history-prod.cellar-c2.services.clever-cloud.com/79520/79520.20240125.180808.025916.zip,FR,see https://transport.data.gouv.fr/datasets/reseau-de-transport-impulsyon-a-la-roche-sur-yon,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://files.mobilitydatabase.org/mdb-2005/latest.zip
|
||||
FR INTERCOM GTFS,gtfs,https://static.data.gouv.fr/resources/horaires-du-reseau-de-bus-intercom-3/20240829-161146/gtfs-open-data.zip,FR,see https://transport.data.gouv.fr/datasets/horaires-du-reseau-de-bus-intercom-3,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://files.mobilitydatabase.org/mdb-2626/latest.zip
|
||||
FR Intercom Bernay Terres de Normandie GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/3d1470de-91f7-4d78-8809-790b7ed02662,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-81284/latest.zip
|
||||
FR Irigo GTFS,gtfs,https://angersloiremetropole.opendatasoft.com/api/datasets/1.0/angers-loire-metropole-horaires-reseau-irigo-gtfs-rt/alternative_exports/irigo_gtfs_zip/,FR,see https://angersloiremetropole.opendatasoft.com/explore/dataset/angers-loire-metropole-horaires-reseau-irigo-gtfs-rt/export/,bus,Mobility Database feed catalog,P3,
|
||||
FR Irigo GTFS,gtfs,https://transport-data-gouv-fr-resource-history-prod.cellar-c2.services.clever-cloud.com/80036/80036.20240213.060926.502564.zip,FR,see https://transport.data.gouv.fr/datasets/angers-loire-metropole-reseau-irigo-gtfs-gtfs-rt-siri,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://files.mobilitydatabase.org/mdb-2007/latest.zip
|
||||
FR J'ybus GTFS,gtfs,https://www.data.gouv.fr/fr/datasets/r/f8b861f3-6fbd-4eed-a82e-9fed96c9dd09,FR,see https://transport.data.gouv.fr/datasets/offre-de-transports-jybus-a-rumilly,bus,Mobility Database feed catalog,P3,
|
||||
FR La Navette de la CC Sel et Vermois GTFS,gtfs,https://zenbus.net/gtfs/static/download.zip?dataset=sel-et-vermois,FR,see https://transport.data.gouv.fr/datasets/horaires-theoriques-et-temps-reel-de-la-navette-de-sel-et-vermois-gtfs-gtfs-rt,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://files.mobilitydatabase.org/mdb-1895/latest.zip
|
||||
FR Le Grand Chalon GTFS,gtfs,https://www.data.gouv.fr/fr/datasets/r/7d3fd400-c55e-4994-9d85-de1dd4291dcb,FR,see https://transport.data.gouv.fr/datasets/horaires-theoriques-du-reseau-zoom-le-grand-chalon-gtfs,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://files.mobilitydatabase.org/mdb-658/latest.zip
|
||||
FR Le Titus GTFS,gtfs,https://zenbus.net/gtfs/static/download.zip?dataset=reseau-titus,FR,see https://transport.data.gouv.fr/datasets/horaires-theoriques-et-temps-reel-du-reseau-titus-rosny-sous-bois-gtfs-gtfs-rt,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://files.mobilitydatabase.org/mdb-1890/latest.zip
|
||||
FR lebateau GTFS,gtfs,http://tsvc2.pilote3.cityway.fr/api/Export/v1/GetExportedDataFile?ExportFormat=Gtfs&OperatorCode=FRIOUL,FR,,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://files.mobilitydatabase.org/mdb-843/latest.zip
|
||||
FR Les bus des Cigales GTFS,gtfs,http://tsvc2.pilote3.cityway.fr/api/Export/v1/GetExportedDataFile?ExportFormat=Gtfs&OperatorCode=CIGALES,FR,see http://opendata.regionpaca.fr/fileadmin/user_upload/tx_ausyopendata/licences/Licence-Ouverte-Open-Licence-ETALAB.pdf,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://files.mobilitydatabase.org/mdb-889/latest.zip
|
||||
FR Les Saisies GTFS,gtfs,https://zenbus.net/gtfs/static/download.zip?dataset=lessaisies,FR,see https://transport.data.gouv.fr/datasets/horaires-theoriques-et-temps-reel-de-la-station-de-ski-les-saisies-gtfs-gtfs-rt,bus,Mobility Database feed catalog; MobilityData validator acceptance-test feed list,P3,Useful smoke-test feed list; prefer Mobility Database feeds_v2 metadata for production source review.; Mobility Database mirror: https://files.mobilitydatabase.org/mdb-1899/latest.zip
|
||||
"FR Lignes d'Azur, Zou! Alpes-Maritimes GTFS",gtfs,http://opendata.nicecotedazur.org/data/dataset/export-quotidien-au-format-gtfs-du-reseau-de-transport-lignes-d-azur/resource/aacb4eea-d008-4b13-b17a-848b8ced7e03/download,FR,,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://files.mobilitydatabase.org/mdb-845/latest.zip
|
||||
FR LILA GTFS,gtfs,https://app.mecatran.com/utw/ws/gtfsfeed/static/lilapresquile?apiKey=3b5f1b483e47272d497403293c565f2c6a440b5c,FR,see https://www.etalab.gouv.fr/licence-ouverte-open-licence/,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://files.mobilitydatabase.org/mdb-1261/latest.zip
|
||||
FR Marcoulines GTFS,gtfs,http://tsvc2.pilote3.cityway.fr/api/Export/v1/GetExportedDataFile?ExportFormat=Gtfs&OperatorCode=MARCOULINE,FR,,bus,Mobility Database feed catalog; MobilityData validator acceptance-test feed list,P3,Useful smoke-test feed list; prefer Mobility Database feeds_v2 metadata for production source review.; Mobility Database mirror: https://files.mobilitydatabase.org/mdb-842/latest.zip
|
||||
FR Marinéo GTFS,gtfs,https://ratpdev-mosaic-prod-bucket-raw.s3-eu-west-1.amazonaws.com/109/exports/3/gtfs.zip,FR,see https://transport.data.gouv.fr/datasets/horaires-theoriques-et-temps-reel-du-reseau-de-transport-marineo,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://files.mobilitydatabase.org/mdb-1876/latest.zip
|
||||
FR Martinique Transport GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/046eac2f-8af9-4e42-9a2e-aae1745176d2,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-80394/latest.zip
|
||||
FR Martinique Transport GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/eaeab898-582e-4554-b79a-845773f041f5,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-81193/latest.zip
|
||||
FR MAT mobilités GTFS,gtfs,https://transport-data-gouv-fr-resource-history-prod.cellar-c2.services.clever-cloud.com/43126/43126.20240709.120931.048325.zip,FR,see https://transport-data-gouv-fr-resource-history-prod.cellar-c2.services.clever-cloud.com/43126/43126.20240709.120931.048325.zip,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://files.mobilitydatabase.org/mdb-2139/latest.zip
|
||||
FR May'bus GTFS,gtfs,https://zenbus.net/gtfs/static/download.zip?dataset=maybus,FR,see https://transport.data.gouv.fr/datasets/horaires-theoriques-et-temps-reel-du-reseau-maybus-mayenne-gtfs-gtfs-rt,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://files.mobilitydatabase.org/mdb-1884/latest.zip
|
||||
FR Metz GTFS,gtfs,https://data.lemet.fr/documents/LEMET-gtfs.zip,FR,see https://www.etalab.gouv.fr/licence-ouverte-open-licence/,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://files.mobilitydatabase.org/mdb-1298/latest.zip
|
||||
FR Metz GTFS,gtfs,https://si.metzmetropole.fr/fiches/opendata/gtfs_current.zip,FR,,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://openmobilitydata-data.s3.us-west-1.amazonaws.com/public/feeds/metz-metropole/850/20230324/gtfs.zip
|
||||
FR Mobilités M - OT Grenoble GTFS,gtfs,https://data.mobilites-m.fr/api/gtfs/BUL,FR,see https://data.mobilites-m.fr/blog/licence-d-utilisation-des-donnees,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://files.mobilitydatabase.org/mdb-641/latest.zip
|
||||
FR Mobilités M - Tag GTFS,gtfs,https://data.mobilites-m.fr/api/gtfs/SEM,FR,see https://data.mobilites-m.fr/blog/licence-d-utilisation-des-donnees,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://files.mobilitydatabase.org/mdb-1055/latest.zip
|
||||
FR Mobilités M - TouGo GTFS,gtfs,https://data.mobilites-m.fr/api/gtfs/GSV,FR,see https://data.mobilites-m.fr/blog/licence-d-utilisation-des-donnees,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://files.mobilitydatabase.org/mdb-1855/latest.zip
|
||||
FR Mobilités M - Transports du Pays Voironnais GTFS,gtfs,https://data.mobilites-m.fr/api/gtfs/TPV,FR,see https://data.mobilites-m.fr/blog/licence-d-utilisation-des-donnees,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://files.mobilitydatabase.org/mdb-1854/latest.zip
|
||||
FR monRéZO GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/128b06bc-f263-4530-8e03-395402f87256,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-82999/latest.zip
|
||||
FR Métropole Aix-Marseille GTFS,gtfs,https://app.mecatran.com/utw/ws/gtfsfeed/static/mamp-pam?apiKey=596e694f3330142c525b7d6b123a5b055f744058,FR,see https://www.etalab.gouv.fr/licence-ouverte-open-licence/,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://files.mobilitydatabase.org/mdb-2133/latest.zip
|
||||
FR Métropole du Grand Nancy GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/22ff00b8-aa07-4f01-af47-8f198964bb1a,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-81346/latest.zip
|
||||
FR Naolib GTFS,gtfs,https://data.nantesmetropole.fr/explore/dataset/244400404_tan-arrets-horaires-circuits/files/16a1a0af5946619af621baa4ad9ee662/download/,FR,see https://data.nantesmetropole.fr/explore/dataset/244400404_tan-arrets-horaires-circuits/information/,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://files.mobilitydatabase.org/mdb-1025/latest.zip
|
||||
FR NEMUS (Réseau de transports de Flers Agglo) GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/d8b9a49f-db3b-4b53-b0a0-345072ce1249,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-83265/latest.zip
|
||||
FR Nouvelle-Aquitaine Mobilités GTFS,gtfs,https://www.pigma.org/public/opendata/nouvelle_aquitaine_mobilites/publication/naq-aggregated-gtfs.zip,FR,see https://transport.data.gouv.fr/datasets/arrets-horaires-et-parcours-theoriques-des-reseaux-nva-mobilite-agreges-1,bus,Mobility Database feed catalog; MobilityData validator acceptance-test feed list,P3,Useful smoke-test feed list; prefer Mobility Database feeds_v2 metadata for production source review.; Mobility Database mirror: https://files.mobilitydatabase.org/mdb-2386/latest.zip
|
||||
FR Nouvelle-Aquitaine Mobilités GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/e09ee736-ae5e-48c8-ac13-3839e3f2f74a,FR,see https://opendatacommons.org/licenses/odbl/1.0/,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-82321/latest.zip
|
||||
FR Nouvelle-Aquitaine Mobilités GTFS,gtfs,https://transport.data.gouv.fr/resources/83870/download?token=xdgqKBTAzhw4DSPz6zeGc4c5eW0LhwztcGv4-vpzP4U,FR,see https://opendatacommons.org/licenses/odbl/1.0/,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-83870/latest.zip
|
||||
FR Néva GTFS,gtfs,https://zenbus.net/gtfs/static/download.zip?dataset=granville,FR,see https://transport.data.gouv.fr/datasets/horaires-theoriques-et-temps-reel-du-reseau-neva-granville-gtfs-gtfs-rt,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://files.mobilitydatabase.org/mdb-1887/latest.zip
|
||||
FR Offre de transport du réseau Libellus GTFS,gtfs,https://www.data.gouv.fr/fr/datasets/r/70c9f936-129e-41f4-940a-8e6f272535d1,FR,see http://opendatacommons.org/licenses/odbl/summary/,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://files.mobilitydatabase.org/mdb-1286/latest.zip
|
||||
FR Ondéa - COMPAGNIE DE TRANSPORT DU LAC DU BOURGET-CTLB GTFS,gtfs,https://ratpdev-mosaic-prod-bucket-raw.s3-eu-west-1.amazonaws.com/33/exports/1/gtfs.zip,FR,see https://transport.data.gouv.fr/datasets/ctlb-donnees-theoriques-et-rt-aix-les-bains-lac-du-bourget/,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://files.mobilitydatabase.org/mdb-600/latest.zip
|
||||
FR Oùra GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/13a04a39-0bb3-4e4f-825e-cf481e6cea92,FR,see https://opendatacommons.org/licenses/odbl/1.0/,bus,Mobility Database feed catalog,P3,
|
||||
FR Palmbus GTFS,gtfs,https://www.data.gouv.fr/fr/datasets/r/d24f3602-db71-4736-b37a-bc8cb961a713,FR,see https://transport.data.gouv.fr/datasets/horaires-theoriques-du-reseau-palmbus-cannes-pays-de-lerins-gtfs/,bus,Mobility Database feed catalog,P3,
|
||||
FR Palmbus GTFS,gtfs,https://transport-data-gouv-fr-resource-history-prod.cellar-c2.services.clever-cloud.com/79420/79420.20240125.160818.158371.zip,FR,see https://transport.data.gouv.fr/datasets/horaires-theoriques-et-temps-reel-gtfs-gtfs-rt-du-reseau-palmbus-cannes-pays-de-lerins,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://files.mobilitydatabase.org/mdb-2003/latest.zip
|
||||
FR PASTEL GTFS,gtfs,https://www.data.gouv.fr/fr/datasets/r/f360cebb-ce17-495b-aac6-05b4f52ea444,FR,see https://transport.data.gouv.fr/datasets/donnees-gtfs-2022-reseau-bus-pastel,bus,Mobility Database feed catalog,P3,
|
||||
FR PASTEL GTFS,gtfs,https://transport-data-gouv-fr-resource-history-prod.cellar-c2.services.clever-cloud.com/81589/81589.20240202.094639.078502.zip,FR,see https://transport.data.gouv.fr/datasets/horaires-hiver-2023-2024,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://files.mobilitydatabase.org/mdb-1998/latest.zip
|
||||
FR Pays de la Loire GTFS,gtfs,https://openmobilitydata-data.s3.us-west-1.amazonaws.com/public/feeds/region-des-pays-de-la-loire/1071/20220914/gtfs.zip,FR,see http://opendatacommons.org/licenses/odbl/,bus,Mobility Database feed catalog; MobilityData validator acceptance-test feed list,P3,Useful smoke-test feed list; prefer Mobility Database feeds_v2 metadata for production source review.; Mobility Database mirror: https://files.mobilitydatabase.org/mdb-1258/latest.zip
|
||||
FR PROVENCE-ALPES-AGGLOMERATION GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/fbc87265-7328-46a1-a202-4f11ed7b52b7,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-81367/latest.zip
|
||||
FR PROVENCE-ALPES-AGGLOMERATION GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/21b768a8-28d1-4f18-a7b0-c848da95d95e,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-81791/latest.zip
|
||||
FR PROVENCE-ALPES-AGGLOMERATION GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/bfc5c9da-7c0d-4f75-801d-be7e15d13e24,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-83792/latest.zip
|
||||
FR PROVENCE-ALPES-AGGLOMERATION GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/6752b334-92a4-4f6f-ac40-dc78458ad514,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-83898/latest.zip
|
||||
FR Proxim iTi GTFS,gtfs,https://www.data.gouv.fr/fr/datasets/r/32634e69-d0e6-4afc-b8b1-39214d0cd837,FR,see https://transport.data.gouv.fr/datasets/offre-de-transport-du-reseau-proxim-iti-gtfs,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://files.mobilitydatabase.org/mdb-1880/latest.zip
|
||||
FR Quadri orari traghetti GTFS,gtfs,https://openmobilitydata-data.s3.us-west-1.amazonaws.com/public/feeds/quadri-orari-traghetti/1169/20240104/gtfs.zip,FR,,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://files.mobilitydatabase.org/mdb-1268/latest.zip
|
||||
FR R'bus GTFS,gtfs,https://www.data.gouv.fr/fr/datasets/r/65dbad9f-4e6c-467c-8320-e159cd7c2cca,FR,see https://transport.data.gouv.fr/datasets/offre-de-transport-rbus-de-la-c-a-de-rochefort-ocean,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://files.mobilitydatabase.org/mdb-1879/latest.zip
|
||||
FR RespiRé GTFS,gtfs,https://zenbus.net/gtfs/static/download.zip?dataset=iledere75923021,FR,see https://transport.data.gouv.fr/datasets/horaires-theoriques-et-temps-reel-du-reseau-respire-ile-de-re-gtfs-gtfs-rt,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://files.mobilitydatabase.org/mdb-1894/latest.zip
|
||||
FR REZO GTFS,gtfs,https://zenbus.net/gtfs/static/download.zip?dataset=verdun-rezo,FR,see https://transport.data.gouv.fr/datasets/horaires-theoriques-et-temps-reel-du-reseau-grand-verdun-agglomeration-rezo-gtfs-gtfs-rt,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://files.mobilitydatabase.org/mdb-1897/latest.zip
|
||||
FR RLV Mobilités GTFS,gtfs,https://static.data.gouv.fr/resources/donnees-gtfs-1/20241119-134820/gtfs-24-10-14-au-25-08-31.zip,FR,see https://transport.data.gouv.fr/datasets/horaires-rlv-mobilites,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://files.mobilitydatabase.org/mdb-2404/latest.zip
|
||||
FR Régie Autonome des Transports Parisiens GTFS,gtfs,https://openmobilitydata-data.s3.us-west-1.amazonaws.com/public/feeds/regie-autonome-des-transports-parisiens/413/20210325/gtfs.zip,FR,,bus,Mobility Database feed catalog; MobilityData validator acceptance-test feed list,P3,Useful smoke-test feed list; prefer Mobility Database feeds_v2 metadata for production source review.; Mobility Database mirror: https://files.mobilitydatabase.org/mdb-1291/latest.zip
|
||||
FR Régie des Transports Métropolitains (RTM) GTFS,gtfs,http://tsvc2.pilote3.cityway.fr/api/Export/v1/GetExportedDataFile?ExportFormat=Gtfs&OperatorCode=RTM,FR,,bus,Mobility Database feed catalog; MobilityData validator acceptance-test feed list,P3,Useful smoke-test feed list; prefer Mobility Database feeds_v2 metadata for production source review.; Mobility Database mirror: https://files.mobilitydatabase.org/mdb-841/latest.zip
|
||||
FR Région Auvergne-Rhône-Alpes GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/932fb155-53d5-4717-87e0-b42fe3aa538b,FR,see https://opendatacommons.org/licenses/odbl/1.0/,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-82729/latest.zip
|
||||
FR Région Hauts-de-France GTFS,gtfs,https://opendata.hautsdefrance.fr/sites/default/files/data/transport/RHDF_GTFS_SCO_80.zip,FR,see https://transport.data.gouv.fr/datasets/arrets-horaires-et-parcours-theoriques-gtfs-du-reseau-routier-regional-de-transport-8/,bus,Mobility Database feed catalog; MobilityData validator acceptance-test feed list,P3,Useful smoke-test feed list; prefer Mobility Database feeds_v2 metadata for production source review.; Mobility Database mirror: https://files.mobilitydatabase.org/mdb-789/latest.zip
|
||||
FR Région Provence-Alpes-Côte-d'Azur GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/07cedea1-d25e-45fc-882a-0299e211d549,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-83647/latest.zip
|
||||
FR Réseau de transport en commun Le Havre Seine Métropole LiA GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/1e666e24-58ee-46b9-8952-ea2755ba88f2,FR,see https://opendatacommons.org/licenses/odbl/1.0/,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-80254/latest.zip
|
||||
FR Réseau de transport Transpole de la Métropole Européenne de Lille GTFS,gtfs,https://opendata.lillemetropole.fr/api/datasets/1.0/transport_arret_transpole-point/alternative_exports/gtfszip,FR,see https://opendata.lillemetropole.fr/explore/dataset/transport_arret_transpole-point/?disjunctive.filename&disjunctive.commune,bus,Mobility Database feed catalog,P3,
|
||||
FR Réseau interurbain - Creuse GTFS,gtfs,https://www.pigma.org/public/opendata/nouvelle_aquitaine_mobilites/publication/creuse-aggregated-gtfs.zip,FR,see https://transport.data.gouv.fr/datasets/arrets-horaires-et-parcours-theoriques-des-reseaux-naq-cre-nva-m-1,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://files.mobilitydatabase.org/mdb-2686/latest.zip
|
||||
FR Réseau interurbain liO Occitanie GTFS,gtfs,https://app.mecatran.com/utw/ws/gtfsfeed/static/lio?apiKey=2b160d626f783808095373766f18714901325e45&type=gtfs_lio,FR,see https://transport.data.gouv.fr/datasets/reseau-lio-occitanie,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://files.mobilitydatabase.org/mdb-2604/latest.zip
|
||||
FR Réseau STAN GTFS,gtfs,https://openmobilitydata-data.s3.us-west-1.amazonaws.com/public/feeds/grand-nancy/1068/20190910/gtfs.zip,FR,,bus,Mobility Database feed catalog; MobilityData validator acceptance-test feed list,P3,Useful smoke-test feed list; prefer Mobility Database feeds_v2 metadata for production source review.; Mobility Database mirror: https://files.mobilitydatabase.org/mdb-1117/latest.zip
|
||||
FR Réseau urbain Cap Cotentin GTFS,gtfs,https://www.data.gouv.fr/fr/datasets/r/2e97c9b3-a59f-42dd-9b9e-a232fa771f21,FR,see https://www.etalab.gouv.fr/wp-content/uploads/2017/04/ETALAB-Licence-Ouverte-v2.0.pdf,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://files.mobilitydatabase.org/mdb-1840/latest.zip
|
||||
FR Réseau urbain Neobus GTFS,gtfs,https://www.data.gouv.fr/fr/datasets/r/e25586ac-9c14-4872-b00e-e66cd23d4413,FR,see https://www.etalab.gouv.fr/wp-content/uploads/2017/04/ETALAB-Licence-Ouverte-v2.0.pdf,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://files.mobilitydatabase.org/mdb-1839/latest.zip
|
||||
FR Réseau urbain Stan GTFS,gtfs,https://hstan.g-ny.eu/gtfs/gtfs_stan.zip,FR,see https://www.etalab.gouv.fr/licence-ouverte-open-licence/,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://files.mobilitydatabase.org/mdb-1256/latest.zip
|
||||
FR Sailcoop GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/c6be5123-466e-4989-8843-549bee825750,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-83311/latest.zip
|
||||
FR Saint-Etienne Métropole GTFS,gtfs,https://www.data.gouv.fr/fr/datasets/r/8b3e71e2-1155-4fb0-bf4a-cdddbb496e29,FR,see https://transport.data.gouv.fr/datasets/donnees-horaires-theoriques-gtfs-du-reseau-de-transport-de-la-metropole-de-saint-etienne-stas/,bus,Mobility Database feed catalog,P3,
|
||||
FR Saint-Nazaire agglo - La CARENE GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/e7f40c40-b39d-4583-bc03-05e5bce06949,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-82693/latest.zip
|
||||
FR Saint-Nazaire agglo - La CARENE GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/45ab4dba-29db-4773-99ef-d940ed4f4da2,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-83847/latest.zip
|
||||
FR Sillages Urbain GTFS,gtfs,https://static.data.gouv.fr/resources/lignes-regulieres-de-transports-en-pays-de-grasse/20210203-152533/20210101-gtfs-sillagesurbain.zip,FR,,bus,Mobility Database feed catalog,P3,
|
||||
FR SNCF GTFS,gtfs,https://eu.ftp.opendatasoft.com/sncf/gtfs/export-intercites-gtfs-last.zip,FR,see https://data.sncf.com/pages/cgu/A1#A1,bus,Mobility Database feed catalog; MobilityData validator acceptance-test feed list,P3,Useful smoke-test feed list; prefer Mobility Database feeds_v2 metadata for production source review.; Mobility Database mirror: https://files.mobilitydatabase.org/mdb-1783/latest.zip
|
||||
FR SNCF GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/f7261f25-f76c-4324-97bb-c46c78316d6f,FR,see https://opendatacommons.org/licenses/odbl/1.0/,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-83675/latest.zip
|
||||
FR SNCF GTFS,gtfs,https://eu.ftp.opendatasoft.com/sncf/gtfs/export-ter-gtfs-last.zip,FR,see https://data.sncf.com/pages/cgu/A1#A1,bus,Mobility Database feed catalog; MobilityData validator acceptance-test feed list,P3,Useful smoke-test feed list; prefer Mobility Database feeds_v2 metadata for production source review.; Mobility Database mirror: https://files.mobilitydatabase.org/mdb-1205/latest.zip
|
||||
FR SNCF GTFS,gtfs,https://eu.ftp.opendatasoft.com/sncf/gtfs/export_gtfs_voyages.zip,FR,see https://data.sncf.com/pages/cgu/A1#A1,bus,Mobility Database feed catalog; MobilityData validator acceptance-test feed list,P3,Useful smoke-test feed list; prefer Mobility Database feeds_v2 metadata for production source review.; Mobility Database mirror: https://files.mobilitydatabase.org/mdb-1782/latest.zip
|
||||
FR Société d'économie Mixte des Transports Montalbanais GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/f37827ec-b14c-40de-8a21-3f459b649c8a,FR,see https://opendatacommons.org/licenses/odbl/1.0/,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-82434/latest.zip
|
||||
FR SPL Estival/ Cirest GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/919b4ca6-11e3-4156-bf59-5c0e7f25d929,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-81253/latest.zip
|
||||
FR STAR GTFS,gtfs,https://eu.ftp.opendatasoft.com/star/gtfs/GTFS_2_20250620_20250629_20250603111256.zip,FR,see https://doc.transport.data.gouv.fr/presentation-et-mode-demploi-du-pan/conditions-dutilisation-des-donnees/licence-odbl,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://files.mobilitydatabase.org/mdb-1159/latest.zip
|
||||
FR STGA - réseau Möbius GTFS,gtfs,https://www.data.gouv.fr/fr/datasets/r/2e25c067-65a6-40db-8201-62aa1c258d36,FR,see https://transport.data.gouv.fr/datasets/horaires-theoriques-et-temps-reel-reseau-mobius,bus,Mobility Database feed catalog,P3,
|
||||
FR STGA - réseau Möbius GTFS,gtfs,https://transport-data-gouv-fr-resource-history-prod.cellar-c2.services.clever-cloud.com/79752/79752.20231005.100848.400086.zip,FR,see https://transport.data.gouv.fr/datasets/horaires-theoriques-et-temps-reel-reseau-mobius,bus,Mobility Database feed catalog; MobilityData validator acceptance-test feed list,P3,Useful smoke-test feed list; prefer Mobility Database feeds_v2 metadata for production source review.; Mobility Database mirror: https://files.mobilitydatabase.org/mdb-2002/latest.zip
|
||||
FR STIF GTFS,gtfs,https://openmobilitydata-data.s3.us-west-1.amazonaws.com/public/feeds/stif/822/20210702/gtfs.zip,FR,see http://stif.info/,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://files.mobilitydatabase.org/mdb-1283/latest.zip
|
||||
FR SURF GTFS,gtfs,https://www.data.gouv.fr/fr/datasets/r/665d6c43-598d-4d9d-aa98-206072f4dfa0,FR,see https://transport.data.gouv.fr/datasets/reseau-urbain-surf,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://files.mobilitydatabase.org/mdb-1877/latest.zip
|
||||
FR Syndicat Intercommunal des Transports urbains de l’Agglomération du Calaisis GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/fd7655bc-bfe7-4c13-ba57-2ce9f9282416,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-83308/latest.zip
|
||||
FR Syndicat mixte Atoumod GTFS,gtfs,https://transport.data.gouv.fr/resources/82316/download?token=xdgqKBTAzhw4DSPz6zeGc4c5eW0LhwztcGv4-vpzP4U,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-82316/latest.zip
|
||||
FR Syndicat mixte Atoumod GTFS,gtfs,https://transport.data.gouv.fr/resources/82314/download?token=xdgqKBTAzhw4DSPz6zeGc4c5eW0LhwztcGv4-vpzP4U,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-82314/latest.zip
|
||||
FR Syndicat mixte Atoumod GTFS,gtfs,https://transport.data.gouv.fr/resources/82313/download?token=xdgqKBTAzhw4DSPz6zeGc4c5eW0LhwztcGv4-vpzP4U,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-82313/latest.zip
|
||||
FR SYTRAL Mobilités GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/2f1e1bfc-d378-4e3d-a175-e26f14abc3e6,FR,see https://wiki.lafabriquedesmobilites.fr/wiki/Licence_Mobilit%C3%A9s,bus,Mobility Database feed catalog,P3,
|
||||
FR T'MM GTFS,gtfs,https://zenbus.net/gtfs/static/download.zip?dataset=moselle-et-madon,FR,see https://transport.data.gouv.fr/datasets/horaires-theoriques-et-temps-reel-du-reseau-tmm-moselle-et-madon-gtfs-gtfs-rt,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://files.mobilitydatabase.org/mdb-1891/latest.zip
|
||||
FR TAC GTFS,gtfs,https://ratpdev-mosaic-prod-bucket-raw.s3-eu-west-1.amazonaws.com/129/exports/1/gtfs.zip,FR,see https://transport.data.gouv.fr/datasets/offre-de-transports-reseau-tac-annemasse-agglo-decembre-2022,bus,Mobility Database feed catalog; MobilityData validator acceptance-test feed list,P3,Useful smoke-test feed list; prefer Mobility Database feeds_v2 metadata for production source review.; Mobility Database mirror: https://files.mobilitydatabase.org/mdb-1883/latest.zip
|
||||
FR TAG GTFS,gtfs,https://metromobilite.fr/data/Horaires/SEM-GTFS.zip,FR,,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://openmobilitydata-data.s3.us-west-1.amazonaws.com/public/feeds/tag/594/20221110/gtfs.zip
|
||||
FR TER Pays de la Loire GTFS,gtfs,https://eu.ftp.opendatasoft.com/sncf/plandata/export-ter-gtfs-last.zip,FR,see https://www.etalab.gouv.fr/licence-ouverte-open-licence/,bus,Mobility Database feed catalog; MobilityData validator acceptance-test feed list,P3,Useful smoke-test feed list; prefer Mobility Database feeds_v2 metadata for production source review.; Mobility Database mirror: https://files.mobilitydatabase.org/mdb-1259/latest.zip
|
||||
FR TER Pays de la Loire GTFS,gtfs,https://openmobilitydata-data.s3.us-west-1.amazonaws.com/public/feeds/region-des-pays-de-la-loire/1072/20190403/gtfs.zip,FR,see http://opendatacommons.org/licenses/odbl/,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://files.mobilitydatabase.org/mdb-1153/latest.zip
|
||||
FR Tisséo GTFS,gtfs,https://data.toulouse-metropole.fr/api/v2/catalog/datasets/tisseo-gtfs/files/fc1dda89077cf37e4f7521760e0ef4e9,FR,see https://opendatacommons.org/licenses/odbl/,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://files.mobilitydatabase.org/mdb-1024/latest.zip
|
||||
FR Tours Métropole Val de Loire GTFS,gtfs,https://data.tours-metropole.fr/api/v2/catalog/datasets/zip-horaires-theoriques-du-reseau-fil-bleu-tours-metropole-gtfs/files/aa7f5e79190470545a898703a9c140be,FR,see https://transport.data.gouv.fr/datasets/horaires-theoriques-du-reseau-fil-bleu-tours-metropole-gtfs,bus,Mobility Database feed catalog,P3,
|
||||
FR Tours Métropole Val de Loire GTFS,gtfs,https://data.tours-metropole.fr/api/datasets/1.0/horaires-temps-reel-gtfsrt-reseau-filbleu-tmvl/alternative_exports/filbleu_gtfszip/,FR,see https://transport.data.gouv.fr/datasets/fil-bleu-syndicat-des-mobilites-gtfs-gtfs-rt,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://files.mobilitydatabase.org/mdb-1987/latest.zip
|
||||
FR Tram Train GTFS,gtfs,https://openmobilitydata-data.s3.us-west-1.amazonaws.com/public/feeds/region-des-pays-de-la-loire/1074/20200911/gtfs.zip,FR,see https://opendatacommons.org/licenses/odbl/,bus,Mobility Database feed catalog; MobilityData validator acceptance-test feed list,P3,Useful smoke-test feed list; prefer Mobility Database feeds_v2 metadata for production source review.; Mobility Database mirror: https://files.mobilitydatabase.org/mdb-1260/latest.zip
|
||||
FR Trans-Landes GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/cf6776cd-2f5c-4190-8bec-3578d7f87372,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-83312/latest.zip
|
||||
FR Transavold GTFS,gtfs,https://zenbus.net/gtfs/static/download.zip?dataset=transavold-stavold,FR,see https://transport.data.gouv.fr/datasets/horaires-theoriques-et-temps-reel-du-reseau-transavold-saint-avold-gtfs-gtfs-rt,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://files.mobilitydatabase.org/mdb-1896/latest.zip
|
||||
FR Transdev Savoie GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/092c3d95-d415-46c6-a07f-92d6e45453b1,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-82682/latest.zip
|
||||
FR Transdev Savoie GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/9090506e-731b-4aec-b5ea-116ae6902ce0,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-82746/latest.zip
|
||||
FR Transdev Savoie GTFS,gtfs,https://transport.data.gouv.fr/resources/83787/download?token=xdgqKBTAzhw4DSPz6zeGc4c5eW0LhwztcGv4-vpzP4U,FR,see https://opendatacommons.org/licenses/odbl/1.0/,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-83787/latest.zip
|
||||
FR Transdev • Normandie GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/777bdff5-01ee-4f45-aaed-177cbca9a087,FR,see https://opendatacommons.org/licenses/odbl/1.0/,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-79667/latest.zip
|
||||
FR Transilien SNCF GTFS,gtfs,http://files.transilien.com/horaires/gtfs/export-TN-GTFS-LAST.zip,FR,,bus,Mobility Database feed catalog; MobilityData validator acceptance-test feed list,P3,Useful smoke-test feed list; prefer Mobility Database feeds_v2 metadata for production source review.; Mobility Database mirror: https://files.mobilitydatabase.org/mdb-1069/latest.zip
|
||||
FR Transport en Moselle et Madon (T'MM) GTFS,gtfs,https://www.datagrandest.fr/metadata/fluo-grand-est/FR-200052264-T0025-0000/fluo-grand-est-tmm-gtfs.zip,FR,see https://transport.data.gouv.fr/datasets/fr-200052264-t0025-0000-1,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://files.mobilitydatabase.org/mdb-2738/latest.zip
|
||||
FR TRANSPORT PHOENIX GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/7213015d-a5b6-429d-84e8-c8c01e4cb84e,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-83146/latest.zip
|
||||
FR Transports Bordeaux Métropole (TBM) GTFS,gtfs,https://bdx.mecatran.com/utw/ws/gtfsfeed/static/bordeaux?apiKey=opendata-bordeaux-metropole-flux-gtfs-rt,FR,see https://transport.data.gouv.fr/datasets/offres-de-services-bus-tram-et-scolaire-au-format-gtfs-gtfs-rt-siri-lite-1,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://files.mobilitydatabase.org/mdb-2622/latest.zip
|
||||
FR Transports de l'agglomération de Montpellier (TAM) GTFS,gtfs,https://data.montpellier3m.fr/sites/default/files/ressources/TAM_MMM_GTFS.zip,FR,,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://files.mobilitydatabase.org/mdb-844/latest.zip
|
||||
FR Transports en Commun de l’Agglomération Troyenne (TCAT) GTFS,gtfs,https://static.data.gouv.fr/resources/donnees-tcat-troyes-champagne-metropole-1/20250424-134618/gtfs.zip,FR,see https://transport.data.gouv.fr/datasets/donnees-tcat-troyes-champagne-metropole-1,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://files.mobilitydatabase.org/mdb-2628/latest.zip
|
||||
FR Transports en Commun de l’Agglomération Troyenne (TCAT) GTFS,gtfs,https://static.data.gouv.fr/resources/donnees-tcat-troyes-champagne-metropole-1/20250428-121833/gtfs-navineo.zip,FR,see https://transport.data.gouv.fr/datasets/donnees-tcat-troyes-champagne-metropole-1,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://files.mobilitydatabase.org/mdb-2627/latest.zip
|
||||
FR Transports en Commun Lyonnais (TCL) GTFS,gtfs,https://download.data.grandlyon.com/files/rdata/tcl_sytral.tcltheorique/GTFS_TCL.ZIP,FR,see http://opendatacommons.org/licenses/odbl/,bus,Mobility Database feed catalog,P3,
|
||||
FR Transports en Commun Lyonnais (TCL) GTFS,gtfs,https://transport-data-gouv-fr-resource-history-prod.cellar-c2.services.clever-cloud.com/c6a3d03f-f8e4-44f3-9e7d-fcaf1ab30b92/c6a3d03f-f8e4-44f3-9e7d-fcaf1ab30b92.20220415.060247.709832.zip,FR,see https://transport.data.gouv.fr/datasets/horaires-theoriques-du-reseau-transports-en-commun-lyonnais,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://files.mobilitydatabase.org/mdb-2006/latest.zip
|
||||
FR Transports Publics du Choletais GTFS,gtfs,https://www.data.gouv.fr/fr/datasets/r/71e8ee8e-0b55-40a1-a297-21ef89aba4dc,FR,see https://transport.data.gouv.fr/datasets/horaires-theoriques-reseau-choletbus,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://files.mobilitydatabase.org/mdb-1789/latest.zip
|
||||
"FR TUA TRANSDEV, Envibus, Keolis, STCAR GTFS",gtfs,https://nextcloud.agglo-casa.fr/nextcloud/index.php/s/F10PMtqPtFAoeqG/download,FR,,bus,Mobility Database feed catalog,P3,
|
||||
FR TUM GTFS,gtfs,https://zenbus.net/gtfs/static/download.zip?dataset=tum,FR,see https://transport.data.gouv.fr/datasets/horaires-theoriques-et-temps-reel-du-reseau-transports-urbains-mendois-mende-gtfs-gtfs-rt,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://files.mobilitydatabase.org/mdb-1893/latest.zip
|
||||
FR Ulysse GTFS,gtfs,https://www.data.gouv.fr/fr/datasets/r/e8a86701-6359-45de-bee5-95e648ec04e3,FR,see http://opendata.regionpaca.fr/fileadmin/user_upload/tx_ausyopendata/licences/Licence-Ouverte-Open-Licence-ETALAB.pdf,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://files.mobilitydatabase.org/mdb-988/latest.zip
|
||||
FR Ulysse GTFS,gtfs,http://tsvc2.pilote3.cityway.fr/api/Export/v1/GetExportedDataFile?ExportFormat=Gtfs&OperatorCode=MILSAB,FR,see http://opendata.regionpaca.fr/fileadmin/user_upload/tx_ausyopendata/licences/Licence-Ouverte-Open-Licence-ETALAB.pdf,bus,Mobility Database feed catalog; MobilityData validator acceptance-test feed list,P3,Useful smoke-test feed list; prefer Mobility Database feeds_v2 metadata for production source review.; Mobility Database mirror: https://files.mobilitydatabase.org/mdb-890/latest.zip
|
||||
FR UNION DES BATELIERS ARCACHONNAIS GTFS,gtfs,https://transport.data.gouv.fr/resources/83747/download?token=xdgqKBTAzhw4DSPz6zeGc4c5eW0LhwztcGv4-vpzP4U,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-83747/latest.zip
|
||||
FR Valence Romans Mobilités GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/46bf6b5c-68c1-4198-a982-caeee88540a3,FR,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-82148/latest.zip
|
||||
FR Zenbus GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/2a189c2d-7ccd-4b2e-bca0-40df0734fe0a,FR,see https://opendatacommons.org/licenses/odbl/1.0/,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-14652/latest.zip
|
||||
FR Zenbus GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/c3fa69f0-f8b1-4324-be67-be72baf80ceb,FR,see https://opendatacommons.org/licenses/odbl/1.0/,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-78387/latest.zip
|
||||
FR Zenbus GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/206530ec-6a48-44a4-8042-75d76be59636,FR,see https://opendatacommons.org/licenses/odbl/1.0/,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-14826/latest.zip
|
||||
FR Zenbus GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/300108a3-f8c1-4bc8-873e-f1243d49716b,FR,see https://opendatacommons.org/licenses/odbl/1.0/,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-12616/latest.zip
|
||||
FR Zenbus GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/bb187aa1-d027-4a9e-bfa8-67fec4be3c71,FR,see https://opendatacommons.org/licenses/odbl/1.0/,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-43186/latest.zip
|
||||
FR Île-de-France Mobilités (IDFM) GTFS,gtfs,https://data.iledefrance-mobilites.fr/explore/dataset/offre-horaires-tc-gtfs-idfm/files/a925e164271e4bca93433756d6a340d1/download/,FR,see http://vvlibri.org/fr/licence/odbl-10/legalcode/unofficial,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://files.mobilitydatabase.org/mdb-1026/latest.zip
|
||||
GB Citymapper GTFS,gtfs,https://openmobilitydata-data.s3.us-west-1.amazonaws.com/public/feeds/citymapper/894/20180104/gtfs.zip,GB,see https://opendatacommons.org/licenses/pddl/1.0/,bus,Mobility Database feed catalog,P3,
|
||||
GB French Brothers Ltd GTFS,gtfs,https://www.frenchbrothers.co.uk/link/transport/google_transit.zip,GB,,bus,Mobility Database feed catalog; MobilityData validator acceptance-test feed list,P3,Useful smoke-test feed list; prefer Mobility Database feeds_v2 metadata for production source review.; Mobility Database mirror: https://files.mobilitydatabase.org/mdb-986/latest.zip
|
||||
GB Go Devon Bus GTFS,gtfs,https://data.discoverpassenger.com/operator/godevonbus/dataset/current/download/gtfs,GB,see https://www.nationalarchives.gov.uk/doc/open-government-licence/version/3/,bus,Mobility Database feed catalog; MobilityData validator acceptance-test feed list,P3,Useful smoke-test feed list; prefer Mobility Database feeds_v2 metadata for production source review.; Mobility Database mirror: https://files.mobilitydatabase.org/mdb-2908/latest.zip
|
||||
GB Transdev Blazefield GTFS,gtfs,https://data.discoverpassenger.com/operator/transdevblazefield/dataset/current/download/gtfs,GB,see https://www.nationalarchives.gov.uk/doc/open-government-licence/version/3/,bus,Mobility Database feed catalog; MobilityData validator acceptance-test feed list,P3,Useful smoke-test feed list; prefer Mobility Database feeds_v2 metadata for production source review.; Mobility Database mirror: https://files.mobilitydatabase.org/mdb-1953/latest.zip
|
||||
GB Transport For London (TfL) GTFS,gtfs,https://storage.googleapis.com/teleport-gtfs/tflgtfs_nobus.zip,GB,see https://tfl.gov.uk/corporate/terms-and-conditions/transport-data-service,bus,Mobility Database feed catalog,P3,
|
||||
IE Aircoach GTFS,gtfs,https://www.transportforireland.ie/transitData/google_transit_aircoach.zip,IE,,bus,Mobility Database feed catalog,P3,
|
||||
IE Airport Hopper GTFS,gtfs,https://www.transportforireland.ie/transitData/google_transit_dualway.zip,IE,,bus,Mobility Database feed catalog,P3,
|
||||
IE Ashbourne Connect GTFS,gtfs,https://www.transportforireland.ie/transitData/Data/GTFS_Ashbourne_Connect.zip,IE,,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://files.mobilitydatabase.org/tld-4534/latest.zip
|
||||
IE Buggys Coaches GTFS,gtfs,https://www.transportforireland.ie/transitData/google_transit_buggy.zip,IE,,bus,Mobility Database feed catalog,P3,
|
||||
IE Burkesbus GTFS,gtfs,https://www.transportforireland.ie/transitData/google_transit_burkes.zip,IE,,bus,Mobility Database feed catalog,P3,
|
||||
IE Bus Feda Teoranta GTFS,gtfs,https://www.transportforireland.ie/transitData/google_transit_fedateoranta.zip,IE,,bus,Mobility Database feed catalog,P3,
|
||||
IE Bus Éireann GTFS,gtfs,https://www.transportforireland.ie/transitData/google_transit_buseireann.zip,IE,,bus,Mobility Database feed catalog,P3,
|
||||
IE Catherine Madigan GTFS,gtfs,https://www.transportforireland.ie/transitData/google_transit_cmadigan.zip,IE,see https://data.gov.ie/licence,bus,Mobility Database feed catalog,P3,
|
||||
IE City Direct GTFS,gtfs,https://www.transportforireland.ie/transitData/google_transit_citydirect.zip,IE,,bus,Mobility Database feed catalog,P3,
|
||||
IE Citylink GTFS,gtfs,https://www.transportforireland.ie/transitData/google_transit_citylink.zip,IE,see https://data.gov.ie/licence,bus,Mobility Database feed catalog,P3,
|
||||
IE Collins Coaches GTFS,gtfs,https://www.transportforireland.ie/transitData/google_transit_collins.zip,IE,,bus,Mobility Database feed catalog,P3,
|
||||
IE Corduff Travel GTFS,gtfs,https://www.transportforireland.ie/transitData/Data/GTFS_Corduff_Coaches.zip,IE,,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://files.mobilitydatabase.org/tld-4535/latest.zip
|
||||
IE Dohertyʼs Coach Travel GTFS,gtfs,https://www.transportforireland.ie/transitData/google_transit_sdoherty.zip,IE,,bus,Mobility Database feed catalog,P3,
|
||||
IE Doyle Shipping Group GTFS,gtfs,https://www.transportforireland.ie/transitData/google_transit_ferries.zip,IE,,bus,Mobility Database feed catalog,P3,
|
||||
IE Dublin Bus GTFS,gtfs,https://www.transportforireland.ie/transitData/google_transit_dublinbus.zip,IE,see https://data.gov.ie/licence,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://openmobilitydata-data.s3.us-west-1.amazonaws.com/public/feeds/transport-for-ireland/782/20230315/gtfs.zip
|
||||
IE Dublin Bus Nitelink GTFS,gtfs,https://www.transportforireland.ie/transitData/google_transit_nitelink.zip,IE,see https://data.gov.ie/licence,bus,Mobility Database feed catalog,P3,
|
||||
IE Express Bus GTFS,gtfs,https://www.transportforireland.ie/transitData/google_transit_expressbus.zip,IE,,bus,Mobility Database feed catalog,P3,
|
||||
IE Farragher International Travel Services GTFS,gtfs,https://www.transportforireland.ie/transitData/google_transit_farragher.zip,IE,,bus,Mobility Database feed catalog,P3,
|
||||
IE Finnegan-Bray Ltd GTFS,gtfs,https://www.transportforireland.ie/transitData/google_transit_finnegans.zip,IE,,bus,Mobility Database feed catalog,P3,
|
||||
IE Go Ahead Ireland GTFS,gtfs,https://www.transportforireland.ie/transitData/google_transit_goahead.zip,IE,,bus,Mobility Database feed catalog,P3,
|
||||
IE Halpenny Transport GTFS,gtfs,https://www.transportforireland.ie/transitData/google_transit_halpenny.zip,IE,,bus,Mobility Database feed catalog,P3,
|
||||
IE Irish Rail GTFS,gtfs,https://www.transportforireland.ie/transitData/google_transit_irishrail.zip,IE,,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://openmobilitydata-data.s3.us-west-1.amazonaws.com/public/feeds/irish-rail/1046/20230412/gtfs.zip
|
||||
IE J.J Kavanagh & Sons GTFS,gtfs,https://www.transportforireland.ie/transitData/google_transit_jjkavanagh.zip,IE,,bus,Mobility Database feed catalog,P3,
|
||||
IE JJ/Bernard Kavanagh GTFS,gtfs,https://www.transportforireland.ie/transitData/google_transit_bkavanagh.zip,IE,,bus,Mobility Database feed catalog,P3,
|
||||
"IE John Paul Healy, Kyanitedale Ltd, Philip Farrell, Cummer Coaches Ltd, EastPoint, Galway Bus Ltd, Martin Leydon Coaches, John O'Donoghue & Sons, Gorey Bus Links, McGonagle Bus and Coach Hire, GoBus, Avalen Limited, Treacy Coaches, Barrys ",gtfs,https://www.transportforireland.ie/transitData/google_transit_sro.zip,IE,,bus,Mobility Database feed catalog,P3,
|
||||
IE Joseph Foley GTFS,gtfs,https://www.transportforireland.ie/transitData/google_transit_josfoley.zip,IE,,bus,Mobility Database feed catalog,P3,
|
||||
IE Kearns Transport GTFS,gtfs,https://www.transportforireland.ie/transitData/google_transit_kearns.zip,IE,,bus,Mobility Database feed catalog,P3,
|
||||
IE Kenneallyʼs Bus Service GTFS,gtfs,https://www.transportforireland.ie/transitData/google_transit_kenneallys.zip,IE,,bus,Mobility Database feed catalog,P3,
|
||||
IE Local Link Mayo GTFS,gtfs,https://www.transportforireland.ie/transitData/google_transit_locallink.zip,IE,,bus,Mobility Database feed catalog,P3,
|
||||
IE Luas GTFS,gtfs,https://www.transportforireland.ie/transitData/google_transit_luas.zip,IE,,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://openmobilitydata-data.s3.us-west-1.amazonaws.com/public/feeds/transport-for-ireland/1045/20221213/gtfs.zip
|
||||
IE Mangan Tours GTFS,gtfs,https://www.transportforireland.ie/transitData/google_transit_mangan.zip,IE,,bus,Mobility Database feed catalog,P3,
|
||||
IE Matthews Coach Hire GTFS,gtfs,https://www.transportforireland.ie/transitData/google_transit_matthews.zip,IE,,bus,Mobility Database feed catalog,P3,
|
||||
IE Mc Ginley Coach Travel GTFS,gtfs,https://www.transportforireland.ie/transitData/google_transit_mcginley.zip,IE,,bus,Mobility Database feed catalog,P3,
|
||||
IE McGeehan Coaches GTFS,gtfs,https://www.transportforireland.ie/transitData/google_transit_mcgeehan.zip,IE,,bus,Mobility Database feed catalog,P3,
|
||||
IE McGrath Coaches GTFS,gtfs,https://www.transportforireland.ie/transitData/google_transit_mcgrath.zip,IE,see https://data.gov.ie/licence,bus,Mobility Database feed catalog,P3,
|
||||
IE Michael Gray Coach Hire GTFS,gtfs,https://www.transportforireland.ie/transitData/google_transit_mgray.zip,IE,,bus,Mobility Database feed catalog,P3,
|
||||
IE Michael Kilbride GTFS,gtfs,https://www.transportforireland.ie/transitData/google_transit_mkilbride.zip,IE,,bus,Mobility Database feed catalog,P3,
|
||||
IE Morton's Coaches GTFS,gtfs,https://www.transportforireland.ie/transitData/google_transit_mortons.zip,IE,,bus,Mobility Database feed catalog,P3,
|
||||
IE PJ Martley GTFS,gtfs,https://www.transportforireland.ie/transitData/google_transit_pjmartley.zip,IE,,bus,Mobility Database feed catalog,P3,
|
||||
IE Slieve Bloom Coach Tours GTFS,gtfs,https://www.transportforireland.ie/transitData/google_transit_sbloom.zip,IE,,bus,Mobility Database feed catalog,P3,
|
||||
IE Suirway GTFS,gtfs,https://www.transportforireland.ie/transitData/google_transit_suirway.zip,IE,,bus,Mobility Database feed catalog,P3,
|
||||
IE Swords Express GTFS,gtfs,https://www.transportforireland.ie/transitData/google_transit_swordsexpress.zip,IE,,bus,Mobility Database feed catalog,P3,
|
||||
IE Tralee Peopleʼs Bus Service GTFS,gtfs,https://www.transportforireland.ie/transitData/google_transit_tralee.zip,IE,,bus,Mobility Database feed catalog,P3,
|
||||
IE Westlink Coaches GTFS,gtfs,https://www.transportforireland.ie/transitData/Data/GTFS_Westlink_Coaches.zip,IE,,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://files.mobilitydatabase.org/tld-5577/latest.zip
|
||||
IE Wexford Bus GTFS,gtfs,https://www.transportforireland.ie/transitData/google_transit_wexfordbus.zip,IE,,bus,Mobility Database feed catalog,P3,
|
||||
"LU Régime Général des Transports Routiers, Ville de Luxembourg - Service Autobus, Chemins de Fer Luxembourgeois, Syndicat des Tramways Intercommunaux dans le Canton d'Esch, Luxtram, Société Nationale des Chemins de Fer Luxembourgeois GTFS",gtfs,https://data.public.lu/en/datasets/r/10d05f8a-40a9-4bf2-bd8d-d7f783145b36,LU,see https://data.public.lu/en/datasets/horaires-et-arrets-des-transport-publics-gtfs/,bus,Mobility Database feed catalog; MobilityData validator acceptance-test feed list,P3,Useful smoke-test feed list; prefer Mobility Database feeds_v2 metadata for production source review.; Mobility Database mirror: https://files.mobilitydatabase.org/mdb-1108/latest.zip
|
||||
NO Kolumbus GTFS,gtfs,https://rkt.no/gt/google_transit.zip,NO,,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://openmobilitydata-data.s3.us-west-1.amazonaws.com/public/feeds/kolumbus/54/20211118/gtfs.zip
|
||||
NO Ruter GTFS,gtfs,https://reis.ruter.no/gtfs/gtfs.zip,NO,,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://openmobilitydata-data.s3.us-west-1.amazonaws.com/public/feeds/ruter/240/20170617/gtfs.zip
|
||||
"SE Samtrafiken, SJ, NSB, NSB/SJ, Visingsöleden, UL, Sörmlandstrafiken, ÖstgötaTrafiken, JLT, Länstrafiken Kronoberg, KLT, Region Gotland, Blekingetrafiken, Gällivare Stadstrafik, Hallandstrafiken, Värmlandstrafik, VL, Dalatrafik, X-trafik, ",gtfs,https://openmobilitydata-data.s3.us-west-1.amazonaws.com/public/feeds/trafiklab/50/20231223/gtfs.zip,SE,see http://www.trafiklab.se/api,bus,Mobility Database feed catalog,P3,Mobility Database mirror: https://files.mobilitydatabase.org/mdb-1321/latest.zip
|
||||
SE Trafiklab GTFS,gtfs,https://openmobilitydata-data.s3.us-west-1.amazonaws.com/public/feeds/storstockholms-lokaltrafik/1086/20200224/gtfs.zip,SE,see https://www.trafiklab.se/node/17865/license,bus,Mobility Database feed catalog; MobilityData validator acceptance-test feed list,P3,Useful smoke-test feed list; prefer Mobility Database feeds_v2 metadata for production source review.; Mobility Database mirror: https://files.mobilitydatabase.org/mdb-1320/latest.zip
|
||||
DE VBB Berlin-Brandenburg GTFS,gtfs,https://www.vbb.de/vbbgtfs,DE,CC-BY attribution VBB Verkehrsverbund Berlin-Brandenburg GmbH,"rail,tram,metro,bus,ferry",VBB official Berlin Open Data GTFS,P5,Berlin/Brandenburg bootstrap feed for the regional model; updated twice weekly; pair with Geofabrik Berlin OSM PBF.
|
||||
|
25
docs/generated/gtfs_test_run_sources.csv
Normal file
25
docs/generated/gtfs_test_run_sources.csv
Normal file
@@ -0,0 +1,25 @@
|
||||
name,kind,url,country,license,mode_scope,source_basis,priority,notes
|
||||
CH Swiss national GTFS,gtfs,https://gtfs.geops.ch/dl/gtfs_complete.zip,CH,verify at opentransportdata.swiss,"rail,tram,metro,bus,ferry",European transport feeds / official Swiss OTD derivative,P0,geOps feed is derived from official Swiss Open Transport Data; verify production terms.
|
||||
NL OpenOV national GTFS,gtfs,http://gtfs.openov.nl/gtfs-rt/gtfs-openov-nl.zip,NL,verify OpenOV/NDOV terms,"rail,tram,metro,bus,ferry",European transport feeds / OpenOV,P0,Use NDOV/OVapi for production and realtime.
|
||||
NL OVapi GTFS,gtfs,http://gtfs.ovapi.nl/gtfs-nl.zip,NL,,bus,Mobility Database feed catalog; MobilityData validator acceptance-test feed list,P0,Useful smoke-test feed list; prefer Mobility Database feeds_v2 metadata for production source review.; Mobility Database mirror: https://files.mobilitydatabase.org/mdb-1077/latest.zip
|
||||
DK Rejseplanen GTFS,gtfs,https://www.rejseplanen.info/labs/GTFS.zip,DK,verify Rejseplanen Labs terms,"rail,bus",Rejseplanen Labs / European transport feeds; Mobility Database feed catalog; MobilityData validator acceptance-test feed list,P0,May require account/terms review for production.; Useful smoke-test feed list; prefer Mobility Database feeds_v2 metadata for production source review.; Mobility Database mirror: https://files.mobilitydatabase.org/mdb-1292/latest.zip
|
||||
FI Helsingin seudun liikenne (HSL) GTFS,gtfs,http://dev.hsl.fi/gtfs/hsl.zip,FI,see http://developer.reittiopas.fi/pages/en/home.php,bus,Mobility Database feed catalog; MobilityData validator acceptance-test feed list,P0,Useful smoke-test feed list; prefer Mobility Database feeds_v2 metadata for production source review.; Mobility Database mirror: https://files.mobilitydatabase.org/mdb-865/latest.zip
|
||||
NO Entur national aggregated GTFS,gtfs,https://storage.googleapis.com/marduk-production/outbound/gtfs/rb_norway-aggregated-gtfs.zip,NO,verify Entur terms/NLOD,"rail,tram,metro,bus,ferry",Entur; Mobility Database feed catalog,P0,GTFS is a subset; NeTEx is official/most complete.; Mobility Database mirror: https://files.mobilitydatabase.org/mdb-1078/latest.zip
|
||||
IE Aircoach GTFS,gtfs,https://www.transportforireland.ie/transitData/Data/GTFS_Aircoach.zip,IE,see https://www.transportforireland.ie/transitData/PT_Data.html,bus,Mobility Database feed catalog; MobilityData validator acceptance-test feed list,P0,Useful smoke-test feed list; prefer Mobility Database feeds_v2 metadata for production source review.; Mobility Database mirror: https://files.mobilitydatabase.org/mdb-2640/latest.zip
|
||||
"IE BK & Sons, JJ/Bernard Kavanagh GTFS",gtfs,https://www.transportforireland.ie/transitData/Data/GTFS_Bernard_Kavanagh.zip,IE,,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tld-592/latest.zip
|
||||
IE Bus Éireann GTFS,gtfs,https://www.transportforireland.ie/transitData/Data/GTFS_Bus_Eireann.zip,IE,see https://www.transportforireland.ie/transitData/PT_Data.html,bus,Mobility Database feed catalog; MobilityData validator acceptance-test feed list,P0,Useful smoke-test feed list; prefer Mobility Database feeds_v2 metadata for production source review.; Mobility Database mirror: https://files.mobilitydatabase.org/mdb-2636/latest.zip
|
||||
GB BODS national GTFS,gtfs,https://data.bus-data.dft.gov.uk/timetable/download/gtfs-file/all/,GB,OGL/verify BODS terms,"rail,bus",BODS / Mobility Database; Mobility Database feed catalog; MobilityData validator acceptance-test feed list,P0,England/GB bus focus; heavy rail separate.; Useful smoke-test feed list; prefer Mobility Database feeds_v2 metadata for production source review.; Mobility Database mirror: https://files.mobilitydatabase.org/mdb-2014/latest.zip
|
||||
DE DB Long-distance Rail GTFS.DE,gtfs,https://download.gtfs.de/germany/fv_free/latest.zip,DE,Creative Commons 4.0,rail,GTFS.DE / Deutsche Bahn long-distance rail; Mobility Database feed catalog,P1,Use as the first focused German rail feed for cross-source station deduplication with VBB and FlixTrain.; Mobility Database mirror: https://files.mobilitydatabase.org/mdb-768/latest.zip
|
||||
DE Public Transport Germany GTFS,gtfs,https://download.gtfs.de/germany/nv_free/latest.zip,DE,see https://www.nvbw.de/open-data/lizenz,bus,Mobility Database feed catalog; MobilityData validator acceptance-test feed list,P1,Useful smoke-test feed list; prefer Mobility Database feeds_v2 metadata for production source review.; Mobility Database mirror: https://files.mobilitydatabase.org/mdb-1090/latest.zip
|
||||
DE Regional Rail Transport Germany GTFS,gtfs,https://download.gtfs.de/germany/rv_free/latest.zip,DE,,bus,Mobility Database feed catalog; MobilityData validator acceptance-test feed list,P1,Useful smoke-test feed list; prefer Mobility Database feeds_v2 metadata for production source review.; Mobility Database mirror: https://files.mobilitydatabase.org/mdb-1089/latest.zip
|
||||
DE Verkehrsverbund Berlin-Brandenburg (VBB) GTFS,gtfs,http://vbb.de/vbbgtfs,DE,see http://vbb.de/vbbgtfs,bus,Mobility Database feed catalog,P1,Mobility Database mirror: https://files.mobilitydatabase.org/mdb-782/latest.zip
|
||||
DE Verkehrsverbund Rhein-Neckar GTFS,gtfs,https://geoportal.vrn.de/services/sharing/rest/content/items/4ec4b1d131eb46a6bb8e216ce9b90eff/data,DE,see https://www.vrn.de/opendata/datasets/soll-fahrplandaten-gtfs-aktuell,bus,Mobility Database feed catalog; MobilityData validator acceptance-test feed list,P1,Useful smoke-test feed list; prefer Mobility Database feeds_v2 metadata for production source review.; Mobility Database mirror: https://files.mobilitydatabase.org/mdb-1173/latest.zip
|
||||
"AT Wiener Lokalbahnen (WLB), Wiener Linien GTFS",gtfs,http://www.wienerlinien.at/ogd_realtime/doku/ogd/gtfs/gtfs.zip,AT,,bus,Mobility Database feed catalog; MobilityData validator acceptance-test feed list,P1,Useful smoke-test feed list; prefer Mobility Database feeds_v2 metadata for production source review.; Mobility Database mirror: https://files.mobilitydatabase.org/mdb-648/latest.zip
|
||||
NL OVapi GTFS,gtfs,https://gtfs.ovapi.nl/nl/gtfs-nl.zip,NL,,bus,PTNA GTFS analysis; details: https://ptna.openstreetmap.de/en/gtfs-details.php?feed=NL-OVApi,P2,PTNA candidate; use original publisher URL where available.
|
||||
FI Helsingin seudun liikenne GTFS,gtfs,https://infopalvelut.storage.hsldev.com/gtfs/hsl.zip,FI,,bus,PTNA GTFS analysis; details: https://ptna.openstreetmap.de/en/gtfs-details.php?feed=FI-18-HSL,P2,PTNA candidate; use original publisher URL where available.
|
||||
CH Communauté d'Agglomération Annemasse - les Voirons Agglomération GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/373e19e2-af0a-4939-9f33-3f1268d1e0bb,CH,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-76779/latest.zip
|
||||
CH Communauté de communes pays d'Evian - vallée d'Abondance GTFS,gtfs,https://www.data.gouv.fr/api/1/datasets/r/429c8587-676a-4ed3-8279-e67403bc36f4,CH,see https://www.data.gouv.fr/pages/legal/licences/etalab-2.0,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tdg-80973/latest.zip
|
||||
AT Bean Shuttle GTFS,gtfs,https://www.beanshuttle.com/gtfs.zip,AT,,bus,Mobility Database feed catalog; MobilityData validator acceptance-test feed list,P0,Useful smoke-test feed list; prefer Mobility Database feeds_v2 metadata for production source review.; Mobility Database mirror: https://files.mobilitydatabase.org/mdb-2036/latest.zip
|
||||
AT Optima Express GTFS,gtfs,https://github.com/jonaes/gtfs/raw/refs/heads/main/output/optima_gtfs.zip,AT,,bus,Mobility Database feed catalog,P1,Mobility Database mirror: https://files.mobilitydatabase.org/mdb-3123/latest.zip
|
||||
"FI Haarasilta Toivo Samuli, Järvisen Liikenne Oy, Koiviston Auto Oy, Lehtimäen Liikenne Oy, Bus Travel Oy Reissu Ruoti, Tilausliikenne Kuisma Ky GTFS",gtfs,https://tvv.fra1.digitaloceanspaces.com/223.zip,FI,,bus,Mobility Database feed catalog; MobilityData validator acceptance-test feed list,P0,Useful smoke-test feed list; prefer Mobility Database feeds_v2 metadata for production source review.; Mobility Database mirror: https://files.mobilitydatabase.org/mdb-1129/latest.zip
|
||||
"NO Vestfold Kollektivtrafikk, Bastø Fosen GTFS",gtfs,https://storage.googleapis.com/marduk-production/outbound/gtfs/rb_vkt-aggregated-gtfs.zip,NO,,bus,Mobility Database feed catalog,P0,Mobility Database mirror: https://files.mobilitydatabase.org/tld-1048/latest.zip
|
||||
|
165
docs/gtfs_harmonization.md
Normal file
165
docs/gtfs_harmonization.md
Normal file
@@ -0,0 +1,165 @@
|
||||
# GTFS Harmonization and QA Concept
|
||||
|
||||
Last updated: 2026-07-01
|
||||
|
||||
## Decision
|
||||
|
||||
Run harmonization inside the existing Mobility Workbench for now:
|
||||
|
||||
- Same FastAPI server.
|
||||
- Same operator/data-engineering UI.
|
||||
- Same PostgreSQL/PostGIS database.
|
||||
- Separate GTFS Harmonization and Mapping Data UI modules backed by the existing source/job tables.
|
||||
- Separate QA/harmonization API surface starting with `/api/qa/*`.
|
||||
- Separate canonical export concept, but no separate public API backend yet.
|
||||
|
||||
Split this into a separate service later when one of these becomes true:
|
||||
|
||||
- third-party API consumers need independent uptime, auth, quotas, billing, or SLA boundaries;
|
||||
- export jobs need independent workers, storage, or scaling;
|
||||
- canonical data publication needs immutable release management independent of the editing workbench;
|
||||
- commercial/public API concerns start slowing down internal QA and import workflows.
|
||||
|
||||
The public/API product should not expose raw workbench tables directly. It should consume versioned canonical snapshots.
|
||||
|
||||
The journey/routing interface should consume the active harmonized transit snapshot. It should not expose raw GTFS feed selection as a normal traveller-facing routing control. Feed-specific filters remain useful for QA, layer inspection, diagnostics, and source review.
|
||||
|
||||
## Target Pipeline
|
||||
|
||||
```text
|
||||
source catalog
|
||||
-> raw feed snapshots
|
||||
-> validation reports
|
||||
-> normalized staging tables
|
||||
-> canonical matching and deduplication
|
||||
-> conflict review and reusable rules
|
||||
-> versioned canonical snapshot
|
||||
-> GTFS/API/GeoParquet exports
|
||||
```
|
||||
|
||||
The pan-European output should be a canonical mobility dataset first, not one giant internal GTFS feed. GTFS should be one export format from that canonical snapshot.
|
||||
|
||||
## Core Concepts
|
||||
|
||||
### Source Registry
|
||||
|
||||
Track every identified source, including feeds not yet imported:
|
||||
|
||||
- source URL and publisher;
|
||||
- country/region/mode coverage;
|
||||
- source authority and priority;
|
||||
- update cadence and freshness;
|
||||
- importability;
|
||||
- license and redistribution status.
|
||||
|
||||
Mobility Database can be used as a broad discovery connector. Prefer the full `feeds_v2.csv` catalog/API over validator acceptance-test feed lists because it includes feed status, official/source flags, latest/direct URLs, license URLs, features, and bounding boxes. Treat it as candidate metadata: the catalog metadata is reusable, but each transit feed still needs its own provider licence review and authority ranking.
|
||||
|
||||
PTNA can be used as a GTFS/OSM QA and crosswalk connector. Its country pages expose feed IDs, provider names, release dates, validity windows, route-analysis links, detail pages, and original release-page links. Detail pages can add license text, OSM permission notes, `network:guid`, and route matching hints. PTNA should not become the canonical publisher for a feed; the harmonizer should follow the original provider URL where possible and keep PTNA as evidence.
|
||||
|
||||
The generated discovery files live under `docs/generated/`:
|
||||
|
||||
- `gtfs_feed_candidates.csv` keeps every discovered feed/evidence row.
|
||||
- `gtfs_ingestable_sources.csv` keeps rows that can be imported as GTFS sources after review.
|
||||
- `gtfs_test_run_sources.csv` keeps a smaller multi-source set for deduplication tests.
|
||||
|
||||
Required license flags before publication:
|
||||
|
||||
- `can_import`
|
||||
- `can_derive`
|
||||
- `can_redistribute`
|
||||
- `requires_attribution`
|
||||
- `commercial_restrictions`
|
||||
|
||||
### Raw Snapshots
|
||||
|
||||
Every update should preserve immutable raw input:
|
||||
|
||||
- source id;
|
||||
- fetch time;
|
||||
- source hash;
|
||||
- upstream metadata;
|
||||
- parser/import version;
|
||||
- validator report;
|
||||
- previous active snapshot.
|
||||
|
||||
This keeps deduplication and conflict decisions reproducible.
|
||||
|
||||
### Canonical Entities
|
||||
|
||||
Stable meubility IDs should be the internal truth. Source IDs remain aliases.
|
||||
|
||||
Initial canonical entity families:
|
||||
|
||||
- operators/agencies/authorities/networks;
|
||||
- stop places and station complexes;
|
||||
- platforms, tracks, bus bays, entrances;
|
||||
- routes/lines;
|
||||
- route patterns and trip patterns;
|
||||
- calendars/service validity;
|
||||
- shapes/geometries;
|
||||
- fares/ticketing references later.
|
||||
|
||||
### Authority Ranking
|
||||
|
||||
Conflict resolution needs explicit source authority:
|
||||
|
||||
- manual review decision;
|
||||
- national official feed or registry;
|
||||
- regional authority feed;
|
||||
- operator feed;
|
||||
- broad aggregator feed;
|
||||
- OSM as visual/gap evidence, not timetable authority.
|
||||
|
||||
Authority can differ by entity type. A source can be authoritative for timetable but weak for route geometry or operator identity.
|
||||
|
||||
### Conflict Review
|
||||
|
||||
The QA dashboard should expose review queues for:
|
||||
|
||||
- duplicate operators/agencies;
|
||||
- duplicate stop places/station complexes;
|
||||
- GTFS stops without canonical links;
|
||||
- OSM stops without GTFS/canonical links;
|
||||
- canonical stop groups with large spatial disagreement;
|
||||
- routes with missing, weak, or conflicting OSM links;
|
||||
- routes with missing shapes or route-pattern geometry;
|
||||
- stale calendars and short service horizons;
|
||||
- license/redistribution blockers.
|
||||
|
||||
Manual resolutions must become reusable rules so source updates do not reintroduce the same conflict.
|
||||
|
||||
## Export Strategy
|
||||
|
||||
Do not start with one giant Europe GTFS zip as the only product. Produce:
|
||||
|
||||
- versioned canonical snapshot tables;
|
||||
- country/region GTFS exports;
|
||||
- network/operator GTFS exports;
|
||||
- full-Europe analytical dumps such as GeoParquet;
|
||||
- API-ready entity endpoints later.
|
||||
|
||||
Each export needs:
|
||||
|
||||
- snapshot id;
|
||||
- source feed versions;
|
||||
- generation time;
|
||||
- validation summary;
|
||||
- license/attribution manifest;
|
||||
- conflict/review status.
|
||||
|
||||
## Current Implementation Step
|
||||
|
||||
The first implementation is a lightweight harmonization boundary:
|
||||
|
||||
- `/api/qa/summary`;
|
||||
- source discovery metrics;
|
||||
- import health metrics;
|
||||
- GTFS validation counters;
|
||||
- canonical stop/link coverage;
|
||||
- route matching and geometry counters;
|
||||
- publication-readiness warnings.
|
||||
- GTFS source add/import/review controls live in the `GTFS Harmonization` sidebar module.
|
||||
- OSM/route-layer source controls live in the `Mapping Data` sidebar module.
|
||||
- The journey panel displays the active harmonized transit snapshot instead of a GTFS source picker.
|
||||
|
||||
This is intentionally a skeleton. The next step is to turn non-zero warning/bad counters into review queues with drill-down lists and persistent resolution actions.
|
||||
15
docs/ingestable_sources_seed.csv
Normal file
15
docs/ingestable_sources_seed.csv
Normal file
@@ -0,0 +1,15 @@
|
||||
name,kind,url,country,license,mode_scope,source_basis,priority,notes
|
||||
CH Swiss national GTFS,gtfs,https://gtfs.geops.ch/dl/gtfs_complete.zip,CH,verify at opentransportdata.swiss,"rail,bus,tram,metro,ferry,cableway",European transport feeds / official Swiss OTD derivative,P0,geOps feed is derived from official Swiss Open Transport Data; verify production terms.
|
||||
DE generated national GTFS,gtfs,https://scraped.data.public-transport.earth/de/gtfs.zip,DE,verify upstream DELFI/Mobilithek/gtfs.de,"rail,bus,tram,metro",European transport feeds mirror,P0,Bootstrap only; prefer official DELFI/Mobilithek NeTEx for production.
|
||||
DB Long-distance Rail GTFS.DE,gtfs,https://download.gtfs.de/germany/fv_free/latest.zip,DE,Creative Commons 4.0,"rail,long-distance rail",GTFS.DE / Deutsche Bahn long-distance rail,P1,Use as the first focused German rail feed for cross-source station deduplication with VBB and FlixTrain.
|
||||
VBB Berlin-Brandenburg GTFS,gtfs,https://www.vbb.de/vbbgtfs,DE,CC-BY attribution VBB Verkehrsverbund Berlin-Brandenburg GmbH,"rail,bus,tram,metro,ferry",VBB official Berlin Open Data GTFS,P5,Berlin/Brandenburg bootstrap feed for the regional model; updated twice weekly; pair with Geofabrik Berlin OSM PBF.
|
||||
DK Rejseplanen GTFS,gtfs,https://www.rejseplanen.info/labs/GTFS.zip,DK,verify Rejseplanen Labs terms,"train,bus",Rejseplanen Labs / European transport feeds,P1,May require account/terms review for production.
|
||||
FI national GTFS,gtfs,https://traffic.navici.com/tiedostot/gtfs.zip,FI,verify Fintraffic/FINAP terms,"rail,bus,tram,metro,ferry",European transport feeds / Fintraffic,P1,Check current endpoint and whether HSL/Waltti should be ingested separately.
|
||||
IE Transport for Ireland combined GTFS,gtfs,https://www.transportforireland.ie/transitData/google_transit_combined.zip,IE,verify NTA terms/fair use,"bus,rail,tram",Transport for Ireland / European transport feeds,P1,Pair with NTA GTFS-Realtime later.
|
||||
LT national GTFS,gtfs,https://www.visimarsrutai.lt/gtfs/gtfs_all.zip,LT,verify visimarsrutai terms,"bus,trolleybus,rail",NAP list / European transport feeds,P2,Validate freshness and calendar horizon.
|
||||
LU GTFS mirror,gtfs,https://scraped.data.public-transport.earth/lu/gtfs.zip,LU,verify data.public.lu terms,"bus,rail,tram",European transport feeds mirror,P1,Prefer data.public.lu NeTEx for production.
|
||||
NL OpenOV national GTFS,gtfs,http://gtfs.openov.nl/gtfs-rt/gtfs-openov-nl.zip,NL,verify OpenOV/NDOV terms,"rail,bus,tram,metro,ferry",European transport feeds / OpenOV,P0,Use NDOV/OVapi for production and realtime.
|
||||
NO Entur national aggregated GTFS,gtfs,https://storage.googleapis.com/marduk-production/outbound/gtfs/rb_norway-aggregated-gtfs.zip,NO,verify Entur terms/NLOD,"rail,bus,tram,metro,ferry",Entur,P0,GTFS is a subset; NeTEx is official/most complete.
|
||||
SE GTFS Sverige mirror,gtfs,https://scraped.data.public-transport.earth/se/gtfs.zip,SE,verify Trafiklab/Samtrafiken terms,"rail,bus,tram,metro,ferry",European transport feeds mirror,P0,Production source should use Trafiklab API key and official endpoint.
|
||||
GB BODS national GTFS,gtfs,https://data.bus-data.dft.gov.uk/timetable/download/gtfs-file/all/,GB,OGL/verify BODS terms,"bus,local light rail where in BODS",BODS / Mobility Database,P0,England/GB bus focus; heavy rail separate.
|
||||
FlixBus/FlixTrain Europe GTFS,gtfs,http://gtfs.gis.flix.tech/gtfs_generic_eu.zip,EU,verify Flix terms,"coach,rail",Transitland feed,P1,Important commercial long-distance network; verify reuse before production.
|
||||
|
91
docs/source_acquisition.md
Normal file
91
docs/source_acquisition.md
Normal file
@@ -0,0 +1,91 @@
|
||||
# Source acquisition and operator inventory
|
||||
|
||||
This repository now contains two seed catalogues:
|
||||
|
||||
- `docs/source_catalog_seed.csv` — broad discovery catalogue for official NAPs, feed registries, route-geometry evidence, realtime/disruption sources, rail/air registries and country notes.
|
||||
- `docs/ingestable_sources_seed.csv` — direct static feeds that the current prototype can import immediately.
|
||||
- `docs/generated/gtfs_feed_candidates.csv` — generated GTFS discovery manifest from Mobility Database, PTNA, the validator acceptance list, and curated local seeds.
|
||||
- `docs/generated/gtfs_ingestable_sources.csv` — generated direct GTFS source rows suitable for source-registry import after license/source review.
|
||||
- `docs/generated/gtfs_test_run_sources.csv` — generated focused feed set for the first multi-source harmonization/deduplication run.
|
||||
|
||||
Regenerate the GTFS discovery manifests:
|
||||
|
||||
```bash
|
||||
python -m app.cli discover-gtfs-sources --max-ptna-details 0 --test-limit 24
|
||||
```
|
||||
|
||||
Use `--countries ALL` for the broad global Mobility Database/acceptance-list pass. Use a positive `--max-ptna-details` when you want PTNA license and OSM crosswalk fields; the country-table scrape is fast, while detail pages can be slow.
|
||||
|
||||
Import the direct feed seed list into the source registry:
|
||||
|
||||
```bash
|
||||
python -m app.cli import-source-catalog --csv docs/source_catalog_seed.csv
|
||||
python -m app.cli import-ingestable-sources --csv docs/ingestable_sources_seed.csv
|
||||
python -m app.cli import-ingestable-sources --csv docs/generated/gtfs_test_run_sources.csv
|
||||
python -m app.cli stats
|
||||
```
|
||||
|
||||
Queue the focused multi-source harmonization test run:
|
||||
|
||||
```bash
|
||||
python -m app.cli queue-source-imports-from-csv --csv docs/generated/gtfs_test_run_sources.csv
|
||||
```
|
||||
|
||||
That queues every listed source import with per-source matching disabled, then queues one route-matching job and one route-layer rebuild after the imports. This avoids rebuilding matches/layers after every individual feed.
|
||||
|
||||
Then run feeds one by one from the UI or CLI:
|
||||
|
||||
```bash
|
||||
python -m app.cli run-source 1
|
||||
python -m app.cli run-match
|
||||
```
|
||||
|
||||
## Operator list strategy
|
||||
|
||||
There is no single complete European transport-operator list. Generate the operator table by unioning and reconciling:
|
||||
|
||||
1. GTFS `agency.txt` records from every imported static feed.
|
||||
2. NeTEx `Operator`, `Authority`, `Network`, and `Line` records once NeTEx ingestion is added.
|
||||
3. National Access Point dataset publishers and data-provider metadata.
|
||||
4. National stop registries and access-node systems, such as NaPTAN, NSR, and Swiss SLOID/DiDok/service-point datasets.
|
||||
5. Rail undertaking registries such as ERADIS Single Safety Certificates.
|
||||
6. Aviation registries such as EASA AOC/TCO lists, plus airport registries such as OurAirports.
|
||||
7. OSM `operator` and `network` tags as a gap-finding and alias-discovery layer, not as authority.
|
||||
8. Manual commercial/onboarding records for booking/API coverage.
|
||||
|
||||
Persist every operator row with provenance: source table, source URL, first_seen, last_seen, confidence, and whether it is an authority, data publisher, brand, legal operator, infrastructure manager, or booking partner.
|
||||
|
||||
## Geometry policy
|
||||
|
||||
For this workbench, the extracted OSM route layer is the authoritative visual layer for routes, networks and stop display. GTFS, NeTEx, official GIS data, infrastructure registries and historical vehicle traces are matching and QA inputs. They can propose corrections, flag missing or stale OSM route-layer geometry, and explain timetable deviations, but they do not override the canonical visual route layer automatically.
|
||||
|
||||
Use non-OSM geometry sources as evidence in this order:
|
||||
|
||||
1. NeTEx journey-pattern/link-sequence geometry and GTFS `shapes.txt`.
|
||||
2. Official stop/station registries such as NaPTAN, NSR, and SLOID/DiDok.
|
||||
3. Official infrastructure registries such as ERA RINF and RailNetEurope DII.
|
||||
4. Official operator GIS route datasets where available.
|
||||
5. Historical realtime vehicle traces after QA.
|
||||
6. OSM route-layer gaps and conflicts for manual review.
|
||||
|
||||
## OSM PBF extraction
|
||||
|
||||
For large countries or Europe-wide experiments, pre-filter raw PBF files before importing them:
|
||||
|
||||
```bash
|
||||
scripts/osmium_transport_filter.sh europe-latest.osm.pbf europe-transport.osm.pbf
|
||||
```
|
||||
|
||||
This requires the external `osmium` CLI. The result is still an OSM transport extract and remains the input to the canonical visual route-layer extraction; it is not a separate geometry hierarchy.
|
||||
|
||||
## Temporary closures and disruption data
|
||||
|
||||
Structured temporary closure/disruption data usually comes from:
|
||||
|
||||
- GTFS-Realtime Service Alerts, TripUpdates and VehiclePositions.
|
||||
- SIRI-SX, SIRI-ET, SIRI-VM, and related national profiles.
|
||||
- DATEX II roadworks, closures, incidents, restrictions and weather for bus detours and access legs.
|
||||
- Rail-specific feeds such as National Rail Darwin or operator construction-work feeds.
|
||||
- Ferry and air operator/airport APIs where available, often commercial or auth-gated.
|
||||
|
||||
Model these as separate validity-windowed event tables rather than modifying the base static timetable.
|
||||
61
docs/source_catalog_seed.csv
Normal file
61
docs/source_catalog_seed.csv
Normal file
@@ -0,0 +1,61 @@
|
||||
Geography,Country code,Mode scope,Source name,Source category,Formats / APIs,Availability,Coverage notes,Supersedes OSM for,Disruptions / closures,Operator-list use,Access / licence notes,Priority,Source URL,Evidence URL,Next pipeline action
|
||||
Europe,EU/EEA/UK/CH,multimodal,NAPCORE National Access Points list,Official discovery / NAP index,catalog URLs; country NAPs; MMTIS/RTTI/SRTI/SSTP,public directory,Primary entry point for official national mobility data portals; not itself a feed.,No direct route geometry; points to authoritative national sources.,"Indirect: country NAPs may expose DATEX II, SIRI, GTFS-RT, road events.",Use NAP publishers/data providers as candidate authority/operator records.,Public; individual NAP terms vary.,P0,https://napcore.eu/description-naps/national-access-point/,https://napcore.eu/description-naps/national-access-point/,"Crawl country NAP metadata first; store source, formats, auth requirements, licence, last_seen."
|
||||
Europe,EU/EEA/UK/CH,multimodal,EU National Access Points PDF,Official NAP directory,PDF/list of NAP URLs by delegated act,public,"Official map of country NAPs for multimodal, real-time traffic, safety-related traffic and truck parking data.",No direct route geometry.,References traffic/disruption portals by country.,"Useful for country bootstrap, not operator enumeration.",Public.,P0,https://transport.ec.europa.eu/document/download/963c997d-efd9-40ae-a38b-5d4b935bdfcf_en?filename=its-national-access-points.pdf,https://transport.ec.europa.eu/document/download/963c997d-efd9-40ae-a38b-5d4b935bdfcf_en?filename=its-national-access-points.pdf,Seed country table; reconcile against NAPCORE monitoring tool.
|
||||
Europe,EU/EEA/UK/CH,land/public transport,European transport feeds,Community feed index,"GTFS, NeTEx",public stable redirect URLs; no SLA,Practical list of open European GTFS/NeTEx feeds; good bootstrap but not authoritative and not exhaustive.,GTFS shapes / NeTEx geometry may supersede OSM where present.,No live data focus.,Derive agencies/operators from imported feeds.,URLs may be mirrors/scraped; verify source licence.,P0,https://eu.data.public-transport.earth/,https://eu.data.public-transport.earth/,Import feed URLs into source registry with 'secondary discovery' flag; validate licences upstream.
|
||||
Europe,global,land/public transport + shared mobility,Mobility Database,Open catalog / feed registry,"GTFS, GTFS-RT, GBFS; validators/quality reports",public catalog; API/login for some functions,Large feed catalog with thousands of feeds across many countries; useful to find missing local/regional sources.,GTFS shapes can supersede OSM route geometry; feed metadata helps coverage.,"GTFS-RT feeds can include trip updates, vehicle positions, service alerts.",Feed agencies; not a complete legal operator registry.,Catalog is open; each feed has separate terms.,P0,https://mobilitydatabase.org/,https://mobilitydatabase.org/,Mirror metadata; schedule validator runs; link feed IDs to source registry.
|
||||
Europe,global,land/public transport,Transitland Atlas / Transitland,Feed and operator catalog / archives,"GTFS, GTFS-RT; operator records; historical snapshots; API",public/commercial tiers,"Useful crosswalk of source feeds, agencies, operators, routes and archived versions.",RouteStopPattern / GTFS shapes can improve route geometry and history.,GTFS-RT service alerts/trip updates where catalogued.,Transitland operator abstraction groups agencies across feeds; good candidate operator list seed.,Atlas is open; API/service terms vary.,P1,https://github.com/transitland/transitland-atlas,https://www.transit.land/documentation/concepts/agencies-and-operators/,Use as secondary crosswalk; do not let it override official NAP sources without evidence.
|
||||
Europe,global,land/public transport,Transitous sources,Open journey-planner source list / processed dataset,GTFS-derived datasets,public,Open-source routing project that compiles public feeds and can remove overlapping data when better sources exist.,"Processed shapes can aid QA, but use original feed as source of truth.",Not primary disruption source.,Operators via GTFS agencies.,Processed coverage may differ from original.,P2,https://github.com/transitous/transitous/blob/master/feeds/,https://transitous.org/,Use for gap discovery and regression comparison; ingest original sources where possible.
|
||||
Europe,global,all mapped modes,OpenStreetMap / Geofabrik extracts,Crowdsourced geometry baseline,OSM PBF; route relations; route_master; stops; terminals; infrastructure,open under ODbL,"Best general existence layer for mapped networks, stops, stations, ferry routes and infrastructure; not timetable truth.","Fallback only; superseded by official GTFS shapes, NeTEx geometry, national stop registries, rail infrastructure registries.",Mapped closures may be present but not reliable for temporary passenger disruption.,"operator/network tags are useful candidate operator names, but noisy.",ODbL share-alike requirements; use attribution.,P0 fallback,https://download.geofabrik.de/europe.html,https://wiki.openstreetmap.org/wiki/Public_transport,Load into PostGIS as 'known network'; match official feeds against it.
|
||||
Europe,EU,road/access legs/bus detours,DATEX II ecosystem / NAP road data,Road traffic/disruption standard,DATEX II,via national road NAPs; auth varies,"Reference European road traffic/travel information format; useful for bus detours, road closures, car/taxi/access legs.","Can supersede OSM for temporary road state, restrictions, closures.","Incidents, roadworks, closures, weather, restrictions, travel times.",Usually infrastructure/data-provider rather than transport-operator list.,Country-specific licences and access.,P1,https://datex2.eu/,https://napcore.eu/description-naps/national-access-point/,Model as road_event layer; spatially join with bus route shapes and access legs.
|
||||
Europe,EU,rail,ERA ERADIS Safety Certificates,Rail operator registry,database/web,public,European rail safety/interoperability database; Single Safety Certificates identify licensed railway undertakings.,No route geometry; operator existence/authority layer.,No passenger disruptions.,"Strong seed for rail operator registry, but not timetable coverage.",Public database; not timetable licence.,P1,https://eradis.era.europa.eu/,https://www.era.europa.eu/domains/applicants/applications-single-safety-certificates_en,Ingest as rail_operator_authority table; match names to GTFS/NeTEx agencies.
|
||||
Europe,EU,rail infrastructure,ERA RINF,Rail infrastructure registry,registry/web/API where available,public/official,Register of Infrastructure describes static characteristics of railway lines and operational points.,Can supersede OSM for official rail operational points/line sections and infrastructure characteristics.,No passenger disruption; static infrastructure only.,"Infrastructure managers and operational points, not passenger operators.",Access/API details vary.,P2,https://rinf.era.europa.eu/,https://www.era.europa.eu/domains/infrastructure/register-infrastructure-rinf_en,"Use later for rail topology QA, station/line code crosswalks, route compatibility."
|
||||
Europe,EU,rail infrastructure,RailNetEurope Digital Infrastructure Information,Rail infrastructure / network information,"RINF-related, Primary Location Codes, network overview",public/sector,Rail infrastructure metadata for international rail planning and location codes.,"Can supersede OSM for official rail locations/codes, not passenger timetable route paths.",No direct passenger disruption feed.,"Infrastructure managers, not passenger operators.",Terms vary.,P3,https://rne.eu/it/rne-applications/dii/,https://rne.eu/it/rne-applications/dii/,Use as enrichment after core passenger timetable coverage.
|
||||
Europe,global,air,EASA AOC / Air operators lists,Air operator registry,web/PDF lists,public,"Lists air operators holding EASA-issued AOCs and related authorisations; operator registry, not schedules.",No route geometry.,No passenger disruption.,Air operator existence only; combine with airports and flight schedule providers.,Public; not timetable data.,P3,https://www.easa.europa.eu/en/list-air-operators-holding-aoc-granted-easa,https://www.easa.europa.eu/en/list-air-operators-holding-aoc-granted-easa,Ingest as air_operator_registry; mark as non-schedule.
|
||||
Europe,global,air,EASA Third Country Operators list,Air operator registry,PDF/list,public,"Authorized non-EU air operators relevant to Europe; registry, not route schedules.",No route geometry.,No disruptions.,Air operator existence only.,Public; not timetable data.,P3,https://www.easa.europa.eu/en/domains/air-operations/third-country-operators-tco,https://www.easa.europa.eu/en/domains/air-operations/third-country-operators-tco,Ingest as TCO registry for air operator crosswalk.
|
||||
Europe,global,airports,OurAirports,Airport registry,CSV dumps,public,"Nightly-updated airport, region and country CSV files; useful airport/heliport existence layer.","Can supersede OSM for airport identifiers/metadata, not terminal geometry or schedules.",No disruptions.,"Airports, not airlines.",Public data; verify licence and attribution.,P2,https://ourairports.com/data/,https://ourairports.com/data/,Ingest airport points/codes; reconcile with OSM aeroway and IATA/ICAO.
|
||||
Europe,global,air,IATA SDEP / SSIM,Air schedule data exchange,SSIM/SSM/ASM/MCT,industry/commercial/participation,"Industry schedule ecosystem; better for comprehensive air schedules than open data, but not public/open.","Can supersede OSM entirely for flight path/schedule existence, but not freely reusable.",Schedule changes; not open disruption feed.,Airlines via participation.,Restricted/commercial; use only with agreements.,P4,https://www.iata.org/en/services/data/passenger-traffic/schedule-data-exchange-program/,https://www.iata.org/en/publications/manuals/standard-schedules-information/,Mark as commercial source candidate; do not include in open MVP.
|
||||
Europe,global,air,Cirium schedules,Commercial aviation schedules,"APIs, SSIM-style data",commercial,High-coverage commercial aviation schedules; useful if air timetable precision becomes required.,Supersedes OSM for air route/schedule graph.,Flight status may be separate commercial products.,Airline/operator data inside product.,Commercial licence.,P4,https://www.cirium.com/data/flight-schedules/schedules-and-connections-data/,https://www.cirium.com/data/flight-schedules/schedules-and-connections-data/,Commercial evaluation only; not open pipeline.
|
||||
Europe,EU,air operations,EUROCONTROL Network Manager B2B,Operational aviation data,B2B APIs,restricted to operational stakeholders,Operational network data; not a general open public timetable source.,Could supersede public air data for qualified stakeholders only.,"Operational updates, depending access.","Operational participants, not open operator list.",Eligibility/usage conditions.,P4,https://www.eurocontrol.int/service/network-manager-business-business-b2b-web-services,https://www.eurocontrol.int/service/network-manager-business-business-b2b-web-services,Keep out of MVP unless eligibility/contract exists.
|
||||
Austria,AT,"rail, bus, tram, metro",mobilitydata.gv.at / MVO GTFS timetable data,National feed / NAP,GTFS; NeTEx via Austrian mobility associations where available,public; some registration may apply,National public-transport planned timetable data collected by Verkehrsverbünde / MVO.,GTFS shapes/stops supersede OSM for planned service geometry where present.,Not primary; check Austrian NAP for SIRI/road events.,GTFS agency.txt and MVO data providers.,Licence/auth must be checked per dataset.,P1,https://www.mobilitydata.gv.at/,https://www.mobilitydata.gv.at/daten/soll-fahrplandaten-gtfs/,Add national static feed; validate shape and calendar coverage.
|
||||
Belgium,BE,"rail, bus, tram, metro",transportdata.be / Belgian PT datasets,NAP / official datasets,"GTFS, NeTEx, GTFS-RT, SIRI/API",public; API subscription key may be required,"Belgian portal exposes De Lijn, STIB-MIVB, LETEC, SNCB/NMBS and multimodal datasets.",GTFS/NeTEx route and stop geometry supersede OSM for official service layer.,"De Lijn GTFS-RT includes disruptions, delays, cancellations; SMOP real-time integrates major operators.",Operators from feed agencies and Belgian PTO portal.,API keys and terms vary.,P1,https://www.transportdata.be/en/,https://portal.api.mobility.belgium.be/,"Implement provider-specific auth; ingest static first, RT second."
|
||||
Bulgaria,BG,"rail, bus, airports",Bulgaria NAP / Ministry transport page,NAP / official discovery,"links to BDZ timetable, train location, bus timetables, airports",public discovery; machine-readability variable,NAP lists official sources for multimodal information but uniform GTFS/NeTEx coverage appears less mature.,"Official source pages may supersede OSM for timetable existence, not necessarily geometry.",Train location/road NAP sources; structured SIRI/GTFS-RT maturity unclear.,Operators from source portals and GTFS if present.,Likely fragmented; verify terms.,P3,https://www.mtc.government.bg/en/category/294/national-access-points-transport-related-data,https://www.mtc.government.bg/en/category/294/national-access-points-transport-related-data,Crawl NAP links; classify machine-readable vs web-only.
|
||||
Croatia,HR,"road, ferry, bus, rail",promet-info.hr / Croatian NAP,NAP / traffic data,NAP catalog; road telematics; MMTIS discovery,public; machine access varies,Croatian NAP exists; public transport timetable feeds require additional discovery.,Official route/timetable feeds if found supersede OSM; otherwise OSM remains existence layer.,Road traffic management and incidents via NAP; passenger PT disruption coverage unclear.,Operators via datasets/OSM/web crawling.,Check portal terms.,P3,https://www.promet-info.hr/,https://hrvatske-ceste.hr/en/pages/traffic-and-safety/documents/76-national-access-point,"Start with NAP metadata, ferry/public transport operator crawl."
|
||||
Cyprus,CY,"bus, coach, shuttle",traffic4cyprus GTFS-RT,NAP / realtime feed,GTFS-RT,public web service,Public Works Department dataset provides real-time estimated timetable information from public-transport telematics.,No static route geometry in this row; pair with static GTFS if available.,"GTFS-RT realtime estimates, up to 1 minute frequency.",Operators from static GTFS or agency metadata if available.,Portal terms apply.,P2,https://www.traffic4cyprus.org.cy/dataset/publictransportrealtime_gtfs_rt,https://www.traffic4cyprus.org.cy/dataset/publictransportrealtime_gtfs_rt,Find matching static GTFS; add RT ingestion for trip updates.
|
||||
Czech Republic,CZ,"metro, tram, bus, rail, ferries",PID Open Data,Regional official feed,"GTFS, vehicle positions/API; stops; timetables",public,"Prague integrated transport provides timetables, online vehicle positions, stops and related data; GTFS includes metro/tram/trolleybus/bus/funicular/ferries/trains.",GTFS shapes/stops supersede OSM for PID services.,Online vehicle locations/delays and diversions where published.,PID feed agencies/operators; national sources for broader CZ.,Public licence details per dataset.,P1,https://pid.cz/o-systemu/opendata/,https://data.gov.cz/datové-sady,Ingest PID; discover national CIS JŘ / regional datasets.
|
||||
Denmark,DK,"train, bus",Rejseplanen / Dataudveksleren,National feed / NAP,"NeTEx, GTFS",public; login may be required for some data,National Rejseplanen data includes public transport schedules for trains and buses and geographic information.,NeTEx/GTFS shapes/stops supersede OSM for planned service geometry.,Realtime availability requires separate APIs/feeds.,GTFS agencies/NeTEx operators.,Some feeds may require account.,P1,https://nap.vd.dk/,https://www.rejseplanen.info/labs/GTFS.zip,Implement account/manual token handling; ingest static national feeds.
|
||||
Estonia,EE,"bus, rail, ferry where in register",Estonian public transport register / peatus.ee,National feed / registry,GTFS; national register extracts,public,National public transport register and peatus.ee data provide timetable/register data; feed URLs have changed historically.,Official feed stops/routes supersede OSM for planned services.,Realtime/disruption availability needs follow-up.,Operators from register and agency records.,Verify current feed URLs and licence.,P2,https://peatus.ee/,https://www.transpordiamet.ee/,Add URL-health checks because feed structure/URLs may change.
|
||||
Finland,FI,"rail, bus, tram, metro, ferry, bike",Fintraffic FINAP / national GTFS dump,National feed / NAP,GTFS; APIs; Digitransit; GTFS-RT for HSL/Waltti,public,National travel data for transport services; Digitransit/HSL/Waltti provide strong regional timetable and realtime APIs.,GTFS route shapes/stops supersede OSM for covered services; official ferry/public waterborne data where present.,HSL and Waltti GTFS-RT; road/maritime data via Fintraffic/Digitraffic.,Agencies/operators from GTFS and FINAP metadata.,Terms per API.,P1,https://www.fintraffic.fi/en/digitalservices/Traffic-and-Public-Transport-Data-Services/Finap-service/public-transport,https://digitransit.fi/en/developers/apis/1-routing-api/,Ingest national GTFS and HSL/Waltti RT; identify ferry coverage.
|
||||
France,FR,"rail, bus, tram, metro, ferry, coach, air metadata",transport.data.gouv.fr,National NAP/catalog,"GTFS, NeTEx, GTFS-RT, SIRI; SSIM reference for air",public catalog; per-feed terms/auth,"Large official catalog for public transport, road, shared vehicles, carpooling, etc.; public transit datasets describe networks, stops, routes and times.",GTFS/NeTEx shapes/stops supersede OSM; consolidated stop datasets help stop registry.,GTFS-RT and SIRI feeds including SNCF service alerts/trip updates where available.,Operators/publishers from dataset metadata and agencies.,Licence per dataset.,P0,https://transport.data.gouv.fr/,https://transport.data.gouv.fr/datasets?type=public-transit&locale=en,Crawl catalog API/search; import SNCF and regional feeds; add RT after static.
|
||||
France,FR,national rail,SNCF Open Data,Operator feed,"GTFS, NeTEx, SIRI Lite, GTFS-RT TripUpdates/ServiceAlerts",public; terms per dataset,SNCF publishes static and realtime passenger information datasets.,SNCF GTFS/NeTEx route and stop data supersede OSM for timetable layer.,GTFS-RT TripUpdates and ServiceAlerts; SIRI ET/SX Lite.,SNCF agency/operator records.,Terms per data.gouv dataset.,P0,https://ressources.data.sncf.com/,https://transport.data.gouv.fr/datasets?organization=sncf&locale=en,Create SNCF connector; add stale-feed and preview-window checks.
|
||||
Germany,DE,"rail, bus, tram, metro, ferry if in ÖPNV",DELFI / Mobilithek national NeTEx + GTFS.de,National feed / derived GTFS,"NeTEx, GTFS; GTFS-RT aggregate",public; official static via NAP; GTFS derived,National static timetable data published via Mobilithek/DELFI; GTFS.de offers daily GTFS covering DB long-distance/regional and local/urban transit.,Official NeTEx / GTFS shapes supersede OSM for planned services.,GTFS.de RT stream aggregates realtime where open/licensed: TripUpdates/ServiceAlerts.,Agencies/operators from feed; NAP publishers.,Static open; RT may depend on open licences/special agreements.,P0,https://gtfs.de/en/,https://gtfs.de/en/,"Use GTFS.de for bootstrap, plan NeTEx ingestion for higher fidelity."
|
||||
Germany / Berlin-Brandenburg,DE,"rail, bus, tram, metro, ferry",VBB Berlin-Brandenburg GTFS,Regional authority feed / official GTFS,GTFS,public; updated twice weekly,Official VBB timetable feed for Berlin and Brandenburg bus and rail services; useful regional bootstrap/demo source before national-scale ingestion.,GTFS stops/shapes/timetables supersede OSM for planned service verification where present.,Static timetable only in this row; VBB GTFS-RT is a separate realtime source.,Agencies/operators from agency.txt and VBB feed metadata.,CC-BY; attribution required: VBB Verkehrsverbund Berlin-Brandenburg GmbH.,P5,https://www.vbb.de/vbbgtfs,https://daten.berlin.de/datensaetze/vbb-fahrplandaten-via-gtfs,Use for Berlin model bootstrap; pair with Geofabrik Berlin OSM PBF and validate route geometry coverage.
|
||||
Germany,DE,rail disruptions,DB Baustellen / construction works,Operator disruption/planned works,web/API where available,public web; structured access variable,Major long-distance construction works and timetable changes; useful for planned closure enrichment.,"Can supersede OSM for temporary rail service restrictions, not geometry.","Planned works, timetable changes.",DB as operator/infrastructure-related source.,Terms to verify.,P2,https://bauinfos.deutschebahn.com/,https://bauinfos.deutschebahn.com/,Research structured endpoints; otherwise link as non-ingested evidence.
|
||||
Greece,GR,"metro, tram, bus, coach, ferry",Greece NAP + OASA telematics,NAP / operator portals,web/API; GTFS unknown/fragmented,public web; machine access uncertain,"Official NAP exists; Athens OASA publishes line schedules/stops and telematics app information, but open machine-readable coverage needs verification.",Official feeds if found supersede OSM; otherwise OSM remains strong existence layer.,Telematics app includes real-time arrivals/locations; open reuse unclear.,Operators from NAP/operator pages and OSM.,Likely access/licence work required.,P3,https://www.nap.gov.gr/,https://telematics.oasa.gr/,Investigate NAP catalog API and OASA endpoints/licence.
|
||||
Hungary,HU,"metro, tram, bus, trolleybus, suburban rail",BKK FUTÁR / BKK Open Data,Operator/regional feed,"GTFS, GTFS-RT",public/API key may apply,Budapest transport open data contains planned and realtime network data.,GTFS shapes/stops supersede OSM for Budapest-covered services.,GTFS-RT service/vehicle updates.,BKK operator/agency records.,Terms/API key to verify.,P2,https://bkk.hu/apps/docs/bkkfutar-static-gtfs/,https://opendata.bkk.hu/,Ingest Budapest as high-quality regional feed; search national/regional feeds.
|
||||
Ireland,IE,"bus, rail, tram",NTA / Transport for Ireland GTFS and GTFS-R,National feed,"GTFS, GTFS-Realtime; NaPTAN-like stop data",public; fair-use/API limits,"National static transit data and GTFS-Realtime covering disruptions, vehicle locations and arrivals; current RT covers major operators and v2 extends modes.",GTFS shapes/stops and official stop datasets supersede OSM for timetable layer.,"GTFS-R disruptions, vehicle locations, arrivals.",Operators from agency.txt and NTA metadata.,Fair use and terms apply.,P1,https://developer.nationaltransport.ie/,https://developer.nationaltransport.ie/,Ingest static national GTFS; implement GTFS-RT endpoints with rate limits.
|
||||
Italy,IT,"rail, bus, metro, tram, ferry",CCISS Italian NAP MMTIS,National NAP / NeTEx/SIRI aggregator,"NeTEx, SIRI",public catalog; data via regional access points/operators,Italian NAP receives NeTEx and SIRI from regional access points and operators including Trenitalia and several regions.,NeTEx route/stop/service data supersede OSM where present.,SIRI profiles for realtime/deviation where implemented.,Operators from NeTEx datasets and regional RAP metadata.,Access and profile conformity vary.,P1,https://www.cciss.it/nap/mmtis/public/en/static/multimodal,https://www.cciss.it/nap/mmtis/public/en/static/multimodal,Implement Italian NeTEx profile import; start with Trenitalia and regions.
|
||||
Italy,IT,rail,Trenitalia NeTEx,Operator feed,NeTEx L1 scheduling services,public via NAP/dataset,National railway operator scheduled services in NeTEx.,Trenitalia NeTEx supersedes OSM for scheduled train route/timetable layer.,Realtime via SIRI/other channels to investigate.,Trenitalia as operator; train brands/services in data.,Terms per NAP.,P1,https://www.cciss.it/nap/mmtis/public/en/static/multimodal,https://www.trenitalia.com/,Connector for Trenitalia static NeTEx; map to station codes.
|
||||
Latvia,LV,"bus, tram, trolleybus, rail",Rīgas satiksme GTFS + Latvian NAP,Operator feed / NAP,GTFS,public,Rīga publishes route/timetable schedules in GTFS; national NAP lists transport data sources.,GTFS shapes/stops supersede OSM for Rīga services.,Realtime/disruptions to investigate.,Rīgas satiksme agency; national providers from NAP.,Licence to verify.,P2,https://www.rigassatiksme.lv/en/for-media/open-data/,https://www.transportdata.gov.lv/,Ingest Rīga GTFS; discover national/regional/rail sources.
|
||||
Lithuania,LT,"bus, trolleybus, rail",visimarsrutai.lt GTFS,National feed / NAP reference,GTFS,public URL referenced by official NAP list/community,Countrywide GTFS at visimarsrutai; official NAP list references GTFS and road/event portals.,GTFS shapes/stops supersede OSM for covered services.,Road restrictions/weather via eismoinfo; PT realtime to verify.,Agencies/operators from feed.,Licence to verify.,P2,https://www.visimarsrutai.lt/gtfs/,https://maps.eismoinfo.lt/,Ingest GTFS; use eismoinfo as road_event source for bus/access disruptions.
|
||||
Luxembourg,LU,"bus, rail, tram",mobiliteit.lu / data.public.lu NeTEx and GTFS-RT,National feed,"NeTEx, GTFS, GTFS-RT",public,"Luxembourg publishes public-transport stops/timetables covering AVL, CFL, Luxtram, RGTR, TICE; realtime GTFS-RT integrates mobiliteit.lu and CFL data.",NeTEx/GTFS route/stops supersede OSM for national PT.,GTFS-RT realtime data.,Operators from NeTEx/agency records.,Open data terms per data.public.lu.,P1,https://data.public.lu/en/datasets/horaires-et-arrets-des-transport-publics-netex/,https://data.public.lu/en/datasets/gtfs-realtime/,Ingest NeTEx static; add GTFS-RT connector.
|
||||
Malta,MT,"bus, ferry, road",Transport Malta NAP / geoservices,NAP / geospatial portal,geoservices; dataset catalog,public discovery; GTFS not confirmed,Malta NAP exists via Transport Malta geoservices; clear public machine-readable PT timetable feeds need follow-up.,"Official geoservices may supersede OSM for infrastructure, not necessarily timetable routes.",Road/traffic datasets may exist; PT disruptions unclear.,Operators from NAP and Malta Public Transport datasets/web.,Terms to verify.,P3,https://geoservices.transport.gov.mt/egis,https://www.transport.gov.mt/sustainable-mobility/transport-governance-and-policy-5761,Catalog geoservices layers; search for static bus/ferry feeds.
|
||||
Netherlands,NL,"rail, bus, tram, metro, ferry",NDOV Loket / OVapi / gtfs.ovapi.nl,National feed hub,"GTFS, GTFS-RT, NeTEx, KV1, realtime",public; terms vary,"National PT data hub provides GTFS/GTFS-RT/NeTEx/trains/stops data; OVapi aggregate covers bus, rail, tram, subway and ferry.",Official GTFS/NeTEx/KV1 data supersedes OSM for planned service geometry/stops.,GTFS-RT and other realtime feeds.,Operators from feeds and NDOV metadata.,Terms per dataset; some legacy formats.,P0,https://gtfs.ovapi.nl/,https://ndovloket.nl/,Ingest national GTFS; add realtime and NeTEx/KV1 crosswalk.
|
||||
Norway,NO,"rail, bus, tram, metro, ferry, air access",Entur national NeTEx/GTFS and NSR,National feed + stop registry,"NeTEx, GTFS, GTFS-RT, SIRI; NSR stop registry",public/free,National timetable and stop data dumps updated daily; NeTEx is official and most complete; NSR is master stop registry.,NeTEx and NSR supersede OSM for official stops/timetable/service geometry; OSM can be QA layer.,SIRI/GTFS-RT and SIRI-SX situations where available.,Operators from NeTEx and Entur source metadata.,Norwegian open data licence for NSR; check individual feed terms.,P0,https://developer.entur.org/stops-and-timetable-data/,https://developer.entur.org/pages-nsr-nsr/,Use as reference implementation for NeTEx-first ingestion.
|
||||
Poland,PL,"bus, tram, rail, metro",dane.gov.pl NAP + local GTFS/GTFS-RT,NAP / fragmented local feeds,"GTFS, GTFS-RT for some cities/operators",public; fragmented,National NAP exists; dynamic data declarations are local/operator specific; major cities often publish their own feeds.,Local GTFS shapes supersede OSM per city; national coverage uneven.,GTFS-RT in selected locations; road disruptions via GDDKiA/NAP.,Operators from local feeds and NAP datasets.,Licence varies by municipality/operator.,P3,"https://dane.gov.pl/en/dataset/1739,NAP",https://kpd.gddkia.gov.pl/index.php/en/homepage/,Discover city-by-city; start with Warsaw/Kraków/Poznań/Gdańsk and rail.
|
||||
Portugal,PT,"bus, tram, rail, ferry",NAP Portugal / IMT,NAP / national discovery,catalog; GTFS where available,public; feed maturity variable,"Portugal NAP is official discovery source; earlier pilots converted bus/tram/train/ferry routes, shapes, stops and calendars to GTFS.",Official GTFS/route/shapes where available supersede OSM.,Road/traffic info via NAP; PT realtime variable.,Operators from NAP datasets and feeds.,Terms vary.,P3,https://nap-portugal.imt-ip.pt/nap/home,https://nap-portugal.imt-ip.pt/nap/home,Crawl catalog; discover Lisbon/Porto/regional/ferry feeds.
|
||||
Romania,RO,"bus, tram, trolleybus, metro, rail",Romanian Open Transit Initiative + city feeds,Community registry / local official feeds,"GTFS, GTFS-RT, APIs",mixed; official/reverse-engineered/converted,ROTI collects static/realtime transit resources; Cluj and Bucharest have city-level transit platforms/open data.,Official GTFS/API shapes supersede OSM; reverse-engineered data is lower trust.,City realtime data in selected systems.,Operators from local feeds/city agencies.,Check licence; avoid unlicensed reverse-engineered sources for production.,P3,https://github.com/roti-opendata,https://tpbi.ro/,Use only licence-clear official sources; mark community sources as discovery.
|
||||
Slovakia,SK,"rail, bus, tram, trolleybus",ZSR rail GTFS + IDS BK,National rail / regional feed,GTFS; JDF; maps of closures,public,Slovak rail GTFS is available; Bratislava region publishes GTFS/JDF open data.,Official GTFS supersedes OSM for covered rail/regional services.,ZSR closures map; road traffic portals via NAP.,Operators from feeds; rail undertaking registry via ERADIS.,Terms to verify.,P2,https://www.zsr.sk/files/pre-cestujucich/cestovny-poriadok/gtfs/gtfs.zip,https://aplikacie.zsr.sk/MapaVylukZsr/index.aspx,Add rail static; investigate structured closure endpoints.
|
||||
Slovenia,SI,"bus, rail, urban bus",NAP Slovenia / IJPP + LPP,NAP / national multimodal system,"GTFS, GTFS-RT planned; APIs",public; maturity evolving,NAP/IJPP moving toward realtime GTFS-RT for planned PT routes; Ljubljana LPP GTFS exists.,Official GTFS/IJPP data supersedes OSM for covered services.,GTFS-RT realization/realtime planned/published by NAP where available.,Operators from IJPP and feeds.,Terms to verify.,P2,https://www.nap.si/,https://data.lpp.si/api/gtfs/feed.zip,Start with LPP/IJPP static; add NAP dynamic when stable.
|
||||
Spain,ES,"rail, bus, metro, tram, ferry",Spanish NAP / Ministerio de Transportes,National NAP/catalog,"GTFS-ZIP, GTFS-RT, SIRI, NeTEx",public catalog; per-feed terms,"Spanish NAP lists many datasets; search result showed 161 GTFS-ZIP, 35 GTFS-RT, 27 SIRI and 1 NeTEx, including RENFE, regional rail, buses and Fred.Olsen ferry.",Official GTFS/NeTEx shapes/stops supersede OSM for covered operators.,GTFS-RT/SIRI for selected operators; road data via DGT.,Operators/providers from dataset metadata and agency records.,Licence per dataset.,P1,https://nap.transportes.gob.es/Files/List,https://nap.transportes.gob.es/Files/List,"Crawl catalog pages/API; import RENFE, CRTM, FGC, Metro Bilbao, ferry datasets."
|
||||
Spain,ES,Barcelona metro/bus/tram/regional,TMB / ATM / AMB Open Data,Regional/operator feeds,"GTFS, GTFS-RT/API",public; API registration for live,TMB provides Barcelona bus and metro information in GTFS and realtime API; ATM planned offer dataset removes duplicate lines and keeps versions with shapes.,TMB/ATM GTFS shapes supersede OSM for Barcelona services.,TMB realtime services; AMB GTFS-RT every 30 seconds for buses outside TMB.,Operators from agencies and regional authority metadata.,Registration/terms for realtime APIs.,P1,https://www.tmb.cat/en/tmb-app-and-other-apps/tools-for-developers,https://t-mobilitat.atm.cat/en/web/t-mobilitat/open-data/sheduled-information,Implement Barcelona bundle; use ATM simplified feed for cleaner display layer.
|
||||
Sweden,SE,"rail, bus, tram, metro, ferry",Trafiklab / Samtrafiken GTFS Sverige,National feed / API hub,"GTFS Sverige 2/3, NeTEx, SIRI, GTFS-RT regional",public but API key required,Trafiklab provides official open data for all Swedish public transport; GTFS Sverige contains planned national PT data; realtime split by region/operator.,GTFS/NeTEx shapes/stops supersede OSM for planned services.,GTFS-RT/SIRI realtime and deviations for operators/regions.,Operators from Samtrafiken/Trafiklab metadata and agencies.,API key and terms apply.,P0,https://www.trafiklab.se/api/trafiklab-apis/gtfs-sverige-2/,https://www.trafiklab.se/api/,Implement API-key secrets handling; import static national feed first.
|
||||
Switzerland,CH,"rail, bus, tram, metro, ferry, cableways",opentransportdata.swiss GTFS/NeTEx/RT + SLOID/DiDok,National feed + stop/operator registry,"GTFS, GTFS-RT, SIRI-SX/VDV736, NeTEx-like stop/service datasets",public,National timetable GTFS covers Swiss public transport; agency.txt lists transport companies; stable SLOID service point datasets exist.,Official GTFS/service point data supersedes OSM for timetable/stop layer; OSM as QA/fallback.,GTFS-RT TripUpdates/ServiceAlerts; SIRI-SX/VDV736 incident information; actual journey data.,agency.txt transport companies and official service point/company datasets.,Terms per OTD dataset.,P0,https://opentransportdata.swiss/en/cookbook/timetable-cookbook/gtfs/,https://opentransportdata.swiss/en/dataset/googletansit,Use as high-quality reference country; implement SLOID-based stop matching.
|
||||
United Kingdom / Great Britain,GB/UK,"bus, coach, tram, metro, ferry, rail stops",BODS + NaPTAN,National bus feeds + stop registry,"TransXChange, GTFS, SIRI-VM, GTFS-RT, NeTEx fares; NaPTAN CSV/XML/API",public; API keys for some services,"England's BODS provides local bus timetable, vehicle-location and fares data; NaPTAN uniquely identifies GB public transport access points including bus stops, ferry terminals, airports, rail stations, coaches, trams, underground/metro.",TransXChange/GTFS route/timetable data and NaPTAN stops supersede OSM for covered bus/stops.,SIRI-VM/GTFS-RT live locations; disruptions through operator/TfL/National Rail sources.,BODS operators/NaPTAN admin areas; agency records.,Open data terms and API keys; GB not Northern Ireland for NaPTAN.,P0,https://www.bus-data.dft.gov.uk/,https://beta-naptan.dft.gov.uk/download,Ingest BODS national GTFS/TXC and NaPTAN; normalize NaPTAN as stop authority.
|
||||
United Kingdom,UK,rail,National Rail Darwin / Network Rail feeds,Rail realtime/disruption/source feeds,Darwin SOAP/PUSH; rail data feeds,registration/free limits; terms,"Darwin provides real-time arrival/departure predictions, platform numbers, delay estimates, schedule changes and cancellations.",Supersedes OSM for realtime rail service state; not route geometry.,"Realtime predictions, platform changes, cancellations, schedule changes.",Rail operators from schedules and ORR/ERADIS-equivalent registries.,Registration and limits.,P1,https://opendata.nationalrail.co.uk/,https://wiki.openraildata.com/index.php?title=Darwin,Implement rail RT later; static train planning may require separate timetable data.
|
||||
Iceland,IS,bus,Strætó Open Data,Operator/national bus feed,GTFS; possible GTFS-RT via service providers,public for static,"Strætó publishes bus GTFS with stops, locations, route lines and arrival/departure data.",GTFS shapes/stops supersede OSM for Strætó bus network.,"GTFS-RT implementation exists via third-party/service, confirm licence.",Strætó agency/operator.,Licence and RT access to verify.,P2,https://www.straeto.is/en/open-data,https://www.straeto.is/en/open-data,Ingest static GTFS; verify realtime terms.
|
||||
Europe,EU/UK,long-distance coach + rail,FlixBus / FlixTrain Europe GTFS,Commercial operator GTFS,GTFS,public URL via Transitland/Mobility catalogs; licence verify,Important pan-European coach/rail operator feed; not always present in national NAP coverage.,GTFS shapes supersede OSM for Flix services where feed license permits use.,Realtime not generally open in feed; booking via commercial APIs/site.,FlixBus/FlixTrain operator/agency.,Use only if licence and commercial use allowed.,P1,http://gtfs.gis.flix.tech/gtfs_generic_eu.zip,https://www.transit.land/feeds/f-u-flixbus~flixtrain,Ingest as separate commercial long-distance layer after licence check.
|
||||
Spain / Canary Islands,ES,ferry,Fred.Olsen ferry on Spanish NAP,Ferry operator GTFS,GTFS-ZIP,public via NAP,Spanish NAP lists a Fred.Olsen ferry dataset with stops/routes/trips; useful example of sea data in GTFS.,GTFS supersedes OSM ferry relation for timetable/service coverage.,GTFS-RT unknown.,Fred.Olsen as operator.,Terms per Spanish NAP.,P2,https://nap.transportes.gob.es/Files/List,https://nap.transportes.gob.es/Files/List,Add ferry-mode ingestion and route_type handling.
|
||||
Western Balkans,AL/BA/ME/MK/RS/XK,multimodal,Transport Community / WB NAP assessment,Regional policy/discovery,NAP planning/standards; country portals vary,fragmented,Useful for future expansion; public machine-readable PT feeds are generally less centralized than EU/EEA strongest cases.,Official feeds if discovered supersede OSM; OSM remains baseline.,Road/PT disruptions vary by country.,"Operators from national ministries, OSM, operator websites, registries.",Country-specific.,P4,https://www.transport-community.org/,https://www.transport-community.org/,Not MVP; keep source discovery backlog.
|
||||
United Kingdom / London,GB,"metro, rail, bus, tram, river",Transport for London Open Data,Operator/regional API,"Unified API, GTFS, line status, disruptions, arrivals",public API with terms,TfL open data provides London transport feeds and recommends Unified API for live feeds.,TfL data supersedes OSM for London service state and official stops/routes.,"Line status, disruptions, arrivals, live feeds.",TfL operators/networks; mode-specific lines.,Terms/branding rules apply.,P1,https://tfl.gov.uk/info-for/open-data-users/our-open-data,https://tfl.gov.uk/info-for/open-data-users/our-open-data,Add as high-value city connector after GB national layer.
|
||||
France / Île-de-France,FR,"metro, RER, tram, bus",Île-de-France Mobilités Open Data,Regional authority feed,"GTFS, GTFS-RT, NeTEx/SIRI in catalog",public; auth may apply,Major Paris-region authority data; often more detailed than OSM for official network and service state.,Official shapes/stops/service data supersede OSM.,Realtime disruptions and arrivals where published.,IDFM network/operators metadata.,Terms/API keys vary.,P1,https://data.iledefrance-mobilites.fr/,https://transport.data.gouv.fr/,Add dedicated IDFM connector because of scale/importance.
|
||||
Germany / Bavaria,DE,"rail, bus, tram, metro",DEFAS Bayern / BayernInfo,Regional aggregator,"SIRI, GTFS/NeTEx via portals",public/contracted depending endpoint,Bavarian regional system aggregates timetable and realtime data from many operators; useful richer regional source.,Official regional data supersedes OSM for Bavarian services.,Realtime from participating companies; access may require agreements.,Operators/associations in DEFAS metadata.,Some access may be contract-gated.,P2,https://www.bayerninfo.de/en/about-bayerninfo-1/data-offer/public-transport-data,https://www.bayerninfo.de/en/about-bayerninfo-1/data-offer/public-transport-data,Use after national DE layer for realtime enrichment.
|
||||
|
24
pyproject.toml
Normal file
24
pyproject.toml
Normal file
@@ -0,0 +1,24 @@
|
||||
[project]
|
||||
name = "mobility-workbench"
|
||||
version = "0.1.0"
|
||||
description = "Prototype workbench for ingesting, matching, and visualising public transport network data."
|
||||
requires-python = ">=3.11"
|
||||
dependencies = [
|
||||
"fastapi>=0.128",
|
||||
"uvicorn[standard]>=0.48",
|
||||
"SQLAlchemy>=2.0",
|
||||
"psycopg[binary]>=3.1",
|
||||
"pydantic>=2.0",
|
||||
"pydantic-settings>=2.0",
|
||||
"requests>=2.31",
|
||||
"shapely>=2.0",
|
||||
"python-multipart>=0.0.9",
|
||||
"Jinja2>=3.1",
|
||||
"typer>=0.12",
|
||||
"pytest>=8",
|
||||
"httpx2>=2.5",
|
||||
"osmium>=4.3",
|
||||
]
|
||||
|
||||
[tool.pytest.ini_options]
|
||||
pythonpath = ["."]
|
||||
14
requirements.txt
Normal file
14
requirements.txt
Normal file
@@ -0,0 +1,14 @@
|
||||
fastapi>=0.128
|
||||
uvicorn[standard]>=0.48
|
||||
SQLAlchemy>=2.0
|
||||
psycopg[binary]>=3.1
|
||||
pydantic>=2.0
|
||||
pydantic-settings>=2.0
|
||||
requests>=2.31
|
||||
shapely>=2.0
|
||||
python-multipart>=0.0.9
|
||||
Jinja2>=3.1
|
||||
typer>=0.12
|
||||
pytest>=8
|
||||
httpx2>=2.5
|
||||
osmium>=4.3
|
||||
47
scripts/discover_gtfs_sources.py
Normal file
47
scripts/discover_gtfs_sources.py
Normal file
@@ -0,0 +1,47 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Build GTFS source discovery manifests from Mobility Database, PTNA, and local seeds."""
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
ROOT = Path(__file__).resolve().parents[1]
|
||||
if str(ROOT) not in sys.path:
|
||||
sys.path.insert(0, str(ROOT))
|
||||
|
||||
from app.feed_discovery import build_gtfs_discovery_manifests, default_generated_dir # noqa: E402
|
||||
|
||||
|
||||
def main() -> None:
|
||||
parser = argparse.ArgumentParser(description="Build GTFS discovery and ingestable-source CSV manifests.")
|
||||
parser.add_argument("--output-dir", default=str(default_generated_dir()), help="Directory for generated CSV files")
|
||||
parser.add_argument(
|
||||
"--countries",
|
||||
default="DE,AT,CH,NL,DK,FR,BE,LU,NO,SE,FI,IE,GB",
|
||||
help="Comma-separated country codes, or ALL for all countries exposed by the upstream catalogs",
|
||||
)
|
||||
parser.add_argument("--no-mobility-database", action="store_true", help="Skip Mobility Database feeds_v2.csv")
|
||||
parser.add_argument("--no-acceptance-test-list", action="store_true", help="Skip MobilityData validator acceptance-test feed list")
|
||||
parser.add_argument("--no-ptna", action="store_true", help="Skip PTNA GTFS analysis pages")
|
||||
parser.add_argument("--max-ptna-details", type=int, default=80, help="Maximum PTNA detail pages to fetch")
|
||||
parser.add_argument("--test-limit", type=int, default=24, help="Rows written to the focused test-run CSV")
|
||||
parser.add_argument("--check-urls", action="store_true", help="Run HEAD/range checks for ingestable feed URLs")
|
||||
args = parser.parse_args()
|
||||
|
||||
result = build_gtfs_discovery_manifests(
|
||||
output_dir=Path(args.output_dir),
|
||||
countries=[part.strip() for part in args.countries.split(",") if part.strip()],
|
||||
include_mobility_database=not args.no_mobility_database,
|
||||
include_acceptance_test_list=not args.no_acceptance_test_list,
|
||||
include_ptna=not args.no_ptna,
|
||||
max_ptna_details=args.max_ptna_details,
|
||||
test_limit=args.test_limit,
|
||||
check_urls=args.check_urls,
|
||||
)
|
||||
print(json.dumps(result, indent=2, ensure_ascii=False))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
44
scripts/example_sources.json
Normal file
44
scripts/example_sources.json
Normal file
@@ -0,0 +1,44 @@
|
||||
[
|
||||
{
|
||||
"name": "Local GTFS file",
|
||||
"kind": "gtfs",
|
||||
"url": "./data/my-feed.zip",
|
||||
"country": "DE",
|
||||
"license": "unknown"
|
||||
},
|
||||
{
|
||||
"name": "VBB Online GTFS",
|
||||
"kind": "gtfs",
|
||||
"url": "https://unternehmen.vbb.de/fileadmin/user_upload/VBB/Dokumente/API-Datensaetze/gtfs-mastscharf/GTFS.zip",
|
||||
"country": "DE",
|
||||
"license": "CC BY 4.0"
|
||||
},
|
||||
{
|
||||
"name": "DB Long-distance Rail GTFS.DE",
|
||||
"kind": "gtfs",
|
||||
"url": "https://download.gtfs.de/germany/fv_free/latest.zip",
|
||||
"country": "DE",
|
||||
"license": "Creative Commons 4.0"
|
||||
},
|
||||
{
|
||||
"name": "Germany Regional Rail GTFS.DE",
|
||||
"kind": "gtfs",
|
||||
"url": "https://download.gtfs.de/germany/rv_free/latest.zip",
|
||||
"country": "DE",
|
||||
"license": "Creative Commons 4.0"
|
||||
},
|
||||
{
|
||||
"name": "Berlin OSM PBF",
|
||||
"kind": "osm_pbf",
|
||||
"url": "https://download.geofabrik.de/europe/germany/berlin-latest.osm.pbf",
|
||||
"country": "DE",
|
||||
"license": "ODbL"
|
||||
},
|
||||
{
|
||||
"name": "Local OSM transport GeoJSON",
|
||||
"kind": "osm_geojson",
|
||||
"url": "./data/transport.geojson",
|
||||
"country": "DE",
|
||||
"license": "ODbL"
|
||||
}
|
||||
]
|
||||
38
scripts/finalize_routing_layer.py
Normal file
38
scripts/finalize_routing_layer.py
Normal file
@@ -0,0 +1,38 @@
|
||||
#!/usr/bin/env python3
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
ROOT = Path(__file__).resolve().parents[1]
|
||||
if str(ROOT) not in sys.path:
|
||||
sys.path.insert(0, str(ROOT))
|
||||
|
||||
from app.db import init_db, session_scope # noqa: E402
|
||||
from app.pipeline.routing_layer import finalize_routing_layer # noqa: E402
|
||||
|
||||
|
||||
def main() -> None:
|
||||
parser = argparse.ArgumentParser(description="Finalize an already imported routing graph.")
|
||||
parser.add_argument("--dataset-id", type=int, default=None, help="Raw OSM PBF dataset id. Defaults to the active routing dataset.")
|
||||
args = parser.parse_args()
|
||||
|
||||
init_db()
|
||||
with session_scope() as session:
|
||||
result = finalize_routing_layer(session, dataset_id=args.dataset_id, progress_callback=_progress)
|
||||
print(result)
|
||||
|
||||
|
||||
def _progress(event_type: str, message: str, current: int | None, total: int | None, metadata: dict[str, object] | None) -> None:
|
||||
if current is None and total is None:
|
||||
progress = ""
|
||||
elif total:
|
||||
progress = f" [{current}/{total}]"
|
||||
else:
|
||||
progress = f" [{current}]"
|
||||
print(f"{event_type}{progress}: {message} {metadata or {}}", flush=True)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
22
scripts/host_tool.sh
Normal file
22
scripts/host_tool.sh
Normal file
@@ -0,0 +1,22 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
if [ "$#" -lt 1 ]; then
|
||||
echo "usage: scripts/host_tool.sh TOOL [ARG...]" >&2
|
||||
exit 64
|
||||
fi
|
||||
|
||||
TOOL=$1
|
||||
shift
|
||||
|
||||
if command -v "$TOOL" >/dev/null 2>&1; then
|
||||
exec "$TOOL" "$@"
|
||||
fi
|
||||
|
||||
if [ -f /.flatpak-info ] && command -v flatpak-spawn >/dev/null 2>&1; then
|
||||
exec flatpak-spawn --host "$TOOL" "$@"
|
||||
fi
|
||||
|
||||
echo "required tool not found: $TOOL" >&2
|
||||
echo "Install it on the host, or run from a terminal where $TOOL is on PATH." >&2
|
||||
exit 127
|
||||
34
scripts/import_ingestable_sources.py
Normal file
34
scripts/import_ingestable_sources.py
Normal file
@@ -0,0 +1,34 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Import seed feed sources into the Mobility Workbench source registry."""
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
ROOT = Path(__file__).resolve().parents[1]
|
||||
if str(ROOT) not in sys.path:
|
||||
sys.path.insert(0, str(ROOT))
|
||||
|
||||
from app.db import init_db, session_scope # noqa: E402
|
||||
from app.source_catalog import default_ingestable_sources_path, import_ingestable_sources # noqa: E402
|
||||
|
||||
|
||||
def main() -> None:
|
||||
parser = argparse.ArgumentParser(description="Import seed ingestable sources into the source registry.")
|
||||
parser.add_argument("--csv", dest="csv_path", default=str(default_ingestable_sources_path()), help="CSV path relative to repo root or absolute path")
|
||||
parser.add_argument("--no-update", action="store_true", help="Skip rows that already exist instead of updating them")
|
||||
args = parser.parse_args()
|
||||
|
||||
csv_path = Path(args.csv_path)
|
||||
if not csv_path.is_absolute():
|
||||
csv_path = ROOT / csv_path
|
||||
init_db()
|
||||
with session_scope() as session:
|
||||
result = import_ingestable_sources(session, csv_path, update_existing=not args.no_update)
|
||||
print(json.dumps(result, indent=2))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
48
scripts/import_routing_layer.py
Normal file
48
scripts/import_routing_layer.py
Normal file
@@ -0,0 +1,48 @@
|
||||
#!/usr/bin/env python3
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
ROOT = Path(__file__).resolve().parents[1]
|
||||
if str(ROOT) not in sys.path:
|
||||
sys.path.insert(0, str(ROOT))
|
||||
|
||||
from app.db import init_db, session_scope # noqa: E402
|
||||
from app.pipeline.routing_layer import rebuild_routing_layer # noqa: E402
|
||||
|
||||
|
||||
def main() -> None:
|
||||
parser = argparse.ArgumentParser(description="Import a routable OSM graph for walking/driving first/last-mile routing.")
|
||||
parser.add_argument("--dataset-id", type=int, default=None, help="Raw OSM PBF dataset id. Defaults to the raw dataset behind the active OSM import.")
|
||||
parser.add_argument("--input-path", type=Path, default=None, help="Override the PBF path.")
|
||||
parser.add_argument("--batch-size", type=int, default=5000, help="Insert batch size.")
|
||||
parser.add_argument("--append", action="store_true", help="Append instead of clearing existing graph rows for the dataset.")
|
||||
args = parser.parse_args()
|
||||
|
||||
init_db()
|
||||
with session_scope() as session:
|
||||
result = rebuild_routing_layer(
|
||||
session,
|
||||
dataset_id=args.dataset_id,
|
||||
input_path=args.input_path,
|
||||
reset=not args.append,
|
||||
batch_size=args.batch_size,
|
||||
progress_callback=_progress,
|
||||
)
|
||||
print(result)
|
||||
|
||||
|
||||
def _progress(event_type: str, message: str, current: int | None, total: int | None, metadata: dict[str, object] | None) -> None:
|
||||
if current is None and total is None:
|
||||
progress = ""
|
||||
elif total:
|
||||
progress = f" [{current}/{total}]"
|
||||
else:
|
||||
progress = f" [{current}]"
|
||||
print(f"{event_type}{progress}: {message} {metadata or {}}", flush=True)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
134
scripts/launch-dev.sh
Normal file
134
scripts/launch-dev.sh
Normal file
@@ -0,0 +1,134 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
ROOT="${MOBILITY_WORKBENCH_ROOT:-/mnt/DATA/git/meubility-workbench}"
|
||||
PYTHON="${PYTHON:-$ROOT/.venv/bin/python}"
|
||||
HOST="${MOBILITY_HOST:-127.0.0.1}"
|
||||
PORT="${MOBILITY_PORT:-8000}"
|
||||
OPEN_BROWSER="${OPEN_BROWSER:-1}"
|
||||
SAMPLE_MODE="${MOBILITY_SAMPLE_MODE:-missing}" # missing, always, never
|
||||
|
||||
LOG_DIR="$ROOT/data/dev-launcher"
|
||||
SERVER_LOG="$LOG_DIR/server.log"
|
||||
URL="http://$HOST:$PORT"
|
||||
|
||||
server_pid=""
|
||||
|
||||
fail() {
|
||||
printf 'launch-dev: %s\n' "$*" >&2
|
||||
exit 1
|
||||
}
|
||||
|
||||
port_is_free() {
|
||||
"$PYTHON" - "$1" "$2" <<'PY'
|
||||
import socket
|
||||
import sys
|
||||
|
||||
host = sys.argv[1]
|
||||
port = int(sys.argv[2])
|
||||
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock:
|
||||
sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
|
||||
try:
|
||||
sock.bind((host, port))
|
||||
except OSError:
|
||||
raise SystemExit(1)
|
||||
PY
|
||||
}
|
||||
|
||||
wait_for_url() {
|
||||
"$PYTHON" - "$1" <<'PY'
|
||||
import sys
|
||||
import time
|
||||
import urllib.request
|
||||
|
||||
url = sys.argv[1]
|
||||
deadline = time.monotonic() + 60
|
||||
last_error = None
|
||||
while time.monotonic() < deadline:
|
||||
try:
|
||||
with urllib.request.urlopen(url, timeout=2) as response:
|
||||
if 200 <= response.status < 500:
|
||||
raise SystemExit(0)
|
||||
except Exception as exc: # noqa: BLE001 - printed only on timeout.
|
||||
last_error = exc
|
||||
time.sleep(1)
|
||||
print(f"Timed out waiting for {url}: {last_error}", file=sys.stderr)
|
||||
raise SystemExit(1)
|
||||
PY
|
||||
}
|
||||
|
||||
configured_database() {
|
||||
"$PYTHON" - <<'PY'
|
||||
from app.config import settings
|
||||
|
||||
kind = "sqlite" if settings.is_sqlite_database else "postgresql" if settings.is_postgresql_database else "other"
|
||||
print(f"{kind}\t{settings.database_url}")
|
||||
PY
|
||||
}
|
||||
|
||||
cleanup() {
|
||||
if [ -n "${server_pid:-}" ] && kill -0 "$server_pid" 2>/dev/null; then
|
||||
kill "$server_pid" 2>/dev/null || true
|
||||
fi
|
||||
}
|
||||
trap cleanup EXIT INT TERM
|
||||
|
||||
[ -x "$PYTHON" ] || fail "Python virtualenv not found at $PYTHON. Run: cd $ROOT && python -m venv .venv && . .venv/bin/activate && pip install -r requirements.txt"
|
||||
|
||||
mkdir -p "$LOG_DIR"
|
||||
: > "$SERVER_LOG"
|
||||
|
||||
port_is_free "$HOST" "$PORT" || fail "$URL is already in use"
|
||||
|
||||
cd "$ROOT"
|
||||
db_info="$(configured_database)"
|
||||
db_kind="$(printf '%s' "$db_info" | cut -f1)"
|
||||
db_url="$(printf '%s' "$db_info" | cut -f2-)"
|
||||
case "$SAMPLE_MODE" in
|
||||
always)
|
||||
printf 'Loading sample project. This clears project data in the configured database.\n'
|
||||
"$PYTHON" -m app.cli load-sample
|
||||
;;
|
||||
missing)
|
||||
if [ "$db_kind" = "sqlite" ] && [ "$db_url" = "sqlite:///./data/workbench.sqlite" ] && [ ! -s "$ROOT/data/workbench.sqlite" ]; then
|
||||
printf 'Default SQLite database is missing. Loading sample project.\n'
|
||||
"$PYTHON" -m app.cli load-sample
|
||||
else
|
||||
"$PYTHON" -m app.cli init-db
|
||||
fi
|
||||
;;
|
||||
never)
|
||||
"$PYTHON" -m app.cli init-db
|
||||
;;
|
||||
*)
|
||||
fail "MOBILITY_SAMPLE_MODE must be missing, always, or never"
|
||||
;;
|
||||
esac
|
||||
|
||||
printf 'Starting Mobility Workbench at %s\n' "$URL"
|
||||
"$PYTHON" -m uvicorn app.main:app --host "$HOST" --port "$PORT" --reload >"$SERVER_LOG" 2>&1 &
|
||||
server_pid="$!"
|
||||
|
||||
printf 'Waiting for %s\n' "$URL"
|
||||
wait_for_url "$URL" || {
|
||||
tail -n 80 "$SERVER_LOG" >&2 || true
|
||||
fail "server did not become reachable"
|
||||
}
|
||||
|
||||
if [ "$OPEN_BROWSER" = "1" ] && command -v xdg-open >/dev/null 2>&1; then
|
||||
xdg-open "$URL" >/dev/null 2>&1 || true
|
||||
fi
|
||||
|
||||
cat <<EOF
|
||||
|
||||
Mobility Workbench is running.
|
||||
Web UI: $URL
|
||||
API: $URL/api
|
||||
|
||||
Log:
|
||||
$SERVER_LOG
|
||||
|
||||
Press Ctrl+C to stop the server.
|
||||
EOF
|
||||
|
||||
wait "$server_pid"
|
||||
452
scripts/migrate_sqlite_to_postgres.py
Normal file
452
scripts/migrate_sqlite_to_postgres.py
Normal file
@@ -0,0 +1,452 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import os
|
||||
import sqlite3
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from typing import Any, Iterable
|
||||
|
||||
|
||||
REPO_ROOT = Path(__file__).resolve().parents[1]
|
||||
if str(REPO_ROOT) not in sys.path:
|
||||
sys.path.insert(0, str(REPO_ROOT))
|
||||
|
||||
TABLE_ORDER = [
|
||||
"source_catalog_entries",
|
||||
"sources",
|
||||
"datasets",
|
||||
"source_update_checks",
|
||||
"osm_diff_states",
|
||||
"jobs",
|
||||
"job_events",
|
||||
"pipeline_runs",
|
||||
"gtfs_agencies",
|
||||
"gtfs_stops",
|
||||
"gtfs_routes",
|
||||
"gtfs_trips",
|
||||
"gtfs_calendars",
|
||||
"gtfs_calendar_dates",
|
||||
"gtfs_shapes",
|
||||
"gtfs_stop_times",
|
||||
"osm_features",
|
||||
"canonical_stops",
|
||||
"canonical_stop_links",
|
||||
"route_matches",
|
||||
"match_rules",
|
||||
"route_patterns",
|
||||
"route_pattern_stops",
|
||||
"gtfs_route_pattern_links",
|
||||
"gtfs_trip_route_pattern_links",
|
||||
"travel_requests",
|
||||
"itineraries",
|
||||
"itinerary_legs",
|
||||
]
|
||||
|
||||
|
||||
def main() -> int:
|
||||
parser = argparse.ArgumentParser(description="Migrate a Mobility Workbench SQLite database to PostgreSQL/PostGIS.")
|
||||
parser.add_argument("--postgres-url", default=os.environ.get("POSTGRES_DATABASE_URL") or os.environ.get("DATABASE_URL"))
|
||||
parser.add_argument("--sqlite-path", default="data/workbench.sqlite")
|
||||
parser.add_argument("--reset", action="store_true", help="Drop and recreate the target PostgreSQL schema before copying.")
|
||||
parser.add_argument("--batch-size", type=int, default=100_000)
|
||||
parser.add_argument("--strict-sidecars", action="store_true", help="Fail when a referenced sidecar file is missing.")
|
||||
args = parser.parse_args()
|
||||
|
||||
if not args.postgres_url:
|
||||
from app.config import settings as parsed_settings
|
||||
|
||||
if parsed_settings.is_postgresql_database:
|
||||
args.postgres_url = parsed_settings.database_url
|
||||
|
||||
if not args.postgres_url:
|
||||
parser.error("--postgres-url or POSTGRES_DATABASE_URL is required")
|
||||
if not str(args.postgres_url).startswith(("postgresql://", "postgresql+psycopg://")):
|
||||
parser.error("--postgres-url must be a PostgreSQL SQLAlchemy URL")
|
||||
|
||||
sqlite_path = Path(args.sqlite_path)
|
||||
if not sqlite_path.exists():
|
||||
parser.error(f"SQLite database does not exist: {sqlite_path}")
|
||||
|
||||
os.environ["DATABASE_URL"] = str(args.postgres_url)
|
||||
|
||||
from app import models # noqa: F401
|
||||
from app.db import Base, SessionLocal, _ensure_database_extensions, _ensure_runtime_columns, _ensure_runtime_indexes, engine, init_db
|
||||
from app.gtfs_storage import GTFS_STORAGE_METADATA_KEY, GTFS_STORAGE_MAIN, GTFS_STOP_TIME_COLUMNS
|
||||
from app.osm_storage import OSM_FEATURE_COLUMNS, OSM_STORAGE_MAIN, OSM_STORAGE_METADATA_KEY
|
||||
from app.spatial import analyze_postgresql_tables, refresh_postgis_geometries
|
||||
|
||||
if args.reset:
|
||||
print("Resetting PostgreSQL schema without secondary indexes...")
|
||||
_ensure_database_extensions()
|
||||
Base.metadata.drop_all(bind=engine)
|
||||
Base.metadata.create_all(bind=engine)
|
||||
_ensure_runtime_columns()
|
||||
else:
|
||||
print("Initializing PostgreSQL schema...")
|
||||
init_db()
|
||||
|
||||
source = sqlite3.connect(sqlite_path)
|
||||
source.row_factory = sqlite3.Row
|
||||
try:
|
||||
source_tables = _sqlite_tables(source)
|
||||
target_columns = {name: list(table.c.keys()) for name, table in Base.metadata.tables.items()}
|
||||
bool_columns = {
|
||||
name: _boolean_columns(table)
|
||||
for name, table in Base.metadata.tables.items()
|
||||
}
|
||||
|
||||
import psycopg
|
||||
|
||||
with psycopg.connect(_psycopg_url(str(args.postgres_url))) as pg:
|
||||
copied_tables: list[str] = []
|
||||
for table_name in TABLE_ORDER:
|
||||
if table_name not in source_tables or table_name not in target_columns:
|
||||
continue
|
||||
copied = _copy_sqlite_table(
|
||||
source,
|
||||
pg,
|
||||
table_name=table_name,
|
||||
target_columns=target_columns[table_name],
|
||||
bool_columns=bool_columns.get(table_name, set()),
|
||||
batch_size=max(1_000, int(args.batch_size)),
|
||||
)
|
||||
copied_tables.append(table_name)
|
||||
print(f"Copied {copied:,} rows from {table_name}.")
|
||||
pg.commit()
|
||||
|
||||
_reset_sequences(pg, target_columns)
|
||||
pg.commit()
|
||||
|
||||
sidecar_results = _copy_sidecars(
|
||||
source,
|
||||
pg,
|
||||
sqlite_base_dir=sqlite_path.parent,
|
||||
batch_size=max(1_000, int(args.batch_size)),
|
||||
strict=args.strict_sidecars,
|
||||
osm_columns=OSM_FEATURE_COLUMNS,
|
||||
gtfs_stop_time_columns=GTFS_STOP_TIME_COLUMNS,
|
||||
gtfs_storage_key=GTFS_STORAGE_METADATA_KEY,
|
||||
osm_storage_key=OSM_STORAGE_METADATA_KEY,
|
||||
gtfs_main_mode=GTFS_STORAGE_MAIN,
|
||||
osm_main_mode=OSM_STORAGE_MAIN,
|
||||
)
|
||||
_reset_sequences(pg, target_columns)
|
||||
pg.commit()
|
||||
|
||||
print("Refreshing PostGIS geometries and indexes...")
|
||||
with SessionLocal() as session:
|
||||
refresh_postgis_geometries(session, only_missing=False)
|
||||
session.commit()
|
||||
_ensure_runtime_indexes()
|
||||
with SessionLocal() as session:
|
||||
analyze_postgresql_tables(session, copied_tables + ["osm_features", "gtfs_stop_times"])
|
||||
session.commit()
|
||||
|
||||
print("Migration complete.")
|
||||
for message in sidecar_results:
|
||||
print(message)
|
||||
return 0
|
||||
finally:
|
||||
source.close()
|
||||
|
||||
|
||||
def _copy_sqlite_table(
|
||||
source: sqlite3.Connection,
|
||||
pg,
|
||||
*,
|
||||
table_name: str,
|
||||
target_columns: list[str],
|
||||
bool_columns: set[str],
|
||||
batch_size: int,
|
||||
) -> int:
|
||||
source_columns = [column for column in _sqlite_columns(source, table_name) if column in target_columns]
|
||||
if not source_columns:
|
||||
return 0
|
||||
total = 0
|
||||
select_sql = f"SELECT {', '.join(_quote_sqlite(column) for column in source_columns)} FROM {_quote_sqlite(table_name)}"
|
||||
cursor = source.execute(select_sql)
|
||||
try:
|
||||
while True:
|
||||
rows = cursor.fetchmany(batch_size)
|
||||
if not rows:
|
||||
break
|
||||
_copy_rows(
|
||||
pg,
|
||||
table_name=table_name,
|
||||
columns=source_columns,
|
||||
rows=(_row_values(row, source_columns, bool_columns) for row in rows),
|
||||
)
|
||||
total += len(rows)
|
||||
finally:
|
||||
cursor.close()
|
||||
return total
|
||||
|
||||
|
||||
def _copy_sidecars(
|
||||
source: sqlite3.Connection,
|
||||
pg,
|
||||
*,
|
||||
sqlite_base_dir: Path,
|
||||
batch_size: int,
|
||||
strict: bool,
|
||||
osm_columns: list[str],
|
||||
gtfs_stop_time_columns: list[str],
|
||||
gtfs_storage_key: str,
|
||||
osm_storage_key: str,
|
||||
gtfs_main_mode: str,
|
||||
osm_main_mode: str,
|
||||
) -> list[str]:
|
||||
messages: list[str] = []
|
||||
dataset_rows = source.execute("SELECT id, kind, metadata_json FROM datasets ORDER BY id").fetchall()
|
||||
for row in dataset_rows:
|
||||
dataset_id = int(row["id"])
|
||||
metadata = _json_dict(row["metadata_json"])
|
||||
|
||||
gtfs_storage = metadata.get(gtfs_storage_key)
|
||||
if isinstance(gtfs_storage, dict) and _storage_uses_sidecar(gtfs_storage, "gtfs_stop_times"):
|
||||
path = _resolve_sidecar_path(gtfs_storage.get("sidecar_path"), sqlite_base_dir)
|
||||
if path is None or not path.exists():
|
||||
message = f"Missing GTFS sidecar for dataset #{dataset_id}: {path}"
|
||||
if strict:
|
||||
raise FileNotFoundError(message)
|
||||
messages.append(message)
|
||||
else:
|
||||
existing = _pg_scalar(pg, "SELECT COUNT(*) FROM gtfs_stop_times WHERE dataset_id = %s", [dataset_id])
|
||||
if int(existing or 0) > 0:
|
||||
messages.append(f"Skipped GTFS sidecar for dataset #{dataset_id}; target already has stop_times rows.")
|
||||
else:
|
||||
copied = _copy_gtfs_sidecar(pg, dataset_id, path, gtfs_stop_time_columns, batch_size)
|
||||
_mark_storage_main(metadata, gtfs_storage_key, "gtfs_stop_times", gtfs_main_mode, path)
|
||||
_update_dataset_metadata(pg, dataset_id, metadata)
|
||||
pg.commit()
|
||||
messages.append(f"Copied {copied:,} GTFS stop_times rows from {path}.")
|
||||
|
||||
osm_storage = metadata.get(osm_storage_key)
|
||||
if isinstance(osm_storage, dict) and _storage_uses_sidecar(osm_storage, "osm_features"):
|
||||
path = _resolve_sidecar_path(osm_storage.get("sidecar_path"), sqlite_base_dir)
|
||||
if path is None or not path.exists():
|
||||
message = f"Missing OSM sidecar for dataset #{dataset_id}: {path}"
|
||||
if strict:
|
||||
raise FileNotFoundError(message)
|
||||
messages.append(message)
|
||||
else:
|
||||
copied, inserted = _copy_osm_sidecar(pg, dataset_id, path, osm_columns, batch_size)
|
||||
_mark_storage_main(metadata, osm_storage_key, "osm_features", osm_main_mode, path)
|
||||
_update_dataset_metadata(pg, dataset_id, metadata)
|
||||
pg.commit()
|
||||
messages.append(f"Copied {copied:,} OSM sidecar rows from {path}; inserted {inserted:,} new main rows.")
|
||||
return messages
|
||||
|
||||
|
||||
def _copy_gtfs_sidecar(pg, dataset_id: int, path: Path, columns: list[str], batch_size: int) -> int:
|
||||
source = sqlite3.connect(path)
|
||||
source.row_factory = sqlite3.Row
|
||||
try:
|
||||
available = _sqlite_columns(source, "gtfs_stop_times")
|
||||
select_columns = [(_quote_sqlite(column) if column in available else f"NULL AS {_quote_sqlite(column)}") for column in columns]
|
||||
total = 0
|
||||
cursor = source.execute(f"SELECT {', '.join(select_columns)} FROM gtfs_stop_times")
|
||||
try:
|
||||
while True:
|
||||
rows = cursor.fetchmany(batch_size)
|
||||
if not rows:
|
||||
break
|
||||
_copy_rows(
|
||||
pg,
|
||||
table_name="gtfs_stop_times",
|
||||
columns=["dataset_id", *columns],
|
||||
rows=([dataset_id, *[row[column] for column in columns]] for row in rows),
|
||||
)
|
||||
total += len(rows)
|
||||
finally:
|
||||
cursor.close()
|
||||
return total
|
||||
finally:
|
||||
source.close()
|
||||
|
||||
|
||||
def _copy_osm_sidecar(pg, dataset_id: int, path: Path, columns: list[str], batch_size: int) -> tuple[int, int]:
|
||||
source = sqlite3.connect(path)
|
||||
source.row_factory = sqlite3.Row
|
||||
temp_table = "tmp_osm_sidecar_features"
|
||||
try:
|
||||
available = _sqlite_columns(source, "osm_features")
|
||||
payload_columns = [column for column in columns if column != "dataset_id"]
|
||||
select_columns = [
|
||||
(_quote_sqlite(column) if column in available else f"NULL AS {_quote_sqlite(column)}")
|
||||
for column in payload_columns
|
||||
]
|
||||
with pg.cursor() as cur:
|
||||
cur.execute(f"DROP TABLE IF EXISTS pg_temp.{_quote_pg(temp_table)}")
|
||||
cur.execute(f"CREATE TEMP TABLE {temp_table} (LIKE osm_features INCLUDING DEFAULTS) ON COMMIT DROP")
|
||||
copied = 0
|
||||
cursor = source.execute(f"SELECT {', '.join(select_columns)} FROM osm_features")
|
||||
try:
|
||||
while True:
|
||||
rows = cursor.fetchmany(batch_size)
|
||||
if not rows:
|
||||
break
|
||||
_copy_rows(
|
||||
pg,
|
||||
table_name=temp_table,
|
||||
columns=columns,
|
||||
rows=([dataset_id, *[row[column] for column in payload_columns]] for row in rows),
|
||||
)
|
||||
copied += len(rows)
|
||||
finally:
|
||||
cursor.close()
|
||||
with pg.cursor() as cur:
|
||||
column_sql = ", ".join(_quote_pg(column) for column in columns)
|
||||
cur.execute(
|
||||
f"""
|
||||
INSERT INTO osm_features ({column_sql})
|
||||
SELECT {column_sql}
|
||||
FROM {temp_table}
|
||||
ON CONFLICT ON CONSTRAINT uq_osm_feature_dataset_type_id DO NOTHING
|
||||
"""
|
||||
)
|
||||
inserted = int(cur.rowcount or 0)
|
||||
cur.execute(f"DROP TABLE IF EXISTS pg_temp.{_quote_pg(temp_table)}")
|
||||
return copied, inserted
|
||||
finally:
|
||||
source.close()
|
||||
|
||||
|
||||
def _copy_rows(pg, *, table_name: str, columns: list[str], rows: Iterable[Iterable[Any]]) -> None:
|
||||
column_sql = ", ".join(_quote_pg(column) for column in columns)
|
||||
with pg.cursor() as cur:
|
||||
with cur.copy(f"COPY {_quote_pg(table_name)} ({column_sql}) FROM STDIN") as copy:
|
||||
for row in rows:
|
||||
copy.write_row(list(row))
|
||||
|
||||
|
||||
def _reset_sequences(pg, target_columns: dict[str, list[str]]) -> None:
|
||||
with pg.cursor() as cur:
|
||||
for table_name, columns in target_columns.items():
|
||||
if "id" not in columns:
|
||||
continue
|
||||
cur.execute("SELECT pg_get_serial_sequence(%s, 'id')", [table_name])
|
||||
row = cur.fetchone()
|
||||
sequence_name = row[0] if row else None
|
||||
if not sequence_name:
|
||||
continue
|
||||
cur.execute(
|
||||
"""
|
||||
SELECT setval(
|
||||
%s,
|
||||
COALESCE((SELECT MAX(id) FROM {table}), 1),
|
||||
(SELECT MAX(id) IS NOT NULL FROM {table})
|
||||
)
|
||||
""".format(table=_quote_pg(table_name)),
|
||||
[sequence_name],
|
||||
)
|
||||
|
||||
|
||||
def _mark_storage_main(metadata: dict[str, Any], key: str, table_name: str, mode: str, sidecar_path: Path) -> None:
|
||||
storage = metadata.setdefault(key, {})
|
||||
if not isinstance(storage, dict):
|
||||
storage = {}
|
||||
metadata[key] = storage
|
||||
storage["mode"] = mode
|
||||
storage["tables"] = {table_name: "main"}
|
||||
storage["storage_status"] = "ready"
|
||||
storage["legacy_sidecar_path"] = str(sidecar_path)
|
||||
storage.pop("sidecar_path", None)
|
||||
storage.pop("sidecar_status", None)
|
||||
|
||||
|
||||
def _update_dataset_metadata(pg, dataset_id: int, metadata: dict[str, Any]) -> None:
|
||||
with pg.cursor() as cur:
|
||||
cur.execute(
|
||||
"UPDATE datasets SET metadata_json = %s WHERE id = %s",
|
||||
[json.dumps(metadata, separators=(",", ":")), dataset_id],
|
||||
)
|
||||
|
||||
|
||||
def _pg_scalar(pg, sql: str, params: list[Any]) -> Any:
|
||||
with pg.cursor() as cur:
|
||||
cur.execute(sql, params)
|
||||
row = cur.fetchone()
|
||||
return row[0] if row else None
|
||||
|
||||
|
||||
def _sqlite_tables(connection: sqlite3.Connection) -> set[str]:
|
||||
return {
|
||||
str(row["name"])
|
||||
for row in connection.execute("SELECT name FROM sqlite_master WHERE type = 'table'").fetchall()
|
||||
}
|
||||
|
||||
|
||||
def _sqlite_columns(connection: sqlite3.Connection, table_name: str) -> list[str]:
|
||||
return [str(row["name"]) for row in connection.execute(f"PRAGMA table_info({_quote_sqlite(table_name)})").fetchall()]
|
||||
|
||||
|
||||
def _row_values(row: sqlite3.Row, columns: list[str], bool_columns: set[str]) -> list[Any]:
|
||||
values: list[Any] = []
|
||||
for column in columns:
|
||||
value = row[column]
|
||||
if column in bool_columns and value is not None:
|
||||
value = bool(value)
|
||||
values.append(value)
|
||||
return values
|
||||
|
||||
|
||||
def _boolean_columns(table) -> set[str]:
|
||||
columns: set[str] = set()
|
||||
for column in table.c:
|
||||
try:
|
||||
if column.type.python_type is bool:
|
||||
columns.add(str(column.name))
|
||||
except NotImplementedError:
|
||||
continue
|
||||
return columns
|
||||
|
||||
|
||||
def _storage_uses_sidecar(storage: dict[str, Any], table_name: str) -> bool:
|
||||
tables = storage.get("tables")
|
||||
if isinstance(tables, dict) and tables.get(table_name) == "sidecar":
|
||||
return True
|
||||
return str(storage.get("mode") or "").startswith("sidecar")
|
||||
|
||||
|
||||
def _resolve_sidecar_path(value: Any, base_dir: Path) -> Path | None:
|
||||
if not value:
|
||||
return None
|
||||
path = Path(str(value))
|
||||
if path.is_absolute():
|
||||
return path
|
||||
if path.exists():
|
||||
return path
|
||||
source_relative = base_dir / path
|
||||
if source_relative.exists():
|
||||
return source_relative
|
||||
repo_relative = base_dir.parent / path
|
||||
if repo_relative.exists():
|
||||
return repo_relative
|
||||
return path
|
||||
|
||||
|
||||
def _json_dict(value: str | None) -> dict[str, Any]:
|
||||
try:
|
||||
data = json.loads(value or "{}")
|
||||
except json.JSONDecodeError:
|
||||
return {}
|
||||
return data if isinstance(data, dict) else {}
|
||||
|
||||
|
||||
def _psycopg_url(value: str) -> str:
|
||||
return value.replace("postgresql+psycopg://", "postgresql://", 1)
|
||||
|
||||
|
||||
def _quote_pg(identifier: str) -> str:
|
||||
return '"' + identifier.replace('"', '""') + '"'
|
||||
|
||||
|
||||
def _quote_sqlite(identifier: str) -> str:
|
||||
return '"' + identifier.replace('"', '""') + '"'
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
26
scripts/osmium_transport_filter.sh
Normal file
26
scripts/osmium_transport_filter.sh
Normal file
@@ -0,0 +1,26 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
# Usage:
|
||||
# scripts/osmium_transport_filter.sh europe-latest.osm.pbf transport.osm.pbf
|
||||
#
|
||||
# This produces a transport-focused PBF that can then be converted to GeoJSON,
|
||||
# imported via a future PBF importer, or used by tile-generation tooling.
|
||||
|
||||
INPUT=${1:?input .osm.pbf required}
|
||||
OUTPUT=${2:?output .osm.pbf required}
|
||||
SCRIPT_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
||||
|
||||
"$SCRIPT_DIR/host_tool.sh" osmium tags-filter "$INPUT" \
|
||||
r/route=train,railway,light_rail,subway,tram,bus,trolleybus,coach,ferry,monorail,funicular,aerialway \
|
||||
r/route_master=train,railway,light_rail,subway,tram,bus,trolleybus,coach,ferry,monorail,funicular,aerialway \
|
||||
nwr/public_transport \
|
||||
nwr/railway=station,halt,tram_stop,subway_entrance,platform \
|
||||
nwr/highway=bus_stop \
|
||||
nwr/amenity=bus_station,ferry_terminal \
|
||||
nwr/aerialway=station \
|
||||
nwr/aeroway=aerodrome,terminal \
|
||||
w/railway=rail,light_rail,subway,tram,monorail,funicular \
|
||||
w/route=ferry \
|
||||
--overwrite \
|
||||
-o "$OUTPUT"
|
||||
13
tests/conftest.py
Normal file
13
tests/conftest.py
Normal file
@@ -0,0 +1,13 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import shutil
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
TEST_DATA_DIR = Path("./data/test-runtime")
|
||||
|
||||
shutil.rmtree(TEST_DATA_DIR, ignore_errors=True)
|
||||
os.environ["QUEUE_WORKER_AUTOSTART"] = "false"
|
||||
os.environ["DATA_DIR"] = str(TEST_DATA_DIR)
|
||||
os.environ["DATABASE_URL"] = f"sqlite:///{TEST_DATA_DIR / 'test_workbench.sqlite'}"
|
||||
33
tests/test_address_search.py
Normal file
33
tests/test_address_search.py
Normal file
@@ -0,0 +1,33 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from app.address_search import _folded_query_candidates, _numbered_query_candidates, coordinate_token, parse_coordinate_token
|
||||
|
||||
|
||||
def test_numbered_address_query_accepts_city_first_without_comma():
|
||||
assert ("alexanderplatz", "1", "berlin") in _numbered_query_candidates("Berlin Alexanderplatz 1")
|
||||
|
||||
|
||||
def test_numbered_address_query_accepts_city_last_without_comma():
|
||||
assert ("alexanderplatz", "1", "berlin") in _numbered_query_candidates("Alexanderplatz 1 Berlin")
|
||||
|
||||
|
||||
def test_numbered_address_query_prefers_comma_locality():
|
||||
assert _numbered_query_candidates("Berlin, Alexanderplatz 1")[0] == ("alexanderplatz", "1", "berlin")
|
||||
|
||||
|
||||
def test_folded_address_query_accepts_city_first_without_comma():
|
||||
assert ("alexanderplatz", "berlin") in _folded_query_candidates("Berlin Alexanderplatz")
|
||||
|
||||
|
||||
def test_folded_address_query_accepts_city_last_without_comma():
|
||||
assert ("alexanderplatz", "berlin") in _folded_query_candidates("Alexanderplatz Berlin")
|
||||
|
||||
|
||||
def test_folded_address_query_prefers_comma_locality():
|
||||
assert _folded_query_candidates("Berlin, Alexanderplatz")[0] == ("alexanderplatz", "berlin")
|
||||
|
||||
|
||||
def test_coordinate_token_round_trips():
|
||||
token = coordinate_token(49.404539659, 8.685940101)
|
||||
assert token == "coord:49.4045397:8.6859401"
|
||||
assert parse_coordinate_token(token) == (49.4045397, 8.6859401)
|
||||
666
tests/test_api.py
Normal file
666
tests/test_api.py
Normal file
@@ -0,0 +1,666 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
|
||||
from fastapi.testclient import TestClient
|
||||
from sqlalchemy import select
|
||||
|
||||
import app.jobs as jobs_module
|
||||
import app.main as main_module
|
||||
from app.config import settings
|
||||
from app.db import init_db, session_scope
|
||||
from app.db_lock import DatabaseWriteBusy, database_write_lock
|
||||
from app.jobs import run_worker_once
|
||||
from app.main import app
|
||||
from app.models import Dataset, GtfsRoute, Job, Source
|
||||
from app.source_catalog import import_ingestable_sources
|
||||
|
||||
|
||||
def test_api_sample_and_geojson():
|
||||
client = TestClient(app)
|
||||
response = client.get("/")
|
||||
assert response.status_code == 200
|
||||
assert "Mobility Workbench" in response.text
|
||||
assert "GTFS Harmonization" in response.text
|
||||
assert "Mapping Data" in response.text
|
||||
assert "journeyTransitSnapshot" in response.text
|
||||
assert "journeySource" not in response.text
|
||||
|
||||
response = client.post("/api/sample/reset")
|
||||
assert response.status_code == 200
|
||||
stats = client.get("/api/stats").json()
|
||||
assert stats["gtfs_routes"] == 6
|
||||
assert stats["osm_routes"] == 6
|
||||
geojson = client.get("/api/map/gtfs_routes.geojson").json()
|
||||
assert geojson["type"] == "FeatureCollection"
|
||||
assert len(geojson["features"]) == 6
|
||||
matched_geojson = client.get("/api/map/matched_gtfs_routes.geojson?status=matched").json()
|
||||
assert matched_geojson["features"]
|
||||
assert {feature["properties"]["visual_source"] for feature in matched_geojson["features"]} == {"osm"}
|
||||
filtered = client.get("/api/map/osm_features.geojson?kind=route&mode=tram&bbox=13.3,52.4,13.5,52.6").json()
|
||||
assert filtered["type"] == "FeatureCollection"
|
||||
assert {feature["properties"]["ref"] for feature in filtered["features"]} == {"M5", "M10"}
|
||||
source_filtered_gtfs = client.get("/api/map/gtfs_routes.geojson?source_id=1").json()
|
||||
assert len(source_filtered_gtfs["features"]) == 6
|
||||
source_filtered_osm = client.get("/api/map/osm_features.geojson?source_id=2&kind=route&mode=tram").json()
|
||||
assert {feature["properties"]["ref"] for feature in source_filtered_osm["features"]} == {"M5", "M10"}
|
||||
route_layer = client.post("/api/route-layer/build").json()
|
||||
assert route_layer["route_patterns"] > 0
|
||||
assert client.get("/api/stats").json()["route_patterns"] == route_layer["route_patterns"]
|
||||
regional_osm = client.get("/api/map/osm_features.geojson?kind=route&mode=train&route_scope=regional").json()
|
||||
assert {feature["properties"]["ref"] for feature in regional_osm["features"]} == {"RE1"}
|
||||
regional_patterns = client.get("/api/map/route_patterns.geojson?mode=train&source_kind=osm&route_scope=regional").json()
|
||||
assert {feature["properties"]["ref"] for feature in regional_patterns["features"]} == {"RE1"}
|
||||
local_patterns = client.get("/api/map/route_patterns.geojson?mode=subway&source_kind=osm&route_scope=local").json()
|
||||
assert {feature["properties"]["ref"] for feature in local_patterns["features"]} == {"U2"}
|
||||
local_bus_patterns = client.get("/api/map/route_patterns.geojson?mode=bus&source_kind=osm&route_scope=local").json()
|
||||
assert {feature["properties"]["ref"] for feature in local_bus_patterns["features"]} == {"100"}
|
||||
|
||||
|
||||
def test_journey_demo_direct_and_one_transfer():
|
||||
client = TestClient(app)
|
||||
assert client.post("/api/sample/reset").status_code == 200
|
||||
|
||||
hbf = _first_stop(client, "Hauptbahnhof")
|
||||
alex = _first_stop(client, "Alexanderplatz")
|
||||
direct = client.get(f"/api/journey/search?from_stop_id={hbf['id']}&to_stop_id={alex['id']}&departure=08:00&max_transfers=0").json()
|
||||
assert direct["journeys"]
|
||||
assert direct["journeys"][0]["transfers"] == 0
|
||||
assert direct["journeys"][0]["legs"][0]["route_ref"] in {"RE1", "M5"}
|
||||
coords = direct["journeys"][0]["features"]["features"][0]["geometry"]["coordinates"]
|
||||
assert coords[-1] == [13.4132, 52.5219]
|
||||
assert [13.4344, 52.51] not in coords
|
||||
stop_roles = {
|
||||
feature["properties"]["role"]
|
||||
for feature in direct["journeys"][0]["features"]["features"]
|
||||
if feature["geometry"]["type"] == "Point"
|
||||
}
|
||||
assert {"start", "end", "passed"} <= stop_roles
|
||||
|
||||
zoo = _first_stop(client, "Zoologischer")
|
||||
ost = _first_stop(client, "Ostbahnhof")
|
||||
transfer = client.get(
|
||||
f"/api/journey/search?from_stop_id={zoo['id']}&to_stop_id={ost['id']}&departure=08:00&max_transfers=1&transfer_seconds=0"
|
||||
).json()
|
||||
assert transfer["journeys"]
|
||||
assert transfer["journeys"][0]["transfers"] == 1
|
||||
assert [leg["route_ref"] for leg in transfer["journeys"][0]["legs"]] == ["100", "RE1"]
|
||||
|
||||
|
||||
def test_route_layer_job_endpoint_completes():
|
||||
client = TestClient(app)
|
||||
assert client.post("/api/sample/reset").status_code == 200
|
||||
|
||||
queued = client.post("/api/jobs/route-layer-build").json()
|
||||
assert queued["kind"] == "route_layer_rebuild"
|
||||
assert queued["status"] == "queued"
|
||||
assert queued["priority"] == 0
|
||||
|
||||
worker = run_worker_once(worker_id="test-worker")
|
||||
assert worker["processed"] == 1
|
||||
job = client.get(f"/api/jobs/{queued['id']}").json()
|
||||
|
||||
assert job["status"] == "completed"
|
||||
assert job["result"]["route_patterns"] > 0
|
||||
events = client.get(f"/api/jobs/{queued['id']}/events").json()
|
||||
assert [event["event_type"] for event in events["events"]][-1] == "completed"
|
||||
|
||||
|
||||
def test_route_matching_job_endpoint_completes():
|
||||
client = TestClient(app)
|
||||
assert client.post("/api/sample/reset").status_code == 200
|
||||
|
||||
queued = client.post("/api/jobs/match-run").json()
|
||||
assert queued["kind"] == "route_matching"
|
||||
assert queued["status"] == "queued"
|
||||
|
||||
worker = run_worker_once(worker_id="test-worker")
|
||||
assert worker["processed"] == 1
|
||||
job = client.get(f"/api/jobs/{queued['id']}").json()
|
||||
|
||||
assert job["status"] == "completed"
|
||||
assert job["result"]["routes"] == 6
|
||||
assert job["result"]["matches"] > 0
|
||||
events = client.get(f"/api/jobs/{queued['id']}/events").json()
|
||||
event_types = [event["event_type"] for event in events["events"]]
|
||||
assert "route_matching_batch" in event_types
|
||||
assert event_types[-1] == "completed"
|
||||
|
||||
|
||||
def test_qa_summary_endpoint_exposes_harmonization_sections():
|
||||
client = TestClient(app)
|
||||
assert client.post("/api/sample/reset").status_code == 200
|
||||
|
||||
summary = client.get("/api/qa/summary").json()
|
||||
|
||||
assert summary["decision"]["deployment"] == "same_workbench_for_now"
|
||||
section_ids = {section["id"] for section in summary["sections"]}
|
||||
assert {
|
||||
"source_discovery",
|
||||
"import_health",
|
||||
"gtfs_validation",
|
||||
"deduplication",
|
||||
"route_quality",
|
||||
"publication_readiness",
|
||||
} <= section_ids
|
||||
gtfs_section = next(section for section in summary["sections"] if section["id"] == "gtfs_validation")
|
||||
assert any(item["label"] == "Routes" for item in gtfs_section["items"])
|
||||
|
||||
|
||||
def test_gtfs_harmonization_inventory_and_detail():
|
||||
client = TestClient(app)
|
||||
assert client.post("/api/sample/reset").status_code == 200
|
||||
|
||||
inventory = client.get("/api/harmonization/gtfs/inventory").json()
|
||||
assert inventory["summary"]["sources"] == 1
|
||||
assert inventory["summary"]["active_sources"] == 1
|
||||
feed = inventory["feeds"][0]
|
||||
assert feed["source"]["name"] == "Sample Berlin GTFS"
|
||||
assert feed["active_dataset"]["counts"]["routes"] == 6
|
||||
assert feed["validation"]["items"]
|
||||
assert feed["service"]["items"]
|
||||
|
||||
detail = client.get(f"/api/harmonization/gtfs/sources/{feed['source']['id']}").json()
|
||||
assert detail["source"]["id"] == feed["source"]["id"]
|
||||
assert {section["id"] for section in detail["sections"]} == {"validation", "service", "overlap", "license"}
|
||||
assert all({"id", "severity", "title", "detail"} <= set(issue) for issue in detail["issues"])
|
||||
assert detail["qa_status"] in {"ready", "needs_review", "blocked"}
|
||||
|
||||
reviewed = client.patch(
|
||||
f"/api/harmonization/gtfs/sources/{feed['source']['id']}/review",
|
||||
json={"license": "CC-BY-4.0", "review_status": "approved", "review_note": "Operator publication allowed.", "enabled": True},
|
||||
).json()
|
||||
assert reviewed["source"]["license"] == "CC-BY-4.0"
|
||||
assert reviewed["source"]["qa_review"]["status"] == "approved"
|
||||
assert reviewed["source"]["qa_review"]["note"] == "Operator publication allowed."
|
||||
assert reviewed["source"]["enabled"] is True
|
||||
|
||||
|
||||
def test_terminal_jobs_can_be_dismissed_from_default_view():
|
||||
client = TestClient(app)
|
||||
assert client.post("/api/sample/reset").status_code == 200
|
||||
|
||||
queued = client.post("/api/jobs/route-layer-build").json()
|
||||
assert run_worker_once(worker_id="test-worker")["processed"] == 1
|
||||
|
||||
listed = client.get("/api/jobs").json()
|
||||
assert any(job["id"] == queued["id"] for job in listed["jobs"])
|
||||
|
||||
dismissed = client.post(f"/api/jobs/{queued['id']}/dismiss").json()
|
||||
assert dismissed["dismissed_at"]
|
||||
|
||||
hidden = client.get("/api/jobs").json()
|
||||
assert all(job["id"] != queued["id"] for job in hidden["jobs"])
|
||||
|
||||
visible = client.get("/api/jobs?include_dismissed=true").json()
|
||||
assert any(job["id"] == queued["id"] for job in visible["jobs"])
|
||||
|
||||
|
||||
def test_jobs_revision_endpoint_reports_changes():
|
||||
client = TestClient(app)
|
||||
assert client.post("/api/sample/reset").status_code == 200
|
||||
|
||||
initial = client.get("/api/jobs/revision").json()
|
||||
assert initial["changed"] is True
|
||||
assert initial["revision"]
|
||||
assert initial["job_revision"]
|
||||
assert "workers" in initial
|
||||
|
||||
queued = client.post("/api/jobs/route-layer-build").json()
|
||||
changed = client.get("/api/jobs/revision", params={"since": initial["revision"]}).json()
|
||||
assert changed["changed"] is True
|
||||
assert changed["latest_job_id"] >= queued["id"]
|
||||
assert changed["job_count"] >= 1
|
||||
|
||||
unchanged = client.get("/api/jobs/revision", params={"since": changed["revision"]}).json()
|
||||
assert unchanged["changed"] is False
|
||||
|
||||
listed = client.get("/api/jobs").json()
|
||||
assert listed["revision"] == unchanged["revision"]
|
||||
assert listed["jobs"]
|
||||
|
||||
|
||||
def test_nearest_location_skips_address_lookup_while_address_index_rebuilds(monkeypatch):
|
||||
client = TestClient(app)
|
||||
assert client.post("/api/sample/reset").status_code == 200
|
||||
with session_scope() as session:
|
||||
session.add(Job(kind="address_index_rebuild", status="running", description="test address rebuild"))
|
||||
session.commit()
|
||||
|
||||
def fail_address_lookup(**_kwargs):
|
||||
raise AssertionError("address lookup should be skipped while address index rebuilds")
|
||||
|
||||
monkeypatch.setattr(main_module, "address_at_point", fail_address_lookup)
|
||||
response = client.get("/api/journey/nearest-location?lat=0&lon=0")
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
assert data["selection_kind"] == "coordinate"
|
||||
assert data["address_lookup_skipped"] is True
|
||||
assert "Address index rebuild" in data["message"]
|
||||
|
||||
|
||||
def test_job_queue_controls_for_queued_job():
|
||||
client = TestClient(app)
|
||||
assert client.post("/api/sample/reset").status_code == 200
|
||||
|
||||
queued = client.post("/api/jobs/route-layer-build?priority=5").json()
|
||||
assert queued["status"] == "queued"
|
||||
assert queued["priority"] == 5
|
||||
|
||||
priority = client.post(f"/api/jobs/{queued['id']}/priority", json={"priority": 20}).json()
|
||||
assert priority["priority"] == 20
|
||||
|
||||
paused = client.post(f"/api/jobs/{queued['id']}/pause").json()
|
||||
assert paused["status"] == "paused"
|
||||
|
||||
idle_worker = run_worker_once(worker_id="test-worker")
|
||||
assert idle_worker["processed"] == 0
|
||||
|
||||
resumed = client.post(f"/api/jobs/{queued['id']}/resume").json()
|
||||
assert resumed["status"] == "queued"
|
||||
|
||||
stopped = client.post(f"/api/jobs/{queued['id']}/stop").json()
|
||||
assert stopped["status"] == "cancelled"
|
||||
|
||||
retried = client.post(f"/api/jobs/{queued['id']}/retry").json()
|
||||
assert retried["status"] == "queued"
|
||||
assert retried["error"] is None
|
||||
|
||||
|
||||
def test_worker_once_returns_idle_when_claim_is_busy(monkeypatch):
|
||||
def busy_claim(_worker_id):
|
||||
raise DatabaseWriteBusy("job:claim", {"operation": "update source"})
|
||||
|
||||
monkeypatch.setattr(jobs_module, "claim_next_job", busy_claim)
|
||||
|
||||
assert jobs_module.run_worker_once(worker_id="test-worker") == {"worker_id": "test-worker", "processed": 0}
|
||||
|
||||
|
||||
def test_running_job_can_be_stopped_while_write_lock_is_held():
|
||||
client = TestClient(app)
|
||||
assert client.post("/api/sample/reset").status_code == 200
|
||||
queued = client.post("/api/jobs/route-layer-build").json()
|
||||
|
||||
with session_scope() as session:
|
||||
job = session.get(Job, queued["id"])
|
||||
job.status = "running"
|
||||
job.lease_owner = "test-worker"
|
||||
|
||||
with database_write_lock("job:route_layer_rebuild:test"):
|
||||
response = client.post(f"/api/jobs/{queued['id']}/stop")
|
||||
|
||||
assert response.status_code == 200
|
||||
stopped = response.json()
|
||||
assert stopped["id"] == queued["id"]
|
||||
assert stopped["requested_action"] == "cancel"
|
||||
|
||||
|
||||
def test_itinerary_generation_and_leg_locking():
|
||||
client = TestClient(app)
|
||||
assert client.post("/api/sample/reset").status_code == 200
|
||||
hbf = _first_stop(client, "Hauptbahnhof")
|
||||
alex = _first_stop(client, "Alexanderplatz")
|
||||
|
||||
generated = client.post(
|
||||
"/api/itineraries/generate",
|
||||
json={
|
||||
"from_stop_id": hbf["id"],
|
||||
"to_stop_id": alex["id"],
|
||||
"departure": "08:00",
|
||||
"service_date": "2026-06-27",
|
||||
"max_transfers": 1,
|
||||
"transfer_seconds": 120,
|
||||
"limit": 2,
|
||||
},
|
||||
).json()
|
||||
|
||||
assert generated["request"]["service_date"] == "2026-06-27"
|
||||
assert any(item["family"] == "public_transport" for item in generated["itineraries"])
|
||||
assert any(item["family"] == "flight_access" for item in generated["itineraries"])
|
||||
public = next(item for item in generated["itineraries"] if item["family"] == "public_transport")
|
||||
saved = client.post(f"/api/itineraries/{public['id']}/save", json={"saved": True}).json()
|
||||
assert saved["saved"] is True
|
||||
leg_id = saved["legs"][0]["id"]
|
||||
locked = client.post(f"/api/itinerary-legs/{leg_id}/lock", json={"locked": True}).json()
|
||||
assert locked["locked"] is True
|
||||
recent = client.get("/api/itineraries?saved_only=true").json()
|
||||
assert any(item["id"] == public["id"] for item in recent["itineraries"])
|
||||
|
||||
|
||||
def test_geofabrik_catalog_source_creation(monkeypatch):
|
||||
from app import main
|
||||
from app.geofabrik import create_geofabrik_source
|
||||
|
||||
client = TestClient(app)
|
||||
assert client.post("/api/sample/reset").status_code == 200
|
||||
fake_entry = {
|
||||
"id": "berlin",
|
||||
"name": "Berlin",
|
||||
"parent": "germany",
|
||||
"country_codes": ["DE"],
|
||||
"pbf_url": "https://download.geofabrik.de/europe/germany/berlin-latest.osm.pbf",
|
||||
"updates_url": "https://download.geofabrik.de/europe/germany/berlin-updates",
|
||||
"taginfo_url": "https://taginfo.geofabrik.de/europe:germany:berlin",
|
||||
"urls": {},
|
||||
}
|
||||
|
||||
monkeypatch.setattr(main, "geofabrik_catalog", lambda q=None, limit=80: [fake_entry])
|
||||
monkeypatch.setattr("app.geofabrik.geofabrik_entry", lambda geofabrik_id: fake_entry if geofabrik_id == "berlin" else None)
|
||||
|
||||
catalog = client.get("/api/geofabrik/catalog?q=berlin").json()
|
||||
assert catalog["entries"][0]["id"] == "berlin"
|
||||
created = client.post(
|
||||
"/api/geofabrik/sources",
|
||||
json={"geofabrik_id": "berlin", "import_updates": True, "run_import": False},
|
||||
).json()
|
||||
assert created["source"]["kind"] == "osm_pbf"
|
||||
assert "berlin-latest.osm.pbf" in created["source"]["url"]
|
||||
|
||||
|
||||
def test_source_management_and_match_candidates():
|
||||
client = TestClient(app)
|
||||
assert client.post("/api/sample/reset").status_code == 200
|
||||
|
||||
stats = client.get("/api/stats").json()
|
||||
assert stats["match_summary"]["missing"] + stats["match_summary"]["weak"] >= 1
|
||||
sources = client.get("/api/sources").json()
|
||||
gtfs_source = next(source for source in sources if source["kind"] == "gtfs")
|
||||
assert gtfs_source["stats"]["routes"] == 6
|
||||
assert gtfs_source["datasets"][0]["stats"]["stop_times"] == 20
|
||||
|
||||
match = client.get("/api/matches?limit=1").json()[0]
|
||||
candidates = client.get(f"/api/matches/{match['id']}/candidates").json()
|
||||
assert candidates["route"]["id"] == match["gtfs"]["id"]
|
||||
assert candidates["route"]["geometry"]["present"] is True
|
||||
assert candidates["candidates"]
|
||||
assert "score" in candidates["candidates"][0]
|
||||
assert candidates["candidates"][0]["osm"]["geometry"]["present"] is True
|
||||
assert candidates["preview"]["type"] == "FeatureCollection"
|
||||
preview_roles = {feature["properties"]["preview_role"] for feature in candidates["preview"]["features"]}
|
||||
assert {"gtfs_route", "candidate"} <= preview_roles
|
||||
candidate_preview = next(feature for feature in candidates["preview"]["features"] if feature["properties"]["preview_role"] == "candidate")
|
||||
assert "candidate_score" in candidate_preview["properties"]
|
||||
picked = candidates["candidates"][0]
|
||||
accepted = client.post(f"/api/matches/{match['id']}/candidates/{picked['osm']['id']}/accept").json()
|
||||
assert accepted["status"] == "accepted"
|
||||
assert accepted["match"]["osm"]["osm_type"] == picked["osm"]["osm_type"]
|
||||
assert accepted["match"]["osm"]["osm_id"] == picked["osm"]["osm_id"]
|
||||
|
||||
search = client.get("/api/datasets/search?q=M5&active_only=true").json()
|
||||
assert search["gtfs_routes"]
|
||||
assert search["osm_routes"]
|
||||
m5_route = next(item for item in search["gtfs_routes"] if item["route"]["ref"] == "M5")
|
||||
assert m5_route["timetable"]["stop_times"] > 0
|
||||
assert m5_route["geometry"]["present"] is True
|
||||
feature = client.get(f"/api/datasets/search/feature.geojson?type=gtfs_route&id={m5_route['route']['id']}").json()
|
||||
assert feature["features"]
|
||||
assert feature["features"][0]["properties"]["search_result_type"] == "gtfs_route"
|
||||
|
||||
update_check = client.post(f"/api/sources/{gtfs_source['id']}/check-update").json()
|
||||
assert update_check["status"] == "checked"
|
||||
assert update_check["update_available"] is False
|
||||
update_result = client.post(f"/api/sources/{gtfs_source['id']}/update").json()
|
||||
assert update_result["status"] == "skipped"
|
||||
history = client.get(f"/api/sources/{gtfs_source['id']}/update-checks").json()
|
||||
assert history["checks"]
|
||||
|
||||
response = client.delete(f"/api/sources/{gtfs_source['id']}")
|
||||
assert response.status_code == 200
|
||||
delete_job = response.json()
|
||||
assert delete_job["kind"] == "source_delete"
|
||||
assert delete_job["status"] == "queued"
|
||||
duplicate = client.delete(f"/api/sources/{gtfs_source['id']}").json()
|
||||
assert duplicate["id"] == delete_job["id"]
|
||||
|
||||
worker = run_worker_once(worker_id="test-worker")
|
||||
assert worker["processed"] == 1
|
||||
completed = client.get(f"/api/jobs/{delete_job['id']}").json()
|
||||
assert completed["status"] == "completed"
|
||||
assert completed["result"]["delete_result"]["deleted"] is True
|
||||
stats_after_delete = client.get("/api/stats").json()
|
||||
assert stats_after_delete["gtfs_routes"] == 0
|
||||
assert stats_after_delete["osm_routes"] == 6
|
||||
|
||||
osm_source = next(source for source in client.get("/api/sources").json() if source["kind"] == "osm_geojson")
|
||||
dataset_id = osm_source["datasets"][0]["id"]
|
||||
dataset_delete_job = client.delete(f"/api/datasets/{dataset_id}").json()
|
||||
assert dataset_delete_job["kind"] == "dataset_delete"
|
||||
assert dataset_delete_job["status"] == "queued"
|
||||
queued_source = next(source for source in client.get("/api/sources").json() if source["id"] == osm_source["id"])
|
||||
assert queued_source["datasets"][0]["active_job"]["id"] == dataset_delete_job["id"]
|
||||
assert queued_source["active_job"]["id"] == dataset_delete_job["id"]
|
||||
|
||||
assert run_worker_once(worker_id="test-worker")["processed"] == 1
|
||||
completed_dataset_delete = client.get(f"/api/jobs/{dataset_delete_job['id']}").json()
|
||||
assert completed_dataset_delete["status"] == "completed"
|
||||
assert completed_dataset_delete["result"]["delete_result"]["deleted"] is True
|
||||
assert client.get("/api/stats").json()["osm_routes"] == 0
|
||||
|
||||
|
||||
def test_missing_gtfs_sidecar_queues_recovery_without_breaking_sources():
|
||||
client = TestClient(app)
|
||||
assert client.post("/api/sample/reset").status_code == 200
|
||||
|
||||
with session_scope() as session:
|
||||
dataset = session.scalar(select(Dataset).where(Dataset.kind == "gtfs", Dataset.is_active.is_(True)))
|
||||
assert dataset is not None
|
||||
source_id = dataset.source_id
|
||||
metadata = json.loads(dataset.metadata_json or "{}")
|
||||
metadata["gtfs_storage"]["sidecar_path"] = str(settings.data_dir / "sidecars" / f"missing_gtfs_dataset_{dataset.id}.sqlite")
|
||||
dataset.metadata_json = json.dumps(metadata)
|
||||
dataset_id = dataset.id
|
||||
|
||||
response = client.get("/api/sources")
|
||||
|
||||
assert response.status_code == 200
|
||||
source = next(item for item in response.json() if item["id"] == source_id)
|
||||
assert source["active_job"]["kind"] == "source_import"
|
||||
assert "GTFS sidecar missing" in source["active_job"]["result"]["recovery_reason"]
|
||||
recovered_dataset = next(item for item in source["datasets"] if item["id"] == dataset_id)
|
||||
assert recovered_dataset["status"] == "missing_files"
|
||||
assert recovered_dataset["stats"]["missing_sidecar"] is True
|
||||
assert recovered_dataset["stats"]["stop_times"] == 0
|
||||
|
||||
second_response = client.get("/api/sources")
|
||||
assert second_response.status_code == 200
|
||||
with session_scope() as session:
|
||||
recovery_jobs = session.scalars(select(Job).where(Job.kind == "source_import", Job.status == "queued")).all()
|
||||
assert len(recovery_jobs) == 1
|
||||
|
||||
|
||||
def test_admin_maintenance_endpoints_are_guarded_and_callable():
|
||||
client = TestClient(app)
|
||||
assert client.post("/api/sample/reset").status_code == 200
|
||||
|
||||
init_job = client.post("/api/admin/init-db").json()
|
||||
assert init_job["kind"] == "maintenance"
|
||||
assert init_job["result"]["action"] == "init-db"
|
||||
assert run_worker_once(worker_id="test-worker")["processed"] == 1
|
||||
init_completed = client.get(f"/api/jobs/{init_job['id']}").json()
|
||||
assert init_completed["status"] == "completed"
|
||||
assert init_completed["result"]["result"]["status"] == "initialized"
|
||||
|
||||
backfill_job = client.post("/api/admin/backfill-gtfs-shapes", json={}).json()
|
||||
assert backfill_job["kind"] == "maintenance"
|
||||
assert run_worker_once(worker_id="test-worker")["processed"] == 1
|
||||
backfill = client.get(f"/api/jobs/{backfill_job['id']}").json()
|
||||
assert "datasets" in backfill["result"]["result"]
|
||||
|
||||
prune_cache_job = client.post("/api/admin/prune-cache", json={}).json()
|
||||
assert prune_cache_job["kind"] == "maintenance"
|
||||
assert run_worker_once(worker_id="test-worker")["processed"] == 1
|
||||
prune_cache = client.get(f"/api/jobs/{prune_cache_job['id']}").json()["result"]["result"]
|
||||
assert prune_cache["dry_run"] is True
|
||||
assert "files" in prune_cache
|
||||
assert "bytes" in prune_cache
|
||||
|
||||
prune_inactive_job = client.post("/api/admin/prune-inactive-datasets", json={}).json()
|
||||
assert prune_inactive_job["kind"] == "maintenance"
|
||||
assert run_worker_once(worker_id="test-worker")["processed"] == 1
|
||||
prune_inactive = client.get(f"/api/jobs/{prune_inactive_job['id']}").json()["result"]["result"]
|
||||
assert prune_inactive["dry_run"] is True
|
||||
assert "would_delete" in prune_inactive
|
||||
|
||||
sample_job = client.post("/api/jobs/sample-reset").json()
|
||||
assert sample_job["kind"] == "maintenance"
|
||||
assert sample_job["result"]["action"] == "sample-reset"
|
||||
assert run_worker_once(worker_id="test-worker")["processed"] == 1
|
||||
sample_completed = client.get(f"/api/jobs/{sample_job['id']}").json()
|
||||
assert sample_completed["status"] == "completed"
|
||||
assert sample_completed["result"]["result"]["status"] == "ok"
|
||||
assert client.get("/api/stats").json()["gtfs_routes"] == 6
|
||||
|
||||
assert client.post("/api/admin/prune-cache", json={"dry_run": False}).status_code == 400
|
||||
assert client.post("/api/admin/prune-inactive-datasets", json={"dry_run": False}).status_code == 400
|
||||
assert client.post("/api/admin/vacuum-db", json={}).status_code == 400
|
||||
assert client.post("/api/admin/reset-db", json={}).status_code == 400
|
||||
|
||||
|
||||
def test_source_catalog_import_and_ingestable_seed_metadata():
|
||||
init_db()
|
||||
client = TestClient(app)
|
||||
|
||||
catalog_import = client.post("/api/source-catalog/import").json()
|
||||
assert catalog_import["summary"]["catalog_entries"] >= 50
|
||||
|
||||
catalog = client.get("/api/source-catalog?country=DE&priority=P0&limit=10").json()
|
||||
assert catalog["entries"]
|
||||
assert any("DELFI" in entry["source_name"] for entry in catalog["entries"])
|
||||
assert "geometry_notes" in catalog["entries"][0]
|
||||
|
||||
osm_catalog = client.get("/api/source-catalog?q=Geofabrik&limit=5").json()
|
||||
osm_entry = next(entry for entry in osm_catalog["entries"] if "Geofabrik" in entry["source_name"])
|
||||
created_source = client.post(
|
||||
"/api/sources",
|
||||
json={
|
||||
"catalog_entry_id": osm_entry["id"],
|
||||
"name": "Berlin Geofabrik OSM PBF",
|
||||
"kind": "osm_pbf",
|
||||
"url": "https://download.geofabrik.de/europe/germany/berlin-latest.osm.pbf",
|
||||
"country": "DE",
|
||||
},
|
||||
).json()
|
||||
sources = client.get("/api/sources").json()
|
||||
linked_source = next(source for source in sources if source["id"] == created_source["id"])
|
||||
assert linked_source["catalog_entry_id"] == osm_entry["id"]
|
||||
assert linked_source["priority"] == osm_entry["priority"]
|
||||
linked_catalog = client.get("/api/source-catalog?q=Geofabrik&limit=5").json()
|
||||
linked_entry = next(entry for entry in linked_catalog["entries"] if entry["id"] == osm_entry["id"])
|
||||
assert linked_entry["linked_source_count"] == 1
|
||||
|
||||
seed_import = client.post("/api/source-catalog/import-ingestable").json()
|
||||
assert seed_import["created"] + seed_import["updated"] >= 10
|
||||
|
||||
sources = client.get("/api/sources").json()
|
||||
swiss = next(source for source in sources if source["name"] == "CH Swiss national GTFS")
|
||||
assert swiss["kind"] == "gtfs"
|
||||
assert swiss["priority"] == "P0"
|
||||
assert "rail" in swiss["mode_scope"]
|
||||
assert swiss["notes"]
|
||||
vbb = next(source for source in sources if source["name"] == "VBB Berlin-Brandenburg GTFS")
|
||||
vbb_catalog = next(entry for entry in client.get("/api/source-catalog?q=VBB&limit=5").json()["entries"] if entry["source_name"] == "VBB Berlin-Brandenburg GTFS")
|
||||
assert vbb["kind"] == "gtfs"
|
||||
assert vbb["priority"] == "P5"
|
||||
assert vbb["catalog_entry_id"] == vbb_catalog["id"]
|
||||
|
||||
|
||||
def test_ingestable_source_import_deduplicates_by_kind_and_url(tmp_path):
|
||||
init_db()
|
||||
first = tmp_path / "first.csv"
|
||||
first.write_text(
|
||||
"name,kind,url,country,license,mode_scope,source_basis,priority,notes\n"
|
||||
"Original GTFS,gtfs,https://example.test/feed.zip,DE,CC0,bus,test,P1,first\n",
|
||||
encoding="utf-8",
|
||||
)
|
||||
second = tmp_path / "second.csv"
|
||||
second.write_text(
|
||||
"name,kind,url,country,license,mode_scope,source_basis,priority,notes\n"
|
||||
"Renamed GTFS,gtfs,https://example.test/feed.zip,DE,CC0,bus,test,P0,second\n",
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
with session_scope() as session:
|
||||
assert import_ingestable_sources(session, first)["created"] == 1
|
||||
with session_scope() as session:
|
||||
result = import_ingestable_sources(session, second)
|
||||
assert result["created"] == 0
|
||||
assert result["updated"] == 1
|
||||
sources = session.scalars(select(Source).where(Source.url == "https://example.test/feed.zip")).all()
|
||||
assert len(sources) == 1
|
||||
assert sources[0].name == "Renamed GTFS"
|
||||
assert sources[0].priority == "P0"
|
||||
|
||||
|
||||
def test_write_endpoint_returns_busy_when_another_write_is_active():
|
||||
init_db()
|
||||
client = TestClient(app)
|
||||
previous_timeout = settings.database_write_lock_timeout_seconds
|
||||
settings.database_write_lock_timeout_seconds = 0.05
|
||||
try:
|
||||
with database_write_lock("test long write", timeout=0.1):
|
||||
response = client.post(
|
||||
"/api/sources",
|
||||
json={"name": "Busy test source", "kind": "gtfs", "url": "https://example.invalid/feed.zip"},
|
||||
)
|
||||
finally:
|
||||
settings.database_write_lock_timeout_seconds = previous_timeout
|
||||
|
||||
assert response.status_code == 409
|
||||
assert "Database is busy" in response.json()["detail"]
|
||||
|
||||
|
||||
def test_manual_match_rule_survives_new_gtfs_dataset_row():
|
||||
client = TestClient(app)
|
||||
assert client.post("/api/sample/reset").status_code == 200
|
||||
|
||||
match = next(item for item in client.get("/api/matches?status=matched").json() if item["osm"])
|
||||
accepted = client.post(f"/api/matches/{match['id']}/accept").json()
|
||||
assert accepted["status"] == "accepted"
|
||||
|
||||
with session_scope() as session:
|
||||
old_route = session.get(GtfsRoute, match["gtfs"]["id"])
|
||||
assert old_route is not None
|
||||
old_dataset = session.get(Dataset, old_route.dataset_id)
|
||||
assert old_dataset is not None
|
||||
old_dataset.is_active = False
|
||||
replacement_dataset = Dataset(
|
||||
source_id=old_dataset.source_id,
|
||||
kind="gtfs",
|
||||
local_path="./data/replacement.gtfs.zip",
|
||||
sha256="replacement",
|
||||
is_active=True,
|
||||
status="imported",
|
||||
)
|
||||
session.add(replacement_dataset)
|
||||
session.flush()
|
||||
session.add(
|
||||
GtfsRoute(
|
||||
dataset_id=replacement_dataset.id,
|
||||
route_id=old_route.route_id,
|
||||
agency_id=old_route.agency_id,
|
||||
short_name=old_route.short_name,
|
||||
long_name=old_route.long_name,
|
||||
route_type=old_route.route_type,
|
||||
mode=old_route.mode,
|
||||
operator_name=old_route.operator_name,
|
||||
min_lon=old_route.min_lon,
|
||||
min_lat=old_route.min_lat,
|
||||
max_lon=old_route.max_lon,
|
||||
max_lat=old_route.max_lat,
|
||||
route_key=old_route.route_key,
|
||||
operator_key=old_route.operator_key,
|
||||
)
|
||||
)
|
||||
|
||||
rerun = client.post("/api/match/run").json()
|
||||
assert rerun["manual"] >= 1
|
||||
matches = client.get("/api/matches?status=accepted").json()
|
||||
assert any(item["gtfs"]["route_id"] == match["gtfs"]["route_id"] for item in matches)
|
||||
|
||||
|
||||
def _first_stop(client: TestClient, query: str) -> dict:
|
||||
response = client.get(f"/api/journey/stops?q={query}")
|
||||
assert response.status_code == 200
|
||||
stops = response.json()["stops"]
|
||||
assert stops
|
||||
return stops[0]
|
||||
148
tests/test_feed_discovery.py
Normal file
148
tests/test_feed_discovery.py
Normal file
@@ -0,0 +1,148 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import csv
|
||||
|
||||
from app import feed_discovery
|
||||
from app.feed_discovery import (
|
||||
FeedCandidate,
|
||||
build_gtfs_discovery_manifests,
|
||||
enrich_ptna_candidate_from_details,
|
||||
parse_ptna_country_page,
|
||||
parse_ptna_detail_fields,
|
||||
select_test_run_candidates,
|
||||
)
|
||||
|
||||
|
||||
def test_parse_ptna_country_and_detail_pages():
|
||||
country_html = """
|
||||
<table>
|
||||
<tr class="gtfs-tablerow">
|
||||
<td><a href="routes.php?feed=DE-BE-VBB">DE-BE-VBB</a></td>
|
||||
<td><a href="https://www.vbb.de">Verkehrsverbund Berlin-Brandenburg</a></td>
|
||||
<td><a href="https://www.vbb.de">VBB Verkehrsverbund Berlin-Brandenburg GmbH</a></td>
|
||||
<td>2026-01-01</td>
|
||||
<td>2026-12-12</td>
|
||||
<td>20260603</td>
|
||||
<td><a href="https://www.vbb.de/vbb-services/api-open-data/datensaetze/">2026-06-03</a></td>
|
||||
<td>2026-06-03</td>
|
||||
<td><a href="/en/gtfs-details.php?feed=DE-BE-VBB">Details, ...</a></td>
|
||||
</tr>
|
||||
</table>
|
||||
"""
|
||||
candidates = parse_ptna_country_page(
|
||||
country_html,
|
||||
country="DE",
|
||||
page_url="https://ptna.openstreetmap.de/gtfs/DE/index.php",
|
||||
)
|
||||
|
||||
assert len(candidates) == 1
|
||||
candidate = candidates[0]
|
||||
assert candidate.ptna_feed_id == "DE-BE-VBB"
|
||||
assert candidate.country == "DE"
|
||||
assert candidate.original_release_url == "https://www.vbb.de/vbb-services/api-open-data/datensaetze/"
|
||||
assert candidate.details_url == "https://ptna.openstreetmap.de/en/gtfs-details.php?feed=DE-BE-VBB"
|
||||
|
||||
detail_html = """
|
||||
<table>
|
||||
<tr><td>Release Url</td><td><a href="https://example.test/gtfs.zip">https://example.test/gtfs.zip</a></td></tr>
|
||||
<tr><td>Publisher's License</td><td><a href="https://example.test/license">CC BY 4.0</a></td></tr>
|
||||
<tr><td>License given for use in OSM</td><td>Attribution on contributor page is sufficient.</td></tr>
|
||||
<tr><td>"network:guid"</td><td>DE-BE-VBB</td></tr>
|
||||
</table>
|
||||
"""
|
||||
fields = parse_ptna_detail_fields(detail_html, "https://ptna.openstreetmap.de/en/gtfs-details.php?feed=DE-BE-VBB")
|
||||
assert fields["publisher's license"] == "CC BY 4.0"
|
||||
assert fields["publisher's license href"] == "https://example.test/license"
|
||||
|
||||
enrich_ptna_candidate_from_details(candidate, detail_html, candidate.details_url)
|
||||
assert candidate.selected_url == "https://example.test/gtfs.zip"
|
||||
assert candidate.license_text == "CC BY 4.0"
|
||||
assert "network:guid=DE-BE-VBB" in candidate.notes
|
||||
|
||||
|
||||
def test_build_gtfs_discovery_manifests_from_stubbed_sources(tmp_path, monkeypatch):
|
||||
mobility = [
|
||||
FeedCandidate(
|
||||
discovery_source="mobility_database",
|
||||
country="DE",
|
||||
provider="Rhein-Neckar-Verkehr",
|
||||
feed_name="RNV",
|
||||
stable_id="mdb-rnv",
|
||||
status="active",
|
||||
is_official="True",
|
||||
selected_url="https://example.test/rnv.zip",
|
||||
direct_download_url="https://example.test/rnv.zip",
|
||||
license_url="https://example.test/license",
|
||||
features="Shapes|Feed Information",
|
||||
priority="P0",
|
||||
)
|
||||
]
|
||||
ptna = [
|
||||
FeedCandidate(
|
||||
discovery_source="ptna",
|
||||
country="DE",
|
||||
provider="Rhein-Neckar-Verkehr",
|
||||
feed_name="RNV",
|
||||
ptna_feed_id="DE-BW-RNV",
|
||||
selected_url="https://example.test/rnv.zip",
|
||||
original_release_url="https://example.test/rnv.zip",
|
||||
license_text="CC BY 4.0",
|
||||
priority="P2",
|
||||
)
|
||||
]
|
||||
curated = [
|
||||
FeedCandidate(
|
||||
discovery_source="curated_seed",
|
||||
country="CH",
|
||||
provider="Swiss national",
|
||||
feed_name="CH Swiss national GTFS",
|
||||
selected_url="https://example.test/ch.zip",
|
||||
license_text="verify",
|
||||
features="rail,bus",
|
||||
priority="P0",
|
||||
)
|
||||
]
|
||||
monkeypatch.setattr(feed_discovery, "fetch_mobility_database_candidates", lambda **_: mobility)
|
||||
monkeypatch.setattr(feed_discovery, "fetch_mobility_acceptance_candidates", lambda **_: [])
|
||||
monkeypatch.setattr(feed_discovery, "fetch_ptna_candidates", lambda **_: ptna)
|
||||
monkeypatch.setattr(feed_discovery, "load_curated_ingestable_seed", lambda **_: curated)
|
||||
|
||||
report = build_gtfs_discovery_manifests(output_dir=tmp_path, countries=["DE", "CH"], test_limit=10)
|
||||
|
||||
assert report["counts"]["candidates"] == 2
|
||||
assert report["counts"]["ingestable"] == 2
|
||||
ingestable_rows = list(csv.DictReader((tmp_path / "gtfs_ingestable_sources.csv").open(encoding="utf-8")))
|
||||
assert {row["url"] for row in ingestable_rows} == {"https://example.test/rnv.zip", "https://example.test/ch.zip"}
|
||||
assert "ptna" in next(row for row in ingestable_rows if row["url"] == "https://example.test/rnv.zip")["source_basis"]
|
||||
|
||||
|
||||
def test_select_test_run_candidates_keeps_overlapping_german_feeds():
|
||||
candidates = [
|
||||
FeedCandidate(
|
||||
discovery_source="curated_seed",
|
||||
country="DE",
|
||||
provider="DB Long-distance Rail GTFS.DE",
|
||||
selected_url="https://download.gtfs.de/germany/fv_free/latest.zip",
|
||||
priority="P1",
|
||||
),
|
||||
FeedCandidate(
|
||||
discovery_source="mobility_database",
|
||||
country="DE",
|
||||
provider="Rhein-Neckar-Verkehr",
|
||||
selected_url="https://gtfs-sandbox-dds.rnv-online.de/latest/gtfs.zip",
|
||||
priority="P0",
|
||||
),
|
||||
FeedCandidate(
|
||||
discovery_source="curated_seed",
|
||||
country="CH",
|
||||
provider="Swiss national",
|
||||
selected_url="https://gtfs.geops.ch/dl/gtfs_complete.zip",
|
||||
priority="P0",
|
||||
),
|
||||
]
|
||||
|
||||
selected = select_test_run_candidates(candidates, limit=3)
|
||||
|
||||
assert len(selected) == 3
|
||||
assert any("gtfs.de" in candidate.selected_url for candidate in selected)
|
||||
assert any("rnv" in candidate.selected_url for candidate in selected)
|
||||
72
tests/test_gtfs_import.py
Normal file
72
tests/test_gtfs_import.py
Normal file
@@ -0,0 +1,72 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import zipfile
|
||||
|
||||
from sqlalchemy import func, select
|
||||
|
||||
from app.db import reset_db, session_scope
|
||||
from app.gtfs_storage import sidecar_path, stop_time_count, stop_times_by_trip
|
||||
from app.journey import find_journeys, search_scheduled_stops
|
||||
from app.models import Dataset, GtfsCalendar, Source
|
||||
from app.pipeline.run import run_source
|
||||
|
||||
|
||||
def test_gtfs_import_uses_staging_bulk_loader_and_reports_chunks(tmp_path, monkeypatch):
|
||||
reset_db()
|
||||
gtfs_path = tmp_path / "small.gtfs.zip"
|
||||
with zipfile.ZipFile(gtfs_path, "w") as zf:
|
||||
zf.writestr("agency.txt", "agency_id,agency_name,agency_url,agency_timezone\nA,Agency,https://example.invalid,Europe/Berlin\n")
|
||||
zf.writestr(
|
||||
"stops.txt",
|
||||
"stop_id,stop_name,stop_lat,stop_lon\nA,Alpha,52.0,13.0\nB,Beta,52.1,13.1\nC,Gamma,52.2,13.2\n",
|
||||
)
|
||||
zf.writestr("routes.txt", "route_id,agency_id,route_short_name,route_long_name,route_type\nR,A,R1,Alpha - Gamma,3\n")
|
||||
zf.writestr("trips.txt", "route_id,service_id,trip_id,shape_id\nR,daily,t1,s1\nR,daily,t2,s1\n")
|
||||
zf.writestr("calendar.txt", "service_id,monday,tuesday,wednesday,thursday,friday,saturday,sunday,start_date,end_date\ndaily,1,1,1,1,1,1,1,20260101,20261231\n")
|
||||
zf.writestr(
|
||||
"stop_times.txt",
|
||||
"\n".join(
|
||||
[
|
||||
"trip_id,arrival_time,departure_time,stop_id,stop_sequence",
|
||||
"t1,08:00:00,08:00:00,A,1",
|
||||
"t1,08:05:00,08:05:00,B,2",
|
||||
"t1,08:10:00,08:10:00,C,3",
|
||||
"t2,09:00:00,09:00:00,A,1",
|
||||
"t2,09:10:00,09:10:00,C,2",
|
||||
]
|
||||
)
|
||||
+ "\n",
|
||||
)
|
||||
zf.writestr("shapes.txt", "shape_id,shape_pt_lat,shape_pt_lon,shape_pt_sequence\ns1,52.0,13.0,1\ns1,52.2,13.2,2\n")
|
||||
|
||||
monkeypatch.setattr("app.pipeline.gtfs.GTFS_STAGE_BATCH_SIZE", 2)
|
||||
events = []
|
||||
with session_scope() as session:
|
||||
source = Source(name="Small GTFS", kind="gtfs", url=str(gtfs_path))
|
||||
session.add(source)
|
||||
session.flush()
|
||||
dataset = run_source(session, source, progress_callback=lambda *args: events.append(args))
|
||||
|
||||
metadata = json.loads(dataset.metadata_json or "{}")
|
||||
assert metadata["importer"] == "gtfs_import_v6_sidecar_stop_times"
|
||||
assert metadata["staging"] == "sqlite_promoted_to_sidecar"
|
||||
assert metadata["gtfs_storage"]["tables"]["gtfs_stop_times"] == "sidecar"
|
||||
assert metadata["stop_times_imported"] == 5
|
||||
assert sidecar_path(dataset) is not None
|
||||
assert sidecar_path(dataset).exists()
|
||||
assert stop_time_count(session, dataset.id) == 5
|
||||
assert len(stop_times_by_trip(session, dataset.id, ["t1"])["t1"]) == 3
|
||||
assert session.scalar(select(func.count()).select_from(GtfsCalendar).where(GtfsCalendar.dataset_id == dataset.id)) == 1
|
||||
assert session.scalar(select(func.count()).select_from(Dataset).where(Dataset.kind == "gtfs", Dataset.is_active.is_(True))) == 1
|
||||
alpha = search_scheduled_stops(session, "Alpha", limit=1)[0]
|
||||
gamma = search_scheduled_stops(session, "Gamma", limit=1)[0]
|
||||
journey = find_journeys(session, alpha["id"], gamma["id"], "08:00", limit=1)
|
||||
assert journey["journeys"][0]["departure_time"] == "08:00:00"
|
||||
assert journey["journeys"][0]["arrival_time"] == "08:10:00"
|
||||
|
||||
event_types = [event[0] for event in events]
|
||||
assert "gtfs_staging_started" in event_types
|
||||
assert "gtfs_file_chunk" in event_types
|
||||
assert "gtfs_activation_sidecar_stop_times" in event_types
|
||||
assert "gtfs_activation_completed" in event_types
|
||||
282
tests/test_osm_pbf.py
Normal file
282
tests/test_osm_pbf.py
Normal file
@@ -0,0 +1,282 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import sqlite3
|
||||
|
||||
from sqlalchemy import func, select
|
||||
|
||||
from app.config import settings
|
||||
from app.db import reset_db, session_scope
|
||||
from app.models import Dataset, PipelineRun, Source
|
||||
from app.osm_storage import features_are_sidecar, osm_feature_count, query_osm_features, sidecar_path
|
||||
from app.pipeline.osm_labeling import relabel_osm_features
|
||||
from app.pipeline.run import run_source
|
||||
|
||||
|
||||
def test_osm_pbf_source_commits_raw_and_extracts_route_geometry(tmp_path):
|
||||
reset_db()
|
||||
osm_path = tmp_path / "transport.osm"
|
||||
osm_path.write_text(
|
||||
"""<?xml version="1.0" encoding="UTF-8"?>
|
||||
<osm version="0.6" generator="mobility-workbench-test">
|
||||
<node id="1" lat="52.5000" lon="13.4000" />
|
||||
<node id="2" lat="52.5010" lon="13.4100" />
|
||||
<node id="3" lat="52.5020" lon="13.4200">
|
||||
<tag k="highway" v="bus_stop"/>
|
||||
<tag k="name" v="Example Stop"/>
|
||||
</node>
|
||||
<node id="4" lat="52.5030" lon="13.4300" />
|
||||
<node id="5" lat="52.5030" lon="13.4310" />
|
||||
<node id="6" lat="52.5040" lon="13.4310" />
|
||||
<node id="7" lat="52.5040" lon="13.4300" />
|
||||
<node id="8" lat="52.5050" lon="13.4400" />
|
||||
<node id="9" lat="52.5060" lon="13.4500" />
|
||||
<way id="10">
|
||||
<nd ref="1"/>
|
||||
<nd ref="2"/>
|
||||
<nd ref="3"/>
|
||||
<tag k="highway" v="primary"/>
|
||||
</way>
|
||||
<way id="11">
|
||||
<nd ref="4"/>
|
||||
<nd ref="3"/>
|
||||
<tag k="highway" v="primary"/>
|
||||
</way>
|
||||
<way id="20">
|
||||
<nd ref="4"/>
|
||||
<nd ref="5"/>
|
||||
<nd ref="6"/>
|
||||
<nd ref="7"/>
|
||||
<nd ref="4"/>
|
||||
<tag k="aerialway" v="station"/>
|
||||
<tag k="name" v="Cable Station"/>
|
||||
</way>
|
||||
<way id="30">
|
||||
<nd ref="8"/>
|
||||
<nd ref="9"/>
|
||||
<tag k="route" v="ferry"/>
|
||||
<tag k="name" v="Ferry Waterway"/>
|
||||
</way>
|
||||
<relation id="100">
|
||||
<member type="way" ref="10" role=""/>
|
||||
<member type="way" ref="11" role=""/>
|
||||
<tag k="type" v="route"/>
|
||||
<tag k="route" v="bus"/>
|
||||
<tag k="ref" v="100"/>
|
||||
<tag k="name" v="Bus 100"/>
|
||||
<tag k="operator" v="BVG"/>
|
||||
</relation>
|
||||
</osm>
|
||||
""",
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
with session_scope() as session:
|
||||
source = Source(name="Test OSM", kind="osm_pbf", url=str(osm_path), country="DE")
|
||||
session.add(source)
|
||||
session.flush()
|
||||
|
||||
dataset = run_source(session, source)
|
||||
|
||||
raw_dataset = session.scalars(select(Dataset).where(Dataset.kind == "osm_pbf_raw")).one()
|
||||
assert raw_dataset.status == "extracted"
|
||||
assert raw_dataset.is_active is False
|
||||
assert dataset.kind == "osm_geojson"
|
||||
assert dataset.is_active is True
|
||||
assert features_are_sidecar(dataset)
|
||||
assert sidecar_path(dataset) is not None
|
||||
assert sidecar_path(dataset).exists()
|
||||
|
||||
route = next(iter(query_osm_features(session, [dataset.id], kinds=["route"], search="100")), None)
|
||||
assert route is not None
|
||||
assert route.osm_type == "relation"
|
||||
assert route.mode == "bus"
|
||||
assert json.loads(route.geometry_geojson or "{}") == {
|
||||
"type": "LineString",
|
||||
"coordinates": [[13.4, 52.5], [13.41, 52.501], [13.42, 52.502], [13.43, 52.503]],
|
||||
}
|
||||
|
||||
stop = next(iter(query_osm_features(session, [dataset.id], kinds=["stop"], search="Example Stop")), None)
|
||||
assert stop is not None
|
||||
|
||||
cable_station = next(iter(query_osm_features(session, [dataset.id], kinds=["station"], search="Cable Station")), None)
|
||||
assert cable_station is not None
|
||||
|
||||
ferry_infra = next(iter(query_osm_features(session, [dataset.id], kinds=["infra"], search="Ferry Waterway")), None)
|
||||
assert ferry_infra is not None
|
||||
assert ferry_infra.mode == "ferry"
|
||||
|
||||
second_dataset = run_source(session, source)
|
||||
assert second_dataset.id == dataset.id
|
||||
assert session.scalar(select(func.count()).select_from(Dataset).where(Dataset.kind == "osm_pbf_raw")) == 1
|
||||
|
||||
|
||||
def test_osm_pbf_source_reuses_raw_and_filtered_transport_dataset(tmp_path):
|
||||
reset_db()
|
||||
osm_path = tmp_path / "transport.osm"
|
||||
osm_path.write_text(
|
||||
"""<?xml version="1.0" encoding="UTF-8"?>
|
||||
<osm version="0.6" generator="mobility-workbench-test">
|
||||
<node id="1" lat="52.5000" lon="13.4000" />
|
||||
<node id="2" lat="52.5010" lon="13.4100" />
|
||||
<way id="10">
|
||||
<nd ref="1"/>
|
||||
<nd ref="2"/>
|
||||
<tag k="highway" v="primary"/>
|
||||
</way>
|
||||
<relation id="100">
|
||||
<member type="way" ref="10" role=""/>
|
||||
<tag k="type" v="route"/>
|
||||
<tag k="route" v="bus"/>
|
||||
<tag k="ref" v="100"/>
|
||||
</relation>
|
||||
</osm>
|
||||
""",
|
||||
encoding="utf-8",
|
||||
)
|
||||
filter_script = tmp_path / "copy_filter.sh"
|
||||
filter_script.write_text("#!/usr/bin/env sh\nset -eu\ncp \"$1\" \"$2\"\n", encoding="utf-8")
|
||||
filter_script.chmod(0o755)
|
||||
|
||||
old_enabled = settings.osm_pbf_prefilter_enabled
|
||||
old_formats = settings.osm_pbf_prefilter_formats
|
||||
old_script = settings.osm_pbf_prefilter_script
|
||||
settings.osm_pbf_prefilter_enabled = True
|
||||
settings.osm_pbf_prefilter_formats = "osm_xml"
|
||||
settings.osm_pbf_prefilter_script = filter_script
|
||||
try:
|
||||
with session_scope() as session:
|
||||
source = Source(name="Filtered OSM", kind="osm_pbf", url=str(osm_path), country="DE")
|
||||
session.add(source)
|
||||
session.flush()
|
||||
|
||||
dataset = run_source(session, source)
|
||||
|
||||
raw_dataset = session.scalars(select(Dataset).where(Dataset.kind == "osm_pbf_raw")).one()
|
||||
filtered_dataset = session.scalars(select(Dataset).where(Dataset.kind == "osm_pbf_transport")).one()
|
||||
raw_metadata = json.loads(raw_dataset.metadata_json or "{}")
|
||||
filtered_metadata = json.loads(filtered_dataset.metadata_json or "{}")
|
||||
derived_metadata = json.loads(dataset.metadata_json or "{}")
|
||||
|
||||
assert raw_dataset.status == "filtered"
|
||||
assert raw_dataset.is_active is False
|
||||
assert raw_metadata["filtered_dataset_id"] == filtered_dataset.id
|
||||
assert filtered_dataset.status == "extracted"
|
||||
assert filtered_dataset.is_active is False
|
||||
assert filtered_metadata["stage"] == "filtered_osm_transport_pbf"
|
||||
assert filtered_metadata["derived_from_dataset_id"] == raw_dataset.id
|
||||
assert filtered_metadata["filter"] == "osmium_transport_filter_v1"
|
||||
assert dataset.kind == "osm_geojson"
|
||||
assert dataset.is_active is True
|
||||
assert derived_metadata["raw_dataset_id"] == raw_dataset.id
|
||||
assert derived_metadata["filtered_dataset_id"] == filtered_dataset.id
|
||||
assert derived_metadata["derived_from_dataset_id"] == filtered_dataset.id
|
||||
|
||||
second_dataset = run_source(session, source)
|
||||
assert second_dataset.id == dataset.id
|
||||
assert session.scalar(select(func.count()).select_from(Dataset).where(Dataset.kind == "osm_pbf_raw")) == 1
|
||||
assert session.scalar(select(func.count()).select_from(Dataset).where(Dataset.kind == "osm_pbf_transport")) == 1
|
||||
finally:
|
||||
settings.osm_pbf_prefilter_enabled = old_enabled
|
||||
settings.osm_pbf_prefilter_formats = old_formats
|
||||
settings.osm_pbf_prefilter_script = old_script
|
||||
|
||||
|
||||
def test_osm_geojson_import_deduplicates_duplicate_osm_identities(tmp_path):
|
||||
reset_db()
|
||||
geojson_path = tmp_path / "duplicate-osm-identities.geojson"
|
||||
geojson_path.write_text(
|
||||
json.dumps(
|
||||
{
|
||||
"type": "FeatureCollection",
|
||||
"features": [
|
||||
{
|
||||
"type": "Feature",
|
||||
"properties": {"osm_type": "relation", "osm_id": "100", "type": "route", "route": "bus", "ref": "100"},
|
||||
"geometry": {
|
||||
"type": "LineString",
|
||||
"coordinates": [[13.4, 52.5], [13.41, 52.501]],
|
||||
},
|
||||
},
|
||||
{
|
||||
"type": "Feature",
|
||||
"properties": {"osm_type": "relation", "osm_id": "100", "name": "Duplicate without route geometry"},
|
||||
"geometry": None,
|
||||
},
|
||||
],
|
||||
}
|
||||
),
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
with session_scope() as session:
|
||||
source = Source(name="Duplicate OSM IDs", kind="osm_geojson", url=str(geojson_path), country="DE")
|
||||
session.add(source)
|
||||
session.flush()
|
||||
|
||||
dataset = run_source(session, source)
|
||||
|
||||
metadata = json.loads(dataset.metadata_json or "{}")
|
||||
storage = metadata["osm_storage"]
|
||||
assert dataset.status == "imported"
|
||||
assert storage["features"] == 1
|
||||
assert storage["duplicate_features_skipped"] == 1
|
||||
assert osm_feature_count(session, dataset.id) == 1
|
||||
route = query_osm_features(session, [dataset.id], kinds=["route"])[0]
|
||||
assert route.osm_type == "relation"
|
||||
assert route.osm_id == "100"
|
||||
assert route.ref == "100"
|
||||
|
||||
|
||||
def test_osm_relabel_updates_sidecar_route_scope_without_reparse(tmp_path):
|
||||
reset_db()
|
||||
geojson_path = tmp_path / "scope.geojson"
|
||||
geojson_path.write_text(
|
||||
json.dumps(
|
||||
{
|
||||
"type": "FeatureCollection",
|
||||
"features": [
|
||||
{
|
||||
"type": "Feature",
|
||||
"properties": {
|
||||
"osm_type": "relation",
|
||||
"osm_id": "900",
|
||||
"type": "route",
|
||||
"route": "bus",
|
||||
"name": "FlixBus Berlin Hamburg",
|
||||
"ref": "N900",
|
||||
},
|
||||
"geometry": {"type": "LineString", "coordinates": [[13.4, 52.5], [10.0, 53.55]]},
|
||||
}
|
||||
],
|
||||
}
|
||||
),
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
with session_scope() as session:
|
||||
source = Source(name="Scope OSM", kind="osm_geojson", url=str(geojson_path), country="DE")
|
||||
session.add(source)
|
||||
session.flush()
|
||||
dataset = run_source(session, source)
|
||||
path = sidecar_path(dataset)
|
||||
assert path is not None
|
||||
|
||||
with sqlite3.connect(path) as connection:
|
||||
connection.execute("UPDATE osm_features SET route_scope = 'local'")
|
||||
connection.commit()
|
||||
|
||||
stale = query_osm_features(session, [dataset.id], kinds=["route"])[0]
|
||||
assert stale.route_scope == "local"
|
||||
|
||||
result = relabel_osm_features(session, dataset_id=dataset.id, rebuild_indexes=False)
|
||||
assert result["changed"] == 1
|
||||
|
||||
relabeled = query_osm_features(session, [dataset.id], kinds=["route"])[0]
|
||||
assert relabeled.route_scope == "long_distance"
|
||||
metadata = json.loads(session.get(Dataset, dataset.id).metadata_json or "{}")
|
||||
assert metadata["label_features"]["version"] == "route_scope_v2"
|
||||
assert session.scalar(select(func.count()).select_from(PipelineRun).where(PipelineRun.stage == "label_features")) == 1
|
||||
|
||||
skipped = relabel_osm_features(session, dataset_id=dataset.id)
|
||||
assert skipped["skipped"] == 1
|
||||
92
tests/test_osm_replication.py
Normal file
92
tests/test_osm_replication.py
Normal file
@@ -0,0 +1,92 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import subprocess
|
||||
|
||||
from sqlalchemy import select
|
||||
|
||||
from app.db import reset_db, session_scope
|
||||
from app.models import Dataset, OsmDiffState, Source
|
||||
from app.pipeline.osm_pbf import _try_prepare_raw_from_diffs
|
||||
from app.pipeline.osm_replication import ReplicationState, diff_url_for_sequence, parse_replication_state_text
|
||||
|
||||
|
||||
def test_parse_replication_state_text_and_diff_url():
|
||||
state = parse_replication_state_text(
|
||||
"""
|
||||
#Sat Jun 27 21:21:03 UTC 2026
|
||||
sequenceNumber=1234
|
||||
timestamp=2026-06-27T21\\:21\\:02Z
|
||||
"""
|
||||
)
|
||||
|
||||
assert state.sequence_number == 1234
|
||||
assert state.timestamp == "2026-06-27T21:21:02Z"
|
||||
assert diff_url_for_sequence("https://download.geofabrik.de/europe/germany/berlin-updates", 1234).endswith(
|
||||
"/000/001/234.osc.gz"
|
||||
)
|
||||
|
||||
|
||||
def test_osm_diff_application_records_new_raw_dataset_and_state(tmp_path, monkeypatch):
|
||||
reset_db()
|
||||
base_path = tmp_path / "base.osm.pbf"
|
||||
base_path.write_bytes(b"base")
|
||||
diff_paths = []
|
||||
|
||||
def fake_fetch(_updates_url, timeout=30):
|
||||
return ReplicationState(sequence_number=3, timestamp="2026-06-27T21:21:02Z", raw={"sequenceNumber": "3"})
|
||||
|
||||
def fake_download(_updates_url, sequence_number, output_dir, timeout=120):
|
||||
path = output_dir / f"{sequence_number}.osc.gz"
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
path.write_bytes(f"diff-{sequence_number}".encode())
|
||||
diff_paths.append(path)
|
||||
return path
|
||||
|
||||
def fake_apply(base, diffs, output, host_tool):
|
||||
output.write_bytes(base.read_bytes() + b"+" + b"+".join(path.read_bytes() for path in diffs))
|
||||
return subprocess.CompletedProcess(args=["osmium"], returncode=0, stdout="applied", stderr="")
|
||||
|
||||
monkeypatch.setattr("app.pipeline.osm_pbf.fetch_replication_state", fake_fetch)
|
||||
monkeypatch.setattr("app.pipeline.osm_pbf.download_diff", fake_download)
|
||||
monkeypatch.setattr("app.pipeline.osm_pbf.apply_osm_changes", fake_apply)
|
||||
|
||||
with session_scope() as session:
|
||||
source = Source(
|
||||
name="Berlin OSM",
|
||||
kind="osm_pbf",
|
||||
url="https://download.geofabrik.de/europe/germany/berlin-latest.osm.pbf",
|
||||
notes="geofabrik_id=berlin; updates_url=https://download.geofabrik.de/europe/germany/berlin-updates",
|
||||
)
|
||||
session.add(source)
|
||||
session.flush()
|
||||
base_dataset = Dataset(
|
||||
source_id=source.id,
|
||||
kind="osm_pbf_raw",
|
||||
local_path=str(base_path),
|
||||
sha256="b" * 64,
|
||||
is_active=False,
|
||||
status="committed",
|
||||
)
|
||||
session.add(base_dataset)
|
||||
session.flush()
|
||||
session.add(
|
||||
OsmDiffState(
|
||||
source_id=source.id,
|
||||
raw_dataset_id=base_dataset.id,
|
||||
updates_url="https://download.geofabrik.de/europe/germany/berlin-updates",
|
||||
sequence_number=1,
|
||||
timestamp="2026-06-26T21:21:02Z",
|
||||
status="active",
|
||||
)
|
||||
)
|
||||
session.flush()
|
||||
|
||||
new_dataset = _try_prepare_raw_from_diffs(session, source)
|
||||
|
||||
assert new_dataset is not None
|
||||
assert new_dataset.id != base_dataset.id
|
||||
assert new_dataset.kind == "osm_pbf_raw"
|
||||
assert len(diff_paths) == 2
|
||||
states = session.scalars(select(OsmDiffState).where(OsmDiffState.source_id == source.id).order_by(OsmDiffState.sequence_number)).all()
|
||||
assert [state.sequence_number for state in states] == [1, 3]
|
||||
assert [state.status for state in states] == ["superseded", "active"]
|
||||
317
tests/test_pipeline.py
Normal file
317
tests/test_pipeline.py
Normal file
@@ -0,0 +1,317 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
|
||||
from shapely.geometry import LineString
|
||||
from sqlalchemy import select
|
||||
|
||||
from app.db import init_db, session_scope
|
||||
from app.models import GtfsRoute, OsmFeature, RouteMatch, RoutePattern
|
||||
from app.osm_classification import infer_osm_route_scope
|
||||
from app.pipeline.gtfs import _gtfs_mode
|
||||
from app.pipeline.matcher import _build_osm_route_index, _candidate_osm_routes, route_match_scope, run_route_matching, score_route_pair
|
||||
from app.pipeline.osm_addresses import _address_area_geometry_geojson
|
||||
from app.pipeline.sample_data import load_sample_project
|
||||
from app.pipeline.route_layer import rebuild_route_layer
|
||||
from app.pipeline.utils import geometry_json_and_bbox
|
||||
|
||||
|
||||
def test_sample_pipeline_imports_and_matches():
|
||||
init_db()
|
||||
with session_scope() as session:
|
||||
result = load_sample_project(session)
|
||||
assert result["match_result"]["routes"] == 6
|
||||
assert session.scalar(select(GtfsRoute).where(GtfsRoute.short_name == "RE1")) is not None
|
||||
assert session.scalar(select(OsmFeature).where(OsmFeature.ref == "RE1")) is not None
|
||||
statuses = {row[0]: row[1] for row in session.execute(select(RouteMatch.status, RouteMatch.confidence))}
|
||||
assert "matched" in statuses
|
||||
assert set(statuses) & {"weak", "missing"}
|
||||
|
||||
|
||||
def test_route_matching_preserves_unchanged_match_rows():
|
||||
init_db()
|
||||
with session_scope() as session:
|
||||
load_sample_project(session)
|
||||
before = {
|
||||
route_id: (match_id, updated_at)
|
||||
for route_id, match_id, updated_at in session.execute(
|
||||
select(RouteMatch.gtfs_route_id, RouteMatch.id, RouteMatch.updated_at)
|
||||
)
|
||||
}
|
||||
|
||||
result = run_route_matching(session)
|
||||
|
||||
after = {
|
||||
route_id: (match_id, updated_at)
|
||||
for route_id, match_id, updated_at in session.execute(
|
||||
select(RouteMatch.gtfs_route_id, RouteMatch.id, RouteMatch.updated_at)
|
||||
)
|
||||
}
|
||||
assert result["unchanged"] == result["routes"]
|
||||
assert result["created"] == 0
|
||||
assert result["updated"] == 0
|
||||
assert after == before
|
||||
|
||||
|
||||
def test_route_layer_reuses_unchanged_route_patterns():
|
||||
init_db()
|
||||
with session_scope() as session:
|
||||
load_sample_project(session)
|
||||
before = {
|
||||
pattern_key: pattern_id
|
||||
for pattern_key, pattern_id in session.execute(select(RoutePattern.pattern_key, RoutePattern.id))
|
||||
}
|
||||
|
||||
result = rebuild_route_layer(session)
|
||||
|
||||
after = {
|
||||
pattern_key: pattern_id
|
||||
for pattern_key, pattern_id in session.execute(select(RoutePattern.pattern_key, RoutePattern.id))
|
||||
}
|
||||
assert result["route_patterns_created"] == 0
|
||||
assert result["route_patterns_removed"] == 0
|
||||
assert result["route_patterns_reused"] == result["route_patterns"]
|
||||
assert after == before
|
||||
|
||||
|
||||
def test_extended_gtfs_route_types_are_mapped_to_modes():
|
||||
assert _gtfs_mode(700) == "bus"
|
||||
assert _gtfs_mode(100) == "train"
|
||||
assert _gtfs_mode(109) == "train"
|
||||
assert _gtfs_mode(900) == "tram"
|
||||
assert _gtfs_mode(1000) == "ferry"
|
||||
|
||||
|
||||
def test_closed_address_way_is_stored_as_polygon_geometry():
|
||||
geometry = _address_area_geometry_geojson(
|
||||
[
|
||||
(8.68590, 49.40435),
|
||||
(8.68600, 49.40435),
|
||||
(8.68600, 49.40445),
|
||||
(8.68590, 49.40445),
|
||||
(8.68590, 49.40435),
|
||||
]
|
||||
)
|
||||
|
||||
assert json.loads(geometry or "{}") == {
|
||||
"type": "Polygon",
|
||||
"coordinates": [
|
||||
[
|
||||
[8.6859, 49.40435],
|
||||
[8.686, 49.40435],
|
||||
[8.686, 49.40445],
|
||||
[8.6859, 49.40445],
|
||||
[8.6859, 49.40435],
|
||||
]
|
||||
],
|
||||
}
|
||||
geometry = _address_area_geometry_geojson(
|
||||
[
|
||||
(8.68590, 49.40435),
|
||||
(8.68600, 49.40435),
|
||||
(8.68600, 49.40445),
|
||||
(8.68590, 49.40445),
|
||||
],
|
||||
closed=True,
|
||||
)
|
||||
assert json.loads(geometry or "{}")["coordinates"][0][-1] == [8.6859, 49.40435]
|
||||
assert _address_area_geometry_geojson([(0, 0), (1, 0), (1, 1)]) is None
|
||||
assert _address_area_geometry_geojson([(0, 0), (1, 0), (1, 1)], closed=False) is None
|
||||
|
||||
|
||||
def test_osm_route_scope_classifier_distinguishes_train_service_classes():
|
||||
assert infer_osm_route_scope(mode="train", ref="ICE 28") == "long_distance"
|
||||
assert infer_osm_route_scope(mode="train", ref="RE1") == "regional"
|
||||
assert infer_osm_route_scope(mode="train", ref="S5", network="S-Bahn Berlin") == "local"
|
||||
assert infer_osm_route_scope(mode="subway", ref="U5") == "local"
|
||||
assert infer_osm_route_scope(mode="coach", ref="FLX") == "long_distance"
|
||||
assert infer_osm_route_scope(mode="bus", ref="100") == "local"
|
||||
assert infer_osm_route_scope(mode="bus", ref="800", tags={"bus": "regional"}) == "regional"
|
||||
assert infer_osm_route_scope(mode="bus", name="FlixBus Berlin Hamburg") == "long_distance"
|
||||
|
||||
|
||||
def test_exact_line_ref_with_overlapping_geometry_scores_as_match_candidate():
|
||||
route = GtfsRoute(
|
||||
route_id="17441_700",
|
||||
short_name="M11",
|
||||
long_name=None,
|
||||
mode="bus",
|
||||
operator_name="Verkehrsverbund Berlin-Brandenburg",
|
||||
min_lon=13.29,
|
||||
min_lat=52.42,
|
||||
max_lon=13.33,
|
||||
max_lat=52.45,
|
||||
route_key="m11",
|
||||
)
|
||||
feature = OsmFeature(
|
||||
osm_type="relation",
|
||||
osm_id="123",
|
||||
kind="route",
|
||||
mode="bus",
|
||||
ref="M11",
|
||||
name="Bus M11: U Dahlem-Dorf => S Schöneweide/Sterndamm",
|
||||
operator="Berliner Verkehrsbetriebe",
|
||||
network="Verkehrsverbund Berlin-Brandenburg",
|
||||
min_lon=13.28,
|
||||
min_lat=52.40,
|
||||
max_lon=13.51,
|
||||
max_lat=52.46,
|
||||
route_key="m11",
|
||||
)
|
||||
|
||||
score, reasons = score_route_pair(route, feature)
|
||||
|
||||
assert score >= 85
|
||||
assert reasons["line_identity"] == "exact_ref_mode_bbox_overlap"
|
||||
|
||||
|
||||
def test_exact_line_ref_with_bbox_overlap_is_strong_without_name_or_operator_match():
|
||||
route = GtfsRoute(
|
||||
route_id="route-1",
|
||||
short_name="M11",
|
||||
long_name="",
|
||||
mode="bus",
|
||||
operator_name="",
|
||||
min_lon=13.29,
|
||||
min_lat=52.42,
|
||||
max_lon=13.33,
|
||||
max_lat=52.45,
|
||||
route_key="m11",
|
||||
)
|
||||
feature = OsmFeature(
|
||||
osm_type="relation",
|
||||
osm_id="456",
|
||||
kind="route",
|
||||
mode="bus",
|
||||
ref="M11",
|
||||
name="",
|
||||
operator="",
|
||||
network="",
|
||||
min_lon=13.30,
|
||||
min_lat=52.43,
|
||||
max_lon=13.35,
|
||||
max_lat=52.46,
|
||||
route_key="m11",
|
||||
)
|
||||
|
||||
score, reasons = score_route_pair(route, feature)
|
||||
|
||||
assert score >= 88
|
||||
assert reasons["strong_identity"] == "exact_ref_mode_bbox_overlap"
|
||||
|
||||
|
||||
def test_common_short_ref_candidates_are_spatially_ranked():
|
||||
route = GtfsRoute(
|
||||
route_id="bus-2-berlin",
|
||||
short_name="2",
|
||||
mode="bus",
|
||||
min_lon=13.30,
|
||||
min_lat=52.40,
|
||||
max_lon=13.40,
|
||||
max_lat=52.50,
|
||||
route_key="2",
|
||||
)
|
||||
far = OsmFeature(
|
||||
id=1,
|
||||
osm_type="relation",
|
||||
osm_id="far",
|
||||
kind="route",
|
||||
mode="bus",
|
||||
ref="2",
|
||||
min_lon=7.0,
|
||||
min_lat=50.0,
|
||||
max_lon=7.1,
|
||||
max_lat=50.1,
|
||||
route_key="2",
|
||||
)
|
||||
near = OsmFeature(
|
||||
id=2,
|
||||
osm_type="relation",
|
||||
osm_id="near",
|
||||
kind="route",
|
||||
mode="bus",
|
||||
ref="2",
|
||||
min_lon=13.31,
|
||||
min_lat=52.41,
|
||||
max_lon=13.39,
|
||||
max_lat=52.49,
|
||||
route_key="2",
|
||||
)
|
||||
|
||||
candidates = _candidate_osm_routes(route, _build_osm_route_index([far, near]))
|
||||
|
||||
assert candidates[0].osm_id == "near"
|
||||
|
||||
|
||||
def test_exact_ref_far_away_is_not_promoted_without_spatial_or_geometry_evidence():
|
||||
route = GtfsRoute(
|
||||
route_id="bus-2-berlin",
|
||||
short_name="2",
|
||||
mode="bus",
|
||||
operator_name="Example Operator",
|
||||
min_lon=13.30,
|
||||
min_lat=52.40,
|
||||
max_lon=13.40,
|
||||
max_lat=52.50,
|
||||
route_key="2",
|
||||
)
|
||||
feature = OsmFeature(
|
||||
osm_type="relation",
|
||||
osm_id="2-cologne",
|
||||
kind="route",
|
||||
mode="bus",
|
||||
ref="2",
|
||||
operator="Example Operator",
|
||||
min_lon=6.9,
|
||||
min_lat=50.9,
|
||||
max_lon=7.1,
|
||||
max_lat=51.0,
|
||||
route_key="2",
|
||||
)
|
||||
|
||||
score, reasons = score_route_pair(route, feature)
|
||||
|
||||
assert score < 65
|
||||
assert reasons["spatial_penalty"] == "exact_ref_far_bbox_center"
|
||||
assert reasons["spatial_cap"] == "exact_ref_far_without_geometry_overlap"
|
||||
|
||||
|
||||
def test_geometry_overlap_can_confirm_exact_ref_match():
|
||||
gtfs_geometry, gtfs_bbox = geometry_json_and_bbox(LineString([(13.30, 52.40), (13.35, 52.45), (13.40, 52.50)]))
|
||||
osm_geometry, osm_bbox = geometry_json_and_bbox(LineString([(13.3005, 52.4005), (13.3505, 52.4505), (13.4005, 52.5005)]))
|
||||
route = GtfsRoute(
|
||||
route_id="bus-2-berlin",
|
||||
short_name="2",
|
||||
mode="bus",
|
||||
min_lon=gtfs_bbox[0],
|
||||
min_lat=gtfs_bbox[1],
|
||||
max_lon=gtfs_bbox[2],
|
||||
max_lat=gtfs_bbox[3],
|
||||
route_key="2",
|
||||
geometry_geojson=gtfs_geometry,
|
||||
)
|
||||
feature = OsmFeature(
|
||||
osm_type="relation",
|
||||
osm_id="2-berlin",
|
||||
kind="route",
|
||||
mode="bus",
|
||||
ref="2",
|
||||
min_lon=osm_bbox[0],
|
||||
min_lat=osm_bbox[1],
|
||||
max_lon=osm_bbox[2],
|
||||
max_lat=osm_bbox[3],
|
||||
route_key="2",
|
||||
geometry_geojson=osm_geometry,
|
||||
)
|
||||
|
||||
score, reasons = score_route_pair(route, feature)
|
||||
|
||||
assert score >= 90
|
||||
assert reasons["strong_identity"] == "exact_ref_mode_geometry_overlap"
|
||||
assert reasons["geometry"]["gtfs_on_osm_ratio"] >= 0.9
|
||||
|
||||
|
||||
def test_route_match_scope_distinguishes_outside_loaded_osm_area():
|
||||
route = GtfsRoute(min_lon=13.3, min_lat=52.4, max_lon=13.4, max_lat=52.5)
|
||||
assert route_match_scope(route, (13.0, 52.3, 13.8, 52.7)) == "in_osm_scope"
|
||||
assert route_match_scope(route, (6.0, 50.0, 7.0, 51.0)) == "outside_osm_scope"
|
||||
1004
tests/test_route_layer.py
Normal file
1004
tests/test_route_layer.py
Normal file
File diff suppressed because it is too large
Load Diff
22
tests/test_source_updates.py
Normal file
22
tests/test_source_updates.py
Normal file
@@ -0,0 +1,22 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from app.models import Source
|
||||
from app.source_updates import _recover_missing_managed_cache_url
|
||||
|
||||
|
||||
def test_missing_managed_cache_source_recovers_seed_url_for_online_update():
|
||||
source = Source(
|
||||
id=3,
|
||||
name="Geofabrik Berlin OSM PBF",
|
||||
kind="osm_pbf",
|
||||
url="data/sources/source_3/1782478365.osm.pbf",
|
||||
country="DE",
|
||||
)
|
||||
|
||||
recovery = _recover_missing_managed_cache_url(source)
|
||||
|
||||
assert recovery == {
|
||||
"previous_url": "data/sources/source_3/1782478365.osm.pbf",
|
||||
"url": "https://download.geofabrik.de/europe/germany/berlin-latest.osm.pbf",
|
||||
}
|
||||
assert source.url == "https://download.geofabrik.de/europe/germany/berlin-latest.osm.pbf"
|
||||
Reference in New Issue
Block a user