Files
meubility-workbench/scripts/discover_gtfs_sources.py
2026-07-01 23:29:51 +02:00

48 lines
2.1 KiB
Python

#!/usr/bin/env python3
"""Build GTFS source discovery manifests from Mobility Database, PTNA, and local seeds."""
from __future__ import annotations
import argparse
import json
import sys
from pathlib import Path
ROOT = Path(__file__).resolve().parents[1]
if str(ROOT) not in sys.path:
sys.path.insert(0, str(ROOT))
from app.feed_discovery import build_gtfs_discovery_manifests, default_generated_dir # noqa: E402
def main() -> None:
parser = argparse.ArgumentParser(description="Build GTFS discovery and ingestable-source CSV manifests.")
parser.add_argument("--output-dir", default=str(default_generated_dir()), help="Directory for generated CSV files")
parser.add_argument(
"--countries",
default="DE,AT,CH,NL,DK,FR,BE,LU,NO,SE,FI,IE,GB",
help="Comma-separated country codes, or ALL for all countries exposed by the upstream catalogs",
)
parser.add_argument("--no-mobility-database", action="store_true", help="Skip Mobility Database feeds_v2.csv")
parser.add_argument("--no-acceptance-test-list", action="store_true", help="Skip MobilityData validator acceptance-test feed list")
parser.add_argument("--no-ptna", action="store_true", help="Skip PTNA GTFS analysis pages")
parser.add_argument("--max-ptna-details", type=int, default=80, help="Maximum PTNA detail pages to fetch")
parser.add_argument("--test-limit", type=int, default=24, help="Rows written to the focused test-run CSV")
parser.add_argument("--check-urls", action="store_true", help="Run HEAD/range checks for ingestable feed URLs")
args = parser.parse_args()
result = build_gtfs_discovery_manifests(
output_dir=Path(args.output_dir),
countries=[part.strip() for part in args.countries.split(",") if part.strip()],
include_mobility_database=not args.no_mobility_database,
include_acceptance_test_list=not args.no_acceptance_test_list,
include_ptna=not args.no_ptna,
max_ptna_details=args.max_ptna_details,
test_limit=args.test_limit,
check_urls=args.check_urls,
)
print(json.dumps(result, indent=2, ensure_ascii=False))
if __name__ == "__main__":
main()