from __future__ import annotations import csv from app import feed_discovery from app.feed_discovery import ( FeedCandidate, build_gtfs_discovery_manifests, enrich_ptna_candidate_from_details, parse_ptna_country_page, parse_ptna_detail_fields, select_test_run_candidates, ) def test_parse_ptna_country_and_detail_pages(): country_html = """
DE-BE-VBB Verkehrsverbund Berlin-Brandenburg VBB Verkehrsverbund Berlin-Brandenburg GmbH 2026-01-01 2026-12-12 20260603 2026-06-03 2026-06-03 Details, ...
""" candidates = parse_ptna_country_page( country_html, country="DE", page_url="https://ptna.openstreetmap.de/gtfs/DE/index.php", ) assert len(candidates) == 1 candidate = candidates[0] assert candidate.ptna_feed_id == "DE-BE-VBB" assert candidate.country == "DE" assert candidate.original_release_url == "https://www.vbb.de/vbb-services/api-open-data/datensaetze/" assert candidate.details_url == "https://ptna.openstreetmap.de/en/gtfs-details.php?feed=DE-BE-VBB" detail_html = """
Release Urlhttps://example.test/gtfs.zip
Publisher's LicenseCC BY 4.0
License given for use in OSMAttribution on contributor page is sufficient.
"network:guid"DE-BE-VBB
""" fields = parse_ptna_detail_fields(detail_html, "https://ptna.openstreetmap.de/en/gtfs-details.php?feed=DE-BE-VBB") assert fields["publisher's license"] == "CC BY 4.0" assert fields["publisher's license href"] == "https://example.test/license" enrich_ptna_candidate_from_details(candidate, detail_html, candidate.details_url) assert candidate.selected_url == "https://example.test/gtfs.zip" assert candidate.license_text == "CC BY 4.0" assert "network:guid=DE-BE-VBB" in candidate.notes def test_build_gtfs_discovery_manifests_from_stubbed_sources(tmp_path, monkeypatch): mobility = [ FeedCandidate( discovery_source="mobility_database", country="DE", provider="Rhein-Neckar-Verkehr", feed_name="RNV", stable_id="mdb-rnv", status="active", is_official="True", selected_url="https://example.test/rnv.zip", direct_download_url="https://example.test/rnv.zip", license_url="https://example.test/license", features="Shapes|Feed Information", priority="P0", ) ] ptna = [ FeedCandidate( discovery_source="ptna", country="DE", provider="Rhein-Neckar-Verkehr", feed_name="RNV", ptna_feed_id="DE-BW-RNV", selected_url="https://example.test/rnv.zip", original_release_url="https://example.test/rnv.zip", license_text="CC BY 4.0", priority="P2", ) ] curated = [ FeedCandidate( discovery_source="curated_seed", country="CH", provider="Swiss national", feed_name="CH Swiss national GTFS", selected_url="https://example.test/ch.zip", license_text="verify", features="rail,bus", priority="P0", ) ] monkeypatch.setattr(feed_discovery, "fetch_mobility_database_candidates", lambda **_: mobility) monkeypatch.setattr(feed_discovery, "fetch_mobility_acceptance_candidates", lambda **_: []) monkeypatch.setattr(feed_discovery, "fetch_ptna_candidates", lambda **_: ptna) monkeypatch.setattr(feed_discovery, "load_curated_ingestable_seed", lambda **_: curated) report = build_gtfs_discovery_manifests(output_dir=tmp_path, countries=["DE", "CH"], test_limit=10) assert report["counts"]["candidates"] == 2 assert report["counts"]["ingestable"] == 2 ingestable_rows = list(csv.DictReader((tmp_path / "gtfs_ingestable_sources.csv").open(encoding="utf-8"))) assert {row["url"] for row in ingestable_rows} == {"https://example.test/rnv.zip", "https://example.test/ch.zip"} assert "ptna" in next(row for row in ingestable_rows if row["url"] == "https://example.test/rnv.zip")["source_basis"] def test_select_test_run_candidates_keeps_overlapping_german_feeds(): candidates = [ FeedCandidate( discovery_source="curated_seed", country="DE", provider="DB Long-distance Rail GTFS.DE", selected_url="https://download.gtfs.de/germany/fv_free/latest.zip", priority="P1", ), FeedCandidate( discovery_source="mobility_database", country="DE", provider="Rhein-Neckar-Verkehr", selected_url="https://gtfs-sandbox-dds.rnv-online.de/latest/gtfs.zip", priority="P0", ), FeedCandidate( discovery_source="curated_seed", country="CH", provider="Swiss national", selected_url="https://gtfs.geops.ch/dl/gtfs_complete.zip", priority="P0", ), ] selected = select_test_run_candidates(candidates, limit=3) assert len(selected) == 3 assert any("gtfs.de" in candidate.selected_url for candidate in selected) assert any("rnv" in candidate.selected_url for candidate in selected)