Files
meubility-workbench/tests/test_feed_discovery.py
2026-07-01 23:29:51 +02:00

149 lines
5.8 KiB
Python

from __future__ import annotations
import csv
from app import feed_discovery
from app.feed_discovery import (
FeedCandidate,
build_gtfs_discovery_manifests,
enrich_ptna_candidate_from_details,
parse_ptna_country_page,
parse_ptna_detail_fields,
select_test_run_candidates,
)
def test_parse_ptna_country_and_detail_pages():
country_html = """
<table>
<tr class="gtfs-tablerow">
<td><a href="routes.php?feed=DE-BE-VBB">DE-BE-VBB</a></td>
<td><a href="https://www.vbb.de">Verkehrsverbund Berlin-Brandenburg</a></td>
<td><a href="https://www.vbb.de">VBB Verkehrsverbund Berlin-Brandenburg GmbH</a></td>
<td>2026-01-01</td>
<td>2026-12-12</td>
<td>20260603</td>
<td><a href="https://www.vbb.de/vbb-services/api-open-data/datensaetze/">2026-06-03</a></td>
<td>2026-06-03</td>
<td><a href="/en/gtfs-details.php?feed=DE-BE-VBB">Details, ...</a></td>
</tr>
</table>
"""
candidates = parse_ptna_country_page(
country_html,
country="DE",
page_url="https://ptna.openstreetmap.de/gtfs/DE/index.php",
)
assert len(candidates) == 1
candidate = candidates[0]
assert candidate.ptna_feed_id == "DE-BE-VBB"
assert candidate.country == "DE"
assert candidate.original_release_url == "https://www.vbb.de/vbb-services/api-open-data/datensaetze/"
assert candidate.details_url == "https://ptna.openstreetmap.de/en/gtfs-details.php?feed=DE-BE-VBB"
detail_html = """
<table>
<tr><td>Release Url</td><td><a href="https://example.test/gtfs.zip">https://example.test/gtfs.zip</a></td></tr>
<tr><td>Publisher's License</td><td><a href="https://example.test/license">CC BY 4.0</a></td></tr>
<tr><td>License given for use in OSM</td><td>Attribution on contributor page is sufficient.</td></tr>
<tr><td>"network:guid"</td><td>DE-BE-VBB</td></tr>
</table>
"""
fields = parse_ptna_detail_fields(detail_html, "https://ptna.openstreetmap.de/en/gtfs-details.php?feed=DE-BE-VBB")
assert fields["publisher's license"] == "CC BY 4.0"
assert fields["publisher's license href"] == "https://example.test/license"
enrich_ptna_candidate_from_details(candidate, detail_html, candidate.details_url)
assert candidate.selected_url == "https://example.test/gtfs.zip"
assert candidate.license_text == "CC BY 4.0"
assert "network:guid=DE-BE-VBB" in candidate.notes
def test_build_gtfs_discovery_manifests_from_stubbed_sources(tmp_path, monkeypatch):
mobility = [
FeedCandidate(
discovery_source="mobility_database",
country="DE",
provider="Rhein-Neckar-Verkehr",
feed_name="RNV",
stable_id="mdb-rnv",
status="active",
is_official="True",
selected_url="https://example.test/rnv.zip",
direct_download_url="https://example.test/rnv.zip",
license_url="https://example.test/license",
features="Shapes|Feed Information",
priority="P0",
)
]
ptna = [
FeedCandidate(
discovery_source="ptna",
country="DE",
provider="Rhein-Neckar-Verkehr",
feed_name="RNV",
ptna_feed_id="DE-BW-RNV",
selected_url="https://example.test/rnv.zip",
original_release_url="https://example.test/rnv.zip",
license_text="CC BY 4.0",
priority="P2",
)
]
curated = [
FeedCandidate(
discovery_source="curated_seed",
country="CH",
provider="Swiss national",
feed_name="CH Swiss national GTFS",
selected_url="https://example.test/ch.zip",
license_text="verify",
features="rail,bus",
priority="P0",
)
]
monkeypatch.setattr(feed_discovery, "fetch_mobility_database_candidates", lambda **_: mobility)
monkeypatch.setattr(feed_discovery, "fetch_mobility_acceptance_candidates", lambda **_: [])
monkeypatch.setattr(feed_discovery, "fetch_ptna_candidates", lambda **_: ptna)
monkeypatch.setattr(feed_discovery, "load_curated_ingestable_seed", lambda **_: curated)
report = build_gtfs_discovery_manifests(output_dir=tmp_path, countries=["DE", "CH"], test_limit=10)
assert report["counts"]["candidates"] == 2
assert report["counts"]["ingestable"] == 2
ingestable_rows = list(csv.DictReader((tmp_path / "gtfs_ingestable_sources.csv").open(encoding="utf-8")))
assert {row["url"] for row in ingestable_rows} == {"https://example.test/rnv.zip", "https://example.test/ch.zip"}
assert "ptna" in next(row for row in ingestable_rows if row["url"] == "https://example.test/rnv.zip")["source_basis"]
def test_select_test_run_candidates_keeps_overlapping_german_feeds():
candidates = [
FeedCandidate(
discovery_source="curated_seed",
country="DE",
provider="DB Long-distance Rail GTFS.DE",
selected_url="https://download.gtfs.de/germany/fv_free/latest.zip",
priority="P1",
),
FeedCandidate(
discovery_source="mobility_database",
country="DE",
provider="Rhein-Neckar-Verkehr",
selected_url="https://gtfs-sandbox-dds.rnv-online.de/latest/gtfs.zip",
priority="P0",
),
FeedCandidate(
discovery_source="curated_seed",
country="CH",
provider="Swiss national",
selected_url="https://gtfs.geops.ch/dl/gtfs_complete.zip",
priority="P0",
),
]
selected = select_test_run_candidates(candidates, limit=3)
assert len(selected) == 3
assert any("gtfs.de" in candidate.selected_url for candidate in selected)
assert any("rnv" in candidate.selected_url for candidate in selected)