149 lines
5.8 KiB
Python
149 lines
5.8 KiB
Python
from __future__ import annotations
|
|
|
|
import csv
|
|
|
|
from app import feed_discovery
|
|
from app.feed_discovery import (
|
|
FeedCandidate,
|
|
build_gtfs_discovery_manifests,
|
|
enrich_ptna_candidate_from_details,
|
|
parse_ptna_country_page,
|
|
parse_ptna_detail_fields,
|
|
select_test_run_candidates,
|
|
)
|
|
|
|
|
|
def test_parse_ptna_country_and_detail_pages():
|
|
country_html = """
|
|
<table>
|
|
<tr class="gtfs-tablerow">
|
|
<td><a href="routes.php?feed=DE-BE-VBB">DE-BE-VBB</a></td>
|
|
<td><a href="https://www.vbb.de">Verkehrsverbund Berlin-Brandenburg</a></td>
|
|
<td><a href="https://www.vbb.de">VBB Verkehrsverbund Berlin-Brandenburg GmbH</a></td>
|
|
<td>2026-01-01</td>
|
|
<td>2026-12-12</td>
|
|
<td>20260603</td>
|
|
<td><a href="https://www.vbb.de/vbb-services/api-open-data/datensaetze/">2026-06-03</a></td>
|
|
<td>2026-06-03</td>
|
|
<td><a href="/en/gtfs-details.php?feed=DE-BE-VBB">Details, ...</a></td>
|
|
</tr>
|
|
</table>
|
|
"""
|
|
candidates = parse_ptna_country_page(
|
|
country_html,
|
|
country="DE",
|
|
page_url="https://ptna.openstreetmap.de/gtfs/DE/index.php",
|
|
)
|
|
|
|
assert len(candidates) == 1
|
|
candidate = candidates[0]
|
|
assert candidate.ptna_feed_id == "DE-BE-VBB"
|
|
assert candidate.country == "DE"
|
|
assert candidate.original_release_url == "https://www.vbb.de/vbb-services/api-open-data/datensaetze/"
|
|
assert candidate.details_url == "https://ptna.openstreetmap.de/en/gtfs-details.php?feed=DE-BE-VBB"
|
|
|
|
detail_html = """
|
|
<table>
|
|
<tr><td>Release Url</td><td><a href="https://example.test/gtfs.zip">https://example.test/gtfs.zip</a></td></tr>
|
|
<tr><td>Publisher's License</td><td><a href="https://example.test/license">CC BY 4.0</a></td></tr>
|
|
<tr><td>License given for use in OSM</td><td>Attribution on contributor page is sufficient.</td></tr>
|
|
<tr><td>"network:guid"</td><td>DE-BE-VBB</td></tr>
|
|
</table>
|
|
"""
|
|
fields = parse_ptna_detail_fields(detail_html, "https://ptna.openstreetmap.de/en/gtfs-details.php?feed=DE-BE-VBB")
|
|
assert fields["publisher's license"] == "CC BY 4.0"
|
|
assert fields["publisher's license href"] == "https://example.test/license"
|
|
|
|
enrich_ptna_candidate_from_details(candidate, detail_html, candidate.details_url)
|
|
assert candidate.selected_url == "https://example.test/gtfs.zip"
|
|
assert candidate.license_text == "CC BY 4.0"
|
|
assert "network:guid=DE-BE-VBB" in candidate.notes
|
|
|
|
|
|
def test_build_gtfs_discovery_manifests_from_stubbed_sources(tmp_path, monkeypatch):
|
|
mobility = [
|
|
FeedCandidate(
|
|
discovery_source="mobility_database",
|
|
country="DE",
|
|
provider="Rhein-Neckar-Verkehr",
|
|
feed_name="RNV",
|
|
stable_id="mdb-rnv",
|
|
status="active",
|
|
is_official="True",
|
|
selected_url="https://example.test/rnv.zip",
|
|
direct_download_url="https://example.test/rnv.zip",
|
|
license_url="https://example.test/license",
|
|
features="Shapes|Feed Information",
|
|
priority="P0",
|
|
)
|
|
]
|
|
ptna = [
|
|
FeedCandidate(
|
|
discovery_source="ptna",
|
|
country="DE",
|
|
provider="Rhein-Neckar-Verkehr",
|
|
feed_name="RNV",
|
|
ptna_feed_id="DE-BW-RNV",
|
|
selected_url="https://example.test/rnv.zip",
|
|
original_release_url="https://example.test/rnv.zip",
|
|
license_text="CC BY 4.0",
|
|
priority="P2",
|
|
)
|
|
]
|
|
curated = [
|
|
FeedCandidate(
|
|
discovery_source="curated_seed",
|
|
country="CH",
|
|
provider="Swiss national",
|
|
feed_name="CH Swiss national GTFS",
|
|
selected_url="https://example.test/ch.zip",
|
|
license_text="verify",
|
|
features="rail,bus",
|
|
priority="P0",
|
|
)
|
|
]
|
|
monkeypatch.setattr(feed_discovery, "fetch_mobility_database_candidates", lambda **_: mobility)
|
|
monkeypatch.setattr(feed_discovery, "fetch_mobility_acceptance_candidates", lambda **_: [])
|
|
monkeypatch.setattr(feed_discovery, "fetch_ptna_candidates", lambda **_: ptna)
|
|
monkeypatch.setattr(feed_discovery, "load_curated_ingestable_seed", lambda **_: curated)
|
|
|
|
report = build_gtfs_discovery_manifests(output_dir=tmp_path, countries=["DE", "CH"], test_limit=10)
|
|
|
|
assert report["counts"]["candidates"] == 2
|
|
assert report["counts"]["ingestable"] == 2
|
|
ingestable_rows = list(csv.DictReader((tmp_path / "gtfs_ingestable_sources.csv").open(encoding="utf-8")))
|
|
assert {row["url"] for row in ingestable_rows} == {"https://example.test/rnv.zip", "https://example.test/ch.zip"}
|
|
assert "ptna" in next(row for row in ingestable_rows if row["url"] == "https://example.test/rnv.zip")["source_basis"]
|
|
|
|
|
|
def test_select_test_run_candidates_keeps_overlapping_german_feeds():
|
|
candidates = [
|
|
FeedCandidate(
|
|
discovery_source="curated_seed",
|
|
country="DE",
|
|
provider="DB Long-distance Rail GTFS.DE",
|
|
selected_url="https://download.gtfs.de/germany/fv_free/latest.zip",
|
|
priority="P1",
|
|
),
|
|
FeedCandidate(
|
|
discovery_source="mobility_database",
|
|
country="DE",
|
|
provider="Rhein-Neckar-Verkehr",
|
|
selected_url="https://gtfs-sandbox-dds.rnv-online.de/latest/gtfs.zip",
|
|
priority="P0",
|
|
),
|
|
FeedCandidate(
|
|
discovery_source="curated_seed",
|
|
country="CH",
|
|
provider="Swiss national",
|
|
selected_url="https://gtfs.geops.ch/dl/gtfs_complete.zip",
|
|
priority="P0",
|
|
),
|
|
]
|
|
|
|
selected = select_test_run_candidates(candidates, limit=3)
|
|
|
|
assert len(selected) == 3
|
|
assert any("gtfs.de" in candidate.selected_url for candidate in selected)
|
|
assert any("rnv" in candidate.selected_url for candidate in selected)
|