Files
PlanetaryTime/scripts/refresh_data.py

340 lines
11 KiB
Python

"""refresh_data.py — fetch planetary/moon data from Wikidata and regenerate _data.py.
Usage:
python scripts/refresh_data.py [--dry-run]
Writes:
src/planetarytime/_data.py
Requires only the Python standard library (urllib).
"""
from __future__ import annotations
import argparse
import json
import sys
import urllib.parse
import urllib.request
from dataclasses import dataclass
from datetime import date, datetime
from pathlib import Path
# ---------------------------------------------------------------------------
# Paths
# ---------------------------------------------------------------------------
REPO_ROOT = Path(__file__).resolve().parent.parent
DATA_FILE = REPO_ROOT / "src" / "planetarytime" / "_data.py"
# ---------------------------------------------------------------------------
# Wikidata SPARQL helpers
# ---------------------------------------------------------------------------
SPARQL_ENDPOINT = "https://query.wikidata.org/sparql"
USER_AGENT = "planetarytime-refresh/1.0 (https://github.com/jan-doubravsky/planetarytime)"
def sparql_query(query: str) -> list[dict]:
"""Execute a SPARQL SELECT query against Wikidata and return the bindings."""
params = urllib.parse.urlencode({"query": query, "format": "json"})
url = f"{SPARQL_ENDPOINT}?{params}"
req = urllib.request.Request(url, headers={
"User-Agent": USER_AGENT,
"Accept": "application/sparql-results+json",
})
with urllib.request.urlopen(req, timeout=30) as resp:
data = json.loads(resp.read().decode())
return data["results"]["bindings"]
def _float(binding: dict, key: str) -> float | None:
v = binding.get(key, {}).get("value")
return float(v) if v is not None else None
def _date(binding: dict, key: str) -> date | None:
v = binding.get(key, {}).get("value")
if v is None:
return None
v = v.lstrip("+")
try:
return datetime.fromisoformat(v.replace("Z", "+00:00")).date()
except ValueError:
return None
# ---------------------------------------------------------------------------
# Planet data
# ---------------------------------------------------------------------------
PLANET_QUERY = """\
SELECT ?name ?rotationHours ?orbitalDays ?discoveryDate WHERE {
VALUES ?item {
wd:Q308 # Mercury
wd:Q313 # Venus
wd:Q111 # Mars
wd:Q319 # Jupiter
wd:Q193 # Saturn
wd:Q324 # Uranus
wd:Q332 # Neptune
}
?item rdfs:label ?name FILTER(LANG(?name) = "en").
OPTIONAL {
?item p:P2386 ?rotStmt.
?rotStmt ps:P2386 ?rotationHours.
?rotStmt psv:P2386/wikibase:quantityUnit wd:Q7727.
}
OPTIONAL {
?item p:P2257 ?orbStmt.
?orbStmt ps:P2257 ?orbitalDays.
?orbStmt psv:P2257/wikibase:quantityUnit wd:Q573.
}
OPTIONAL { ?item wdt:P575 ?discoveryDate. }
}
"""
CONTACT_QUERY = """\
SELECT ?name ?contactDate WHERE {
VALUES (?probe ?planet) {
(wd:Q1573 wd:Q308) # MESSENGER — Mercury
(wd:Q170 wd:Q313) # Venera 7 — Venus
(wd:Q160102 wd:Q111) # Viking 1 — Mars
}
?planet rdfs:label ?name FILTER(LANG(?name) = "en").
?probe wdt:P619 ?contactDate.
}
"""
@dataclass
class PlanetData:
name: str
rotation_hours: float
orbital_hours: float
discovery_date: date
contact_date: date | None = None
_FALLBACK_PLANETS: list[PlanetData] = [
PlanetData("Mercury", 1407.6, 87.97 * 24, date(1631, 11, 7), date(2011, 3, 18)),
PlanetData("Venus", 5832.5, 224.70 * 24, date(1610, 1, 1), date(1970, 12, 15)),
PlanetData("Mars", 24.6, 686.97 * 24, date(1610, 1, 1), date(1976, 7, 20)),
PlanetData("Jupiter", 9.9, 4332.59 * 24, date(1610, 1, 7), None),
PlanetData("Saturn", 10.7, 10759.22 * 24, date(1610, 7, 25), None),
PlanetData("Uranus", 17.2, 30688.50 * 24, date(1781, 3, 13), None),
PlanetData("Neptune", 16.1, 60182.00 * 24, date(1846, 9, 23), None),
]
_PLANET_ORDER = [p.name for p in _FALLBACK_PLANETS]
def fetch_planets() -> list[PlanetData]:
print("Fetching planet data from Wikidata…")
try:
rows = sparql_query(PLANET_QUERY)
except Exception as exc:
print(f" WARNING: planet query failed ({exc}), using fallback data.", file=sys.stderr)
return _FALLBACK_PLANETS
by_name: dict[str, PlanetData] = {}
for row in rows:
name = row["name"]["value"]
rot = _float(row, "rotationHours")
orb_days = _float(row, "orbitalDays")
disc = _date(row, "discoveryDate")
if rot is None or orb_days is None or disc is None:
continue
by_name[name] = PlanetData(name, abs(rot), orb_days * 24, disc)
try:
for row in sparql_query(CONTACT_QUERY):
name = row["name"]["value"]
d = _date(row, "contactDate")
if name in by_name and d is not None:
by_name[name].contact_date = d
except Exception as exc:
print(f" WARNING: contact query failed ({exc}).", file=sys.stderr)
fallback_map = {p.name: p for p in _FALLBACK_PLANETS}
result: list[PlanetData] = []
for name in _PLANET_ORDER:
if name in by_name:
result.append(by_name[name])
else:
print(f" WARNING: {name} missing from Wikidata, using fallback.", file=sys.stderr)
result.append(fallback_map[name])
print(f" Planets: {', '.join(p.name for p in result)}")
return result
# ---------------------------------------------------------------------------
# Moon data
# ---------------------------------------------------------------------------
MOON_QUERY = """\
SELECT ?moonLabel ?rotationHours ?orbitalHours ?discoveryDate ?contactDate WHERE {
VALUES ?moon {
wd:Q40 # Phobos
wd:Q39 # Deimos
wd:Q36236 # Io
wd:Q36712 # Europa
wd:Q44537 # Ganymede
wd:Q44523 # Callisto
wd:Q2565 # Titan
wd:Q3532 # Enceladus
wd:Q3552 # Miranda
wd:Q3551 # Ariel
wd:Q3543 # Umbriel
wd:Q3555 # Titania
wd:Q3547 # Oberon
wd:Q3561 # Triton
}
?moon rdfs:label ?moonLabel FILTER(LANG(?moonLabel) = "en").
OPTIONAL {
?moon p:P2386 ?rotStmt.
?rotStmt ps:P2386 ?rotationHours.
?rotStmt psv:P2386/wikibase:quantityUnit wd:Q7727.
}
OPTIONAL {
?moon p:P2257 ?orbStmt.
?orbStmt ps:P2257 ?orbDays.
?orbStmt psv:P2257/wikibase:quantityUnit wd:Q573.
BIND(?orbDays * 24.0 AS ?orbitalHours)
}
OPTIONAL { ?moon wdt:P575 ?discoveryDate. }
OPTIONAL { ?moon wdt:P619 ?contactDate. }
}
"""
@dataclass
class MoonData:
name: str
rotation_hours: float
orbital_hours: float
is_tidally_locked: bool
discovery_date: date
contact_date: date | None = None
_FALLBACK_MOONS: list[MoonData] = [
MoonData("Phobos", 7.653, 7.653, True, date(1877, 8, 18)),
MoonData("Deimos", 30.312, 30.312, True, date(1877, 8, 12)),
MoonData("Io", 42.456, 42.456, True, date(1610, 1, 8)),
MoonData("Europa", 85.228, 85.228, True, date(1610, 1, 8)),
MoonData("Ganymede", 171.709, 171.709, True, date(1610, 1, 7)),
MoonData("Callisto", 400.535, 400.535, True, date(1610, 1, 7)),
MoonData("Titan", 382.690, 382.690, True, date(1655, 3, 25), date(2005, 1, 14)),
MoonData("Enceladus", 32.923, 32.923, True, date(1789, 8, 28)),
MoonData("Miranda", 33.923, 33.923, True, date(1948, 2, 16)),
MoonData("Ariel", 60.489, 60.489, True, date(1851, 10, 24)),
MoonData("Umbriel", 99.460, 99.460, True, date(1851, 10, 24)),
MoonData("Titania", 208.940, 208.940, True, date(1787, 1, 11)),
MoonData("Oberon", 323.117, 323.117, True, date(1787, 1, 11)),
MoonData("Triton", 141.045, 141.045, True, date(1846, 10, 10)),
]
_MOON_ORDER = [m.name for m in _FALLBACK_MOONS]
_TIDAL_THRESHOLD = 0.01
def fetch_moons() -> list[MoonData]:
print("Fetching moon data from Wikidata…")
try:
rows = sparql_query(MOON_QUERY)
except Exception as exc:
print(f" WARNING: moon query failed ({exc}), using fallback data.", file=sys.stderr)
return _FALLBACK_MOONS
by_name: dict[str, MoonData] = {}
for row in rows:
name = row["moonLabel"]["value"]
rot = _float(row, "rotationHours")
orb = _float(row, "orbitalHours")
disc = _date(row, "discoveryDate")
if rot is None or orb is None or disc is None:
continue
rot, orb = abs(rot), abs(orb)
locked = abs(rot - orb) / max(rot, orb) < _TIDAL_THRESHOLD
by_name[name] = MoonData(name, rot, orb, locked, disc, _date(row, "contactDate"))
fallback_map = {m.name: m for m in _FALLBACK_MOONS}
result: list[MoonData] = []
for name in _MOON_ORDER:
if name in by_name:
result.append(by_name[name])
else:
print(f" WARNING: {name} missing from Wikidata, using fallback.", file=sys.stderr)
result.append(fallback_map[name])
print(f" Moons: {', '.join(m.name for m in result)}")
return result
# ---------------------------------------------------------------------------
# Code generation — writes only _data.py
# ---------------------------------------------------------------------------
def _dr(d: date) -> str:
return f"date({d.year}, {d.month:2d}, {d.day:2d})"
def _dr_opt(d: date | None) -> str:
return "None" if d is None else _dr(d)
def generate_data_py(planets: list[PlanetData], moons: list[MoonData]) -> str:
lines = [
"# AUTO-GENERATED by scripts/refresh_data.py — do not edit by hand.",
"from datetime import date",
"",
"# (name, rotation_hours, orbital_hours, discovery_date, contact_date | None)",
"PLANET_ROWS: list[tuple[str, float, float, date, date | None]] = [",
]
for p in planets:
lines.append(
f" ({p.name!r:12s}, {p.rotation_hours:10.3f}, {p.orbital_hours:12.4f},"
f" {_dr(p.discovery_date)}, {_dr_opt(p.contact_date)}),"
)
lines += [
"]",
"",
"# (name, rotation_hours, orbital_hours, is_tidally_locked, discovery_date, contact_date | None)",
"MOON_ROWS: list[tuple[str, float, float, bool, date, date | None]] = [",
]
for m in moons:
lines.append(
f" ({m.name!r:12s}, {m.rotation_hours:8.3f}, {m.orbital_hours:8.3f},"
f" {str(m.is_tidally_locked):5s}, {_dr(m.discovery_date)}, {_dr_opt(m.contact_date)}),"
)
lines += ["]", ""]
return "\n".join(lines)
# ---------------------------------------------------------------------------
# Main
# ---------------------------------------------------------------------------
def main() -> None:
parser = argparse.ArgumentParser(description="Refresh planetary data from Wikidata.")
parser.add_argument("--dry-run", action="store_true", help="Print generated _data.py without writing.")
args = parser.parse_args()
planets = fetch_planets()
moons = fetch_moons()
content = generate_data_py(planets, moons)
if args.dry_run:
print(f"\n# {DATA_FILE}\n{'=' * 60}")
print(content)
else:
DATA_FILE.write_text(content, encoding="utf-8")
print(f"\nWritten: {DATA_FILE}")
print("Run your test suite to verify the updated data.")
if __name__ == "__main__":
main()