"""One-off migration: drop the old decade-band rating tags from a pool index. Earlier the ČSFD rating was stored bucketed (e.g. ``Hodnocení/90–100 %``). Now the tag carries the exact value (``Hodnocení/90``) and the band is only a folder. This removes the legacy band tags (``Hodnocení/ %``) so re-fetching from ČSFD leaves only the exact ratings. Exact rating tags are kept. A timestamped backup of the index is written first. Usage: poetry run python scripts/strip_rating_bands.py [] [--category "Hodnocení"] """ from __future__ import annotations import re import sys import json import shutil import argparse from pathlib import Path from datetime import datetime from loguru import logger # Allow running as a plain script (``python scripts/...``) by exposing the repo root. sys.path.insert(0, str(Path(__file__).resolve().parent.parent)) from src.core.config import load_global_config # noqa: E402 from src.core.pool_index import INDEX_FILENAME # noqa: E402 # A band tag looks like "Hodnocení/90–100 %" — value has a dash range and a "%". _BAND_RE = re.compile(r"\d+\s*[–-]\s*\d+\s*%") def _strip_bands(tags: list[str], category: str) -> tuple[list[str], int]: """Return (kept tags, removed count), dropping ``category`` band tags.""" prefix = f"{category}/" kept = [ t for t in tags if not (isinstance(t, str) and t.startswith(prefix) and _BAND_RE.search(t)) ] return kept, len(tags) - len(kept) def migrate(index_path: Path, category: str) -> int: """Remove band rating tags in place; return number of tags removed.""" with open(index_path, "r", encoding="utf-8") as f: data = json.load(f) movies: dict[str, dict] = data.get("movies", {}) total_removed = 0 affected = 0 for key, record in movies.items(): tags = record.get("tags", []) kept, removed = _strip_bands(tags, category) if removed: record["tags"] = kept # also drop them from the ČSFD provenance set, if present if isinstance(record.get("csfd_tags"), list): record["csfd_tags"] = [ t for t in record["csfd_tags"] if not (t.startswith(f"{category}/") and _BAND_RE.search(t)) ] total_removed += removed affected += 1 logger.debug(f"{key}: removed {removed} band tag(s)") if total_removed == 0: logger.info(f"No '{category}/…–… %' band tags found — nothing to migrate") return 0 backup = index_path.with_suffix( index_path.suffix + f".bak-{datetime.now():%Y%m%d-%H%M%S}" ) shutil.copy2(index_path, backup) logger.info(f"Backup written: {backup}") with open(index_path, "w", encoding="utf-8") as f: json.dump(data, f, indent=2, ensure_ascii=False) logger.info( f"Removed {total_removed} band '{category}' tag(s) across {affected} record(s)" ) return total_removed def main() -> None: parser = argparse.ArgumentParser(description=__doc__) parser.add_argument( "pool_dir", nargs="?", help="Pool root (default: pool_dir from the global config)", ) parser.add_argument("--category", default="Hodnocení", help="Rating category") args = parser.parse_args() pool_dir = args.pool_dir or load_global_config().get("pool_dir") if not pool_dir: parser.error("No pool_dir given and none configured in the global config") index_path = Path(pool_dir) / INDEX_FILENAME if not index_path.exists(): parser.error(f"No index found at {index_path}") migrate(index_path, args.category) if __name__ == "__main__": main()