Add per-movie attributes and per-category filename templates

This commit is contained in:
2026-06-16 17:39:39 +02:00
parent b3a61f9e86
commit a71b209539
19 changed files with 1064 additions and 111 deletions
+109
View File
@@ -0,0 +1,109 @@
"""One-off migration: drop the old decade-band rating tags from a pool index.
Earlier the ČSFD rating was stored bucketed (e.g. ``Hodnocení/90100 %``). Now
the tag carries the exact value (``Hodnocení/90``) and the band is only a folder.
This removes the legacy band tags (``Hodnocení/<x><y> %``) so re-fetching from
ČSFD leaves only the exact ratings. Exact rating tags are kept. A timestamped
backup of the index is written first.
Usage:
poetry run python scripts/strip_rating_bands.py [<pool_dir>] [--category "Hodnocení"]
"""
from __future__ import annotations
import re
import sys
import json
import shutil
import argparse
from pathlib import Path
from datetime import datetime
from loguru import logger
# Allow running as a plain script (``python scripts/...``) by exposing the repo root.
sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
from src.core.config import load_global_config # noqa: E402
from src.core.pool_index import INDEX_FILENAME # noqa: E402
# A band tag looks like "Hodnocení/90100 %" — value has a dash range and a "%".
_BAND_RE = re.compile(r"\d+\s*[-]\s*\d+\s*%")
def _strip_bands(tags: list[str], category: str) -> tuple[list[str], int]:
"""Return (kept tags, removed count), dropping ``category`` band tags."""
prefix = f"{category}/"
kept = [
t for t in tags
if not (isinstance(t, str) and t.startswith(prefix) and _BAND_RE.search(t))
]
return kept, len(tags) - len(kept)
def migrate(index_path: Path, category: str) -> int:
"""Remove band rating tags in place; return number of tags removed."""
with open(index_path, "r", encoding="utf-8") as f:
data = json.load(f)
movies: dict[str, dict] = data.get("movies", {})
total_removed = 0
affected = 0
for key, record in movies.items():
tags = record.get("tags", [])
kept, removed = _strip_bands(tags, category)
if removed:
record["tags"] = kept
# also drop them from the ČSFD provenance set, if present
if isinstance(record.get("csfd_tags"), list):
record["csfd_tags"] = [
t for t in record["csfd_tags"]
if not (t.startswith(f"{category}/") and _BAND_RE.search(t))
]
total_removed += removed
affected += 1
logger.debug(f"{key}: removed {removed} band tag(s)")
if total_removed == 0:
logger.info(f"No '{category}/…–… %' band tags found — nothing to migrate")
return 0
backup = index_path.with_suffix(
index_path.suffix + f".bak-{datetime.now():%Y%m%d-%H%M%S}"
)
shutil.copy2(index_path, backup)
logger.info(f"Backup written: {backup}")
with open(index_path, "w", encoding="utf-8") as f:
json.dump(data, f, indent=2, ensure_ascii=False)
logger.info(
f"Removed {total_removed} band '{category}' tag(s) across {affected} record(s)"
)
return total_removed
def main() -> None:
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument(
"pool_dir",
nargs="?",
help="Pool root (default: pool_dir from the global config)",
)
parser.add_argument("--category", default="Hodnocení", help="Rating category")
args = parser.parse_args()
pool_dir = args.pool_dir or load_global_config().get("pool_dir")
if not pool_dir:
parser.error("No pool_dir given and none configured in the global config")
index_path = Path(pool_dir) / INDEX_FILENAME
if not index_path.exists():
parser.error(f"No index found at {index_path}")
migrate(index_path, args.category)
if __name__ == "__main__":
main()