Add per-movie attributes and per-category filename templates
This commit is contained in:
@@ -0,0 +1,109 @@
|
||||
"""One-off migration: drop the old decade-band rating tags from a pool index.
|
||||
|
||||
Earlier the ČSFD rating was stored bucketed (e.g. ``Hodnocení/90–100 %``). Now
|
||||
the tag carries the exact value (``Hodnocení/90``) and the band is only a folder.
|
||||
This removes the legacy band tags (``Hodnocení/<x>–<y> %``) so re-fetching from
|
||||
ČSFD leaves only the exact ratings. Exact rating tags are kept. A timestamped
|
||||
backup of the index is written first.
|
||||
|
||||
Usage:
|
||||
poetry run python scripts/strip_rating_bands.py [<pool_dir>] [--category "Hodnocení"]
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
import sys
|
||||
import json
|
||||
import shutil
|
||||
import argparse
|
||||
from pathlib import Path
|
||||
from datetime import datetime
|
||||
|
||||
from loguru import logger
|
||||
|
||||
# Allow running as a plain script (``python scripts/...``) by exposing the repo root.
|
||||
sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
|
||||
|
||||
from src.core.config import load_global_config # noqa: E402
|
||||
from src.core.pool_index import INDEX_FILENAME # noqa: E402
|
||||
|
||||
# A band tag looks like "Hodnocení/90–100 %" — value has a dash range and a "%".
|
||||
_BAND_RE = re.compile(r"\d+\s*[–-]\s*\d+\s*%")
|
||||
|
||||
|
||||
def _strip_bands(tags: list[str], category: str) -> tuple[list[str], int]:
|
||||
"""Return (kept tags, removed count), dropping ``category`` band tags."""
|
||||
prefix = f"{category}/"
|
||||
kept = [
|
||||
t for t in tags
|
||||
if not (isinstance(t, str) and t.startswith(prefix) and _BAND_RE.search(t))
|
||||
]
|
||||
return kept, len(tags) - len(kept)
|
||||
|
||||
|
||||
def migrate(index_path: Path, category: str) -> int:
|
||||
"""Remove band rating tags in place; return number of tags removed."""
|
||||
with open(index_path, "r", encoding="utf-8") as f:
|
||||
data = json.load(f)
|
||||
|
||||
movies: dict[str, dict] = data.get("movies", {})
|
||||
total_removed = 0
|
||||
affected = 0
|
||||
for key, record in movies.items():
|
||||
tags = record.get("tags", [])
|
||||
kept, removed = _strip_bands(tags, category)
|
||||
if removed:
|
||||
record["tags"] = kept
|
||||
# also drop them from the ČSFD provenance set, if present
|
||||
if isinstance(record.get("csfd_tags"), list):
|
||||
record["csfd_tags"] = [
|
||||
t for t in record["csfd_tags"]
|
||||
if not (t.startswith(f"{category}/") and _BAND_RE.search(t))
|
||||
]
|
||||
total_removed += removed
|
||||
affected += 1
|
||||
logger.debug(f"{key}: removed {removed} band tag(s)")
|
||||
|
||||
if total_removed == 0:
|
||||
logger.info(f"No '{category}/…–… %' band tags found — nothing to migrate")
|
||||
return 0
|
||||
|
||||
backup = index_path.with_suffix(
|
||||
index_path.suffix + f".bak-{datetime.now():%Y%m%d-%H%M%S}"
|
||||
)
|
||||
shutil.copy2(index_path, backup)
|
||||
logger.info(f"Backup written: {backup}")
|
||||
|
||||
with open(index_path, "w", encoding="utf-8") as f:
|
||||
json.dump(data, f, indent=2, ensure_ascii=False)
|
||||
|
||||
logger.info(
|
||||
f"Removed {total_removed} band '{category}' tag(s) across {affected} record(s)"
|
||||
)
|
||||
return total_removed
|
||||
|
||||
|
||||
def main() -> None:
|
||||
parser = argparse.ArgumentParser(description=__doc__)
|
||||
parser.add_argument(
|
||||
"pool_dir",
|
||||
nargs="?",
|
||||
help="Pool root (default: pool_dir from the global config)",
|
||||
)
|
||||
parser.add_argument("--category", default="Hodnocení", help="Rating category")
|
||||
args = parser.parse_args()
|
||||
|
||||
pool_dir = args.pool_dir or load_global_config().get("pool_dir")
|
||||
if not pool_dir:
|
||||
parser.error("No pool_dir given and none configured in the global config")
|
||||
|
||||
index_path = Path(pool_dir) / INDEX_FILENAME
|
||||
if not index_path.exists():
|
||||
parser.error(f"No index found at {index_path}")
|
||||
|
||||
migrate(index_path, args.category)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user