Files
Curator/tests/test_hardlink_manager.py

724 lines
26 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import pytest
import os
from src.core.hardlink_manager import HardlinkManager, create_hardlink_structure
from src.core.file import File
from src.core.tag import Tag
from src.core.tag_manager import TagManager
class TestHardlinkManager:
"""Testy pro HardlinkManager"""
@pytest.fixture
def tag_manager(self):
"""Fixture pro TagManager"""
tm = TagManager()
# Remove default tags for cleaner tests
for cat in list(tm.tags_by_category.keys()):
tm.remove_category(cat)
return tm
@pytest.fixture
def temp_source_dir(self, tmp_path):
"""Fixture pro zdrojovou složku s testovacími soubory"""
source_dir = tmp_path / "source"
source_dir.mkdir()
(source_dir / "file1.txt").write_text("content1")
(source_dir / "file2.txt").write_text("content2")
(source_dir / "file3.txt").write_text("content3")
return source_dir
@pytest.fixture
def temp_output_dir(self, tmp_path):
"""Fixture pro výstupní složku"""
output_dir = tmp_path / "output"
output_dir.mkdir()
return output_dir
@pytest.fixture
def files_with_tags(self, temp_source_dir, tag_manager):
"""Fixture pro soubory s tagy"""
files = []
# File 1 with multiple tags
f1 = File(temp_source_dir / "file1.txt", tag_manager)
f1.tags.clear() # ensure a clean tag set
f1.add_tag(Tag("žánr", "Komedie"))
f1.add_tag(Tag("žánr", "Akční"))
f1.add_tag(Tag("rok", "1988"))
files.append(f1)
# File 2 with one tag
f2 = File(temp_source_dir / "file2.txt", tag_manager)
f2.tags.clear() # ensure a clean tag set
f2.add_tag(Tag("žánr", "Drama"))
files.append(f2)
# File 3 with no tags
f3 = File(temp_source_dir / "file3.txt", tag_manager)
f3.tags.clear() # ensure a clean tag set
files.append(f3)
return files
def test_hardlink_manager_creation(self, temp_output_dir):
"""Test vytvoření HardlinkManager"""
manager = HardlinkManager(temp_output_dir)
assert manager.output_dir == temp_output_dir
assert manager.created_links == []
assert manager.errors == []
def test_create_structure_basic(self, files_with_tags, temp_output_dir):
"""Test základního vytvoření struktury"""
manager = HardlinkManager(temp_output_dir)
success, fail = manager.create_structure_for_files(files_with_tags)
# File1 has 3 tags, File2 has 1 tag, File3 has 0 tags
# Should create 4 hardlinks total
assert success == 4
assert fail == 0
# Check directory structure
assert (temp_output_dir / "žánr" / "Komedie" / "file1.txt").exists()
assert (temp_output_dir / "žánr" / "Akční" / "file1.txt").exists()
assert (temp_output_dir / "rok" / "1988" / "file1.txt").exists()
assert (temp_output_dir / "žánr" / "Drama" / "file2.txt").exists()
def test_hardlinks_are_same_inode(self, files_with_tags, temp_output_dir, temp_source_dir):
"""Test že vytvořené soubory jsou opravdu hardlinky (stejný inode)"""
manager = HardlinkManager(temp_output_dir)
manager.create_structure_for_files(files_with_tags)
original = temp_source_dir / "file1.txt"
hardlink = temp_output_dir / "žánr" / "Komedie" / "file1.txt"
# Same inode = hardlink
assert original.stat().st_ino == hardlink.stat().st_ino
def test_create_structure_with_category_filter(self, files_with_tags, temp_output_dir):
"""Test vytvoření struktury jen pro vybrané kategorie"""
manager = HardlinkManager(temp_output_dir)
success, fail = manager.create_structure_for_files(files_with_tags, categories=["žánr"])
# Only "žánr" tags should be processed (3 links)
assert success == 3
assert fail == 0
assert (temp_output_dir / "žánr" / "Komedie" / "file1.txt").exists()
assert not (temp_output_dir / "rok").exists()
def test_create_structure_with_category_roots(self, files_with_tags, temp_output_dir):
"""category_roots: genres sit at the output root, rok under 'Dle roku'."""
manager = HardlinkManager(temp_output_dir)
roots = {"žánr": "", "rok": "Dle roku"}
manager.create_structure_for_files(files_with_tags, category_roots=roots)
# Genres directly at the output root (no "žánr" wrapper folder)
assert (temp_output_dir / "Komedie" / "file1.txt").exists()
assert (temp_output_dir / "Akční" / "file1.txt").exists()
assert (temp_output_dir / "Drama" / "file2.txt").exists()
assert not (temp_output_dir / "žánr").exists()
# Rok grouped under its own "Dle roku" folder
assert (temp_output_dir / "Dle roku" / "1988" / "file1.txt").exists()
def test_sync_with_roots_leaves_unmanaged_mirror_untouched(
self, files_with_tags, temp_source_dir, temp_output_dir
):
"""Cleanup must not delete links in a copy-as-is mirror (e.g. Seriály)."""
manager = HardlinkManager(temp_output_dir)
roots = {"žánr": "", "rok": "Dle roku"}
manager.create_structure_for_files(files_with_tags, category_roots=roots)
# Simulate a copy-as-is mirror holding a hardlink to a source file
mirror = temp_output_dir / "Seriály"
mirror.mkdir()
mirror_link = mirror / "file1.txt"
os.link(temp_source_dir / "file1.txt", mirror_link)
manager.sync_structure(files_with_tags, category_roots=roots)
# The mirror (not a managed tag folder) is left alone
assert mirror_link.exists()
def test_category_transform_groups_folder_by_band(
self, temp_source_dir, temp_output_dir, tag_manager
):
"""Exact tag value, but the folder name goes through the transform."""
f = File(temp_source_dir / "file1.txt", tag_manager)
f.tags.clear()
f.add_tag(Tag("Hodnocení", "90")) # exact rating tag
manager = HardlinkManager(temp_output_dir)
manager.create_structure_for_files(
[f],
category_roots={"Hodnocení": "Dle hodnocení"},
category_transforms={"Hodnocení": "decade_band"},
)
# folder is the band; the file lands inside it
assert (temp_output_dir / "Dle hodnocení" / "90100 %" / "file1.txt").exists()
# not a per-exact-value folder
assert not (temp_output_dir / "Dle hodnocení" / "90").exists()
def test_filename_template_applies_only_in_that_category(
self, temp_source_dir, temp_output_dir, tag_manager
):
"""A per-category template renames the hardlink only inside its folder."""
f = File(temp_source_dir / "file1.txt", tag_manager)
f.tags.clear()
f.title = "Dr. No"
f.csfd_cache = {"year": 1962}
f.add_tag(Tag("Kolekce", "James Bond"))
f.add_tag(Tag("žánr", "Akční"))
manager = HardlinkManager(temp_output_dir)
manager.create_structure_for_files(
[f],
category_roots={"Kolekce": "Dle kolekce", "žánr": ""},
category_filename_templates={"Kolekce": "{year} - {title}{ext}"},
)
# Templated name inside the collection folder
assert (temp_output_dir / "Dle kolekce" / "James Bond" / "1962 - Dr. No.txt").exists()
# Other categories keep the pool filename
assert (temp_output_dir / "Akční" / "file1.txt").exists()
def test_filename_template_cleanup_is_consistent(
self, temp_source_dir, temp_output_dir, tag_manager
):
"""sync twice with a template leaves no stale/duplicate templated link."""
f = File(temp_source_dir / "file1.txt", tag_manager)
f.tags.clear()
f.title = "Dr. No"
f.csfd_cache = {"year": 1962}
f.add_tag(Tag("Kolekce", "James Bond"))
roots = {"Kolekce": "Dle kolekce"}
templates = {"Kolekce": "{year} - {title}{ext}"}
manager = HardlinkManager(temp_output_dir)
manager.sync_structure([f], category_roots=roots, category_filename_templates=templates)
created, _, removed, _ = manager.sync_structure(
[f], category_roots=roots, category_filename_templates=templates)
folder = temp_output_dir / "Dle kolekce" / "James Bond"
assert [p.name for p in folder.iterdir()] == ["1962 - Dr. No.txt"]
assert removed == 0 # nothing treated as obsolete on the second run
def test_dry_run(self, files_with_tags, temp_output_dir):
"""Test dry run (bez skutečného vytváření)"""
manager = HardlinkManager(temp_output_dir)
success, fail = manager.create_structure_for_files(files_with_tags, dry_run=True)
assert success == 4
assert fail == 0
# No actual files should be created
assert not (temp_output_dir / "žánr").exists()
def test_get_preview(self, files_with_tags, temp_output_dir):
"""Test náhledu co bude vytvořeno"""
manager = HardlinkManager(temp_output_dir)
preview = manager.get_preview(files_with_tags)
assert len(preview) == 4
# Check that preview contains expected paths
targets = [p[1] for p in preview]
assert temp_output_dir / "žánr" / "Komedie" / "file1.txt" in targets
assert temp_output_dir / "žánr" / "Drama" / "file2.txt" in targets
def test_get_preview_with_category_filter(self, files_with_tags, temp_output_dir):
"""Test náhledu s filtrem kategorií"""
manager = HardlinkManager(temp_output_dir)
preview = manager.get_preview(files_with_tags, categories=["rok"])
assert len(preview) == 1
assert preview[0][1] == temp_output_dir / "rok" / "1988" / "file1.txt"
def test_remove_created_links(self, files_with_tags, temp_output_dir):
"""Test odstranění vytvořených hardlinků"""
manager = HardlinkManager(temp_output_dir)
manager.create_structure_for_files(files_with_tags)
# Verify links exist
assert (temp_output_dir / "žánr" / "Komedie" / "file1.txt").exists()
# Remove links
removed = manager.remove_created_links()
assert removed == 4
# Links should be gone
assert not (temp_output_dir / "žánr" / "Komedie" / "file1.txt").exists()
# Empty directories should also be removed
assert not (temp_output_dir / "žánr" / "Komedie").exists()
def test_empty_files_list(self, temp_output_dir):
"""Test s prázdným seznamem souborů"""
manager = HardlinkManager(temp_output_dir)
success, fail = manager.create_structure_for_files([])
assert success == 0
assert fail == 0
def test_files_without_tags(self, temp_source_dir, temp_output_dir, tag_manager):
"""Test se soubory bez tagů"""
f1 = File(temp_source_dir / "file1.txt", tag_manager)
f1.tags.clear() # Remove default tags
manager = HardlinkManager(temp_output_dir)
success, fail = manager.create_structure_for_files([f1])
assert success == 0
assert fail == 0
def test_duplicate_link_same_file(self, files_with_tags, temp_output_dir):
"""Test že existující hardlink na stejný soubor je přeskočen"""
manager = HardlinkManager(temp_output_dir)
# Create first time
success1, _ = manager.create_structure_for_files(files_with_tags)
# Create second time - should skip existing
manager2 = HardlinkManager(temp_output_dir)
success2, fail2 = manager2.create_structure_for_files(files_with_tags)
# All should be skipped (same inode)
assert success2 == 0
assert fail2 == 0
def test_unique_name_on_conflict(self, temp_source_dir, temp_output_dir, tag_manager):
"""Test že při konfliktu (jiný soubor) se použije unikátní jméno"""
# Create first file
f1 = File(temp_source_dir / "file1.txt", tag_manager)
f1.tags.clear()
f1.add_tag(Tag("test", "tag"))
manager = HardlinkManager(temp_output_dir)
manager.create_structure_for_files([f1])
# Create different file with same name in different location
source2 = temp_source_dir / "subdir"
source2.mkdir()
(source2 / "file1.txt").write_text("different content")
f2 = File(source2 / "file1.txt", tag_manager)
f2.tags.clear()
f2.add_tag(Tag("test", "tag"))
# Should create file1_1.txt
manager2 = HardlinkManager(temp_output_dir)
success, fail = manager2.create_structure_for_files([f2])
assert success == 1
assert (temp_output_dir / "test" / "tag" / "file1_1.txt").exists()
def test_czech_characters_in_tags(self, temp_source_dir, temp_output_dir, tag_manager):
"""Test českých znaků v názvech tagů"""
f1 = File(temp_source_dir / "file1.txt", tag_manager)
f1.tags.clear()
f1.add_tag(Tag("Žánr", "Česká komedie"))
f1.add_tag(Tag("Štítky", "Příběh"))
manager = HardlinkManager(temp_output_dir)
success, fail = manager.create_structure_for_files([f1])
assert success == 2
assert fail == 0
assert (temp_output_dir / "Žánr" / "Česká komedie" / "file1.txt").exists()
assert (temp_output_dir / "Štítky" / "Příběh" / "file1.txt").exists()
class TestConvenienceFunction:
"""Testy pro convenience funkci create_hardlink_structure"""
@pytest.fixture
def tag_manager(self):
tm = TagManager()
for cat in list(tm.tags_by_category.keys()):
tm.remove_category(cat)
return tm
@pytest.fixture
def temp_files(self, tmp_path, tag_manager):
source = tmp_path / "source"
source.mkdir()
(source / "file.txt").write_text("content")
f = File(source / "file.txt", tag_manager)
f.tags.clear()
f.add_tag(Tag("cat", "tag"))
return [f]
def test_create_hardlink_structure_function(self, temp_files, tmp_path):
"""Test convenience funkce"""
output = tmp_path / "output"
output.mkdir()
success, fail, errors = create_hardlink_structure(temp_files, output)
assert success == 1
assert fail == 0
assert len(errors) == 0
assert (output / "cat" / "tag" / "file.txt").exists()
def test_create_hardlink_structure_with_categories(self, tmp_path, tag_manager):
"""Test convenience funkce s filtrem kategorií"""
source = tmp_path / "source"
source.mkdir()
(source / "file.txt").write_text("content")
f = File(source / "file.txt", tag_manager)
f.tags.clear()
f.add_tag(Tag("include", "yes"))
f.add_tag(Tag("exclude", "no"))
output = tmp_path / "output"
output.mkdir()
success, fail, errors = create_hardlink_structure([f], output, categories=["include"])
assert success == 1
assert (output / "include" / "yes" / "file.txt").exists()
assert not (output / "exclude").exists()
class TestSyncStructure:
"""Testy pro synchronizaci hardlink struktury"""
@pytest.fixture
def tag_manager(self):
tm = TagManager()
for cat in list(tm.tags_by_category.keys()):
tm.remove_category(cat)
return tm
@pytest.fixture
def setup_dirs(self, tmp_path):
source = tmp_path / "source"
source.mkdir()
output = tmp_path / "output"
output.mkdir()
return source, output
def test_find_obsolete_links_empty_output(self, setup_dirs, tag_manager):
"""Test find_obsolete_links s prázdným výstupem"""
source, output = setup_dirs
(source / "file.txt").write_text("content")
f = File(source / "file.txt", tag_manager)
f.tags.clear()
f.add_tag(Tag("cat", "tag"))
manager = HardlinkManager(output)
obsolete = manager.find_obsolete_links([f])
assert obsolete == []
def test_find_obsolete_links_detects_removed_tag(self, setup_dirs, tag_manager):
"""Test že find_obsolete_links najde hardlink pro odebraný tag"""
source, output = setup_dirs
(source / "file.txt").write_text("content")
f = File(source / "file.txt", tag_manager)
f.tags.clear()
f.add_tag(Tag("cat", "tag1"))
f.add_tag(Tag("cat", "tag2"))
# Create structure with both tags
manager = HardlinkManager(output)
manager.create_structure_for_files([f])
assert (output / "cat" / "tag1" / "file.txt").exists()
assert (output / "cat" / "tag2" / "file.txt").exists()
# Remove one tag from file
f.tags.clear()
f.add_tag(Tag("cat", "tag1")) # Only tag1 remains
# Find obsolete
obsolete = manager.find_obsolete_links([f])
assert len(obsolete) == 1
assert obsolete[0][0] == output / "cat" / "tag2" / "file.txt"
def test_remove_obsolete_links(self, setup_dirs, tag_manager):
"""Test odstranění zastaralých hardlinků"""
source, output = setup_dirs
(source / "file.txt").write_text("content")
f = File(source / "file.txt", tag_manager)
f.tags.clear()
f.add_tag(Tag("cat", "tag1"))
f.add_tag(Tag("cat", "tag2"))
manager = HardlinkManager(output)
manager.create_structure_for_files([f])
# Remove tag2
f.tags.clear()
f.add_tag(Tag("cat", "tag1"))
# Remove obsolete links
removed, paths = manager.remove_obsolete_links([f])
assert removed == 1
assert not (output / "cat" / "tag2" / "file.txt").exists()
assert (output / "cat" / "tag1" / "file.txt").exists()
def test_remove_obsolete_links_dry_run(self, setup_dirs, tag_manager):
"""Test dry run pro remove_obsolete_links"""
source, output = setup_dirs
(source / "file.txt").write_text("content")
f = File(source / "file.txt", tag_manager)
f.tags.clear()
f.add_tag(Tag("cat", "tag1"))
f.add_tag(Tag("cat", "tag2"))
manager = HardlinkManager(output)
manager.create_structure_for_files([f])
f.tags.clear()
f.add_tag(Tag("cat", "tag1"))
removed, paths = manager.remove_obsolete_links([f], dry_run=True)
assert removed == 1
# File should still exist (dry run)
assert (output / "cat" / "tag2" / "file.txt").exists()
def test_sync_structure_creates_and_removes(self, setup_dirs, tag_manager):
"""Test sync_structure vytvoří nové a odstraní staré hardlinky"""
source, output = setup_dirs
(source / "file.txt").write_text("content")
f = File(source / "file.txt", tag_manager)
f.tags.clear()
f.add_tag(Tag("cat", "old_tag"))
# Create initial structure
manager = HardlinkManager(output)
manager.create_structure_for_files([f])
assert (output / "cat" / "old_tag" / "file.txt").exists()
# Change tags
f.tags.clear()
f.add_tag(Tag("cat", "new_tag"))
# Sync
created, c_fail, removed, r_fail = manager.sync_structure([f])
assert created == 1
assert removed == 1
assert c_fail == 0
assert r_fail == 0
assert not (output / "cat" / "old_tag").exists()
assert (output / "cat" / "new_tag" / "file.txt").exists()
def test_sync_structure_no_changes_needed(self, setup_dirs, tag_manager):
"""Test sync_structure když není potřeba žádná změna"""
source, output = setup_dirs
(source / "file.txt").write_text("content")
f = File(source / "file.txt", tag_manager)
f.tags.clear()
f.add_tag(Tag("cat", "tag"))
manager = HardlinkManager(output)
manager.create_structure_for_files([f])
# Sync again without changes
created, c_fail, removed, r_fail = manager.sync_structure([f])
# Nothing should change (existing links are skipped)
assert removed == 0
assert (output / "cat" / "tag" / "file.txt").exists()
def test_find_obsolete_with_category_filter(self, setup_dirs, tag_manager):
"""Test find_obsolete_links s filtrem kategorií"""
source, output = setup_dirs
(source / "file.txt").write_text("content")
f = File(source / "file.txt", tag_manager)
f.tags.clear()
f.add_tag(Tag("cat1", "tag"))
f.add_tag(Tag("cat2", "tag"))
manager = HardlinkManager(output)
manager.create_structure_for_files([f])
# Remove both tags
f.tags.clear()
# Find obsolete only in cat1
obsolete = manager.find_obsolete_links([f], categories=["cat1"])
assert len(obsolete) == 1
assert obsolete[0][0] == output / "cat1" / "tag" / "file.txt"
def test_removes_empty_directories(self, setup_dirs, tag_manager):
"""Test že prázdné adresáře jsou odstraněny po sync"""
source, output = setup_dirs
(source / "file.txt").write_text("content")
f = File(source / "file.txt", tag_manager)
f.tags.clear()
f.add_tag(Tag("category", "tag"))
manager = HardlinkManager(output)
manager.create_structure_for_files([f])
# Remove all tags
f.tags.clear()
manager.remove_obsolete_links([f])
# Directory should be gone
assert not (output / "category" / "tag").exists()
assert not (output / "category").exists()
class TestEdgeCases:
"""Testy pro okrajové případy"""
@pytest.fixture
def tag_manager(self):
tm = TagManager()
for cat in list(tm.tags_by_category.keys()):
tm.remove_category(cat)
return tm
def test_nonexistent_output_dir_created(self, tmp_path, tag_manager):
"""Test že výstupní složka je vytvořena pokud neexistuje"""
source = tmp_path / "source"
source.mkdir()
(source / "file.txt").write_text("content")
f = File(source / "file.txt", tag_manager)
f.tags.clear()
f.add_tag(Tag("cat", "tag"))
output = tmp_path / "output" / "nested" / "deep"
# output doesn't exist
manager = HardlinkManager(output)
success, fail = manager.create_structure_for_files([f])
assert success == 1
assert (output / "cat" / "tag" / "file.txt").exists()
def test_special_characters_in_filename(self, tmp_path, tag_manager):
"""Test souboru se speciálními znaky v názvu"""
source = tmp_path / "source"
source.mkdir()
(source / "file with spaces (2024).txt").write_text("content")
f = File(source / "file with spaces (2024).txt", tag_manager)
f.tags.clear()
f.add_tag(Tag("test", "tag"))
output = tmp_path / "output"
output.mkdir()
manager = HardlinkManager(output)
success, fail = manager.create_structure_for_files([f])
assert success == 1
assert (output / "test" / "tag" / "file with spaces (2024).txt").exists()
def test_empty_category_filter(self, tmp_path, tag_manager):
"""Test s prázdným seznamem kategorií"""
source = tmp_path / "source"
source.mkdir()
(source / "file.txt").write_text("content")
f = File(source / "file.txt", tag_manager)
f.tags.clear()
f.add_tag(Tag("cat", "tag"))
output = tmp_path / "output"
output.mkdir()
manager = HardlinkManager(output)
# Empty list = no categories = no links
success, fail = manager.create_structure_for_files([f], categories=[])
assert success == 0
def test_is_same_file_method(self, tmp_path):
"""Test metody _is_same_file"""
file1 = tmp_path / "file1.txt"
file1.write_text("content")
link = tmp_path / "link.txt"
os.link(file1, link)
file2 = tmp_path / "file2.txt"
file2.write_text("different")
manager = HardlinkManager(tmp_path)
# Same inode
assert manager._is_same_file(file1, link) is True
# Different inode
assert manager._is_same_file(file1, file2) is False
# Non-existent file
assert manager._is_same_file(file1, tmp_path / "nonexistent") is False
def test_get_unique_name_method(self, tmp_path):
"""Test metody _get_unique_name"""
(tmp_path / "file.txt").write_text("1")
(tmp_path / "file_1.txt").write_text("2")
(tmp_path / "file_2.txt").write_text("3")
manager = HardlinkManager(tmp_path)
unique = manager._get_unique_name(tmp_path / "file.txt")
assert unique == tmp_path / "file_3.txt"
class TestMirrorAsIs:
"""Testy pro copy-as-is zrcadlení (Seriály)"""
def test_mirror_clones_hierarchy_with_hardlinks(self, tmp_path):
"""Adresářová struktura se zrcadlí 1:1 a soubory jsou hardlinky"""
source = tmp_path / "Seriály"
(source / "Show" / "S01").mkdir(parents=True)
ep1 = source / "Show" / "S01" / "ep1.mkv"
ep2 = source / "Show" / "S01" / "ep2.mkv"
ep1.write_text("a")
ep2.write_text("b")
output = tmp_path / "out"
manager = HardlinkManager(output)
created, failed = manager.mirror_as_is(source, "Seriály")
assert failed == 0
assert created == 2
linked = output / "Seriály" / "Show" / "S01" / "ep1.mkv"
assert linked.exists()
assert linked.stat().st_ino == ep1.stat().st_ino
def test_mirror_skips_curator_metadata(self, tmp_path):
"""Metadata soubory (.!tag, .!index) se nezrcadlí"""
source = tmp_path / "Seriály"
source.mkdir()
(source / "ep1.mkv").write_text("a")
(source / ".ep1.mkv.!tag").write_text("{}")
(source / ".Curator.!index").write_text("{}")
output = tmp_path / "out"
manager = HardlinkManager(output)
created, failed = manager.mirror_as_is(source, "Seriály")
assert created == 1
assert failed == 0
assert not (output / "Seriály" / ".ep1.mkv.!tag").exists()
def test_mirror_nonexistent_source_is_noop(self, tmp_path):
"""Neexistující zdroj nic neudělá"""
manager = HardlinkManager(tmp_path / "out")
assert manager.mirror_as_is(tmp_path / "missing", "Seriály") == (0, 0)