Files
SQLmem/tests/test_cache.py
T

377 lines
14 KiB
Python

import sqlite3
import threading
import pytest
from sqlmem.cache import CacheManager
@pytest.fixture
def cache(tmp_path):
c = CacheManager(db_path=tmp_path / "test_cache.db", backup_interval=9999)
yield c
c.close()
@pytest.fixture
def source_conn():
conn = sqlite3.connect(":memory:")
conn.execute("CREATE TABLE users (name TEXT, email TEXT, status TEXT)")
conn.executemany(
"INSERT INTO users VALUES (?, ?, ?)",
[("alice", "alice@example.com", "active"), ("bob", "bob@example.com", "inactive")],
)
conn.commit()
yield conn
conn.close()
def test_table_not_cached_initially(cache):
assert cache.is_table_cached("users") is False
def test_load_table(cache, source_conn):
cache.load_table("users", ["name", "email"], source_conn)
assert cache.is_table_cached("users") is True
def test_loaded_data_correct(cache, source_conn):
cache.load_table("users", ["name", "email"], source_conn)
rows = cache.connection.execute("SELECT name, email FROM users").fetchall()
assert len(rows) == 2
assert ("alice", "alice@example.com") in rows
def test_mark_table_refreshed(cache, source_conn):
cache.load_table("users", ["name"], source_conn)
row = cache.connection.execute(
"SELECT row_count FROM _sqlmem_tables WHERE table_name = 'users'"
).fetchone()
assert row[0] == 2
def test_backup_and_reload(tmp_path, source_conn):
db_path = tmp_path / "cache.db"
c1 = CacheManager(db_path=db_path, backup_interval=9999)
c1.load_table("users", ["name"], source_conn)
c1.close()
c2 = CacheManager(db_path=db_path, backup_interval=9999)
assert c2.is_table_cached("users") is True
c2.close()
# ---------------------------------------------------------------------------
# Disk-backed mode (in_memory=False)
# ---------------------------------------------------------------------------
def test_disk_mode_persists_without_backup(tmp_path, source_conn):
"""Disk mode writes straight to the file — no explicit backup/close needed."""
db_path = tmp_path / "cache.db"
c = CacheManager(db_path=db_path, backup_interval=9999, in_memory=False)
c.load_table("users", ["name"], source_conn)
# Data is already on disk; a brand-new disk-mode manager sees it.
c2 = CacheManager(db_path=db_path, backup_interval=9999, in_memory=False)
assert c2.is_table_cached("users") is True
c2.close()
c.close()
def test_disk_mode_file_created_immediately(tmp_path, source_conn):
db_path = tmp_path / "cache.db"
c = CacheManager(db_path=db_path, backup_interval=9999, in_memory=False)
c.load_table("users", ["name"], source_conn)
assert db_path.exists()
c.close()
def test_disk_mode_reload_in_new_instance(tmp_path, source_conn):
db_path = tmp_path / "cache.db"
c1 = CacheManager(db_path=db_path, backup_interval=9999, in_memory=False)
c1.load_table("users", ["name", "email"], source_conn)
c1.close()
c2 = CacheManager(db_path=db_path, backup_interval=9999, in_memory=False)
rows = c2.connection.execute("SELECT name FROM users").fetchall()
assert {r[0] for r in rows} == {"alice", "bob"}
c2.close()
def test_quoted_reserved_and_spaced_identifiers(tmp_path):
"""Table/column names that are reserved words or contain spaces must work."""
src = sqlite3.connect(":memory:")
src.execute('CREATE TABLE "weird tbl" ("order" TEXT, "group by" TEXT)')
src.executemany('INSERT INTO "weird tbl" VALUES (?, ?)', [("1", "a"), ("2", "b")])
src.commit()
c = CacheManager(db_path=tmp_path / "c.db", backup_interval=9999)
c.load_table("weird tbl", ["order", "group by"], src)
assert c.is_table_cached("weird tbl") is True
_, rows = c.execute_in_memory('SELECT "order", "group by" FROM "weird tbl"')
assert ("1", "a") in rows
c.close()
src.close()
def test_disk_mode_uses_separate_read_connection(tmp_path, source_conn):
"""Disk-mode reads go through a per-thread read connection, not the writer."""
c = CacheManager(db_path=tmp_path / "c.db", backup_interval=9999, in_memory=False)
c.load_table("users", ["name", "email"], source_conn)
_, rows = c.execute_in_memory("SELECT name FROM users ORDER BY name")
assert [r[0] for r in rows] == ["alice", "bob"]
assert len(c._read_conns) == 1
assert c._read_conns[0] is not c.connection # dedicated read conn
c.close()
def test_disk_mode_concurrent_reads(tmp_path, source_conn):
"""Several reader threads each get their own connection and correct results."""
c = CacheManager(db_path=tmp_path / "c.db", backup_interval=9999, in_memory=False)
c.load_table("users", ["name"], source_conn)
results: list[int] = []
errors: list[Exception] = []
def reader() -> None:
try:
_, rows = c.execute_in_memory("SELECT name FROM users")
results.append(len(rows))
except Exception as e: # noqa: BLE001
errors.append(e)
threads = [threading.Thread(target=reader) for _ in range(5)]
for t in threads:
t.start()
for t in threads:
t.join(5)
assert not errors
assert results == [2] * 5
assert len(c._read_conns) == 5 # one read connection per reader thread
c.close()
def test_memory_mode_uses_shared_connection(cache, source_conn):
"""In-memory mode can't share :memory: across connections — no read conns."""
cache.load_table("users", ["name"], source_conn)
cache.execute_in_memory("SELECT name FROM users")
assert cache._read_conns == []
def test_disk_mode_reset_keeps_file(tmp_path, source_conn):
db_path = tmp_path / "cache.db"
c = CacheManager(db_path=db_path, backup_interval=9999, in_memory=False)
c.load_table("users", ["name"], source_conn)
c.reset()
# File stays (the connection is still open) but the table is gone.
assert db_path.exists()
assert c.is_table_cached("users") is False
c.close()
# ---------------------------------------------------------------------------
# Pragmas / layout tuning (1.11.0)
# ---------------------------------------------------------------------------
def test_pragmas_applied_on_fresh_disk_cache(tmp_path):
"""page_size, auto_vacuum and a generic pragma all take effect on a new file."""
c = CacheManager(
db_path=tmp_path / "cache.db",
backup_interval=9999,
in_memory=False,
pragmas={"page_size": 8192, "auto_vacuum": "INCREMENTAL", "cache_size": -2000},
)
assert c.connection.execute("PRAGMA page_size").fetchone()[0] == 8192
assert c.connection.execute("PRAGMA auto_vacuum").fetchone()[0] == 2 # INCREMENTAL
assert c.connection.execute("PRAGMA cache_size").fetchone()[0] == -2000
c.close()
def test_page_size_ignored_on_existing_file_warns(tmp_path):
"""A page_size that differs from the existing file is ignored, with a warning."""
db_path = tmp_path / "cache.db"
c1 = CacheManager(db_path=db_path, backup_interval=9999, in_memory=False)
assert c1.connection.execute("PRAGMA page_size").fetchone()[0] == 4096 # default
c1.close()
c2 = CacheManager(
db_path=db_path,
backup_interval=9999,
in_memory=False,
pragmas={"page_size": 16384},
)
# File keeps its original page size; the request is ignored (not an error).
assert c2.connection.execute("PRAGMA page_size").fetchone()[0] == 4096
c2.close()
def test_unknown_pragma_does_not_crash(tmp_path):
"""SQLite ignores unknown/inapplicable pragmas — startup must not fail."""
c = CacheManager(
db_path=tmp_path / "cache.db",
backup_interval=9999,
in_memory=False,
pragmas={"this_is_not_a_pragma": 1, "mmap_size": 1024 * 1024},
)
assert c.connection.execute("PRAGMA mmap_size").fetchone()[0] == 1024 * 1024
c.close()
# ---------------------------------------------------------------------------
# hard_reset / vacuum (1.11.0)
# ---------------------------------------------------------------------------
def test_hard_reset_recreates_file_and_clears_tables(tmp_path, source_conn):
db_path = tmp_path / "cache.db"
c = CacheManager(db_path=db_path, backup_interval=9999, in_memory=False)
c.load_table("users", ["name"], source_conn)
assert c.is_table_cached("users") is True
c.hard_reset()
assert db_path.exists() # reopened fresh
assert c.is_table_cached("users") is False
# The connection is usable again after the swap.
c.load_table("users", ["name"], source_conn)
assert c.is_table_cached("users") is True
c.close()
def test_hard_reset_applies_new_page_size(tmp_path, source_conn):
"""page_size can't change via reset() but does via hard_reset() (fresh file)."""
db_path = tmp_path / "cache.db"
# Existing file at the default 4096; request 8192 — ignored on open.
CacheManager(db_path=db_path, backup_interval=9999, in_memory=False).close()
c = CacheManager(
db_path=db_path,
backup_interval=9999,
in_memory=False,
pragmas={"page_size": 8192},
)
assert c.connection.execute("PRAGMA page_size").fetchone()[0] == 4096
c.hard_reset() # deletes the file → recreated with the requested page size
assert c.connection.execute("PRAGMA page_size").fetchone()[0] == 8192
c.close()
def test_hard_reset_in_memory_falls_back_to_reset(tmp_path, source_conn):
c = CacheManager(db_path=tmp_path / "cache.db", backup_interval=9999)
c.load_table("users", ["name"], source_conn)
c.hard_reset() # memory mode → reset()
assert c.is_table_cached("users") is False
c.close()
def test_full_vacuum_runs_on_disk(tmp_path, source_conn):
db_path = tmp_path / "cache.db"
c = CacheManager(db_path=db_path, backup_interval=9999, in_memory=False)
c.load_table("users", ["name"], source_conn)
c.vacuum(incremental=False) # must not raise
assert c.is_table_cached("users") is True
c.close()
def test_incremental_vacuum_runs_with_auto_vacuum(tmp_path, source_conn):
c = CacheManager(
db_path=tmp_path / "cache.db",
backup_interval=9999,
in_memory=False,
pragmas={"auto_vacuum": "INCREMENTAL"},
)
c.load_table("users", ["name"], source_conn)
c.vacuum(incremental=True, pages=100) # must not raise
assert c.is_table_cached("users") is True
c.close()
def test_vacuum_in_memory_is_noop(cache, source_conn):
cache.load_table("users", ["name"], source_conn)
cache.vacuum(incremental=False) # no-op, no error
assert cache.is_table_cached("users") is True
# ---------------------------------------------------------------------------
# Double-checked locking against cache stampede (1.15.0)
# ---------------------------------------------------------------------------
class _ExplodingSource:
def execute(self, *args):
raise AssertionError("source must not be queried when recheck() is True")
def test_load_table_recheck_true_skips_load(cache, source_conn):
"""A recheck that reports the table already satisfied skips the reload."""
cache.load_table("users", ["name"], source_conn)
# Second load with recheck() → True must not touch the source at all.
cache.load_table("users", ["name"], _ExplodingSource(), recheck=lambda: True)
assert cache.is_table_cached("users") is True
def test_concurrent_loads_dedup_via_double_checked_lock(tmp_path):
"""A second loader queued behind a slow cold load must not reload the table."""
import time
c = CacheManager(db_path=tmp_path / "c.db", backup_interval=9999)
started = threading.Event()
release = threading.Event()
loads: list[str] = []
class _GatedCursor:
def __init__(self, rows):
self._rows = list(rows)
self._done = False
def fetchmany(self, n):
if self._done:
return []
self._done = True
return self._rows
class _GatedSource:
def execute(self, sql):
loads.append(sql) # one entry per *actual* source load
started.set()
release.wait(5) # hold the load open (and _load_lock) until released
return _GatedCursor([("alice",), ("bob",)])
def recheck() -> bool:
return c.is_table_cached("users") and "name" in c.get_table_columns("users")
def load() -> None:
c.load_table("users", ["name"], _GatedSource(), recheck=recheck)
a = threading.Thread(target=load)
b = threading.Thread(target=load)
a.start()
assert started.wait(5), "first load never started" # A holds _load_lock, mid-fetch
b.start()
time.sleep(0.2) # give B time to queue on _load_lock
release.set() # let A finish; B then re-checks and skips
a.join(5)
b.join(5)
assert not a.is_alive() and not b.is_alive()
assert len(loads) == 1 # the redundant second load was skipped
assert c.is_table_cached("users") is True
_, rows = c.execute_in_memory("SELECT name FROM users ORDER BY name")
assert [r[0] for r in rows] == ["alice", "bob"]
c.close()
def test_incremental_vacuum_warns_without_incremental_auto_vacuum(tmp_path, source_conn):
"""Incremental vacuum on a DB that isn't auto_vacuum=INCREMENTAL warns and skips."""
from loguru import logger
messages: list[str] = []
sink_id = logger.add(messages.append, level="WARNING", filter="sqlmem")
logger.enable("sqlmem")
try:
c = CacheManager(db_path=tmp_path / "c.db", backup_interval=9999, in_memory=False)
c.load_table("users", ["name"], source_conn)
c.vacuum(incremental=True) # auto_vacuum defaults to NONE → no-op + warning
c.close()
finally:
logger.remove(sink_id)
logger.disable("sqlmem")
assert any("auto_vacuum" in m for m in messages)