Files
SQLmem/tests/test_engine.py
T

492 lines
17 KiB
Python

import sqlite3
import pytest
from sqlalchemy import create_engine
import sqlmem.engine as eng_mod
from sqlmem import CachingEngine, ReadOnlyError, UnsupportedQueryError
# ---------------------------------------------------------------------------
# Fixtures
# ---------------------------------------------------------------------------
@pytest.fixture
def source_db(tmp_path):
"""File-based SQLite source with two pre-populated tables."""
db_path = tmp_path / "source.db"
conn = sqlite3.connect(db_path)
conn.execute("CREATE TABLE products (id TEXT, name TEXT, price TEXT)")
conn.executemany(
"INSERT INTO products VALUES (?, ?, ?)",
[("1", "Widget", "9.99"), ("2", "Gadget", "19.99"), ("3", "Doohickey", "4.99")],
)
conn.execute("CREATE TABLE orders (order_id TEXT, product_id TEXT, qty TEXT)")
conn.executemany(
"INSERT INTO orders VALUES (?, ?, ?)",
[("101", "1", "2"), ("102", "2", "1")],
)
conn.commit()
conn.close()
return db_path
@pytest.fixture
def source_engine(source_db):
engine = create_engine(f"sqlite:///{source_db}")
yield engine
engine.dispose()
@pytest.fixture
def cache_path(tmp_path):
return tmp_path / "cache.db"
@pytest.fixture
def engine(source_engine, cache_path, monkeypatch):
"""CachingEngine pointed at a temp cache DB."""
monkeypatch.setattr(eng_mod, "CACHE_DB_PATH", cache_path)
monkeypatch.setattr(eng_mod, "BACKUP_INTERVAL_SECONDS", 9999)
ce = CachingEngine(source_engine)
yield ce
ce.close()
# ---------------------------------------------------------------------------
# Basic SELECT execution (in-memory)
# ---------------------------------------------------------------------------
def test_select_returns_list_of_dicts(engine):
rows = engine.execute("SELECT id, name FROM products")
assert isinstance(rows, list)
assert all(isinstance(r, dict) for r in rows)
def test_select_correct_row_count(engine):
assert len(engine.execute("SELECT id, name FROM products")) == 3
def test_select_correct_values(engine):
rows = engine.execute("SELECT id, name FROM products")
assert {r["name"] for r in rows} == {"Widget", "Gadget", "Doohickey"}
def test_select_with_where_clause(engine):
rows = engine.execute("SELECT id, price FROM products WHERE id = '1'")
assert len(rows) == 1
assert rows[0]["price"] == "9.99"
def test_select_with_order_and_limit(engine):
rows = engine.execute("SELECT id, name FROM products ORDER BY id LIMIT 2")
assert len(rows) == 2
assert rows[0]["id"] == "1"
def test_select_different_table(engine):
rows = engine.execute("SELECT order_id, qty FROM orders")
assert len(rows) == 2
def test_where_on_non_selected_column(engine):
"""WHERE references a column not in SELECT — parser must extract it for the cache."""
rows = engine.execute("SELECT name FROM products WHERE price = '9.99'")
assert len(rows) == 1
assert rows[0]["name"] == "Widget"
# ---------------------------------------------------------------------------
# In-memory caching behaviour
# ---------------------------------------------------------------------------
def test_cache_hit_survives_source_deletion(engine, source_db):
engine.execute("SELECT id, name FROM products")
# Wipe source — cache must still answer
conn = sqlite3.connect(source_db)
conn.execute("DELETE FROM products")
conn.commit()
conn.close()
rows = engine.execute("SELECT id, name FROM products")
assert len(rows) == 3
def test_new_column_triggers_refetch(engine):
engine.execute("SELECT id FROM products")
rows = engine.execute("SELECT id, name FROM products")
assert "Widget" in {r["name"] for r in rows}
def test_second_query_same_columns_is_cache_hit(engine):
engine.execute("SELECT id, name FROM products")
assert engine._cache.is_table_cached("products") is True
rows = engine.execute("SELECT id, name FROM products")
assert len(rows) == 3
def test_cache_hit_does_not_open_source(engine, source_engine, monkeypatch):
"""A pure cache hit must not open a source connection (lazy source)."""
engine.execute("SELECT id, name FROM products") # miss → caches
calls = {"n": 0}
original_connect = source_engine.connect
def counting_connect(*args, **kwargs):
calls["n"] += 1
return original_connect(*args, **kwargs)
monkeypatch.setattr(source_engine, "connect", counting_connect)
engine.execute("SELECT id, name FROM products") # hit → no source access
assert calls["n"] == 0
# ---------------------------------------------------------------------------
# SQL file creation — backup to disk
# ---------------------------------------------------------------------------
def test_close_creates_sql_file(engine, cache_path):
engine.execute("SELECT id, name FROM products")
engine.close()
assert cache_path.exists()
def test_sql_file_is_valid_sqlite(engine, cache_path):
engine.execute("SELECT id, name FROM products")
engine.close()
conn = sqlite3.connect(cache_path)
tables = {t[0] for t in conn.execute(
"SELECT name FROM sqlite_master WHERE type='table'"
).fetchall()}
conn.close()
assert "_sqlmem_tables" in tables
assert "products" in tables
def test_sql_file_contains_cached_rows(engine, cache_path):
engine.execute("SELECT id, name FROM products")
engine.close()
conn = sqlite3.connect(cache_path)
rows = conn.execute("SELECT id, name FROM products").fetchall()
conn.close()
assert len(rows) == 3
def test_sql_file_meta_table_present(engine, cache_path):
engine.execute("SELECT id FROM products")
engine.close()
conn = sqlite3.connect(cache_path)
row = conn.execute(
"SELECT value FROM _sqlmem_meta WHERE key = 'schema_version'"
).fetchone()
conn.close()
assert row is not None
assert int(row[0]) >= 1
def test_reload_from_disk_file(source_engine, cache_path, monkeypatch):
"""New CachingEngine picks up table cached by a previous instance."""
monkeypatch.setattr(eng_mod, "CACHE_DB_PATH", cache_path)
monkeypatch.setattr(eng_mod, "BACKUP_INTERVAL_SECONDS", 9999)
ce1 = CachingEngine(source_engine)
ce1.execute("SELECT id, name FROM products")
ce1.close()
ce2 = CachingEngine(source_engine)
assert ce2._cache.is_table_cached("products") is True
ce2.close()
def test_reload_data_intact_after_restart(source_engine, cache_path, monkeypatch):
monkeypatch.setattr(eng_mod, "CACHE_DB_PATH", cache_path)
monkeypatch.setattr(eng_mod, "BACKUP_INTERVAL_SECONDS", 9999)
ce1 = CachingEngine(source_engine)
ce1.execute("SELECT id, name FROM products")
ce1.close()
ce2 = CachingEngine(source_engine)
rows = ce2.execute("SELECT id, name FROM products")
ce2.close()
assert {r["name"] for r in rows} == {"Widget", "Gadget", "Doohickey"}
# ---------------------------------------------------------------------------
# Error handling
# ---------------------------------------------------------------------------
def test_insert_raises_readonly(engine):
with pytest.raises(ReadOnlyError):
engine.execute("INSERT INTO products VALUES ('4', 'New', '1.00')")
def test_update_raises_readonly(engine):
with pytest.raises(ReadOnlyError):
engine.execute("UPDATE products SET price = '0' WHERE id = '1'")
def test_delete_raises_readonly(engine):
with pytest.raises(ReadOnlyError):
engine.execute("DELETE FROM products WHERE id = '1'")
def test_ambiguous_unqualified_join_column_raises(engine):
with pytest.raises(UnsupportedQueryError):
engine.execute(
"SELECT name FROM products p JOIN orders o ON p.id = o.product_id"
)
# ---------------------------------------------------------------------------
# R1 — parametrized queries
# ---------------------------------------------------------------------------
def test_positional_param(engine):
rows = engine.execute("SELECT id, name FROM products WHERE id = ?", ("1",))
assert rows == [{"id": "1", "name": "Widget"}]
def test_named_param(engine):
rows = engine.execute("SELECT name FROM products WHERE id = :id", {"id": "2"})
assert rows == [{"name": "Gadget"}]
# ---------------------------------------------------------------------------
# R2 — JOIN support
# ---------------------------------------------------------------------------
def test_join_two_tables(engine):
rows = engine.execute(
"SELECT p.name, o.qty FROM products p "
"JOIN orders o ON p.id = o.product_id WHERE p.id = ?",
("1",),
)
assert rows == [{"name": "Widget", "qty": "2"}]
def test_join_caches_both_tables(engine):
engine.execute(
"SELECT p.name, o.qty FROM products p JOIN orders o ON p.id = o.product_id"
)
assert engine._cache.is_table_cached("products") is True
assert engine._cache.is_table_cached("orders") is True
# ---------------------------------------------------------------------------
# R3 — SELECT *
# ---------------------------------------------------------------------------
def test_select_star_returns_all_columns(engine):
rows = engine.execute("SELECT * FROM products WHERE id = '1'")
assert rows == [{"id": "1", "name": "Widget", "price": "9.99"}]
def test_select_star_marks_table_full(engine):
engine.execute("SELECT * FROM products")
assert engine._cache.is_table_full("products") is True
# ---------------------------------------------------------------------------
# Cache invalidation
# ---------------------------------------------------------------------------
def test_invalidate_marks_table_absent(engine):
engine.execute("SELECT id, name FROM products")
engine.invalidate("products")
assert engine._cache.is_table_cached("products") is False
def test_invalidate_then_refetch_works(engine):
engine.execute("SELECT id, name FROM products")
engine.invalidate("products")
rows = engine.execute("SELECT id, name FROM products")
assert len(rows) == 3
def test_invalidate_unknown_table_is_noop(engine):
engine.invalidate("nonexistent_table") # must not raise
# ---------------------------------------------------------------------------
# Disk-backed cache (in_memory=False)
# ---------------------------------------------------------------------------
def test_disk_mode_query_works(source_engine, cache_path, monkeypatch):
monkeypatch.setattr(eng_mod, "CACHE_DB_PATH", cache_path)
monkeypatch.setattr(eng_mod, "BACKUP_INTERVAL_SECONDS", 9999)
ce = CachingEngine(source_engine, in_memory=False)
rows = ce.execute("SELECT id, name FROM products")
assert {r["name"] for r in rows} == {"Widget", "Gadget", "Doohickey"}
assert ce._cache._in_memory is False
ce.close()
def test_disk_mode_persists_across_instances(source_engine, cache_path, monkeypatch):
monkeypatch.setattr(eng_mod, "CACHE_DB_PATH", cache_path)
monkeypatch.setattr(eng_mod, "BACKUP_INTERVAL_SECONDS", 9999)
ce1 = CachingEngine(source_engine, in_memory=False)
ce1.execute("SELECT id, name FROM products")
ce1.close()
# Second instance opens the same on-disk cache and finds the table already there.
ce2 = CachingEngine(source_engine, in_memory=False)
assert ce2._cache.is_table_cached("products") is True
rows = ce2.execute("SELECT id, name FROM products")
assert {r["name"] for r in rows} == {"Widget", "Gadget", "Doohickey"}
ce2.close()
def test_in_memory_override_respects_config(source_engine, cache_path, monkeypatch):
"""in_memory=None falls back to the IN_MEMORY config default."""
monkeypatch.setattr(eng_mod, "CACHE_DB_PATH", cache_path)
monkeypatch.setattr(eng_mod, "BACKUP_INTERVAL_SECONDS", 9999)
monkeypatch.setattr(eng_mod, "IN_MEMORY", False)
ce = CachingEngine(source_engine) # no explicit in_memory
assert ce._cache._in_memory is False
ce.close()
# ---------------------------------------------------------------------------
# Per-engine configuration (constructor overrides env defaults)
# ---------------------------------------------------------------------------
def test_constructor_config_overrides(source_engine, tmp_path):
p = tmp_path / "explicit_cache.db"
ce = CachingEngine(
source_engine,
cache_db_path=p,
fetch_batch=3,
dialect="sqlite",
backup_interval=12345,
refresh_interval=42,
in_memory=False,
)
ce.execute("SELECT id, name FROM products")
assert p.exists()
assert ce._cache._fetch_batch == 3
assert ce._cache._dialect == "sqlite"
assert ce._dialect == "sqlite"
assert ce._cache._backup_interval == 12345
assert ce._refresh_interval == 42
ce.close()
def test_two_engines_separate_cache_files(source_engine, tmp_path):
"""Two engines in one process can target different cache files."""
a = CachingEngine(source_engine, cache_db_path=tmp_path / "a.db", in_memory=False)
b = CachingEngine(source_engine, cache_db_path=tmp_path / "b.db", in_memory=False)
a.execute("SELECT id FROM products")
assert (tmp_path / "a.db").exists()
assert a._cache.is_table_cached("products") is True
assert b._cache.is_table_cached("products") is False # independent cache
a.close()
b.close()
# ---------------------------------------------------------------------------
# Pragmas / hard_reset / vacuum (1.11.0)
# ---------------------------------------------------------------------------
def test_engine_passes_pragmas_to_cache(source_engine, tmp_path):
ce = CachingEngine(
source_engine,
cache_db_path=tmp_path / "cache.db",
in_memory=False,
pragmas={"page_size": 8192, "auto_vacuum": "INCREMENTAL"},
)
assert ce._cache.connection.execute("PRAGMA page_size").fetchone()[0] == 8192
assert ce._cache.connection.execute("PRAGMA auto_vacuum").fetchone()[0] == 2
ce.close()
def test_engine_hard_reset_reloads(source_engine, tmp_path):
ce = CachingEngine(source_engine, cache_db_path=tmp_path / "cache.db", in_memory=False)
ce.execute("SELECT id FROM products")
assert ce._cache.is_table_cached("products") is True
ce.hard_reset()
assert ce._cache.is_table_cached("products") is False
rows = ce.execute("SELECT id, name FROM products") # reloads on next use
assert len(rows) == 3
ce.close()
def test_engine_vacuum_runs(source_engine, tmp_path):
ce = CachingEngine(source_engine, cache_db_path=tmp_path / "cache.db", in_memory=False)
ce.execute("SELECT id FROM products")
ce.vacuum(incremental=False) # must not raise
assert ce._cache.is_table_cached("products") is True
ce.close()
# ---------------------------------------------------------------------------
# datetime_columns end-to-end: param coercion (A) + read-back datetime (B)
# ---------------------------------------------------------------------------
@pytest.fixture
def events_engine(tmp_path):
src = tmp_path / "events.db"
conn = sqlite3.connect(src)
conn.execute("CREATE TABLE events (id TEXT, changed TEXT)")
conn.executemany(
"INSERT INTO events VALUES (?, ?)",
[("1", "2026-06-01T10:00:00"), ("2", "2026-06-03T10:00:00")],
)
conn.commit()
conn.close()
se = create_engine(f"sqlite:///{src}")
yield se
se.dispose()
def test_datetime_column_where_and_readback(events_engine, tmp_path):
from datetime import datetime, timezone
ce = CachingEngine(
events_engine,
cache_db_path=tmp_path / "cache.db",
in_memory=False,
datetime_columns={"events": ["changed"]},
)
# A: WHERE on the INTEGER-µs column with an ISO string param returns the right row.
rows = ce.execute(
"SELECT id, changed FROM events WHERE changed > ?", ("2026-06-02T00:00:00",)
)
assert [r["id"] for r in rows] == ["2"]
# B: the column comes back as a datetime, not a raw integer.
assert rows[0]["changed"] == datetime(2026, 6, 3, 10, 0, 0, tzinfo=timezone.utc)
ce.close()
def test_datetime_column_return_datetime_false(events_engine, tmp_path):
ce = CachingEngine(
events_engine,
cache_db_path=tmp_path / "cache.db",
in_memory=False,
datetime_columns={"events": ["changed"]},
return_datetime=False,
)
rows = ce.execute("SELECT id, changed FROM events")
assert all(isinstance(r["changed"], int) for r in rows) # opt-out → raw µs
ce.close()
# ---------------------------------------------------------------------------
# db_size_bytes in stats (D)
# ---------------------------------------------------------------------------
def test_stats_reports_db_size_in_disk_mode(source_engine, tmp_path):
ce = CachingEngine(source_engine, cache_db_path=tmp_path / "cache.db", in_memory=False)
ce.execute("SELECT id FROM products")
assert ce.stats.db_size_bytes > 0
ce.close()
def test_stats_db_size_zero_in_memory(engine):
engine.execute("SELECT id, name FROM products")
assert engine.stats.db_size_bytes == 0