Files
SQLmem/tests/test_delta.py
T

190 lines
6.5 KiB
Python

import sqlite3
from types import SimpleNamespace
import pytest
from sqlalchemy import create_engine
import sqlmem.engine as eng_mod
from sqlmem import CachingEngine, DeltaConfig
from sqlmem.cache import CacheManager
from sqlmem.delta import DeltaRefresher, ResolvedDelta
from sqlmem.executor import QueryExecutor
from sqlmem.parser import parse
from sqlmem.registry import ColumnRegistry
from sqlmem.stats import StatsCollector
def cached_rows(cache, sql):
cols, rows = cache.execute_in_memory(sql)
return [dict(zip(cols, row)) for row in rows]
# ---------------------------------------------------------------------------
# Refresher unit tests (in-memory source connection)
# ---------------------------------------------------------------------------
@pytest.fixture
def source_conn():
conn = sqlite3.connect(":memory:")
conn.executescript(
"""
CREATE TABLE products (id TEXT PRIMARY KEY, name TEXT, price TEXT, changed TEXT);
INSERT INTO products VALUES
('1', 'Widget', '9.99', '2026-06-01 10:00:00'),
('2', 'Gadget', '19.99', '2026-06-01 10:05:00');
"""
)
conn.commit()
yield conn
conn.close()
@pytest.fixture
def env(tmp_path, source_conn):
cache = CacheManager(db_path=tmp_path / "cache.db", backup_interval=9999)
registry = ColumnRegistry(cache.connection)
stats = StatsCollector()
delta = {"products": ResolvedDelta(change_column="changed", key_columns=["id"])}
executor = QueryExecutor(cache, registry, source_conn, stats, delta)
refresher = DeltaRefresher(cache, delta)
# Initial load — caches id, name, price (+ augmented key/change columns).
executor.execute(parse("SELECT id, name, price FROM products"))
yield SimpleNamespace(cache=cache, source=source_conn, refresher=refresher)
cache.close()
def test_load_augments_key_and_change_columns(env):
cols = env.cache.get_table_columns("products")
assert {"id", "name", "price", "changed"}.issubset(set(cols))
def test_initial_watermark_is_max_change(env):
assert env.cache.get_last_synced_at("products") == "2026-06-01 10:05:00"
def test_refresh_applies_updates(env):
env.source.execute(
"UPDATE products SET price = '7.77', changed = '2026-06-01 10:10:00' WHERE id = '1'"
)
env.source.commit()
env.refresher.refresh(env.source)
rows = {r["id"]: r for r in cached_rows(env.cache, "SELECT id, price FROM products")}
assert rows["1"]["price"] == "7.77"
assert env.cache.get_last_synced_at("products") == "2026-06-01 10:10:00"
def test_refresh_inserts_new_rows(env):
env.source.execute(
"INSERT INTO products VALUES ('3', 'Sprocket', '5.00', '2026-06-01 10:20:00')"
)
env.source.commit()
env.refresher.refresh(env.source)
ids = {r["id"] for r in cached_rows(env.cache, "SELECT id FROM products")}
assert ids == {"1", "2", "3"}
def test_boundary_timestamp_not_missed_and_idempotent(env):
# New row sharing the exact watermark timestamp must still be picked up (>=),
# and the row already at that timestamp must not be duplicated.
env.source.execute(
"INSERT INTO products VALUES ('3', 'Sprocket', '5.00', '2026-06-01 10:05:00')"
)
env.source.commit()
env.refresher.refresh(env.source)
env.refresher.refresh(env.source) # idempotent — running twice changes nothing
rows = cached_rows(env.cache, "SELECT id FROM products")
assert sorted(r["id"] for r in rows) == ["1", "2", "3"]
def test_delete_by_nulling(env):
env.source.execute(
"UPDATE products SET name = NULL, changed = '2026-06-01 10:30:00' WHERE id = '1'"
)
env.source.commit()
env.refresher.refresh(env.source)
rows = {r["id"]: r for r in cached_rows(env.cache, "SELECT id, name FROM products")}
assert rows["1"]["name"] is None
def test_refresh_without_changes_is_noop(env):
before = cached_rows(env.cache, "SELECT id, name, price FROM products")
env.refresher.refresh(env.source)
after = cached_rows(env.cache, "SELECT id, name, price FROM products")
assert before == after
# ---------------------------------------------------------------------------
# Engine-level: PK auto-discovery, reset, end-to-end refresh
# ---------------------------------------------------------------------------
@pytest.fixture
def source_db(tmp_path):
db_path = tmp_path / "source.db"
conn = sqlite3.connect(db_path)
conn.executescript(
"""
CREATE TABLE products (id TEXT PRIMARY KEY, name TEXT, changed TEXT);
INSERT INTO products VALUES ('1', 'Widget', '2026-06-01 10:00:00');
CREATE VIEW vw_products AS SELECT id, name FROM products;
"""
)
conn.commit()
conn.close()
return db_path
@pytest.fixture
def source_engine(source_db):
engine = create_engine(f"sqlite:///{source_db}")
yield engine
engine.dispose()
@pytest.fixture
def patched_cache(tmp_path, monkeypatch):
monkeypatch.setattr(eng_mod, "CACHE_DB_PATH", tmp_path / "cache.db")
monkeypatch.setattr(eng_mod, "BACKUP_INTERVAL_SECONDS", 9999)
def test_pk_auto_discovery(source_engine, patched_cache):
engine = CachingEngine(source_engine, delta={"products": DeltaConfig(change_column="changed")})
assert engine._delta["products"].key_columns == ["id"]
engine.close()
def test_view_without_key_raises(source_engine, patched_cache):
with pytest.raises(ValueError):
CachingEngine(source_engine, delta={"vw_products": DeltaConfig(change_column="name")})
def test_engine_reset(source_engine, patched_cache):
engine = CachingEngine(source_engine)
engine.execute("SELECT id, name FROM products")
assert engine._cache.is_table_cached("products") is True
engine.reset()
assert engine._cache.is_table_cached("products") is False
engine.close()
def test_engine_delta_refresh_end_to_end(source_engine, source_db, patched_cache):
engine = CachingEngine(
source_engine, delta={"products": DeltaConfig(change_column="changed", key_columns=["id"])}
)
engine.execute("SELECT id, name FROM products") # caches, watermark = 10:00
conn = sqlite3.connect(source_db)
conn.execute("UPDATE products SET name = 'Widget2', changed = '2026-06-01 10:06:00' WHERE id = '1'")
conn.execute("INSERT INTO products VALUES ('2', 'Gadget', '2026-06-01 10:05:00')")
conn.commit()
conn.close()
engine.refresh()
rows = {r["id"]: r for r in engine.execute("SELECT id, name FROM products")}
assert rows["1"]["name"] == "Widget2"
assert rows["2"]["name"] == "Gadget"
engine.close()