import sqlite3 from types import SimpleNamespace import pytest from sqlalchemy import create_engine import sqlmem.engine as eng_mod from sqlmem import CachingEngine, DeltaConfig from sqlmem.cache import CacheManager from sqlmem.delta import DeltaRefresher, ResolvedDelta from sqlmem.executor import QueryExecutor from sqlmem.parser import parse from sqlmem.registry import ColumnRegistry from sqlmem.stats import StatsCollector def cached_rows(cache, sql): cols, rows = cache.execute_in_memory(sql) return [dict(zip(cols, row)) for row in rows] # --------------------------------------------------------------------------- # Refresher unit tests (in-memory source connection) # --------------------------------------------------------------------------- @pytest.fixture def source_conn(): conn = sqlite3.connect(":memory:") conn.executescript( """ CREATE TABLE products (id TEXT PRIMARY KEY, name TEXT, price TEXT, changed TEXT); INSERT INTO products VALUES ('1', 'Widget', '9.99', '2026-06-01 10:00:00'), ('2', 'Gadget', '19.99', '2026-06-01 10:05:00'); """ ) conn.commit() yield conn conn.close() @pytest.fixture def env(tmp_path, source_conn): cache = CacheManager(db_path=tmp_path / "cache.db", backup_interval=9999) registry = ColumnRegistry(cache.connection) stats = StatsCollector() delta = {"products": ResolvedDelta(change_column="changed", key_columns=["id"])} executor = QueryExecutor(cache, registry, source_conn, stats, delta) refresher = DeltaRefresher(cache, delta) # Initial load — caches id, name, price (+ augmented key/change columns). executor.execute(parse("SELECT id, name, price FROM products")) yield SimpleNamespace(cache=cache, source=source_conn, refresher=refresher) cache.close() def test_load_augments_key_and_change_columns(env): cols = env.cache.get_table_columns("products") assert {"id", "name", "price", "changed"}.issubset(set(cols)) def test_initial_watermark_is_max_change(env): assert env.cache.get_last_synced_at("products") == "2026-06-01 10:05:00" def test_refresh_applies_updates(env): env.source.execute( "UPDATE products SET price = '7.77', changed = '2026-06-01 10:10:00' WHERE id = '1'" ) env.source.commit() env.refresher.refresh(env.source) rows = {r["id"]: r for r in cached_rows(env.cache, "SELECT id, price FROM products")} assert rows["1"]["price"] == "7.77" assert env.cache.get_last_synced_at("products") == "2026-06-01 10:10:00" def test_refresh_inserts_new_rows(env): env.source.execute( "INSERT INTO products VALUES ('3', 'Sprocket', '5.00', '2026-06-01 10:20:00')" ) env.source.commit() env.refresher.refresh(env.source) ids = {r["id"] for r in cached_rows(env.cache, "SELECT id FROM products")} assert ids == {"1", "2", "3"} def test_boundary_timestamp_not_missed_and_idempotent(env): # New row sharing the exact watermark timestamp must still be picked up (>=), # and the row already at that timestamp must not be duplicated. env.source.execute( "INSERT INTO products VALUES ('3', 'Sprocket', '5.00', '2026-06-01 10:05:00')" ) env.source.commit() env.refresher.refresh(env.source) env.refresher.refresh(env.source) # idempotent — running twice changes nothing rows = cached_rows(env.cache, "SELECT id FROM products") assert sorted(r["id"] for r in rows) == ["1", "2", "3"] def test_delete_by_nulling(env): env.source.execute( "UPDATE products SET name = NULL, changed = '2026-06-01 10:30:00' WHERE id = '1'" ) env.source.commit() env.refresher.refresh(env.source) rows = {r["id"]: r for r in cached_rows(env.cache, "SELECT id, name FROM products")} assert rows["1"]["name"] is None def test_refresh_without_changes_is_noop(env): before = cached_rows(env.cache, "SELECT id, name, price FROM products") env.refresher.refresh(env.source) after = cached_rows(env.cache, "SELECT id, name, price FROM products") assert before == after # --------------------------------------------------------------------------- # Engine-level: PK auto-discovery, reset, end-to-end refresh # --------------------------------------------------------------------------- @pytest.fixture def source_db(tmp_path): db_path = tmp_path / "source.db" conn = sqlite3.connect(db_path) conn.executescript( """ CREATE TABLE products (id TEXT PRIMARY KEY, name TEXT, changed TEXT); INSERT INTO products VALUES ('1', 'Widget', '2026-06-01 10:00:00'); CREATE VIEW vw_products AS SELECT id, name FROM products; """ ) conn.commit() conn.close() return db_path @pytest.fixture def source_engine(source_db): engine = create_engine(f"sqlite:///{source_db}") yield engine engine.dispose() @pytest.fixture def patched_cache(tmp_path, monkeypatch): monkeypatch.setattr(eng_mod, "CACHE_DB_PATH", tmp_path / "cache.db") monkeypatch.setattr(eng_mod, "BACKUP_INTERVAL_SECONDS", 9999) def test_pk_auto_discovery(source_engine, patched_cache): engine = CachingEngine(source_engine, delta={"products": DeltaConfig(change_column="changed")}) assert engine._delta["products"].key_columns == ["id"] engine.close() def test_view_without_key_raises(source_engine, patched_cache): with pytest.raises(ValueError): CachingEngine(source_engine, delta={"vw_products": DeltaConfig(change_column="name")}) def test_engine_reset(source_engine, patched_cache): engine = CachingEngine(source_engine) engine.execute("SELECT id, name FROM products") assert engine._cache.is_table_cached("products") is True engine.reset() assert engine._cache.is_table_cached("products") is False engine.close() def test_engine_delta_refresh_end_to_end(source_engine, source_db, patched_cache): engine = CachingEngine( source_engine, delta={"products": DeltaConfig(change_column="changed", key_columns=["id"])} ) engine.execute("SELECT id, name FROM products") # caches, watermark = 10:00 conn = sqlite3.connect(source_db) conn.execute("UPDATE products SET name = 'Widget2', changed = '2026-06-01 10:06:00' WHERE id = '1'") conn.execute("INSERT INTO products VALUES ('2', 'Gadget', '2026-06-01 10:05:00')") conn.commit() conn.close() engine.refresh() rows = {r["id"]: r for r in engine.execute("SELECT id, name FROM products")} assert rows["1"]["name"] == "Widget2" assert rows["2"]["name"] == "Gadget" engine.close()