317 lines
11 KiB
Python
317 lines
11 KiB
Python
import sqlite3
|
|
import threading
|
|
from datetime import datetime
|
|
from types import SimpleNamespace
|
|
|
|
import pytest
|
|
from sqlalchemy import create_engine
|
|
|
|
import sqlmem.engine as eng_mod
|
|
from sqlmem import CachingEngine, DeltaConfig
|
|
from sqlmem.cache import CacheManager
|
|
from sqlmem.delta import DeltaRefresher, ResolvedDelta, _bind_watermark
|
|
from sqlmem.executor import QueryExecutor
|
|
from sqlmem.parser import parse
|
|
from sqlmem.registry import ColumnRegistry
|
|
from sqlmem.stats import StatsCollector
|
|
|
|
|
|
def cached_rows(cache, sql):
|
|
cols, rows = cache.execute_in_memory(sql)
|
|
return [dict(zip(cols, row)) for row in rows]
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Refresher unit tests (in-memory source connection)
|
|
# ---------------------------------------------------------------------------
|
|
|
|
@pytest.fixture
|
|
def source_conn():
|
|
conn = sqlite3.connect(":memory:")
|
|
conn.executescript(
|
|
"""
|
|
CREATE TABLE products (id TEXT PRIMARY KEY, name TEXT, price TEXT, changed TEXT);
|
|
INSERT INTO products VALUES
|
|
('1', 'Widget', '9.99', '2026-06-01 10:00:00'),
|
|
('2', 'Gadget', '19.99', '2026-06-01 10:05:00');
|
|
"""
|
|
)
|
|
conn.commit()
|
|
yield conn
|
|
conn.close()
|
|
|
|
|
|
@pytest.fixture
|
|
def env(tmp_path, source_conn):
|
|
cache = CacheManager(db_path=tmp_path / "cache.db", backup_interval=9999)
|
|
registry = ColumnRegistry(cache.connection)
|
|
stats = StatsCollector()
|
|
delta = {"products": ResolvedDelta(change_column="changed", key_columns=["id"])}
|
|
executor = QueryExecutor(cache, registry, source_conn, stats, delta)
|
|
refresher = DeltaRefresher(cache, delta)
|
|
# Initial load — caches id, name, price (+ augmented key/change columns).
|
|
executor.execute(parse("SELECT id, name, price FROM products"))
|
|
yield SimpleNamespace(cache=cache, source=source_conn, refresher=refresher)
|
|
cache.close()
|
|
|
|
|
|
def test_load_augments_key_and_change_columns(env):
|
|
cols = env.cache.get_table_columns("products")
|
|
assert {"id", "name", "price", "changed"}.issubset(set(cols))
|
|
|
|
|
|
def test_initial_watermark_is_max_change(env):
|
|
assert env.cache.get_last_synced_at("products") == "2026-06-01 10:05:00"
|
|
|
|
|
|
def test_refresh_applies_updates(env):
|
|
env.source.execute(
|
|
"UPDATE products SET price = '7.77', changed = '2026-06-01 10:10:00' WHERE id = '1'"
|
|
)
|
|
env.source.commit()
|
|
env.refresher.refresh(env.source)
|
|
|
|
rows = {r["id"]: r for r in cached_rows(env.cache, "SELECT id, price FROM products")}
|
|
assert rows["1"]["price"] == "7.77"
|
|
assert env.cache.get_last_synced_at("products") == "2026-06-01 10:10:00"
|
|
|
|
|
|
def test_refresh_inserts_new_rows(env):
|
|
env.source.execute(
|
|
"INSERT INTO products VALUES ('3', 'Sprocket', '5.00', '2026-06-01 10:20:00')"
|
|
)
|
|
env.source.commit()
|
|
env.refresher.refresh(env.source)
|
|
|
|
ids = {r["id"] for r in cached_rows(env.cache, "SELECT id FROM products")}
|
|
assert ids == {"1", "2", "3"}
|
|
|
|
|
|
def test_boundary_timestamp_not_missed_and_idempotent(env):
|
|
# New row sharing the exact watermark timestamp must still be picked up (>=),
|
|
# and the row already at that timestamp must not be duplicated.
|
|
env.source.execute(
|
|
"INSERT INTO products VALUES ('3', 'Sprocket', '5.00', '2026-06-01 10:05:00')"
|
|
)
|
|
env.source.commit()
|
|
env.refresher.refresh(env.source)
|
|
env.refresher.refresh(env.source) # idempotent — running twice changes nothing
|
|
|
|
rows = cached_rows(env.cache, "SELECT id FROM products")
|
|
assert sorted(r["id"] for r in rows) == ["1", "2", "3"]
|
|
|
|
|
|
def test_delete_by_nulling(env):
|
|
env.source.execute(
|
|
"UPDATE products SET name = NULL, changed = '2026-06-01 10:30:00' WHERE id = '1'"
|
|
)
|
|
env.source.commit()
|
|
env.refresher.refresh(env.source)
|
|
|
|
rows = {r["id"]: r for r in cached_rows(env.cache, "SELECT id, name FROM products")}
|
|
assert rows["1"]["name"] is None
|
|
|
|
|
|
def test_refresh_without_changes_is_noop(env):
|
|
before = cached_rows(env.cache, "SELECT id, name, price FROM products")
|
|
env.refresher.refresh(env.source)
|
|
after = cached_rows(env.cache, "SELECT id, name, price FROM products")
|
|
assert before == after
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Watermark binding — regression for the datetime-as-string delta bug
|
|
# (SQL Server error 241: 'T'-separated 6-digit-microsecond ISO string can't be
|
|
# implicitly converted varchar->datetime, freezing the delta watermark).
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def test_bind_watermark_parses_iso_datetime():
|
|
assert _bind_watermark("2026-06-05T14:54:24.823000") == datetime(
|
|
2026, 6, 5, 14, 54, 24, 823000
|
|
)
|
|
|
|
|
|
def test_bind_watermark_parses_space_separated():
|
|
assert _bind_watermark("2026-06-01 10:05:00") == datetime(2026, 6, 1, 10, 5, 0)
|
|
|
|
|
|
def test_bind_watermark_passes_through_non_datetime():
|
|
# Integer rowversion / non-datetime change column — left untouched.
|
|
assert _bind_watermark("12345") == "12345"
|
|
|
|
|
|
class _SpyCursor:
|
|
def __init__(self, rows):
|
|
self._rows = list(rows)
|
|
|
|
def fetchmany(self, n):
|
|
batch, self._rows = self._rows[:n], self._rows[n:]
|
|
return batch
|
|
|
|
|
|
class _SpySource:
|
|
"""Records the parameters bound to each query (stands in for the pyodbc source)."""
|
|
|
|
def __init__(self, rows):
|
|
self._rows = rows
|
|
self.bound = []
|
|
|
|
def execute(self, sql, params=()):
|
|
self.bound.append((sql, params))
|
|
return _SpyCursor(self._rows)
|
|
|
|
|
|
def test_refresh_binds_watermark_as_datetime(env):
|
|
"""The watermark must reach the source as a real datetime, not a raw ISO
|
|
string — otherwise SQL Server raises error 241 and the delta freezes."""
|
|
env.cache.set_last_synced_at("products", "2026-06-05T14:54:24.823000")
|
|
spy = _SpySource(rows=[("1", "Widget", "9.99", "2026-06-05T14:54:24.823000")])
|
|
|
|
env.refresher.refresh(spy)
|
|
|
|
assert spy.bound, "source query was never issued"
|
|
_, params = spy.bound[-1]
|
|
assert params == (datetime(2026, 6, 5, 14, 54, 24, 823000),)
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Refresh failures are recorded (4.3) so a stuck delta is visible in stats
|
|
# ---------------------------------------------------------------------------
|
|
|
|
class _RaisingSource:
|
|
def execute(self, sql, params=()):
|
|
raise RuntimeError("boom 241")
|
|
|
|
|
|
def test_failed_delta_refresh_records_error(env):
|
|
env.refresher.refresh(_RaisingSource())
|
|
|
|
err = env.cache.get_errors()["products"]
|
|
assert err.consecutive == 1
|
|
assert "boom 241" in err.message
|
|
assert env.cache.error_total == 1
|
|
# State is marked error even though the cache still holds the last-good data.
|
|
assert env.cache.get_states()["products"] == "error"
|
|
|
|
|
|
def test_delta_success_resets_failure_streak(env):
|
|
env.refresher.refresh(_RaisingSource())
|
|
assert env.cache.get_errors()["products"].consecutive == 1
|
|
|
|
env.refresher.refresh(env.source) # real source — succeeds
|
|
assert env.cache.get_errors()["products"].consecutive == 0
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Engine-level: PK auto-discovery, reset, end-to-end refresh
|
|
# ---------------------------------------------------------------------------
|
|
|
|
@pytest.fixture
|
|
def source_db(tmp_path):
|
|
db_path = tmp_path / "source.db"
|
|
conn = sqlite3.connect(db_path)
|
|
conn.executescript(
|
|
"""
|
|
CREATE TABLE products (id TEXT PRIMARY KEY, name TEXT, changed TEXT);
|
|
INSERT INTO products VALUES ('1', 'Widget', '2026-06-01 10:00:00');
|
|
CREATE VIEW vw_products AS SELECT id, name FROM products;
|
|
"""
|
|
)
|
|
conn.commit()
|
|
conn.close()
|
|
return db_path
|
|
|
|
|
|
@pytest.fixture
|
|
def source_engine(source_db):
|
|
engine = create_engine(f"sqlite:///{source_db}")
|
|
yield engine
|
|
engine.dispose()
|
|
|
|
|
|
@pytest.fixture
|
|
def patched_cache(tmp_path, monkeypatch):
|
|
monkeypatch.setattr(eng_mod, "CACHE_DB_PATH", tmp_path / "cache.db")
|
|
monkeypatch.setattr(eng_mod, "BACKUP_INTERVAL_SECONDS", 9999)
|
|
|
|
|
|
def test_pk_auto_discovery(source_engine, patched_cache):
|
|
engine = CachingEngine(source_engine, delta={"products": DeltaConfig(change_column="changed")})
|
|
assert engine._delta["products"].key_columns == ["id"]
|
|
engine.close()
|
|
|
|
|
|
def test_view_without_key_raises(source_engine, patched_cache):
|
|
with pytest.raises(ValueError):
|
|
CachingEngine(source_engine, delta={"vw_products": DeltaConfig(change_column="name")})
|
|
|
|
|
|
def test_engine_reset(source_engine, patched_cache):
|
|
engine = CachingEngine(source_engine)
|
|
engine.execute("SELECT id, name FROM products")
|
|
assert engine._cache.is_table_cached("products") is True
|
|
engine.reset()
|
|
assert engine._cache.is_table_cached("products") is False
|
|
engine.close()
|
|
|
|
|
|
def test_startup_catch_up_is_non_blocking_by_default(source_engine, patched_cache, monkeypatch):
|
|
"""By default the startup catch-up runs on the background thread, not the
|
|
main thread, so it never blocks application startup."""
|
|
threads: list[str] = []
|
|
started = threading.Event()
|
|
real = eng_mod.CachingEngine._run_refresh
|
|
|
|
def spy(self):
|
|
threads.append(threading.current_thread().name)
|
|
started.set()
|
|
return real(self)
|
|
|
|
monkeypatch.setattr(eng_mod.CachingEngine, "_run_refresh", spy)
|
|
engine = CachingEngine(
|
|
source_engine, delta={"products": DeltaConfig("changed", ["id"])}
|
|
)
|
|
# __init__ has returned; the main thread must not have run the catch-up.
|
|
assert "MainThread" not in threads
|
|
assert started.wait(2), "background catch-up never ran"
|
|
assert threads == ["sqlmem-delta"]
|
|
engine.close()
|
|
|
|
|
|
def test_blocking_startup_refresh_runs_synchronously(source_engine, patched_cache, monkeypatch):
|
|
threads: list[str] = []
|
|
real = eng_mod.CachingEngine._run_refresh
|
|
|
|
def spy(self):
|
|
threads.append(threading.current_thread().name)
|
|
return real(self)
|
|
|
|
monkeypatch.setattr(eng_mod.CachingEngine, "_run_refresh", spy)
|
|
engine = CachingEngine(
|
|
source_engine,
|
|
delta={"products": DeltaConfig("changed", ["id"])},
|
|
blocking_startup_refresh=True,
|
|
)
|
|
# Opt-in: the catch-up ran on the main thread before __init__ returned.
|
|
assert "MainThread" in threads
|
|
engine.close()
|
|
|
|
|
|
def test_engine_delta_refresh_end_to_end(source_engine, source_db, patched_cache):
|
|
engine = CachingEngine(
|
|
source_engine, delta={"products": DeltaConfig(change_column="changed", key_columns=["id"])}
|
|
)
|
|
engine.execute("SELECT id, name FROM products") # caches, watermark = 10:00
|
|
|
|
conn = sqlite3.connect(source_db)
|
|
conn.execute("UPDATE products SET name = 'Widget2', changed = '2026-06-01 10:06:00' WHERE id = '1'")
|
|
conn.execute("INSERT INTO products VALUES ('2', 'Gadget', '2026-06-01 10:05:00')")
|
|
conn.commit()
|
|
conn.close()
|
|
|
|
engine.refresh()
|
|
rows = {r["id"]: r for r in engine.execute("SELECT id, name FROM products")}
|
|
assert rows["1"]["name"] == "Widget2"
|
|
assert rows["2"]["name"] == "Gadget"
|
|
engine.close()
|