Fix frozen delta watermark and add error stats, lazy source, concurrent disk reads, and per-engine config
This commit is contained in:
@@ -1,4 +1,5 @@
|
||||
import sqlite3
|
||||
import threading
|
||||
|
||||
import pytest
|
||||
|
||||
@@ -96,6 +97,68 @@ def test_disk_mode_reload_in_new_instance(tmp_path, source_conn):
|
||||
c2.close()
|
||||
|
||||
|
||||
def test_quoted_reserved_and_spaced_identifiers(tmp_path):
|
||||
"""Table/column names that are reserved words or contain spaces must work."""
|
||||
src = sqlite3.connect(":memory:")
|
||||
src.execute('CREATE TABLE "weird tbl" ("order" TEXT, "group by" TEXT)')
|
||||
src.executemany('INSERT INTO "weird tbl" VALUES (?, ?)', [("1", "a"), ("2", "b")])
|
||||
src.commit()
|
||||
|
||||
c = CacheManager(db_path=tmp_path / "c.db", backup_interval=9999)
|
||||
c.load_table("weird tbl", ["order", "group by"], src)
|
||||
assert c.is_table_cached("weird tbl") is True
|
||||
_, rows = c.execute_in_memory('SELECT "order", "group by" FROM "weird tbl"')
|
||||
assert ("1", "a") in rows
|
||||
c.close()
|
||||
src.close()
|
||||
|
||||
|
||||
def test_disk_mode_uses_separate_read_connection(tmp_path, source_conn):
|
||||
"""Disk-mode reads go through a per-thread read connection, not the writer."""
|
||||
c = CacheManager(db_path=tmp_path / "c.db", backup_interval=9999, in_memory=False)
|
||||
c.load_table("users", ["name", "email"], source_conn)
|
||||
|
||||
_, rows = c.execute_in_memory("SELECT name FROM users ORDER BY name")
|
||||
assert [r[0] for r in rows] == ["alice", "bob"]
|
||||
assert len(c._read_conns) == 1
|
||||
assert c._read_conns[0] is not c.connection # dedicated read conn
|
||||
c.close()
|
||||
|
||||
|
||||
def test_disk_mode_concurrent_reads(tmp_path, source_conn):
|
||||
"""Several reader threads each get their own connection and correct results."""
|
||||
c = CacheManager(db_path=tmp_path / "c.db", backup_interval=9999, in_memory=False)
|
||||
c.load_table("users", ["name"], source_conn)
|
||||
|
||||
results: list[int] = []
|
||||
errors: list[Exception] = []
|
||||
|
||||
def reader() -> None:
|
||||
try:
|
||||
_, rows = c.execute_in_memory("SELECT name FROM users")
|
||||
results.append(len(rows))
|
||||
except Exception as e: # noqa: BLE001
|
||||
errors.append(e)
|
||||
|
||||
threads = [threading.Thread(target=reader) for _ in range(5)]
|
||||
for t in threads:
|
||||
t.start()
|
||||
for t in threads:
|
||||
t.join(5)
|
||||
|
||||
assert not errors
|
||||
assert results == [2] * 5
|
||||
assert len(c._read_conns) == 5 # one read connection per reader thread
|
||||
c.close()
|
||||
|
||||
|
||||
def test_memory_mode_uses_shared_connection(cache, source_conn):
|
||||
"""In-memory mode can't share :memory: across connections — no read conns."""
|
||||
cache.load_table("users", ["name"], source_conn)
|
||||
cache.execute_in_memory("SELECT name FROM users")
|
||||
assert cache._read_conns == []
|
||||
|
||||
|
||||
def test_disk_mode_reset_keeps_file(tmp_path, source_conn):
|
||||
db_path = tmp_path / "cache.db"
|
||||
c = CacheManager(db_path=db_path, backup_interval=9999, in_memory=False)
|
||||
|
||||
+128
-1
@@ -1,4 +1,6 @@
|
||||
import sqlite3
|
||||
import threading
|
||||
from datetime import datetime
|
||||
from types import SimpleNamespace
|
||||
|
||||
import pytest
|
||||
@@ -7,7 +9,7 @@ from sqlalchemy import create_engine
|
||||
import sqlmem.engine as eng_mod
|
||||
from sqlmem import CachingEngine, DeltaConfig
|
||||
from sqlmem.cache import CacheManager
|
||||
from sqlmem.delta import DeltaRefresher, ResolvedDelta
|
||||
from sqlmem.delta import DeltaRefresher, ResolvedDelta, _bind_watermark
|
||||
from sqlmem.executor import QueryExecutor
|
||||
from sqlmem.parser import parse
|
||||
from sqlmem.registry import ColumnRegistry
|
||||
@@ -117,6 +119,89 @@ def test_refresh_without_changes_is_noop(env):
|
||||
assert before == after
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Watermark binding — regression for the datetime-as-string delta bug
|
||||
# (SQL Server error 241: 'T'-separated 6-digit-microsecond ISO string can't be
|
||||
# implicitly converted varchar->datetime, freezing the delta watermark).
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_bind_watermark_parses_iso_datetime():
|
||||
assert _bind_watermark("2026-06-05T14:54:24.823000") == datetime(
|
||||
2026, 6, 5, 14, 54, 24, 823000
|
||||
)
|
||||
|
||||
|
||||
def test_bind_watermark_parses_space_separated():
|
||||
assert _bind_watermark("2026-06-01 10:05:00") == datetime(2026, 6, 1, 10, 5, 0)
|
||||
|
||||
|
||||
def test_bind_watermark_passes_through_non_datetime():
|
||||
# Integer rowversion / non-datetime change column — left untouched.
|
||||
assert _bind_watermark("12345") == "12345"
|
||||
|
||||
|
||||
class _SpyCursor:
|
||||
def __init__(self, rows):
|
||||
self._rows = list(rows)
|
||||
|
||||
def fetchmany(self, n):
|
||||
batch, self._rows = self._rows[:n], self._rows[n:]
|
||||
return batch
|
||||
|
||||
|
||||
class _SpySource:
|
||||
"""Records the parameters bound to each query (stands in for the pyodbc source)."""
|
||||
|
||||
def __init__(self, rows):
|
||||
self._rows = rows
|
||||
self.bound = []
|
||||
|
||||
def execute(self, sql, params=()):
|
||||
self.bound.append((sql, params))
|
||||
return _SpyCursor(self._rows)
|
||||
|
||||
|
||||
def test_refresh_binds_watermark_as_datetime(env):
|
||||
"""The watermark must reach the source as a real datetime, not a raw ISO
|
||||
string — otherwise SQL Server raises error 241 and the delta freezes."""
|
||||
env.cache.set_last_synced_at("products", "2026-06-05T14:54:24.823000")
|
||||
spy = _SpySource(rows=[("1", "Widget", "9.99", "2026-06-05T14:54:24.823000")])
|
||||
|
||||
env.refresher.refresh(spy)
|
||||
|
||||
assert spy.bound, "source query was never issued"
|
||||
_, params = spy.bound[-1]
|
||||
assert params == (datetime(2026, 6, 5, 14, 54, 24, 823000),)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Refresh failures are recorded (4.3) so a stuck delta is visible in stats
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class _RaisingSource:
|
||||
def execute(self, sql, params=()):
|
||||
raise RuntimeError("boom 241")
|
||||
|
||||
|
||||
def test_failed_delta_refresh_records_error(env):
|
||||
env.refresher.refresh(_RaisingSource())
|
||||
|
||||
err = env.cache.get_errors()["products"]
|
||||
assert err.consecutive == 1
|
||||
assert "boom 241" in err.message
|
||||
assert env.cache.error_total == 1
|
||||
# State is marked error even though the cache still holds the last-good data.
|
||||
assert env.cache.get_states()["products"] == "error"
|
||||
|
||||
|
||||
def test_delta_success_resets_failure_streak(env):
|
||||
env.refresher.refresh(_RaisingSource())
|
||||
assert env.cache.get_errors()["products"].consecutive == 1
|
||||
|
||||
env.refresher.refresh(env.source) # real source — succeeds
|
||||
assert env.cache.get_errors()["products"].consecutive == 0
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Engine-level: PK auto-discovery, reset, end-to-end refresh
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -170,6 +255,48 @@ def test_engine_reset(source_engine, patched_cache):
|
||||
engine.close()
|
||||
|
||||
|
||||
def test_startup_catch_up_is_non_blocking_by_default(source_engine, patched_cache, monkeypatch):
|
||||
"""By default the startup catch-up runs on the background thread, not the
|
||||
main thread, so it never blocks application startup."""
|
||||
threads: list[str] = []
|
||||
started = threading.Event()
|
||||
real = eng_mod.CachingEngine._run_refresh
|
||||
|
||||
def spy(self):
|
||||
threads.append(threading.current_thread().name)
|
||||
started.set()
|
||||
return real(self)
|
||||
|
||||
monkeypatch.setattr(eng_mod.CachingEngine, "_run_refresh", spy)
|
||||
engine = CachingEngine(
|
||||
source_engine, delta={"products": DeltaConfig("changed", ["id"])}
|
||||
)
|
||||
# __init__ has returned; the main thread must not have run the catch-up.
|
||||
assert "MainThread" not in threads
|
||||
assert started.wait(2), "background catch-up never ran"
|
||||
assert threads == ["sqlmem-delta"]
|
||||
engine.close()
|
||||
|
||||
|
||||
def test_blocking_startup_refresh_runs_synchronously(source_engine, patched_cache, monkeypatch):
|
||||
threads: list[str] = []
|
||||
real = eng_mod.CachingEngine._run_refresh
|
||||
|
||||
def spy(self):
|
||||
threads.append(threading.current_thread().name)
|
||||
return real(self)
|
||||
|
||||
monkeypatch.setattr(eng_mod.CachingEngine, "_run_refresh", spy)
|
||||
engine = CachingEngine(
|
||||
source_engine,
|
||||
delta={"products": DeltaConfig("changed", ["id"])},
|
||||
blocking_startup_refresh=True,
|
||||
)
|
||||
# Opt-in: the catch-up ran on the main thread before __init__ returned.
|
||||
assert "MainThread" in threads
|
||||
engine.close()
|
||||
|
||||
|
||||
def test_engine_delta_refresh_end_to_end(source_engine, source_db, patched_cache):
|
||||
engine = CachingEngine(
|
||||
source_engine, delta={"products": DeltaConfig(change_column="changed", key_columns=["id"])}
|
||||
|
||||
@@ -124,6 +124,22 @@ def test_second_query_same_columns_is_cache_hit(engine):
|
||||
assert len(rows) == 3
|
||||
|
||||
|
||||
def test_cache_hit_does_not_open_source(engine, source_engine, monkeypatch):
|
||||
"""A pure cache hit must not open a source connection (lazy source)."""
|
||||
engine.execute("SELECT id, name FROM products") # miss → caches
|
||||
|
||||
calls = {"n": 0}
|
||||
original_connect = source_engine.connect
|
||||
|
||||
def counting_connect(*args, **kwargs):
|
||||
calls["n"] += 1
|
||||
return original_connect(*args, **kwargs)
|
||||
|
||||
monkeypatch.setattr(source_engine, "connect", counting_connect)
|
||||
engine.execute("SELECT id, name FROM products") # hit → no source access
|
||||
assert calls["n"] == 0
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# SQL file creation — backup to disk
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -331,3 +347,41 @@ def test_in_memory_override_respects_config(source_engine, cache_path, monkeypat
|
||||
ce = CachingEngine(source_engine) # no explicit in_memory
|
||||
assert ce._cache._in_memory is False
|
||||
ce.close()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Per-engine configuration (constructor overrides env defaults)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_constructor_config_overrides(source_engine, tmp_path):
|
||||
p = tmp_path / "explicit_cache.db"
|
||||
ce = CachingEngine(
|
||||
source_engine,
|
||||
cache_db_path=p,
|
||||
fetch_batch=3,
|
||||
dialect="sqlite",
|
||||
backup_interval=12345,
|
||||
refresh_interval=42,
|
||||
in_memory=False,
|
||||
)
|
||||
ce.execute("SELECT id, name FROM products")
|
||||
assert p.exists()
|
||||
assert ce._cache._fetch_batch == 3
|
||||
assert ce._cache._dialect == "sqlite"
|
||||
assert ce._dialect == "sqlite"
|
||||
assert ce._cache._backup_interval == 12345
|
||||
assert ce._refresh_interval == 42
|
||||
ce.close()
|
||||
|
||||
|
||||
def test_two_engines_separate_cache_files(source_engine, tmp_path):
|
||||
"""Two engines in one process can target different cache files."""
|
||||
a = CachingEngine(source_engine, cache_db_path=tmp_path / "a.db", in_memory=False)
|
||||
b = CachingEngine(source_engine, cache_db_path=tmp_path / "b.db", in_memory=False)
|
||||
a.execute("SELECT id FROM products")
|
||||
|
||||
assert (tmp_path / "a.db").exists()
|
||||
assert a._cache.is_table_cached("products") is True
|
||||
assert b._cache.is_table_cached("products") is False # independent cache
|
||||
a.close()
|
||||
b.close()
|
||||
|
||||
@@ -0,0 +1,24 @@
|
||||
from loguru import logger
|
||||
|
||||
import sqlmem
|
||||
|
||||
|
||||
def test_add_sink_idempotent_no_duplicate_lines():
|
||||
"""Calling add_sink twice for the same sink must not duplicate log lines."""
|
||||
sqlmem._added_sinks.clear()
|
||||
msgs: list[str] = []
|
||||
sink = lambda message: msgs.append(str(message)) # noqa: E731
|
||||
|
||||
try:
|
||||
sqlmem.add_sink(sink, level="DEBUG", colorize=False)
|
||||
sqlmem.add_sink(sink, level="DEBUG", colorize=False) # second call: no-op
|
||||
assert len(sqlmem._added_sinks) == 1
|
||||
|
||||
# Emit one record that passes the "sqlmem" name filter.
|
||||
logger.patch(lambda r: r.update(name="sqlmem")).info("hello sqlmem")
|
||||
assert sum("hello sqlmem" in m for m in msgs) == 1
|
||||
finally:
|
||||
for handler_id in sqlmem._added_sinks.values():
|
||||
logger.remove(handler_id)
|
||||
sqlmem._added_sinks.clear()
|
||||
logger.disable("sqlmem") # restore the default-silent state for other tests
|
||||
@@ -73,6 +73,29 @@ def test_counters_still_reported(source_engine, patched_cache):
|
||||
engine.close()
|
||||
|
||||
|
||||
def test_stats_exposes_table_error(source_engine, patched_cache):
|
||||
engine = CachingEngine(source_engine)
|
||||
engine.execute("SELECT id, name FROM products")
|
||||
engine._cache.record_error("products", "ValueError: boom")
|
||||
|
||||
s = engine.stats
|
||||
assert s.errors == 1
|
||||
assert s.tables["products"].consecutive_failures == 1
|
||||
assert s.tables["products"].last_error == "ValueError: boom"
|
||||
assert s.tables["products"].last_error_at is not None
|
||||
engine.close()
|
||||
|
||||
|
||||
def test_stats_no_error_by_default(source_engine, patched_cache):
|
||||
engine = CachingEngine(source_engine)
|
||||
engine.execute("SELECT id, name FROM products")
|
||||
s = engine.stats
|
||||
assert s.errors == 0
|
||||
assert s.tables["products"].consecutive_failures == 0
|
||||
assert s.tables["products"].last_error is None
|
||||
engine.close()
|
||||
|
||||
|
||||
# --- a table being loaded for the first time shows up as "loading" ----------
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user