Fix frozen delta watermark and add error stats, lazy source, concurrent disk reads, and per-engine config

This commit is contained in:
Jan Doubravský
2026-06-08 19:35:33 +02:00
parent 209ae667ab
commit 6dc85e4f3c
17 changed files with 668 additions and 71 deletions
+128 -1
View File
@@ -1,4 +1,6 @@
import sqlite3
import threading
from datetime import datetime
from types import SimpleNamespace
import pytest
@@ -7,7 +9,7 @@ from sqlalchemy import create_engine
import sqlmem.engine as eng_mod
from sqlmem import CachingEngine, DeltaConfig
from sqlmem.cache import CacheManager
from sqlmem.delta import DeltaRefresher, ResolvedDelta
from sqlmem.delta import DeltaRefresher, ResolvedDelta, _bind_watermark
from sqlmem.executor import QueryExecutor
from sqlmem.parser import parse
from sqlmem.registry import ColumnRegistry
@@ -117,6 +119,89 @@ def test_refresh_without_changes_is_noop(env):
assert before == after
# ---------------------------------------------------------------------------
# Watermark binding — regression for the datetime-as-string delta bug
# (SQL Server error 241: 'T'-separated 6-digit-microsecond ISO string can't be
# implicitly converted varchar->datetime, freezing the delta watermark).
# ---------------------------------------------------------------------------
def test_bind_watermark_parses_iso_datetime():
assert _bind_watermark("2026-06-05T14:54:24.823000") == datetime(
2026, 6, 5, 14, 54, 24, 823000
)
def test_bind_watermark_parses_space_separated():
assert _bind_watermark("2026-06-01 10:05:00") == datetime(2026, 6, 1, 10, 5, 0)
def test_bind_watermark_passes_through_non_datetime():
# Integer rowversion / non-datetime change column — left untouched.
assert _bind_watermark("12345") == "12345"
class _SpyCursor:
def __init__(self, rows):
self._rows = list(rows)
def fetchmany(self, n):
batch, self._rows = self._rows[:n], self._rows[n:]
return batch
class _SpySource:
"""Records the parameters bound to each query (stands in for the pyodbc source)."""
def __init__(self, rows):
self._rows = rows
self.bound = []
def execute(self, sql, params=()):
self.bound.append((sql, params))
return _SpyCursor(self._rows)
def test_refresh_binds_watermark_as_datetime(env):
"""The watermark must reach the source as a real datetime, not a raw ISO
string — otherwise SQL Server raises error 241 and the delta freezes."""
env.cache.set_last_synced_at("products", "2026-06-05T14:54:24.823000")
spy = _SpySource(rows=[("1", "Widget", "9.99", "2026-06-05T14:54:24.823000")])
env.refresher.refresh(spy)
assert spy.bound, "source query was never issued"
_, params = spy.bound[-1]
assert params == (datetime(2026, 6, 5, 14, 54, 24, 823000),)
# ---------------------------------------------------------------------------
# Refresh failures are recorded (4.3) so a stuck delta is visible in stats
# ---------------------------------------------------------------------------
class _RaisingSource:
def execute(self, sql, params=()):
raise RuntimeError("boom 241")
def test_failed_delta_refresh_records_error(env):
env.refresher.refresh(_RaisingSource())
err = env.cache.get_errors()["products"]
assert err.consecutive == 1
assert "boom 241" in err.message
assert env.cache.error_total == 1
# State is marked error even though the cache still holds the last-good data.
assert env.cache.get_states()["products"] == "error"
def test_delta_success_resets_failure_streak(env):
env.refresher.refresh(_RaisingSource())
assert env.cache.get_errors()["products"].consecutive == 1
env.refresher.refresh(env.source) # real source — succeeds
assert env.cache.get_errors()["products"].consecutive == 0
# ---------------------------------------------------------------------------
# Engine-level: PK auto-discovery, reset, end-to-end refresh
# ---------------------------------------------------------------------------
@@ -170,6 +255,48 @@ def test_engine_reset(source_engine, patched_cache):
engine.close()
def test_startup_catch_up_is_non_blocking_by_default(source_engine, patched_cache, monkeypatch):
"""By default the startup catch-up runs on the background thread, not the
main thread, so it never blocks application startup."""
threads: list[str] = []
started = threading.Event()
real = eng_mod.CachingEngine._run_refresh
def spy(self):
threads.append(threading.current_thread().name)
started.set()
return real(self)
monkeypatch.setattr(eng_mod.CachingEngine, "_run_refresh", spy)
engine = CachingEngine(
source_engine, delta={"products": DeltaConfig("changed", ["id"])}
)
# __init__ has returned; the main thread must not have run the catch-up.
assert "MainThread" not in threads
assert started.wait(2), "background catch-up never ran"
assert threads == ["sqlmem-delta"]
engine.close()
def test_blocking_startup_refresh_runs_synchronously(source_engine, patched_cache, monkeypatch):
threads: list[str] = []
real = eng_mod.CachingEngine._run_refresh
def spy(self):
threads.append(threading.current_thread().name)
return real(self)
monkeypatch.setattr(eng_mod.CachingEngine, "_run_refresh", spy)
engine = CachingEngine(
source_engine,
delta={"products": DeltaConfig("changed", ["id"])},
blocking_startup_refresh=True,
)
# Opt-in: the catch-up ran on the main thread before __init__ returned.
assert "MainThread" in threads
engine.close()
def test_engine_delta_refresh_end_to_end(source_engine, source_db, patched_cache):
engine = CachingEngine(
source_engine, delta={"products": DeltaConfig(change_column="changed", key_columns=["id"])}