Fix frozen delta watermark and add error stats, lazy source, concurrent disk reads, and per-engine config
This commit is contained in:
+128
-1
@@ -1,4 +1,6 @@
|
||||
import sqlite3
|
||||
import threading
|
||||
from datetime import datetime
|
||||
from types import SimpleNamespace
|
||||
|
||||
import pytest
|
||||
@@ -7,7 +9,7 @@ from sqlalchemy import create_engine
|
||||
import sqlmem.engine as eng_mod
|
||||
from sqlmem import CachingEngine, DeltaConfig
|
||||
from sqlmem.cache import CacheManager
|
||||
from sqlmem.delta import DeltaRefresher, ResolvedDelta
|
||||
from sqlmem.delta import DeltaRefresher, ResolvedDelta, _bind_watermark
|
||||
from sqlmem.executor import QueryExecutor
|
||||
from sqlmem.parser import parse
|
||||
from sqlmem.registry import ColumnRegistry
|
||||
@@ -117,6 +119,89 @@ def test_refresh_without_changes_is_noop(env):
|
||||
assert before == after
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Watermark binding — regression for the datetime-as-string delta bug
|
||||
# (SQL Server error 241: 'T'-separated 6-digit-microsecond ISO string can't be
|
||||
# implicitly converted varchar->datetime, freezing the delta watermark).
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_bind_watermark_parses_iso_datetime():
|
||||
assert _bind_watermark("2026-06-05T14:54:24.823000") == datetime(
|
||||
2026, 6, 5, 14, 54, 24, 823000
|
||||
)
|
||||
|
||||
|
||||
def test_bind_watermark_parses_space_separated():
|
||||
assert _bind_watermark("2026-06-01 10:05:00") == datetime(2026, 6, 1, 10, 5, 0)
|
||||
|
||||
|
||||
def test_bind_watermark_passes_through_non_datetime():
|
||||
# Integer rowversion / non-datetime change column — left untouched.
|
||||
assert _bind_watermark("12345") == "12345"
|
||||
|
||||
|
||||
class _SpyCursor:
|
||||
def __init__(self, rows):
|
||||
self._rows = list(rows)
|
||||
|
||||
def fetchmany(self, n):
|
||||
batch, self._rows = self._rows[:n], self._rows[n:]
|
||||
return batch
|
||||
|
||||
|
||||
class _SpySource:
|
||||
"""Records the parameters bound to each query (stands in for the pyodbc source)."""
|
||||
|
||||
def __init__(self, rows):
|
||||
self._rows = rows
|
||||
self.bound = []
|
||||
|
||||
def execute(self, sql, params=()):
|
||||
self.bound.append((sql, params))
|
||||
return _SpyCursor(self._rows)
|
||||
|
||||
|
||||
def test_refresh_binds_watermark_as_datetime(env):
|
||||
"""The watermark must reach the source as a real datetime, not a raw ISO
|
||||
string — otherwise SQL Server raises error 241 and the delta freezes."""
|
||||
env.cache.set_last_synced_at("products", "2026-06-05T14:54:24.823000")
|
||||
spy = _SpySource(rows=[("1", "Widget", "9.99", "2026-06-05T14:54:24.823000")])
|
||||
|
||||
env.refresher.refresh(spy)
|
||||
|
||||
assert spy.bound, "source query was never issued"
|
||||
_, params = spy.bound[-1]
|
||||
assert params == (datetime(2026, 6, 5, 14, 54, 24, 823000),)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Refresh failures are recorded (4.3) so a stuck delta is visible in stats
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class _RaisingSource:
|
||||
def execute(self, sql, params=()):
|
||||
raise RuntimeError("boom 241")
|
||||
|
||||
|
||||
def test_failed_delta_refresh_records_error(env):
|
||||
env.refresher.refresh(_RaisingSource())
|
||||
|
||||
err = env.cache.get_errors()["products"]
|
||||
assert err.consecutive == 1
|
||||
assert "boom 241" in err.message
|
||||
assert env.cache.error_total == 1
|
||||
# State is marked error even though the cache still holds the last-good data.
|
||||
assert env.cache.get_states()["products"] == "error"
|
||||
|
||||
|
||||
def test_delta_success_resets_failure_streak(env):
|
||||
env.refresher.refresh(_RaisingSource())
|
||||
assert env.cache.get_errors()["products"].consecutive == 1
|
||||
|
||||
env.refresher.refresh(env.source) # real source — succeeds
|
||||
assert env.cache.get_errors()["products"].consecutive == 0
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Engine-level: PK auto-discovery, reset, end-to-end refresh
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -170,6 +255,48 @@ def test_engine_reset(source_engine, patched_cache):
|
||||
engine.close()
|
||||
|
||||
|
||||
def test_startup_catch_up_is_non_blocking_by_default(source_engine, patched_cache, monkeypatch):
|
||||
"""By default the startup catch-up runs on the background thread, not the
|
||||
main thread, so it never blocks application startup."""
|
||||
threads: list[str] = []
|
||||
started = threading.Event()
|
||||
real = eng_mod.CachingEngine._run_refresh
|
||||
|
||||
def spy(self):
|
||||
threads.append(threading.current_thread().name)
|
||||
started.set()
|
||||
return real(self)
|
||||
|
||||
monkeypatch.setattr(eng_mod.CachingEngine, "_run_refresh", spy)
|
||||
engine = CachingEngine(
|
||||
source_engine, delta={"products": DeltaConfig("changed", ["id"])}
|
||||
)
|
||||
# __init__ has returned; the main thread must not have run the catch-up.
|
||||
assert "MainThread" not in threads
|
||||
assert started.wait(2), "background catch-up never ran"
|
||||
assert threads == ["sqlmem-delta"]
|
||||
engine.close()
|
||||
|
||||
|
||||
def test_blocking_startup_refresh_runs_synchronously(source_engine, patched_cache, monkeypatch):
|
||||
threads: list[str] = []
|
||||
real = eng_mod.CachingEngine._run_refresh
|
||||
|
||||
def spy(self):
|
||||
threads.append(threading.current_thread().name)
|
||||
return real(self)
|
||||
|
||||
monkeypatch.setattr(eng_mod.CachingEngine, "_run_refresh", spy)
|
||||
engine = CachingEngine(
|
||||
source_engine,
|
||||
delta={"products": DeltaConfig("changed", ["id"])},
|
||||
blocking_startup_refresh=True,
|
||||
)
|
||||
# Opt-in: the catch-up ran on the main thread before __init__ returned.
|
||||
assert "MainThread" in threads
|
||||
engine.close()
|
||||
|
||||
|
||||
def test_engine_delta_refresh_end_to_end(source_engine, source_db, patched_cache):
|
||||
engine = CachingEngine(
|
||||
source_engine, delta={"products": DeltaConfig(change_column="changed", key_columns=["id"])}
|
||||
|
||||
Reference in New Issue
Block a user