Add incremental delta refresh and fix Decimal/datetime cache binding

This commit is contained in:
Jan Doubravský
2026-06-05 11:09:16 +02:00
parent 530c2618cf
commit 33aa126ff6
13 changed files with 798 additions and 53 deletions
+109
View File
@@ -0,0 +1,109 @@
import datetime
import decimal
import uuid
import pytest
from sqlmem._coerce import coerce_params, to_sqlite
from sqlmem.cache import CacheManager
class _FakeCursor:
def __init__(self, rows):
self._rows = rows
self.description = None
def fetchall(self):
return self._rows
class FakeSource:
"""Stand-in for a pyodbc connection that returns non-sqlite-native types."""
def __init__(self, rows):
self._rows = rows
def execute(self, sql, *args):
return _FakeCursor(self._rows)
@pytest.fixture
def cache(tmp_path):
c = CacheManager(db_path=tmp_path / "cache.db", backup_interval=9999)
yield c
c.close()
# --- to_sqlite / coerce_params unit tests -----------------------------------
def test_decimal_to_str():
assert to_sqlite(decimal.Decimal("9.99")) == "9.99"
def test_decimal_keeps_precision():
assert to_sqlite(decimal.Decimal("123456789.123456789")) == "123456789.123456789"
def test_datetime_to_iso():
assert to_sqlite(datetime.datetime(2026, 6, 1, 10, 0, 0)) == "2026-06-01T10:00:00"
def test_date_to_iso():
assert to_sqlite(datetime.date(2026, 6, 1)) == "2026-06-01"
def test_time_to_iso():
assert to_sqlite(datetime.time(10, 30, 0)) == "10:30:00"
def test_uuid_to_str():
u = uuid.uuid4()
assert to_sqlite(u) == str(u)
def test_bytearray_to_bytes():
assert to_sqlite(bytearray(b"abc")) == b"abc"
@pytest.mark.parametrize("value", [1, 1.5, "text", None, b"blob", True])
def test_native_values_pass_through(value):
assert to_sqlite(value) == value
def test_coerce_params_tuple():
assert coerce_params((decimal.Decimal("1.5"), "x")) == ("1.5", "x")
def test_coerce_params_dict():
assert coerce_params({"p": decimal.Decimal("2")}) == {"p": "2"}
def test_coerce_params_none():
assert coerce_params(None) is None
# --- integration: values reach the cache through coercion -------------------
def test_load_table_coerces_decimal_and_datetime(cache):
rows = [("1", decimal.Decimal("9.99"), datetime.datetime(2026, 6, 1, 10, 0, 0))]
cache.load_table("t", ["id", "price", "changed"], FakeSource(rows))
_, out = cache.execute_in_memory("SELECT id, price, changed FROM t")
assert out == [("1", "9.99", "2026-06-01T10:00:00")]
def test_decimal_where_param_matches_text_value(cache):
cache.load_table("t", ["price"], FakeSource([("9.99",)]))
_, out = cache.execute_in_memory(
"SELECT price FROM t WHERE price = ?", (decimal.Decimal("9.99"),)
)
assert out == [("9.99",)]
def test_upsert_rows_coerces_decimal(cache):
cache.load_table("t", ["id", "price"], FakeSource([("1", "0")]))
cache.create_unique_index("t", ["id"])
cache.upsert_rows("t", ["id", "price"], [("1", decimal.Decimal("12.50"))])
_, out = cache.execute_in_memory("SELECT price FROM t WHERE id = '1'")
assert out == [("12.50",)]
+189
View File
@@ -0,0 +1,189 @@
import sqlite3
from types import SimpleNamespace
import pytest
from sqlalchemy import create_engine
import sqlmem.engine as eng_mod
from sqlmem import CachingEngine, DeltaConfig
from sqlmem.cache import CacheManager
from sqlmem.delta import DeltaRefresher, ResolvedDelta
from sqlmem.executor import QueryExecutor
from sqlmem.parser import parse
from sqlmem.registry import ColumnRegistry
from sqlmem.stats import StatsCollector
def cached_rows(cache, sql):
cols, rows = cache.execute_in_memory(sql)
return [dict(zip(cols, row)) for row in rows]
# ---------------------------------------------------------------------------
# Refresher unit tests (in-memory source connection)
# ---------------------------------------------------------------------------
@pytest.fixture
def source_conn():
conn = sqlite3.connect(":memory:")
conn.executescript(
"""
CREATE TABLE products (id TEXT PRIMARY KEY, name TEXT, price TEXT, changed TEXT);
INSERT INTO products VALUES
('1', 'Widget', '9.99', '2026-06-01 10:00:00'),
('2', 'Gadget', '19.99', '2026-06-01 10:05:00');
"""
)
conn.commit()
yield conn
conn.close()
@pytest.fixture
def env(tmp_path, source_conn):
cache = CacheManager(db_path=tmp_path / "cache.db", backup_interval=9999)
registry = ColumnRegistry(cache.connection)
stats = StatsCollector()
delta = {"products": ResolvedDelta(change_column="changed", key_columns=["id"])}
executor = QueryExecutor(cache, registry, source_conn, stats, delta)
refresher = DeltaRefresher(cache, delta)
# Initial load — caches id, name, price (+ augmented key/change columns).
executor.execute(parse("SELECT id, name, price FROM products"))
yield SimpleNamespace(cache=cache, source=source_conn, refresher=refresher)
cache.close()
def test_load_augments_key_and_change_columns(env):
cols = env.cache.get_table_columns("products")
assert {"id", "name", "price", "changed"}.issubset(set(cols))
def test_initial_watermark_is_max_change(env):
assert env.cache.get_last_synced_at("products") == "2026-06-01 10:05:00"
def test_refresh_applies_updates(env):
env.source.execute(
"UPDATE products SET price = '7.77', changed = '2026-06-01 10:10:00' WHERE id = '1'"
)
env.source.commit()
env.refresher.refresh(env.source)
rows = {r["id"]: r for r in cached_rows(env.cache, "SELECT id, price FROM products")}
assert rows["1"]["price"] == "7.77"
assert env.cache.get_last_synced_at("products") == "2026-06-01 10:10:00"
def test_refresh_inserts_new_rows(env):
env.source.execute(
"INSERT INTO products VALUES ('3', 'Sprocket', '5.00', '2026-06-01 10:20:00')"
)
env.source.commit()
env.refresher.refresh(env.source)
ids = {r["id"] for r in cached_rows(env.cache, "SELECT id FROM products")}
assert ids == {"1", "2", "3"}
def test_boundary_timestamp_not_missed_and_idempotent(env):
# New row sharing the exact watermark timestamp must still be picked up (>=),
# and the row already at that timestamp must not be duplicated.
env.source.execute(
"INSERT INTO products VALUES ('3', 'Sprocket', '5.00', '2026-06-01 10:05:00')"
)
env.source.commit()
env.refresher.refresh(env.source)
env.refresher.refresh(env.source) # idempotent — running twice changes nothing
rows = cached_rows(env.cache, "SELECT id FROM products")
assert sorted(r["id"] for r in rows) == ["1", "2", "3"]
def test_delete_by_nulling(env):
env.source.execute(
"UPDATE products SET name = NULL, changed = '2026-06-01 10:30:00' WHERE id = '1'"
)
env.source.commit()
env.refresher.refresh(env.source)
rows = {r["id"]: r for r in cached_rows(env.cache, "SELECT id, name FROM products")}
assert rows["1"]["name"] is None
def test_refresh_without_changes_is_noop(env):
before = cached_rows(env.cache, "SELECT id, name, price FROM products")
env.refresher.refresh(env.source)
after = cached_rows(env.cache, "SELECT id, name, price FROM products")
assert before == after
# ---------------------------------------------------------------------------
# Engine-level: PK auto-discovery, reset, end-to-end refresh
# ---------------------------------------------------------------------------
@pytest.fixture
def source_db(tmp_path):
db_path = tmp_path / "source.db"
conn = sqlite3.connect(db_path)
conn.executescript(
"""
CREATE TABLE products (id TEXT PRIMARY KEY, name TEXT, changed TEXT);
INSERT INTO products VALUES ('1', 'Widget', '2026-06-01 10:00:00');
CREATE VIEW vw_products AS SELECT id, name FROM products;
"""
)
conn.commit()
conn.close()
return db_path
@pytest.fixture
def source_engine(source_db):
engine = create_engine(f"sqlite:///{source_db}")
yield engine
engine.dispose()
@pytest.fixture
def patched_cache(tmp_path, monkeypatch):
monkeypatch.setattr(eng_mod, "CACHE_DB_PATH", tmp_path / "cache.db")
monkeypatch.setattr(eng_mod, "BACKUP_INTERVAL_SECONDS", 9999)
def test_pk_auto_discovery(source_engine, patched_cache):
engine = CachingEngine(source_engine, delta={"products": DeltaConfig(change_column="changed")})
assert engine._delta["products"].key_columns == ["id"]
engine.close()
def test_view_without_key_raises(source_engine, patched_cache):
with pytest.raises(ValueError):
CachingEngine(source_engine, delta={"vw_products": DeltaConfig(change_column="name")})
def test_engine_reset(source_engine, patched_cache):
engine = CachingEngine(source_engine)
engine.execute("SELECT id, name FROM products")
assert engine._cache.is_table_cached("products") is True
engine.reset()
assert engine._cache.is_table_cached("products") is False
engine.close()
def test_engine_delta_refresh_end_to_end(source_engine, source_db, patched_cache):
engine = CachingEngine(
source_engine, delta={"products": DeltaConfig(change_column="changed", key_columns=["id"])}
)
engine.execute("SELECT id, name FROM products") # caches, watermark = 10:00
conn = sqlite3.connect(source_db)
conn.execute("UPDATE products SET name = 'Widget2', changed = '2026-06-01 10:06:00' WHERE id = '1'")
conn.execute("INSERT INTO products VALUES ('2', 'Gadget', '2026-06-01 10:05:00')")
conn.commit()
conn.close()
engine.refresh()
rows = {r["id"]: r for r in engine.execute("SELECT id, name FROM products")}
assert rows["1"]["name"] == "Widget2"
assert rows["2"]["name"] == "Gadget"
engine.close()