Add incremental delta refresh and fix Decimal/datetime cache binding
This commit is contained in:
@@ -0,0 +1,109 @@
|
||||
import datetime
|
||||
import decimal
|
||||
import uuid
|
||||
|
||||
import pytest
|
||||
|
||||
from sqlmem._coerce import coerce_params, to_sqlite
|
||||
from sqlmem.cache import CacheManager
|
||||
|
||||
|
||||
class _FakeCursor:
|
||||
def __init__(self, rows):
|
||||
self._rows = rows
|
||||
self.description = None
|
||||
|
||||
def fetchall(self):
|
||||
return self._rows
|
||||
|
||||
|
||||
class FakeSource:
|
||||
"""Stand-in for a pyodbc connection that returns non-sqlite-native types."""
|
||||
|
||||
def __init__(self, rows):
|
||||
self._rows = rows
|
||||
|
||||
def execute(self, sql, *args):
|
||||
return _FakeCursor(self._rows)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def cache(tmp_path):
|
||||
c = CacheManager(db_path=tmp_path / "cache.db", backup_interval=9999)
|
||||
yield c
|
||||
c.close()
|
||||
|
||||
|
||||
# --- to_sqlite / coerce_params unit tests -----------------------------------
|
||||
|
||||
|
||||
def test_decimal_to_str():
|
||||
assert to_sqlite(decimal.Decimal("9.99")) == "9.99"
|
||||
|
||||
|
||||
def test_decimal_keeps_precision():
|
||||
assert to_sqlite(decimal.Decimal("123456789.123456789")) == "123456789.123456789"
|
||||
|
||||
|
||||
def test_datetime_to_iso():
|
||||
assert to_sqlite(datetime.datetime(2026, 6, 1, 10, 0, 0)) == "2026-06-01T10:00:00"
|
||||
|
||||
|
||||
def test_date_to_iso():
|
||||
assert to_sqlite(datetime.date(2026, 6, 1)) == "2026-06-01"
|
||||
|
||||
|
||||
def test_time_to_iso():
|
||||
assert to_sqlite(datetime.time(10, 30, 0)) == "10:30:00"
|
||||
|
||||
|
||||
def test_uuid_to_str():
|
||||
u = uuid.uuid4()
|
||||
assert to_sqlite(u) == str(u)
|
||||
|
||||
|
||||
def test_bytearray_to_bytes():
|
||||
assert to_sqlite(bytearray(b"abc")) == b"abc"
|
||||
|
||||
|
||||
@pytest.mark.parametrize("value", [1, 1.5, "text", None, b"blob", True])
|
||||
def test_native_values_pass_through(value):
|
||||
assert to_sqlite(value) == value
|
||||
|
||||
|
||||
def test_coerce_params_tuple():
|
||||
assert coerce_params((decimal.Decimal("1.5"), "x")) == ("1.5", "x")
|
||||
|
||||
|
||||
def test_coerce_params_dict():
|
||||
assert coerce_params({"p": decimal.Decimal("2")}) == {"p": "2"}
|
||||
|
||||
|
||||
def test_coerce_params_none():
|
||||
assert coerce_params(None) is None
|
||||
|
||||
|
||||
# --- integration: values reach the cache through coercion -------------------
|
||||
|
||||
|
||||
def test_load_table_coerces_decimal_and_datetime(cache):
|
||||
rows = [("1", decimal.Decimal("9.99"), datetime.datetime(2026, 6, 1, 10, 0, 0))]
|
||||
cache.load_table("t", ["id", "price", "changed"], FakeSource(rows))
|
||||
_, out = cache.execute_in_memory("SELECT id, price, changed FROM t")
|
||||
assert out == [("1", "9.99", "2026-06-01T10:00:00")]
|
||||
|
||||
|
||||
def test_decimal_where_param_matches_text_value(cache):
|
||||
cache.load_table("t", ["price"], FakeSource([("9.99",)]))
|
||||
_, out = cache.execute_in_memory(
|
||||
"SELECT price FROM t WHERE price = ?", (decimal.Decimal("9.99"),)
|
||||
)
|
||||
assert out == [("9.99",)]
|
||||
|
||||
|
||||
def test_upsert_rows_coerces_decimal(cache):
|
||||
cache.load_table("t", ["id", "price"], FakeSource([("1", "0")]))
|
||||
cache.create_unique_index("t", ["id"])
|
||||
cache.upsert_rows("t", ["id", "price"], [("1", decimal.Decimal("12.50"))])
|
||||
_, out = cache.execute_in_memory("SELECT price FROM t WHERE id = '1'")
|
||||
assert out == [("12.50",)]
|
||||
@@ -0,0 +1,189 @@
|
||||
import sqlite3
|
||||
from types import SimpleNamespace
|
||||
|
||||
import pytest
|
||||
from sqlalchemy import create_engine
|
||||
|
||||
import sqlmem.engine as eng_mod
|
||||
from sqlmem import CachingEngine, DeltaConfig
|
||||
from sqlmem.cache import CacheManager
|
||||
from sqlmem.delta import DeltaRefresher, ResolvedDelta
|
||||
from sqlmem.executor import QueryExecutor
|
||||
from sqlmem.parser import parse
|
||||
from sqlmem.registry import ColumnRegistry
|
||||
from sqlmem.stats import StatsCollector
|
||||
|
||||
|
||||
def cached_rows(cache, sql):
|
||||
cols, rows = cache.execute_in_memory(sql)
|
||||
return [dict(zip(cols, row)) for row in rows]
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Refresher unit tests (in-memory source connection)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@pytest.fixture
|
||||
def source_conn():
|
||||
conn = sqlite3.connect(":memory:")
|
||||
conn.executescript(
|
||||
"""
|
||||
CREATE TABLE products (id TEXT PRIMARY KEY, name TEXT, price TEXT, changed TEXT);
|
||||
INSERT INTO products VALUES
|
||||
('1', 'Widget', '9.99', '2026-06-01 10:00:00'),
|
||||
('2', 'Gadget', '19.99', '2026-06-01 10:05:00');
|
||||
"""
|
||||
)
|
||||
conn.commit()
|
||||
yield conn
|
||||
conn.close()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def env(tmp_path, source_conn):
|
||||
cache = CacheManager(db_path=tmp_path / "cache.db", backup_interval=9999)
|
||||
registry = ColumnRegistry(cache.connection)
|
||||
stats = StatsCollector()
|
||||
delta = {"products": ResolvedDelta(change_column="changed", key_columns=["id"])}
|
||||
executor = QueryExecutor(cache, registry, source_conn, stats, delta)
|
||||
refresher = DeltaRefresher(cache, delta)
|
||||
# Initial load — caches id, name, price (+ augmented key/change columns).
|
||||
executor.execute(parse("SELECT id, name, price FROM products"))
|
||||
yield SimpleNamespace(cache=cache, source=source_conn, refresher=refresher)
|
||||
cache.close()
|
||||
|
||||
|
||||
def test_load_augments_key_and_change_columns(env):
|
||||
cols = env.cache.get_table_columns("products")
|
||||
assert {"id", "name", "price", "changed"}.issubset(set(cols))
|
||||
|
||||
|
||||
def test_initial_watermark_is_max_change(env):
|
||||
assert env.cache.get_last_synced_at("products") == "2026-06-01 10:05:00"
|
||||
|
||||
|
||||
def test_refresh_applies_updates(env):
|
||||
env.source.execute(
|
||||
"UPDATE products SET price = '7.77', changed = '2026-06-01 10:10:00' WHERE id = '1'"
|
||||
)
|
||||
env.source.commit()
|
||||
env.refresher.refresh(env.source)
|
||||
|
||||
rows = {r["id"]: r for r in cached_rows(env.cache, "SELECT id, price FROM products")}
|
||||
assert rows["1"]["price"] == "7.77"
|
||||
assert env.cache.get_last_synced_at("products") == "2026-06-01 10:10:00"
|
||||
|
||||
|
||||
def test_refresh_inserts_new_rows(env):
|
||||
env.source.execute(
|
||||
"INSERT INTO products VALUES ('3', 'Sprocket', '5.00', '2026-06-01 10:20:00')"
|
||||
)
|
||||
env.source.commit()
|
||||
env.refresher.refresh(env.source)
|
||||
|
||||
ids = {r["id"] for r in cached_rows(env.cache, "SELECT id FROM products")}
|
||||
assert ids == {"1", "2", "3"}
|
||||
|
||||
|
||||
def test_boundary_timestamp_not_missed_and_idempotent(env):
|
||||
# New row sharing the exact watermark timestamp must still be picked up (>=),
|
||||
# and the row already at that timestamp must not be duplicated.
|
||||
env.source.execute(
|
||||
"INSERT INTO products VALUES ('3', 'Sprocket', '5.00', '2026-06-01 10:05:00')"
|
||||
)
|
||||
env.source.commit()
|
||||
env.refresher.refresh(env.source)
|
||||
env.refresher.refresh(env.source) # idempotent — running twice changes nothing
|
||||
|
||||
rows = cached_rows(env.cache, "SELECT id FROM products")
|
||||
assert sorted(r["id"] for r in rows) == ["1", "2", "3"]
|
||||
|
||||
|
||||
def test_delete_by_nulling(env):
|
||||
env.source.execute(
|
||||
"UPDATE products SET name = NULL, changed = '2026-06-01 10:30:00' WHERE id = '1'"
|
||||
)
|
||||
env.source.commit()
|
||||
env.refresher.refresh(env.source)
|
||||
|
||||
rows = {r["id"]: r for r in cached_rows(env.cache, "SELECT id, name FROM products")}
|
||||
assert rows["1"]["name"] is None
|
||||
|
||||
|
||||
def test_refresh_without_changes_is_noop(env):
|
||||
before = cached_rows(env.cache, "SELECT id, name, price FROM products")
|
||||
env.refresher.refresh(env.source)
|
||||
after = cached_rows(env.cache, "SELECT id, name, price FROM products")
|
||||
assert before == after
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Engine-level: PK auto-discovery, reset, end-to-end refresh
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@pytest.fixture
|
||||
def source_db(tmp_path):
|
||||
db_path = tmp_path / "source.db"
|
||||
conn = sqlite3.connect(db_path)
|
||||
conn.executescript(
|
||||
"""
|
||||
CREATE TABLE products (id TEXT PRIMARY KEY, name TEXT, changed TEXT);
|
||||
INSERT INTO products VALUES ('1', 'Widget', '2026-06-01 10:00:00');
|
||||
CREATE VIEW vw_products AS SELECT id, name FROM products;
|
||||
"""
|
||||
)
|
||||
conn.commit()
|
||||
conn.close()
|
||||
return db_path
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def source_engine(source_db):
|
||||
engine = create_engine(f"sqlite:///{source_db}")
|
||||
yield engine
|
||||
engine.dispose()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def patched_cache(tmp_path, monkeypatch):
|
||||
monkeypatch.setattr(eng_mod, "CACHE_DB_PATH", tmp_path / "cache.db")
|
||||
monkeypatch.setattr(eng_mod, "BACKUP_INTERVAL_SECONDS", 9999)
|
||||
|
||||
|
||||
def test_pk_auto_discovery(source_engine, patched_cache):
|
||||
engine = CachingEngine(source_engine, delta={"products": DeltaConfig(change_column="changed")})
|
||||
assert engine._delta["products"].key_columns == ["id"]
|
||||
engine.close()
|
||||
|
||||
|
||||
def test_view_without_key_raises(source_engine, patched_cache):
|
||||
with pytest.raises(ValueError):
|
||||
CachingEngine(source_engine, delta={"vw_products": DeltaConfig(change_column="name")})
|
||||
|
||||
|
||||
def test_engine_reset(source_engine, patched_cache):
|
||||
engine = CachingEngine(source_engine)
|
||||
engine.execute("SELECT id, name FROM products")
|
||||
assert engine._cache.is_table_cached("products") is True
|
||||
engine.reset()
|
||||
assert engine._cache.is_table_cached("products") is False
|
||||
engine.close()
|
||||
|
||||
|
||||
def test_engine_delta_refresh_end_to_end(source_engine, source_db, patched_cache):
|
||||
engine = CachingEngine(
|
||||
source_engine, delta={"products": DeltaConfig(change_column="changed", key_columns=["id"])}
|
||||
)
|
||||
engine.execute("SELECT id, name FROM products") # caches, watermark = 10:00
|
||||
|
||||
conn = sqlite3.connect(source_db)
|
||||
conn.execute("UPDATE products SET name = 'Widget2', changed = '2026-06-01 10:06:00' WHERE id = '1'")
|
||||
conn.execute("INSERT INTO products VALUES ('2', 'Gadget', '2026-06-01 10:05:00')")
|
||||
conn.commit()
|
||||
conn.close()
|
||||
|
||||
engine.refresh()
|
||||
rows = {r["id"]: r for r in engine.execute("SELECT id, name FROM products")}
|
||||
assert rows["1"]["name"] == "Widget2"
|
||||
assert rows["2"]["name"] == "Gadget"
|
||||
engine.close()
|
||||
Reference in New Issue
Block a user