Add incremental delta refresh and fix Decimal/datetime cache binding

This commit is contained in:
Jan Doubravský
2026-06-05 11:09:16 +02:00
parent 530c2618cf
commit 33aa126ff6
13 changed files with 798 additions and 53 deletions
+68 -4
View File
@@ -1,11 +1,14 @@
import sqlite3
import threading
from typing import cast
from loguru import logger
from sqlalchemy import inspect
from sqlalchemy.engine import Engine
from .cache import CacheManager
from .config import BACKUP_INTERVAL_SECONDS, CACHE_DB_PATH
from .config import BACKUP_INTERVAL_SECONDS, CACHE_DB_PATH, REFRESH_INTERVAL_SECONDS
from .delta import DeltaConfig, DeltaRefresher, ResolvedDelta
from .executor import QueryExecutor
from .parser import Params, parse
from .registry import ColumnRegistry
@@ -15,24 +18,80 @@ from .stats import Stats, StatsCollector
class CachingEngine:
"""Transparent SQLAlchemy-compatible cache layer."""
def __init__(self, source_engine: Engine) -> None:
def __init__(
self,
source_engine: Engine,
delta: dict[str, DeltaConfig] | None = None,
) -> None:
self._source_engine = source_engine
self._cache = CacheManager(CACHE_DB_PATH, BACKUP_INTERVAL_SECONDS)
self._registry = ColumnRegistry(self._cache.connection)
self._stats = StatsCollector()
self._refresh_interval = REFRESH_INTERVAL_SECONDS
self._delta = self._resolve_delta(delta or {})
self._refresher = DeltaRefresher(self._cache, self._delta)
if self._delta:
self._run_refresh() # catch up tables restored from disk
self._start_refresh_thread()
logger.info("CachingEngine initialized.")
def _resolve_delta(self, delta: dict[str, DeltaConfig]) -> dict[str, ResolvedDelta]:
"""Resolve each DeltaConfig, auto-discovering the primary key when omitted."""
resolved: dict[str, ResolvedDelta] = {}
inspector = None
for table, cfg in delta.items():
keys = list(cfg.key_columns)
if not keys:
inspector = inspector or inspect(self._source_engine)
pk = inspector.get_pk_constraint(table)
keys = list(pk.get("constrained_columns") or [])
if not keys:
raise ValueError(
f"No primary key found for {table!r} in the source DB "
"(views have none) — set key_columns in its DeltaConfig."
)
logger.info(f"Delta {table!r}: auto-discovered key columns {keys}")
resolved[table] = ResolvedDelta(change_column=cfg.change_column, key_columns=keys)
return resolved
@property
def stats(self) -> Stats:
return self._stats.snapshot(self._cache.connection)
with self._cache._lock:
return self._stats.snapshot(self._cache.connection)
def execute(self, sql: str, params: Params = None) -> list[dict]:
parsed = parse(sql, params)
with self._source_engine.connect() as sa_conn:
raw_conn = cast(sqlite3.Connection, sa_conn.connection.dbapi_connection)
executor = QueryExecutor(self._cache, self._registry, raw_conn, self._stats)
executor = QueryExecutor(
self._cache, self._registry, raw_conn, self._stats, self._delta
)
return executor.execute(parsed)
def refresh(self) -> None:
"""Pull deltas for all delta-tracked tables now (also runs on a timer)."""
self._run_refresh()
def _run_refresh(self) -> None:
try:
with self._source_engine.connect() as sa_conn:
raw_conn = cast(sqlite3.Connection, sa_conn.connection.dbapi_connection)
self._refresher.refresh(raw_conn)
except Exception as e:
logger.error(f"Delta refresh cycle failed: {e}")
def _start_refresh_thread(self) -> None:
def loop() -> None:
event = threading.Event()
while not event.wait(self._refresh_interval):
self._run_refresh()
t = threading.Thread(target=loop, daemon=True, name="sqlmem-delta")
t.start()
logger.debug(f"Delta refresh thread started (interval={self._refresh_interval}s)")
def invalidate(self, table: str) -> None:
logger.info(f"Manually invalidating cache for table {table!r}")
with self._cache._lock:
@@ -45,6 +104,11 @@ class CachingEngine:
)
self._cache.connection.commit()
def reset(self) -> None:
"""Wipe the whole cache (RAM + cache.db). Use after structural source changes."""
self._cache.reset()
logger.info("Cache reset — all tables will be reloaded on next use.")
def close(self) -> None:
self._cache.close()
logger.info("CachingEngine closed.")