Fix frozen delta watermark and add error stats, lazy source, concurrent disk reads, and per-engine config

This commit is contained in:
Jan Doubravský
2026-06-08 19:35:33 +02:00
parent 209ae667ab
commit 6dc85e4f3c
17 changed files with 668 additions and 71 deletions
+89 -19
View File
@@ -1,18 +1,21 @@
import sqlite3
import threading
from dataclasses import replace
from typing import cast
from pathlib import Path
from typing import Any
from loguru import logger
from sqlalchemy import inspect
from sqlalchemy.engine import Engine
from sqlalchemy.engine import Connection, Engine
from .cache import CacheManager
from ._sql import quote
from .cache import CacheManager, TableError
from .config import (
BACKUP_INTERVAL_SECONDS,
CACHE_DB_PATH,
FETCH_BATCH_SIZE,
IN_MEMORY,
REFRESH_INTERVAL_SECONDS,
SQL_DIALECT,
)
from .delta import DeltaConfig, DeltaRefresher, ResolvedDelta
from .executor import QueryExecutor
@@ -21,6 +24,32 @@ from .registry import ColumnRegistry
from .stats import Stats, StatsCollector, TableState, TableStats
class _LazySource:
"""A source connection opened on first ``execute`` and shared across one query.
Most queries are cache hits that never touch the source, so opening it (and
occupying a connection-pool slot) eagerly is wasteful. This proxy forwards
``execute`` to a real connection opened on demand, then released by ``close``.
"""
def __init__(self, source_engine: Engine) -> None:
self._source_engine = source_engine
self._sa_conn: Connection | None = None
self._raw: Any = None
def execute(self, *args: Any, **kwargs: Any) -> Any:
if self._raw is None:
self._sa_conn = self._source_engine.connect()
self._raw = self._sa_conn.connection.dbapi_connection
return self._raw.execute(*args, **kwargs)
def close(self) -> None:
if self._sa_conn is not None:
self._sa_conn.close()
self._sa_conn = None
self._raw = None
class CachingEngine:
"""Transparent SQLAlchemy-compatible cache layer."""
@@ -31,15 +60,28 @@ class CachingEngine:
ttl: dict[str, int] | None = None,
indexes: dict[str, list[str | list[str]]] | None = None,
in_memory: bool | None = None,
cache_db_path: str | Path | None = None,
backup_interval: int | None = None,
refresh_interval: int | None = None,
fetch_batch: int | None = None,
dialect: str | None = None,
blocking_startup_refresh: bool = False,
) -> None:
self._source_engine = source_engine
use_memory = IN_MEMORY if in_memory is None else in_memory
self._dialect = dialect if dialect is not None else SQL_DIALECT
self._refresh_interval = (
refresh_interval if refresh_interval is not None else REFRESH_INTERVAL_SECONDS
)
self._cache = CacheManager(
CACHE_DB_PATH, BACKUP_INTERVAL_SECONDS, in_memory=use_memory
Path(cache_db_path) if cache_db_path is not None else CACHE_DB_PATH,
backup_interval if backup_interval is not None else BACKUP_INTERVAL_SECONDS,
in_memory=use_memory,
dialect=self._dialect,
fetch_batch=fetch_batch if fetch_batch is not None else FETCH_BATCH_SIZE,
)
self._registry = ColumnRegistry(self._cache.connection)
self._stats = StatsCollector()
self._refresh_interval = REFRESH_INTERVAL_SECONDS
self._delta = self._resolve_delta(delta or {})
self._ttl = dict(ttl or {})
self._index_columns = self._register_indexes(indexes or {})
@@ -54,8 +96,13 @@ class CachingEngine:
)
if self._delta or self._ttl:
self._run_refresh() # catch up tables restored from disk
self._start_refresh_thread()
# The startup catch-up (deltas/TTL reloads for tables restored from
# disk) can take a while on a cold start. By default it runs on the
# background thread so it never blocks application startup; callers
# who need the cache fully fresh before serving can opt back in.
if blocking_startup_refresh:
self._run_refresh()
self._start_refresh_thread(initial_catch_up=not blocking_startup_refresh)
logger.info("CachingEngine initialized.")
@@ -97,12 +144,18 @@ class CachingEngine:
@property
def stats(self) -> Stats:
states = self._cache.get_states()
errors = self._cache.get_errors()
with self._cache._lock:
base = self._stats.snapshot(self._cache.connection, states)
return replace(base, tables={n: self._enrich(n, t) for n, t in base.tables.items()})
base = replace(base, errors=self._cache.error_total)
return replace(
base, tables={n: self._enrich(n, t, errors) for n, t in base.tables.items()}
)
def _enrich(self, name: str, table_stats: TableStats) -> TableStats:
"""Annotate a TableStats with how it is refreshed and TTL staleness."""
def _enrich(
self, name: str, table_stats: TableStats, errors: dict[str, TableError]
) -> TableStats:
"""Annotate a TableStats with refresh tracking, TTL staleness and errors."""
if name in self._delta:
tracking = "delta"
elif name in self._ttl:
@@ -115,22 +168,37 @@ class CachingEngine:
age = self._cache.seconds_since_refresh(name)
if age is not None and age > self._ttl[name]:
state = TableState.STALE
err = errors.get(name)
if err is not None:
return replace(
table_stats,
tracking=tracking,
state=state,
last_error=err.message,
last_error_at=err.at,
consecutive_failures=err.consecutive,
)
return replace(table_stats, tracking=tracking, state=state)
def execute(self, sql: str, params: Params = None) -> list[dict]:
parsed = parse(sql, params)
with self._source_engine.connect() as sa_conn:
raw_conn = cast(sqlite3.Connection, sa_conn.connection.dbapi_connection)
parsed = parse(sql, params, dialect=self._dialect)
# The source connection is opened lazily — a pure cache hit never touches
# the source and never occupies a pool slot.
source = _LazySource(self._source_engine)
try:
executor = QueryExecutor(
self._cache,
self._registry,
raw_conn,
source,
self._stats,
self._delta,
self._ttl,
self._index_columns,
)
return executor.execute(parsed)
finally:
source.close()
def refresh(self) -> None:
"""Pull deltas for all delta-tracked tables now (also runs on a timer)."""
@@ -139,13 +207,13 @@ class CachingEngine:
def _run_refresh(self) -> None:
try:
with self._source_engine.connect() as sa_conn:
raw_conn = cast(sqlite3.Connection, sa_conn.connection.dbapi_connection)
raw_conn = sa_conn.connection.dbapi_connection
self._refresher.refresh(raw_conn)
self._refresh_ttl(raw_conn)
except Exception as e:
logger.error(f"Refresh cycle failed: {e}")
def _refresh_ttl(self, source_conn: sqlite3.Connection) -> None:
def _refresh_ttl(self, source_conn: Any) -> None:
"""Proactively full-reload TTL-tracked tables whose cache has expired."""
for table, ttl in self._ttl.items():
if not self._cache.is_table_cached(table):
@@ -161,8 +229,10 @@ class CachingEngine:
except Exception as e:
logger.error(f"TTL refresh failed for {table!r}: {e}")
def _start_refresh_thread(self) -> None:
def _start_refresh_thread(self, initial_catch_up: bool = True) -> None:
def loop() -> None:
if initial_catch_up:
self._run_refresh() # off-main-thread startup catch-up
event = threading.Event()
while not event.wait(self._refresh_interval):
self._run_refresh()
@@ -174,7 +244,7 @@ class CachingEngine:
def invalidate(self, table: str) -> None:
logger.info(f"Manually invalidating cache for table {table!r}")
with self._cache._lock:
self._cache.connection.execute(f"DROP TABLE IF EXISTS {table}")
self._cache.connection.execute(f"DROP TABLE IF EXISTS {quote(table)}")
self._cache.connection.execute(
"DELETE FROM _sqlmem_tables WHERE table_name = ?", (table,)
)