Fix frozen delta watermark and add error stats, lazy source, concurrent disk reads, and per-engine config
This commit is contained in:
+89
-19
@@ -1,18 +1,21 @@
|
||||
import sqlite3
|
||||
import threading
|
||||
from dataclasses import replace
|
||||
from typing import cast
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
from loguru import logger
|
||||
from sqlalchemy import inspect
|
||||
from sqlalchemy.engine import Engine
|
||||
from sqlalchemy.engine import Connection, Engine
|
||||
|
||||
from .cache import CacheManager
|
||||
from ._sql import quote
|
||||
from .cache import CacheManager, TableError
|
||||
from .config import (
|
||||
BACKUP_INTERVAL_SECONDS,
|
||||
CACHE_DB_PATH,
|
||||
FETCH_BATCH_SIZE,
|
||||
IN_MEMORY,
|
||||
REFRESH_INTERVAL_SECONDS,
|
||||
SQL_DIALECT,
|
||||
)
|
||||
from .delta import DeltaConfig, DeltaRefresher, ResolvedDelta
|
||||
from .executor import QueryExecutor
|
||||
@@ -21,6 +24,32 @@ from .registry import ColumnRegistry
|
||||
from .stats import Stats, StatsCollector, TableState, TableStats
|
||||
|
||||
|
||||
class _LazySource:
|
||||
"""A source connection opened on first ``execute`` and shared across one query.
|
||||
|
||||
Most queries are cache hits that never touch the source, so opening it (and
|
||||
occupying a connection-pool slot) eagerly is wasteful. This proxy forwards
|
||||
``execute`` to a real connection opened on demand, then released by ``close``.
|
||||
"""
|
||||
|
||||
def __init__(self, source_engine: Engine) -> None:
|
||||
self._source_engine = source_engine
|
||||
self._sa_conn: Connection | None = None
|
||||
self._raw: Any = None
|
||||
|
||||
def execute(self, *args: Any, **kwargs: Any) -> Any:
|
||||
if self._raw is None:
|
||||
self._sa_conn = self._source_engine.connect()
|
||||
self._raw = self._sa_conn.connection.dbapi_connection
|
||||
return self._raw.execute(*args, **kwargs)
|
||||
|
||||
def close(self) -> None:
|
||||
if self._sa_conn is not None:
|
||||
self._sa_conn.close()
|
||||
self._sa_conn = None
|
||||
self._raw = None
|
||||
|
||||
|
||||
class CachingEngine:
|
||||
"""Transparent SQLAlchemy-compatible cache layer."""
|
||||
|
||||
@@ -31,15 +60,28 @@ class CachingEngine:
|
||||
ttl: dict[str, int] | None = None,
|
||||
indexes: dict[str, list[str | list[str]]] | None = None,
|
||||
in_memory: bool | None = None,
|
||||
cache_db_path: str | Path | None = None,
|
||||
backup_interval: int | None = None,
|
||||
refresh_interval: int | None = None,
|
||||
fetch_batch: int | None = None,
|
||||
dialect: str | None = None,
|
||||
blocking_startup_refresh: bool = False,
|
||||
) -> None:
|
||||
self._source_engine = source_engine
|
||||
use_memory = IN_MEMORY if in_memory is None else in_memory
|
||||
self._dialect = dialect if dialect is not None else SQL_DIALECT
|
||||
self._refresh_interval = (
|
||||
refresh_interval if refresh_interval is not None else REFRESH_INTERVAL_SECONDS
|
||||
)
|
||||
self._cache = CacheManager(
|
||||
CACHE_DB_PATH, BACKUP_INTERVAL_SECONDS, in_memory=use_memory
|
||||
Path(cache_db_path) if cache_db_path is not None else CACHE_DB_PATH,
|
||||
backup_interval if backup_interval is not None else BACKUP_INTERVAL_SECONDS,
|
||||
in_memory=use_memory,
|
||||
dialect=self._dialect,
|
||||
fetch_batch=fetch_batch if fetch_batch is not None else FETCH_BATCH_SIZE,
|
||||
)
|
||||
self._registry = ColumnRegistry(self._cache.connection)
|
||||
self._stats = StatsCollector()
|
||||
self._refresh_interval = REFRESH_INTERVAL_SECONDS
|
||||
self._delta = self._resolve_delta(delta or {})
|
||||
self._ttl = dict(ttl or {})
|
||||
self._index_columns = self._register_indexes(indexes or {})
|
||||
@@ -54,8 +96,13 @@ class CachingEngine:
|
||||
)
|
||||
|
||||
if self._delta or self._ttl:
|
||||
self._run_refresh() # catch up tables restored from disk
|
||||
self._start_refresh_thread()
|
||||
# The startup catch-up (deltas/TTL reloads for tables restored from
|
||||
# disk) can take a while on a cold start. By default it runs on the
|
||||
# background thread so it never blocks application startup; callers
|
||||
# who need the cache fully fresh before serving can opt back in.
|
||||
if blocking_startup_refresh:
|
||||
self._run_refresh()
|
||||
self._start_refresh_thread(initial_catch_up=not blocking_startup_refresh)
|
||||
|
||||
logger.info("CachingEngine initialized.")
|
||||
|
||||
@@ -97,12 +144,18 @@ class CachingEngine:
|
||||
@property
|
||||
def stats(self) -> Stats:
|
||||
states = self._cache.get_states()
|
||||
errors = self._cache.get_errors()
|
||||
with self._cache._lock:
|
||||
base = self._stats.snapshot(self._cache.connection, states)
|
||||
return replace(base, tables={n: self._enrich(n, t) for n, t in base.tables.items()})
|
||||
base = replace(base, errors=self._cache.error_total)
|
||||
return replace(
|
||||
base, tables={n: self._enrich(n, t, errors) for n, t in base.tables.items()}
|
||||
)
|
||||
|
||||
def _enrich(self, name: str, table_stats: TableStats) -> TableStats:
|
||||
"""Annotate a TableStats with how it is refreshed and TTL staleness."""
|
||||
def _enrich(
|
||||
self, name: str, table_stats: TableStats, errors: dict[str, TableError]
|
||||
) -> TableStats:
|
||||
"""Annotate a TableStats with refresh tracking, TTL staleness and errors."""
|
||||
if name in self._delta:
|
||||
tracking = "delta"
|
||||
elif name in self._ttl:
|
||||
@@ -115,22 +168,37 @@ class CachingEngine:
|
||||
age = self._cache.seconds_since_refresh(name)
|
||||
if age is not None and age > self._ttl[name]:
|
||||
state = TableState.STALE
|
||||
|
||||
err = errors.get(name)
|
||||
if err is not None:
|
||||
return replace(
|
||||
table_stats,
|
||||
tracking=tracking,
|
||||
state=state,
|
||||
last_error=err.message,
|
||||
last_error_at=err.at,
|
||||
consecutive_failures=err.consecutive,
|
||||
)
|
||||
return replace(table_stats, tracking=tracking, state=state)
|
||||
|
||||
def execute(self, sql: str, params: Params = None) -> list[dict]:
|
||||
parsed = parse(sql, params)
|
||||
with self._source_engine.connect() as sa_conn:
|
||||
raw_conn = cast(sqlite3.Connection, sa_conn.connection.dbapi_connection)
|
||||
parsed = parse(sql, params, dialect=self._dialect)
|
||||
# The source connection is opened lazily — a pure cache hit never touches
|
||||
# the source and never occupies a pool slot.
|
||||
source = _LazySource(self._source_engine)
|
||||
try:
|
||||
executor = QueryExecutor(
|
||||
self._cache,
|
||||
self._registry,
|
||||
raw_conn,
|
||||
source,
|
||||
self._stats,
|
||||
self._delta,
|
||||
self._ttl,
|
||||
self._index_columns,
|
||||
)
|
||||
return executor.execute(parsed)
|
||||
finally:
|
||||
source.close()
|
||||
|
||||
def refresh(self) -> None:
|
||||
"""Pull deltas for all delta-tracked tables now (also runs on a timer)."""
|
||||
@@ -139,13 +207,13 @@ class CachingEngine:
|
||||
def _run_refresh(self) -> None:
|
||||
try:
|
||||
with self._source_engine.connect() as sa_conn:
|
||||
raw_conn = cast(sqlite3.Connection, sa_conn.connection.dbapi_connection)
|
||||
raw_conn = sa_conn.connection.dbapi_connection
|
||||
self._refresher.refresh(raw_conn)
|
||||
self._refresh_ttl(raw_conn)
|
||||
except Exception as e:
|
||||
logger.error(f"Refresh cycle failed: {e}")
|
||||
|
||||
def _refresh_ttl(self, source_conn: sqlite3.Connection) -> None:
|
||||
def _refresh_ttl(self, source_conn: Any) -> None:
|
||||
"""Proactively full-reload TTL-tracked tables whose cache has expired."""
|
||||
for table, ttl in self._ttl.items():
|
||||
if not self._cache.is_table_cached(table):
|
||||
@@ -161,8 +229,10 @@ class CachingEngine:
|
||||
except Exception as e:
|
||||
logger.error(f"TTL refresh failed for {table!r}: {e}")
|
||||
|
||||
def _start_refresh_thread(self) -> None:
|
||||
def _start_refresh_thread(self, initial_catch_up: bool = True) -> None:
|
||||
def loop() -> None:
|
||||
if initial_catch_up:
|
||||
self._run_refresh() # off-main-thread startup catch-up
|
||||
event = threading.Event()
|
||||
while not event.wait(self._refresh_interval):
|
||||
self._run_refresh()
|
||||
@@ -174,7 +244,7 @@ class CachingEngine:
|
||||
def invalidate(self, table: str) -> None:
|
||||
logger.info(f"Manually invalidating cache for table {table!r}")
|
||||
with self._cache._lock:
|
||||
self._cache.connection.execute(f"DROP TABLE IF EXISTS {table}")
|
||||
self._cache.connection.execute(f"DROP TABLE IF EXISTS {quote(table)}")
|
||||
self._cache.connection.execute(
|
||||
"DELETE FROM _sqlmem_tables WHERE table_name = ?", (table,)
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user