Fix frozen delta watermark and add error stats, lazy source, concurrent disk reads, and per-engine config

This commit is contained in:
Jan Doubravský
2026-06-08 19:35:33 +02:00
parent 209ae667ab
commit 6dc85e4f3c
17 changed files with 668 additions and 71 deletions
+22 -3
View File
@@ -1,3 +1,4 @@
from pathlib import Path
from typing import Any
from loguru import logger
@@ -15,13 +16,25 @@ _DEFAULT_FORMAT = (
"<level>{message}</level>"
)
# Sinks already registered, keyed by a stable identity, so a repeated call (e.g.
# a double import) doesn't add a second handler and duplicate every log line.
_added_sinks: dict[object, int] = {}
def _sink_key(sink: Any) -> object:
"""A stable identity for *sink* so the same destination isn't added twice."""
if isinstance(sink, (str, Path)):
return ("path", str(Path(sink).resolve()))
return ("obj", id(sink))
def add_sink(sink: Any, *, level: str | None = None, **kwargs: Any) -> None:
"""Route sqlmem log records to *sink*.
"""Route sqlmem log records to *sink* (idempotent).
Accepts any sink supported by loguru (file path, stream, callable, …).
*level* defaults to ``DEBUG`` when ``SQLMEM_DEBUG=true``, otherwise ``INFO``.
Extra keyword arguments are forwarded to :func:`loguru.logger.add`.
Extra keyword arguments are forwarded to :func:`loguru.logger.add`. Calling it
again for the same sink is a no-op, so a double import won't duplicate logs.
Example::
@@ -31,9 +44,15 @@ def add_sink(sink: Any, *, level: str | None = None, **kwargs: Any) -> None:
add_sink("sqlmem.log", rotation="10 MB")
"""
logger.enable("sqlmem")
key = _sink_key(sink)
if key in _added_sinks:
return
kwargs.setdefault("format", _DEFAULT_FORMAT)
kwargs.setdefault("colorize", True)
logger.add(sink, level=level or ("DEBUG" if DEBUG else "INFO"), filter="sqlmem", **kwargs)
handler_id = logger.add(
sink, level=level or ("DEBUG" if DEBUG else "INFO"), filter="sqlmem", **kwargs
)
_added_sinks[key] = handler_id
__all__ = [
+27
View File
@@ -0,0 +1,27 @@
"""SQL identifier quoting.
Table and column names are interpolated into statements as raw strings, so a
name with a space, a reserved word, or an embedded quote would break the query
(and is a latent injection vector). These helpers quote identifiers safely. The
in-memory cache is SQLite, so it uses double-quote style; the source DB is quoted
in its configured dialect (e.g. T-SQL ``[brackets]``).
"""
from collections.abc import Iterable
from sqlglot import exp
def quote(name: str) -> str:
"""Quote an identifier for the in-memory SQLite cache."""
return '"' + name.replace('"', '""') + '"'
def quote_list(names: Iterable[str]) -> str:
"""Comma-join SQLite-quoted identifiers."""
return ", ".join(quote(n) for n in names)
def quote_source(name: str, dialect: str) -> str:
"""Quote an identifier for the source DB in its dialect (e.g. T-SQL ``[x]``)."""
return exp.to_identifier(name, quoted=True).sql(dialect=dialect)
+128 -33
View File
@@ -10,7 +10,8 @@ from loguru import logger
import sqlmem._meta as _meta
from ._coerce import coerce_params, coerce_row
from .config import FETCH_BATCH_SIZE
from ._sql import quote, quote_list, quote_source
from .config import FETCH_BATCH_SIZE, SQL_DIALECT
from .stats import TableState
SCHEMA_VERSION = 3
@@ -22,17 +23,37 @@ class _Index:
columns: tuple[str, ...]
@dataclass(frozen=True)
class TableError:
"""Most recent load/refresh failure for a table (see ``CacheManager.get_errors``)."""
message: str
at: str
consecutive: int
class CacheManager:
def __init__(
self, db_path: Path, backup_interval: int, in_memory: bool = True
self,
db_path: Path,
backup_interval: int,
in_memory: bool = True,
dialect: str = SQL_DIALECT,
fetch_batch: int = FETCH_BATCH_SIZE,
) -> None:
self._db_path = db_path
self._backup_interval = backup_interval
self._in_memory = in_memory
self._dialect = dialect # source-DB dialect, for identifier quoting
self._fetch_batch = fetch_batch # rows fetched per source batch
self._lock = threading.Lock() # serializes connection access
self._load_lock = threading.Lock() # serializes full table loads
self._states: dict[str, str] = {} # table → live processing state
self._errors: dict[str, TableError] = {} # table → last load/refresh failure
self._error_total = 0 # process-wide failure counter
self._index_defs: dict[str, list[_Index]] = {} # table → secondary indexes
self._read_local = threading.local() # per-thread read conn (disk mode)
self._read_conns: list[sqlite3.Connection] = [] # read conns, for cleanup
self._closed = False
if in_memory:
@@ -124,7 +145,7 @@ class CacheManager:
).fetchall()
]
for name in names:
self._conn.execute(f"DROP TABLE IF EXISTS {name}")
self._conn.execute(f"DROP TABLE IF EXISTS {quote(name)}")
self._conn.commit()
def _load_from_disk(self) -> None:
@@ -161,7 +182,7 @@ class CacheManager:
]
for name in orphans:
logger.warning(f"Dropping orphan staging table {name!r} from a previous interrupted load.")
self._conn.execute(f"DROP TABLE IF EXISTS {name}")
self._conn.execute(f"DROP TABLE IF EXISTS {quote(name)}")
if orphans:
self._conn.commit()
@@ -238,7 +259,9 @@ class CacheManager:
def discover_columns(self, table: str, source_conn: sqlite3.Connection) -> list[str]:
"""Return all column names of *table* from the source DB without fetching rows."""
logger.debug(f"Discovering columns of {table!r} from source DB")
cursor = source_conn.execute(f"SELECT * FROM {table} WHERE 1 = 0")
cursor = source_conn.execute(
f"SELECT * FROM {quote_source(table, self._dialect)} WHERE 1 = 0"
)
columns = [desc[0] for desc in cursor.description]
logger.debug(f"{table!r} has columns: {columns}")
return columns
@@ -251,6 +274,28 @@ class CacheManager:
def clear_state(self, table: str) -> None:
self._states.pop(table, None)
self._errors.pop(table, None)
def record_error(self, table: str, message: str) -> None:
"""Record a load/refresh failure for *table* (increments its failure streak)."""
prev = self._errors.get(table)
streak = (prev.consecutive if prev else 0) + 1
self._errors[table] = TableError(message=message, at=_now(), consecutive=streak)
self._error_total += 1
logger.debug(f"Recorded error for {table!r} (streak {streak}): {message}")
def record_success(self, table: str) -> None:
"""Reset *table*'s failure streak to 0 after a successful load/refresh."""
prev = self._errors.get(table)
if prev and prev.consecutive:
self._errors[table] = TableError(prev.message, prev.at, 0)
def get_errors(self) -> dict[str, TableError]:
return dict(self._errors)
@property
def error_total(self) -> int:
return self._error_total
def add_index(self, table: str, columns: list[str]) -> None:
"""Register a secondary index to (re)create on *columns* after each load."""
@@ -268,10 +313,10 @@ class CacheManager:
f"Skipping index {idx.name!r}: columns {idx.columns} not all cached."
)
continue
cols = ", ".join(idx.columns)
cols = quote_list(idx.columns)
with self._lock:
self._conn.execute(
f"CREATE INDEX IF NOT EXISTS {idx.name} ON {table} ({cols})"
f"CREATE INDEX IF NOT EXISTS {quote(idx.name)} ON {quote(table)} ({cols})"
)
self._conn.commit()
logger.debug(f"Index {idx.name!r} ready on {table} ({cols})")
@@ -291,25 +336,29 @@ class CacheManager:
until the swap. Concurrent loads are serialized by ``_load_lock``; the
connection lock is only held for the brief per-batch inserts and the swap.
"""
cols = ", ".join(columns)
col_defs = ", ".join(f"{c} TEXT" for c in columns)
src_cols = ", ".join(quote_source(c, self._dialect) for c in columns)
col_defs = ", ".join(f"{quote(c)} TEXT" for c in columns)
placeholders = ", ".join("?" * len(columns))
staging = f"{table}__sqlmem_load"
q_staging = quote(staging)
q_table = quote(table)
with self._load_lock:
self.set_state(table, TableState.LOADING)
logger.info(f"Fetching {table!r} columns [{cols}] from source DB (batch={FETCH_BATCH_SIZE})")
logger.info(f"Fetching {table!r} columns {columns} from source DB (batch={self._fetch_batch})")
try:
cursor = source_conn.execute(f"SELECT {cols} FROM {table}")
cursor = source_conn.execute(
f"SELECT {src_cols} FROM {quote_source(table, self._dialect)}"
)
with self._lock:
self._conn.execute(f"DROP TABLE IF EXISTS {staging}")
self._conn.execute(f"CREATE TABLE {staging} ({col_defs})")
self._conn.execute(f"DROP TABLE IF EXISTS {q_staging}")
self._conn.execute(f"CREATE TABLE {q_staging} ({col_defs})")
self._conn.commit()
total = 0
insert_sql = f"INSERT INTO {staging} VALUES ({placeholders})"
insert_sql = f"INSERT INTO {q_staging} VALUES ({placeholders})"
while True:
batch = cursor.fetchmany(FETCH_BATCH_SIZE) # network outside _lock
batch = cursor.fetchmany(self._fetch_batch) # network outside _lock
if not batch:
break
clean = [coerce_row(row) for row in batch]
@@ -319,46 +368,83 @@ class CacheManager:
total += len(batch)
with self._lock: # atomic swap — readers see old or new, never partial
self._conn.execute(f"DROP TABLE IF EXISTS {table}")
self._conn.execute(f"ALTER TABLE {staging} RENAME TO {table}")
self._conn.execute(f"DROP TABLE IF EXISTS {q_table}")
self._conn.execute(f"ALTER TABLE {q_staging} RENAME TO {q_table}")
self._conn.commit()
except BaseException:
except BaseException as exc:
with self._lock:
self._conn.execute(f"DROP TABLE IF EXISTS {staging}")
self._conn.execute(f"DROP TABLE IF EXISTS {q_staging}")
self._conn.commit()
self.set_state(table, TableState.ERROR)
self.record_error(table, f"{type(exc).__name__}: {exc}")
raise
self._create_indexes(table, columns)
self.mark_table_refreshed(table, total, full)
self.set_state(table, TableState.READY)
self.record_success(table)
logger.info(f"Table {table!r} cached ({total} rows, columns: {columns})")
def _read_conn(self) -> sqlite3.Connection:
"""A per-thread, read-only connection used for cache reads in disk mode.
Disk mode runs in WAL, which allows many concurrent readers alongside one
writer. Giving each thread its own read connection (rather than sharing the
single write connection under ``_lock``) means a slow ``SELECT`` no longer
blocks writers (loads/upserts) or other readers. In-memory mode can't do
this — each ``:memory:`` connection is a separate database — so it keeps
using the single locked connection.
"""
conn = getattr(self._read_local, "conn", None)
if conn is None:
conn = sqlite3.connect(str(self._db_path), check_same_thread=False)
conn.execute("PRAGMA query_only=ON") # read-only guard
self._read_local.conn = conn
with self._lock:
self._read_conns.append(conn)
return conn
def execute_in_memory(
self, sql: str, params: tuple | list | dict | None = None
) -> tuple[list[str], list[tuple]]:
"""Run a read query against the in-memory cache, serialized with writers."""
"""Run a read query against the cache.
In-memory mode serializes with writers on the single connection. Disk mode
reads from a per-thread WAL connection, so reads run concurrently with
writers and each other (see :meth:`_read_conn`).
"""
bound = coerce_params(params)
with self._lock:
cursor = self._conn.execute(sql) if bound is None else self._conn.execute(sql, bound)
col_names = [desc[0] for desc in cursor.description]
rows = cursor.fetchall()
if self._in_memory:
with self._lock:
cursor = (
self._conn.execute(sql)
if bound is None
else self._conn.execute(sql, bound)
)
col_names = [desc[0] for desc in cursor.description]
rows = cursor.fetchall()
return col_names, rows
conn = self._read_conn()
cursor = conn.execute(sql) if bound is None else conn.execute(sql, bound)
col_names = [desc[0] for desc in cursor.description]
rows = cursor.fetchall()
return col_names, rows
# --- delta refresh support ---------------------------------------------
def get_table_columns(self, table: str) -> list[str]:
"""Authoritative ordered column list of a cached table (via PRAGMA)."""
rows = self._conn.execute(f"PRAGMA table_info({table})").fetchall()
rows = self._conn.execute(f"PRAGMA table_info({quote(table)})").fetchall()
return [r[1] for r in rows]
def create_unique_index(self, table: str, key_columns: list[str]) -> None:
"""Create the unique index on *key_columns* that makes upsert-by-key work."""
cols = ", ".join(key_columns)
index = f"idx_{table}_pk"
cols = quote_list(key_columns)
index = quote(f"idx_{table}_pk")
with self._lock:
self._conn.execute(
f"CREATE UNIQUE INDEX IF NOT EXISTS {index} ON {table} ({cols})"
f"CREATE UNIQUE INDEX IF NOT EXISTS {index} ON {quote(table)} ({cols})"
)
self._conn.commit()
@@ -378,23 +464,25 @@ class CacheManager:
def max_value(self, table: str, column: str) -> str | None:
"""Maximum value of *column* across cached rows (the delta watermark)."""
row = self._conn.execute(f"SELECT MAX({column}) FROM {table}").fetchone()
row = self._conn.execute(
f"SELECT MAX({quote(column)}) FROM {quote(table)}"
).fetchone()
return row[0] if row else None
def upsert_rows(self, table: str, columns: list[str], rows: list[tuple]) -> None:
"""Insert-or-replace one batch of *rows* by the table's unique key."""
col_list = ", ".join(columns)
col_list = quote_list(columns)
placeholders = ", ".join("?" * len(columns))
clean_rows = [coerce_row(row) for row in rows]
with self._lock:
self._conn.executemany(
f"INSERT OR REPLACE INTO {table} ({col_list}) VALUES ({placeholders})",
f"INSERT OR REPLACE INTO {quote(table)} ({col_list}) VALUES ({placeholders})",
clean_rows,
)
self._conn.commit()
def count_rows(self, table: str) -> int:
row = self._conn.execute(f"SELECT COUNT(*) FROM {table}").fetchone()
row = self._conn.execute(f"SELECT COUNT(*) FROM {quote(table)}").fetchone()
return int(row[0]) if row else 0
def reset(self) -> None:
@@ -411,7 +499,7 @@ class CacheManager:
).fetchall()
]
for name in user_tables:
self._conn.execute(f"DROP TABLE IF EXISTS {name}")
self._conn.execute(f"DROP TABLE IF EXISTS {quote(name)}")
self._conn.execute("DELETE FROM _sqlmem_tables")
self._conn.execute("DELETE FROM _sqlmem_columns")
self._conn.commit()
@@ -434,6 +522,13 @@ class CacheManager:
def close(self) -> None:
self._backup_to_disk()
self._closed = True
with self._lock:
for conn in self._read_conns:
try:
conn.close()
except sqlite3.Error:
pass
self._read_conns.clear()
self._conn.close()
+39 -9
View File
@@ -1,13 +1,34 @@
import sqlite3
from dataclasses import dataclass, field
from datetime import datetime
from typing import Any
from loguru import logger
from ._sql import quote_source
from .cache import CacheManager
from .config import FETCH_BATCH_SIZE
from .stats import TableState
def _bind_watermark(watermark: str) -> datetime | str:
"""Bind the delta watermark back to the source in its native type.
The cache stores the change column as an ISO ``TEXT`` string (see
``_coerce.to_sqlite``), so ``max(change_column)`` comes back as a string such
as ``'2026-06-05T14:54:24.823000'``. Sending that straight back to the source
as an ``nvarchar`` makes SQL Server do an implicit ``varchar -> datetime``
conversion, which **fails** on the ``T``-separated, 6-digit-microsecond ISO
form (error 241 / SQLSTATE 22007 — ``datetime`` accepts at most 3 fractional
digits). Parsing it back to a real :class:`~datetime.datetime` makes the
driver send a typed timestamp, so the comparison happens natively with no
string conversion. Non-datetime change columns (e.g. an integer rowversion)
don't parse and are passed through unchanged.
"""
try:
return datetime.fromisoformat(watermark)
except (TypeError, ValueError):
return watermark
@dataclass(frozen=True)
class DeltaConfig:
"""Per-table configuration for incremental (delta) refresh.
@@ -43,28 +64,37 @@ class DeltaRefresher:
self._cache = cache
self._delta = delta
def refresh(self, source_conn: sqlite3.Connection) -> None:
def refresh(self, source_conn: Any) -> None:
for table, cfg in self._delta.items():
if not self._cache.is_table_cached(table):
continue
try:
self._refresh_table(table, cfg, source_conn)
self._cache.record_success(table)
except Exception as e: # one bad table must not stop the others
logger.error(f"Delta refresh failed for {table!r}: {e}")
# A delta can fail before streaming starts (e.g. a watermark the
# source rejects), leaving state misleadingly READY — mark it and
# record the error so stats reveal the stuck table.
self._cache.set_state(table, TableState.ERROR)
self._cache.record_error(table, f"{type(e).__name__}: {e}")
def _refresh_table(
self, table: str, cfg: ResolvedDelta, source_conn: sqlite3.Connection
self, table: str, cfg: ResolvedDelta, source_conn: Any
) -> None:
columns = self._cache.get_table_columns(table)
watermark = self._cache.get_last_synced_at(table)
col_list = ", ".join(columns)
dialect = self._cache._dialect
col_list = ", ".join(quote_source(c, dialect) for c in columns)
q_table = quote_source(table, dialect)
if watermark is None:
cursor = source_conn.execute(f"SELECT {col_list} FROM {table}")
cursor = source_conn.execute(f"SELECT {col_list} FROM {q_table}")
else:
change_col = quote_source(cfg.change_column, dialect)
cursor = source_conn.execute(
f"SELECT {col_list} FROM {table} WHERE {cfg.change_column} >= ?",
(watermark,),
f"SELECT {col_list} FROM {q_table} WHERE {change_col} >= ?",
(_bind_watermark(watermark),),
)
# Stream the delta in batches so a large catch-up never materializes at once.
@@ -72,7 +102,7 @@ class DeltaRefresher:
self._cache.set_state(table, TableState.REFRESHING)
try:
while True:
batch = cursor.fetchmany(FETCH_BATCH_SIZE)
batch = cursor.fetchmany(self._cache._fetch_batch)
if not batch:
break
self._cache.upsert_rows(table, columns, batch)
+89 -19
View File
@@ -1,18 +1,21 @@
import sqlite3
import threading
from dataclasses import replace
from typing import cast
from pathlib import Path
from typing import Any
from loguru import logger
from sqlalchemy import inspect
from sqlalchemy.engine import Engine
from sqlalchemy.engine import Connection, Engine
from .cache import CacheManager
from ._sql import quote
from .cache import CacheManager, TableError
from .config import (
BACKUP_INTERVAL_SECONDS,
CACHE_DB_PATH,
FETCH_BATCH_SIZE,
IN_MEMORY,
REFRESH_INTERVAL_SECONDS,
SQL_DIALECT,
)
from .delta import DeltaConfig, DeltaRefresher, ResolvedDelta
from .executor import QueryExecutor
@@ -21,6 +24,32 @@ from .registry import ColumnRegistry
from .stats import Stats, StatsCollector, TableState, TableStats
class _LazySource:
"""A source connection opened on first ``execute`` and shared across one query.
Most queries are cache hits that never touch the source, so opening it (and
occupying a connection-pool slot) eagerly is wasteful. This proxy forwards
``execute`` to a real connection opened on demand, then released by ``close``.
"""
def __init__(self, source_engine: Engine) -> None:
self._source_engine = source_engine
self._sa_conn: Connection | None = None
self._raw: Any = None
def execute(self, *args: Any, **kwargs: Any) -> Any:
if self._raw is None:
self._sa_conn = self._source_engine.connect()
self._raw = self._sa_conn.connection.dbapi_connection
return self._raw.execute(*args, **kwargs)
def close(self) -> None:
if self._sa_conn is not None:
self._sa_conn.close()
self._sa_conn = None
self._raw = None
class CachingEngine:
"""Transparent SQLAlchemy-compatible cache layer."""
@@ -31,15 +60,28 @@ class CachingEngine:
ttl: dict[str, int] | None = None,
indexes: dict[str, list[str | list[str]]] | None = None,
in_memory: bool | None = None,
cache_db_path: str | Path | None = None,
backup_interval: int | None = None,
refresh_interval: int | None = None,
fetch_batch: int | None = None,
dialect: str | None = None,
blocking_startup_refresh: bool = False,
) -> None:
self._source_engine = source_engine
use_memory = IN_MEMORY if in_memory is None else in_memory
self._dialect = dialect if dialect is not None else SQL_DIALECT
self._refresh_interval = (
refresh_interval if refresh_interval is not None else REFRESH_INTERVAL_SECONDS
)
self._cache = CacheManager(
CACHE_DB_PATH, BACKUP_INTERVAL_SECONDS, in_memory=use_memory
Path(cache_db_path) if cache_db_path is not None else CACHE_DB_PATH,
backup_interval if backup_interval is not None else BACKUP_INTERVAL_SECONDS,
in_memory=use_memory,
dialect=self._dialect,
fetch_batch=fetch_batch if fetch_batch is not None else FETCH_BATCH_SIZE,
)
self._registry = ColumnRegistry(self._cache.connection)
self._stats = StatsCollector()
self._refresh_interval = REFRESH_INTERVAL_SECONDS
self._delta = self._resolve_delta(delta or {})
self._ttl = dict(ttl or {})
self._index_columns = self._register_indexes(indexes or {})
@@ -54,8 +96,13 @@ class CachingEngine:
)
if self._delta or self._ttl:
self._run_refresh() # catch up tables restored from disk
self._start_refresh_thread()
# The startup catch-up (deltas/TTL reloads for tables restored from
# disk) can take a while on a cold start. By default it runs on the
# background thread so it never blocks application startup; callers
# who need the cache fully fresh before serving can opt back in.
if blocking_startup_refresh:
self._run_refresh()
self._start_refresh_thread(initial_catch_up=not blocking_startup_refresh)
logger.info("CachingEngine initialized.")
@@ -97,12 +144,18 @@ class CachingEngine:
@property
def stats(self) -> Stats:
states = self._cache.get_states()
errors = self._cache.get_errors()
with self._cache._lock:
base = self._stats.snapshot(self._cache.connection, states)
return replace(base, tables={n: self._enrich(n, t) for n, t in base.tables.items()})
base = replace(base, errors=self._cache.error_total)
return replace(
base, tables={n: self._enrich(n, t, errors) for n, t in base.tables.items()}
)
def _enrich(self, name: str, table_stats: TableStats) -> TableStats:
"""Annotate a TableStats with how it is refreshed and TTL staleness."""
def _enrich(
self, name: str, table_stats: TableStats, errors: dict[str, TableError]
) -> TableStats:
"""Annotate a TableStats with refresh tracking, TTL staleness and errors."""
if name in self._delta:
tracking = "delta"
elif name in self._ttl:
@@ -115,22 +168,37 @@ class CachingEngine:
age = self._cache.seconds_since_refresh(name)
if age is not None and age > self._ttl[name]:
state = TableState.STALE
err = errors.get(name)
if err is not None:
return replace(
table_stats,
tracking=tracking,
state=state,
last_error=err.message,
last_error_at=err.at,
consecutive_failures=err.consecutive,
)
return replace(table_stats, tracking=tracking, state=state)
def execute(self, sql: str, params: Params = None) -> list[dict]:
parsed = parse(sql, params)
with self._source_engine.connect() as sa_conn:
raw_conn = cast(sqlite3.Connection, sa_conn.connection.dbapi_connection)
parsed = parse(sql, params, dialect=self._dialect)
# The source connection is opened lazily — a pure cache hit never touches
# the source and never occupies a pool slot.
source = _LazySource(self._source_engine)
try:
executor = QueryExecutor(
self._cache,
self._registry,
raw_conn,
source,
self._stats,
self._delta,
self._ttl,
self._index_columns,
)
return executor.execute(parsed)
finally:
source.close()
def refresh(self) -> None:
"""Pull deltas for all delta-tracked tables now (also runs on a timer)."""
@@ -139,13 +207,13 @@ class CachingEngine:
def _run_refresh(self) -> None:
try:
with self._source_engine.connect() as sa_conn:
raw_conn = cast(sqlite3.Connection, sa_conn.connection.dbapi_connection)
raw_conn = sa_conn.connection.dbapi_connection
self._refresher.refresh(raw_conn)
self._refresh_ttl(raw_conn)
except Exception as e:
logger.error(f"Refresh cycle failed: {e}")
def _refresh_ttl(self, source_conn: sqlite3.Connection) -> None:
def _refresh_ttl(self, source_conn: Any) -> None:
"""Proactively full-reload TTL-tracked tables whose cache has expired."""
for table, ttl in self._ttl.items():
if not self._cache.is_table_cached(table):
@@ -161,8 +229,10 @@ class CachingEngine:
except Exception as e:
logger.error(f"TTL refresh failed for {table!r}: {e}")
def _start_refresh_thread(self) -> None:
def _start_refresh_thread(self, initial_catch_up: bool = True) -> None:
def loop() -> None:
if initial_catch_up:
self._run_refresh() # off-main-thread startup catch-up
event = threading.Event()
while not event.wait(self._refresh_interval):
self._run_refresh()
@@ -174,7 +244,7 @@ class CachingEngine:
def invalidate(self, table: str) -> None:
logger.info(f"Manually invalidating cache for table {table!r}")
with self._cache._lock:
self._cache.connection.execute(f"DROP TABLE IF EXISTS {table}")
self._cache.connection.execute(f"DROP TABLE IF EXISTS {quote(table)}")
self._cache.connection.execute(
"DELETE FROM _sqlmem_tables WHERE table_name = ?", (table,)
)
+2 -2
View File
@@ -1,4 +1,4 @@
import sqlite3
from typing import Any
from loguru import logger
@@ -14,7 +14,7 @@ class QueryExecutor:
self,
cache: CacheManager,
registry: ColumnRegistry,
source_conn: sqlite3.Connection,
source_conn: Any, # raw DBAPI connection (pyodbc/sqlite3/…) — only .execute() is used
stats: StatsCollector,
delta: dict[str, ResolvedDelta] | None = None,
ttl: dict[str, int] | None = None,
+2 -2
View File
@@ -25,10 +25,10 @@ class ParsedQuery:
wildcard_tables: set[str] = field(default_factory=set)
def parse(sql: str, params: Params = None) -> ParsedQuery:
def parse(sql: str, params: Params = None, dialect: str = SQL_DIALECT) -> ParsedQuery:
logger.debug(f"Parsing SQL: {sql!r}")
statement = sqlglot.parse_one(sql, dialect=SQL_DIALECT)
statement = sqlglot.parse_one(sql, dialect=dialect)
if isinstance(statement, WRITE_TYPES):
raise ReadOnlyError(
+6
View File
@@ -20,6 +20,11 @@ class TableStats:
last_refresh: str
state: str = TableState.READY
tracking: str = "static" # "delta" | "ttl" | "static"
# Most recent load/refresh failure for this table, if any. ``consecutive_failures``
# resets to 0 on the next success, so > 0 means the table is currently failing.
last_error: str | None = None
last_error_at: str | None = None
consecutive_failures: int = 0
@dataclass(frozen=True)
@@ -28,6 +33,7 @@ class Stats:
misses: int
refetches: int
tables: dict[str, TableStats]
errors: int = 0 # total load/refresh failures since start
class StatsCollector: