Files
SQLmem/src/sqlmem/executor.py
T

175 lines
7.1 KiB
Python

from collections.abc import Callable
from typing import Any
from loguru import logger
from .cache import CacheManager
from .delta import ResolvedDelta
from .parser import ParsedQuery
from .registry import ColumnRegistry
from .stats import StatsCollector
class QueryExecutor:
def __init__(
self,
cache: CacheManager,
registry: ColumnRegistry,
source_conn: Any, # raw DBAPI connection (pyodbc/sqlite3/…) — only .execute() is used
stats: StatsCollector,
delta: dict[str, ResolvedDelta] | None = None,
ttl: dict[str, int] | None = None,
index_columns: dict[str, list[str]] | None = None,
) -> None:
self._cache = cache
self._registry = registry
self._source_conn = source_conn
self._stats = stats
self._delta = delta or {}
self._ttl = ttl or {}
self._index_columns = index_columns or {}
def _ttl_expired(self, table: str) -> bool:
"""True if *table* has a TTL and its cached copy is older than that TTL."""
ttl = self._ttl.get(table)
if ttl is None:
return False
age = self._cache.seconds_since_refresh(table)
return age is not None and age > ttl
def execute(self, parsed: ParsedQuery) -> list[dict]:
for table in parsed.tables:
self._ensure_table(table, parsed)
return self._run_in_memory(parsed)
def ensure_loaded(self, table: str, columns: list[str] | None) -> None:
"""Preload *table* into the cache without running a query.
``columns=None`` loads the whole table (``SELECT *`` semantics); otherwise
only the listed columns. Reuses the same load path as a real query — delta
key/change + index columns are augmented, the registry and watermark are
updated, and double-checked locking skips a copy already fresh in the
cache — but never materializes any rows (unlike :meth:`execute`).
"""
if columns is None:
self._ensure_full(table)
else:
self._ensure_columns(table, columns)
def _ensure_table(self, table: str, parsed: ParsedQuery) -> None:
if table in parsed.wildcard_tables:
self._ensure_full(table)
else:
self._ensure_columns(table, parsed.columns_by_table[table])
def _full_satisfied(self, table: str) -> bool:
"""True if *table* is cached in full and not TTL-expired (a SELECT * hit)."""
return (
self._cache.is_table_cached(table)
and self._cache.is_table_full(table)
and not self._ttl_expired(table)
)
def _columns_satisfied(self, table: str, columns: list[str]) -> bool:
"""True if *table* is cached with all *columns* present and not TTL-expired."""
if not self._cache.is_table_cached(table) or self._ttl_expired(table):
return False
return set(columns).issubset(self._cache.get_table_columns(table))
def _ensure_full(self, table: str) -> None:
"""Load every column of *table* (SELECT * / t.*), refetching unless already full."""
cached = self._cache.is_table_cached(table)
stale = cached and self._ttl_expired(table)
if cached and self._cache.is_table_full(table) and not stale:
logger.debug(f"Cache hit (full): {table!r}")
self._stats.record_hit()
return
if cached and stale:
logger.info(f"Cache expired (ttl) — reloading {table!r} in full.")
self._stats.record_refetch()
elif cached:
logger.warning(f"Re-fetching {table!r} in full — SELECT * requested.")
self._stats.record_refetch()
else:
self._stats.record_miss()
columns = self._cache.discover_columns(table, self._source_conn)
self._load(table, columns, full=True, satisfied=lambda cols: self._full_satisfied(table))
def _ensure_columns(self, table: str, columns: list[str]) -> None:
"""Load *table* with at least *columns*, refetching on new columns or TTL expiry."""
missing = self._registry.needs_refetch(table, columns)
table_cached = self._cache.is_table_cached(table)
stale = table_cached and self._ttl_expired(table)
if table_cached and not missing and not stale:
logger.debug(f"Cache hit: {table!r} columns={columns}")
self._stats.record_hit()
return
if stale:
logger.info(f"Cache expired (ttl) — reloading {table!r}.")
self._stats.record_refetch()
elif table_cached and missing:
logger.warning(
f"Re-fetching {table!r} — new columns requested: {missing}. "
f"Expanding cache from {self._registry.get_columns(table)} + {missing}"
)
self._stats.record_refetch()
else:
self._stats.record_miss()
all_columns = list(self._registry.get_columns(table)) + missing
# Preserve a fully-cached table's status across a TTL reload.
full = table_cached and self._cache.is_table_full(table)
self._load(
table,
all_columns,
full=full,
satisfied=lambda cols: self._columns_satisfied(table, cols),
)
def _load(
self,
table: str,
columns: list[str],
full: bool,
satisfied: Callable[[list[str]], bool] | None = None,
) -> None:
"""Fetch *table* into cache, adding delta key/timestamp and index columns.
*satisfied* is the double-checked-locking predicate evaluated under the
load lock (see :meth:`CacheManager.load_table`); it is given the final,
augmented column list so a concurrent loader that already produced an
equivalent (or wider) cache is detected and the redundant reload skipped.
"""
cfg = self._delta.get(table)
extra = list(self._index_columns.get(table, []))
if cfg:
# The cache must always hold the key (to upsert) and the change column
# (to compute the watermark), even if no query referenced them.
extra += [*cfg.key_columns, cfg.change_column]
if extra:
columns = list(dict.fromkeys([*columns, *extra]))
recheck: Callable[[], bool] | None = None
if satisfied is not None:
final_columns = columns
recheck = lambda: satisfied(final_columns) # noqa: E731
self._cache.load_table(table, columns, self._source_conn, full=full, recheck=recheck)
self._registry.update(table, columns)
if cfg:
self._cache.create_unique_index(table, cfg.key_columns)
watermark = self._cache.max_value(table, cfg.change_column)
self._cache.set_last_synced_at(table, watermark)
def _run_in_memory(self, parsed: ParsedQuery) -> list[dict]:
logger.debug(f"Executing in SQLite RAM: {parsed.sqlite_sql!r} params={parsed.params!r}")
col_names, rows = self._cache.execute_in_memory(
parsed.sqlite_sql, parsed.params, parsed.tables
)
return [dict(zip(col_names, row)) for row in rows]