from collections.abc import Callable from typing import Any from loguru import logger from .cache import CacheManager from .delta import ResolvedDelta from .parser import ParsedQuery from .registry import ColumnRegistry from .stats import StatsCollector class QueryExecutor: def __init__( self, cache: CacheManager, registry: ColumnRegistry, source_conn: Any, # raw DBAPI connection (pyodbc/sqlite3/…) — only .execute() is used stats: StatsCollector, delta: dict[str, ResolvedDelta] | None = None, ttl: dict[str, int] | None = None, index_columns: dict[str, list[str]] | None = None, ) -> None: self._cache = cache self._registry = registry self._source_conn = source_conn self._stats = stats self._delta = delta or {} self._ttl = ttl or {} self._index_columns = index_columns or {} def _ttl_expired(self, table: str) -> bool: """True if *table* has a TTL and its cached copy is older than that TTL.""" ttl = self._ttl.get(table) if ttl is None: return False age = self._cache.seconds_since_refresh(table) return age is not None and age > ttl def execute(self, parsed: ParsedQuery) -> list[dict]: for table in parsed.tables: self._ensure_table(table, parsed) return self._run_in_memory(parsed) def ensure_loaded(self, table: str, columns: list[str] | None) -> None: """Preload *table* into the cache without running a query. ``columns=None`` loads the whole table (``SELECT *`` semantics); otherwise only the listed columns. Reuses the same load path as a real query — delta key/change + index columns are augmented, the registry and watermark are updated, and double-checked locking skips a copy already fresh in the cache — but never materializes any rows (unlike :meth:`execute`). """ if columns is None: self._ensure_full(table) else: self._ensure_columns(table, columns) def _ensure_table(self, table: str, parsed: ParsedQuery) -> None: if table in parsed.wildcard_tables: self._ensure_full(table) else: self._ensure_columns(table, parsed.columns_by_table[table]) def _full_satisfied(self, table: str) -> bool: """True if *table* is cached in full and not TTL-expired (a SELECT * hit).""" return ( self._cache.is_table_cached(table) and self._cache.is_table_full(table) and not self._ttl_expired(table) ) def _columns_satisfied(self, table: str, columns: list[str]) -> bool: """True if *table* is cached with all *columns* present and not TTL-expired.""" if not self._cache.is_table_cached(table) or self._ttl_expired(table): return False return set(columns).issubset(self._cache.get_table_columns(table)) def _ensure_full(self, table: str) -> None: """Load every column of *table* (SELECT * / t.*), refetching unless already full.""" cached = self._cache.is_table_cached(table) stale = cached and self._ttl_expired(table) if cached and self._cache.is_table_full(table) and not stale: logger.debug(f"Cache hit (full): {table!r}") self._stats.record_hit() return if cached and stale: logger.info(f"Cache expired (ttl) — reloading {table!r} in full.") self._stats.record_refetch() elif cached: logger.warning(f"Re-fetching {table!r} in full — SELECT * requested.") self._stats.record_refetch() else: self._stats.record_miss() columns = self._cache.discover_columns(table, self._source_conn) self._load(table, columns, full=True, satisfied=lambda cols: self._full_satisfied(table)) def _ensure_columns(self, table: str, columns: list[str]) -> None: """Load *table* with at least *columns*, refetching on new columns or TTL expiry.""" missing = self._registry.needs_refetch(table, columns) table_cached = self._cache.is_table_cached(table) stale = table_cached and self._ttl_expired(table) if table_cached and not missing and not stale: logger.debug(f"Cache hit: {table!r} columns={columns}") self._stats.record_hit() return if stale: logger.info(f"Cache expired (ttl) — reloading {table!r}.") self._stats.record_refetch() elif table_cached and missing: logger.warning( f"Re-fetching {table!r} — new columns requested: {missing}. " f"Expanding cache from {self._registry.get_columns(table)} + {missing}" ) self._stats.record_refetch() else: self._stats.record_miss() all_columns = list(self._registry.get_columns(table)) + missing # Preserve a fully-cached table's status across a TTL reload. full = table_cached and self._cache.is_table_full(table) self._load( table, all_columns, full=full, satisfied=lambda cols: self._columns_satisfied(table, cols), ) def _load( self, table: str, columns: list[str], full: bool, satisfied: Callable[[list[str]], bool] | None = None, ) -> None: """Fetch *table* into cache, adding delta key/timestamp and index columns. *satisfied* is the double-checked-locking predicate evaluated under the load lock (see :meth:`CacheManager.load_table`); it is given the final, augmented column list so a concurrent loader that already produced an equivalent (or wider) cache is detected and the redundant reload skipped. """ cfg = self._delta.get(table) extra = list(self._index_columns.get(table, [])) if cfg: # The cache must always hold the key (to upsert) and the change column # (to compute the watermark), even if no query referenced them. extra += [*cfg.key_columns, cfg.change_column] if extra: columns = list(dict.fromkeys([*columns, *extra])) recheck: Callable[[], bool] | None = None if satisfied is not None: final_columns = columns recheck = lambda: satisfied(final_columns) # noqa: E731 self._cache.load_table(table, columns, self._source_conn, full=full, recheck=recheck) self._registry.update(table, columns) if cfg: self._cache.create_unique_index(table, cfg.key_columns) watermark = self._cache.max_value(table, cfg.change_column) self._cache.set_last_synced_at(table, watermark) def _run_in_memory(self, parsed: ParsedQuery) -> list[dict]: logger.debug(f"Executing in SQLite RAM: {parsed.sqlite_sql!r} params={parsed.params!r}") col_names, rows = self._cache.execute_in_memory( parsed.sqlite_sql, parsed.params, parsed.tables ) return [dict(zip(col_names, row)) for row in rows]