import sqlite3 from loguru import logger from .cache import CacheManager from .delta import ResolvedDelta from .parser import ParsedQuery from .registry import ColumnRegistry from .stats import StatsCollector class QueryExecutor: def __init__( self, cache: CacheManager, registry: ColumnRegistry, source_conn: sqlite3.Connection, stats: StatsCollector, delta: dict[str, ResolvedDelta] | None = None, ) -> None: self._cache = cache self._registry = registry self._source_conn = source_conn self._stats = stats self._delta = delta or {} def execute(self, parsed: ParsedQuery) -> list[dict]: for table in parsed.tables: self._ensure_table(table, parsed) return self._run_in_memory(parsed) def _ensure_table(self, table: str, parsed: ParsedQuery) -> None: if table in parsed.wildcard_tables: self._ensure_full(table) else: self._ensure_columns(table, parsed.columns_by_table[table]) def _ensure_full(self, table: str) -> None: """Load every column of *table* (SELECT * / t.*), refetching unless already full.""" if self._cache.is_table_cached(table) and self._cache.is_table_full(table): logger.debug(f"Cache hit (full): {table!r}") self._stats.record_hit() return if self._cache.is_table_cached(table): logger.warning(f"Re-fetching {table!r} in full — SELECT * requested.") self._stats.record_refetch() else: self._stats.record_miss() columns = self._cache.discover_columns(table, self._source_conn) self._load(table, columns, full=True) def _ensure_columns(self, table: str, columns: list[str]) -> None: """Load *table* with at least *columns*, refetching only when columns are missing.""" missing = self._registry.needs_refetch(table, columns) table_cached = self._cache.is_table_cached(table) if not missing and table_cached: logger.debug(f"Cache hit: {table!r} columns={columns}") self._stats.record_hit() return if table_cached and missing: logger.warning( f"Re-fetching {table!r} — new columns requested: {missing}. " f"Expanding cache from {self._registry.get_columns(table)} + {missing}" ) self._stats.record_refetch() else: self._stats.record_miss() all_columns = list(self._registry.get_columns(table)) + missing self._load(table, all_columns, full=False) def _load(self, table: str, columns: list[str], full: bool) -> None: """Fetch *table* into cache, adding delta key/timestamp columns when tracked.""" cfg = self._delta.get(table) if cfg: # The cache must always hold the key (to upsert) and the change column # (to compute the watermark), even if no query referenced them. columns = list(dict.fromkeys([*columns, *cfg.key_columns, cfg.change_column])) self._cache.load_table(table, columns, self._source_conn, full=full) self._registry.update(table, columns) if cfg: self._cache.create_unique_index(table, cfg.key_columns) watermark = self._cache.max_value(table, cfg.change_column) self._cache.set_last_synced_at(table, watermark) def _run_in_memory(self, parsed: ParsedQuery) -> list[dict]: logger.debug(f"Executing in SQLite RAM: {parsed.sqlite_sql!r} params={parsed.params!r}") col_names, rows = self._cache.execute_in_memory(parsed.sqlite_sql, parsed.params) return [dict(zip(col_names, row)) for row in rows]