Files
SQLmem/src/sqlmem/executor.py
T

96 lines
3.7 KiB
Python

import sqlite3
from loguru import logger
from .cache import CacheManager
from .delta import ResolvedDelta
from .parser import ParsedQuery
from .registry import ColumnRegistry
from .stats import StatsCollector
class QueryExecutor:
def __init__(
self,
cache: CacheManager,
registry: ColumnRegistry,
source_conn: sqlite3.Connection,
stats: StatsCollector,
delta: dict[str, ResolvedDelta] | None = None,
) -> None:
self._cache = cache
self._registry = registry
self._source_conn = source_conn
self._stats = stats
self._delta = delta or {}
def execute(self, parsed: ParsedQuery) -> list[dict]:
for table in parsed.tables:
self._ensure_table(table, parsed)
return self._run_in_memory(parsed)
def _ensure_table(self, table: str, parsed: ParsedQuery) -> None:
if table in parsed.wildcard_tables:
self._ensure_full(table)
else:
self._ensure_columns(table, parsed.columns_by_table[table])
def _ensure_full(self, table: str) -> None:
"""Load every column of *table* (SELECT * / t.*), refetching unless already full."""
if self._cache.is_table_cached(table) and self._cache.is_table_full(table):
logger.debug(f"Cache hit (full): {table!r}")
self._stats.record_hit()
return
if self._cache.is_table_cached(table):
logger.warning(f"Re-fetching {table!r} in full — SELECT * requested.")
self._stats.record_refetch()
else:
self._stats.record_miss()
columns = self._cache.discover_columns(table, self._source_conn)
self._load(table, columns, full=True)
def _ensure_columns(self, table: str, columns: list[str]) -> None:
"""Load *table* with at least *columns*, refetching only when columns are missing."""
missing = self._registry.needs_refetch(table, columns)
table_cached = self._cache.is_table_cached(table)
if not missing and table_cached:
logger.debug(f"Cache hit: {table!r} columns={columns}")
self._stats.record_hit()
return
if table_cached and missing:
logger.warning(
f"Re-fetching {table!r} — new columns requested: {missing}. "
f"Expanding cache from {self._registry.get_columns(table)} + {missing}"
)
self._stats.record_refetch()
else:
self._stats.record_miss()
all_columns = list(self._registry.get_columns(table)) + missing
self._load(table, all_columns, full=False)
def _load(self, table: str, columns: list[str], full: bool) -> None:
"""Fetch *table* into cache, adding delta key/timestamp columns when tracked."""
cfg = self._delta.get(table)
if cfg:
# The cache must always hold the key (to upsert) and the change column
# (to compute the watermark), even if no query referenced them.
columns = list(dict.fromkeys([*columns, *cfg.key_columns, cfg.change_column]))
self._cache.load_table(table, columns, self._source_conn, full=full)
self._registry.update(table, columns)
if cfg:
self._cache.create_unique_index(table, cfg.key_columns)
watermark = self._cache.max_value(table, cfg.change_column)
self._cache.set_last_synced_at(table, watermark)
def _run_in_memory(self, parsed: ParsedQuery) -> list[dict]:
logger.debug(f"Executing in SQLite RAM: {parsed.sqlite_sql!r} params={parsed.params!r}")
col_names, rows = self._cache.execute_in_memory(parsed.sqlite_sql, parsed.params)
return [dict(zip(col_names, row)) for row in rows]