175 lines
7.1 KiB
Python
175 lines
7.1 KiB
Python
from collections.abc import Callable
|
|
from typing import Any
|
|
|
|
from loguru import logger
|
|
|
|
from .cache import CacheManager
|
|
from .delta import ResolvedDelta
|
|
from .parser import ParsedQuery
|
|
from .registry import ColumnRegistry
|
|
from .stats import StatsCollector
|
|
|
|
|
|
class QueryExecutor:
|
|
def __init__(
|
|
self,
|
|
cache: CacheManager,
|
|
registry: ColumnRegistry,
|
|
source_conn: Any, # raw DBAPI connection (pyodbc/sqlite3/…) — only .execute() is used
|
|
stats: StatsCollector,
|
|
delta: dict[str, ResolvedDelta] | None = None,
|
|
ttl: dict[str, int] | None = None,
|
|
index_columns: dict[str, list[str]] | None = None,
|
|
) -> None:
|
|
self._cache = cache
|
|
self._registry = registry
|
|
self._source_conn = source_conn
|
|
self._stats = stats
|
|
self._delta = delta or {}
|
|
self._ttl = ttl or {}
|
|
self._index_columns = index_columns or {}
|
|
|
|
def _ttl_expired(self, table: str) -> bool:
|
|
"""True if *table* has a TTL and its cached copy is older than that TTL."""
|
|
ttl = self._ttl.get(table)
|
|
if ttl is None:
|
|
return False
|
|
age = self._cache.seconds_since_refresh(table)
|
|
return age is not None and age > ttl
|
|
|
|
def execute(self, parsed: ParsedQuery) -> list[dict]:
|
|
for table in parsed.tables:
|
|
self._ensure_table(table, parsed)
|
|
return self._run_in_memory(parsed)
|
|
|
|
def ensure_loaded(self, table: str, columns: list[str] | None) -> None:
|
|
"""Preload *table* into the cache without running a query.
|
|
|
|
``columns=None`` loads the whole table (``SELECT *`` semantics); otherwise
|
|
only the listed columns. Reuses the same load path as a real query — delta
|
|
key/change + index columns are augmented, the registry and watermark are
|
|
updated, and double-checked locking skips a copy already fresh in the
|
|
cache — but never materializes any rows (unlike :meth:`execute`).
|
|
"""
|
|
if columns is None:
|
|
self._ensure_full(table)
|
|
else:
|
|
self._ensure_columns(table, columns)
|
|
|
|
def _ensure_table(self, table: str, parsed: ParsedQuery) -> None:
|
|
if table in parsed.wildcard_tables:
|
|
self._ensure_full(table)
|
|
else:
|
|
self._ensure_columns(table, parsed.columns_by_table[table])
|
|
|
|
def _full_satisfied(self, table: str) -> bool:
|
|
"""True if *table* is cached in full and not TTL-expired (a SELECT * hit)."""
|
|
return (
|
|
self._cache.is_table_cached(table)
|
|
and self._cache.is_table_full(table)
|
|
and not self._ttl_expired(table)
|
|
)
|
|
|
|
def _columns_satisfied(self, table: str, columns: list[str]) -> bool:
|
|
"""True if *table* is cached with all *columns* present and not TTL-expired."""
|
|
if not self._cache.is_table_cached(table) or self._ttl_expired(table):
|
|
return False
|
|
return set(columns).issubset(self._cache.get_table_columns(table))
|
|
|
|
def _ensure_full(self, table: str) -> None:
|
|
"""Load every column of *table* (SELECT * / t.*), refetching unless already full."""
|
|
cached = self._cache.is_table_cached(table)
|
|
stale = cached and self._ttl_expired(table)
|
|
|
|
if cached and self._cache.is_table_full(table) and not stale:
|
|
logger.debug(f"Cache hit (full): {table!r}")
|
|
self._stats.record_hit()
|
|
return
|
|
|
|
if cached and stale:
|
|
logger.info(f"Cache expired (ttl) — reloading {table!r} in full.")
|
|
self._stats.record_refetch()
|
|
elif cached:
|
|
logger.warning(f"Re-fetching {table!r} in full — SELECT * requested.")
|
|
self._stats.record_refetch()
|
|
else:
|
|
self._stats.record_miss()
|
|
|
|
columns = self._cache.discover_columns(table, self._source_conn)
|
|
self._load(table, columns, full=True, satisfied=lambda cols: self._full_satisfied(table))
|
|
|
|
def _ensure_columns(self, table: str, columns: list[str]) -> None:
|
|
"""Load *table* with at least *columns*, refetching on new columns or TTL expiry."""
|
|
missing = self._registry.needs_refetch(table, columns)
|
|
table_cached = self._cache.is_table_cached(table)
|
|
stale = table_cached and self._ttl_expired(table)
|
|
|
|
if table_cached and not missing and not stale:
|
|
logger.debug(f"Cache hit: {table!r} columns={columns}")
|
|
self._stats.record_hit()
|
|
return
|
|
|
|
if stale:
|
|
logger.info(f"Cache expired (ttl) — reloading {table!r}.")
|
|
self._stats.record_refetch()
|
|
elif table_cached and missing:
|
|
logger.warning(
|
|
f"Re-fetching {table!r} — new columns requested: {missing}. "
|
|
f"Expanding cache from {self._registry.get_columns(table)} + {missing}"
|
|
)
|
|
self._stats.record_refetch()
|
|
else:
|
|
self._stats.record_miss()
|
|
|
|
all_columns = list(self._registry.get_columns(table)) + missing
|
|
# Preserve a fully-cached table's status across a TTL reload.
|
|
full = table_cached and self._cache.is_table_full(table)
|
|
self._load(
|
|
table,
|
|
all_columns,
|
|
full=full,
|
|
satisfied=lambda cols: self._columns_satisfied(table, cols),
|
|
)
|
|
|
|
def _load(
|
|
self,
|
|
table: str,
|
|
columns: list[str],
|
|
full: bool,
|
|
satisfied: Callable[[list[str]], bool] | None = None,
|
|
) -> None:
|
|
"""Fetch *table* into cache, adding delta key/timestamp and index columns.
|
|
|
|
*satisfied* is the double-checked-locking predicate evaluated under the
|
|
load lock (see :meth:`CacheManager.load_table`); it is given the final,
|
|
augmented column list so a concurrent loader that already produced an
|
|
equivalent (or wider) cache is detected and the redundant reload skipped.
|
|
"""
|
|
cfg = self._delta.get(table)
|
|
extra = list(self._index_columns.get(table, []))
|
|
if cfg:
|
|
# The cache must always hold the key (to upsert) and the change column
|
|
# (to compute the watermark), even if no query referenced them.
|
|
extra += [*cfg.key_columns, cfg.change_column]
|
|
if extra:
|
|
columns = list(dict.fromkeys([*columns, *extra]))
|
|
|
|
recheck: Callable[[], bool] | None = None
|
|
if satisfied is not None:
|
|
final_columns = columns
|
|
recheck = lambda: satisfied(final_columns) # noqa: E731
|
|
self._cache.load_table(table, columns, self._source_conn, full=full, recheck=recheck)
|
|
self._registry.update(table, columns)
|
|
|
|
if cfg:
|
|
self._cache.create_unique_index(table, cfg.key_columns)
|
|
watermark = self._cache.max_value(table, cfg.change_column)
|
|
self._cache.set_last_synced_at(table, watermark)
|
|
|
|
def _run_in_memory(self, parsed: ParsedQuery) -> list[dict]:
|
|
logger.debug(f"Executing in SQLite RAM: {parsed.sqlite_sql!r} params={parsed.params!r}")
|
|
col_names, rows = self._cache.execute_in_memory(
|
|
parsed.sqlite_sql, parsed.params, parsed.tables
|
|
)
|
|
return [dict(zip(col_names, row)) for row in rows]
|