Add incremental delta refresh and fix Decimal/datetime cache binding

This commit is contained in:
Jan Doubravský
2026-06-05 11:09:16 +02:00
parent 530c2618cf
commit 33aa126ff6
13 changed files with 798 additions and 53 deletions
+22 -11
View File
@@ -3,6 +3,7 @@ import sqlite3
from loguru import logger
from .cache import CacheManager
from .delta import ResolvedDelta
from .parser import ParsedQuery
from .registry import ColumnRegistry
from .stats import StatsCollector
@@ -15,11 +16,13 @@ class QueryExecutor:
registry: ColumnRegistry,
source_conn: sqlite3.Connection,
stats: StatsCollector,
delta: dict[str, ResolvedDelta] | None = None,
) -> None:
self._cache = cache
self._registry = registry
self._source_conn = source_conn
self._stats = stats
self._delta = delta or {}
def execute(self, parsed: ParsedQuery) -> list[dict]:
for table in parsed.tables:
@@ -46,8 +49,7 @@ class QueryExecutor:
self._stats.record_miss()
columns = self._cache.discover_columns(table, self._source_conn)
self._cache.load_table(table, columns, self._source_conn, full=True)
self._registry.update(table, columns)
self._load(table, columns, full=True)
def _ensure_columns(self, table: str, columns: list[str]) -> None:
"""Load *table* with at least *columns*, refetching only when columns are missing."""
@@ -69,16 +71,25 @@ class QueryExecutor:
self._stats.record_miss()
all_columns = list(self._registry.get_columns(table)) + missing
self._cache.load_table(table, all_columns, self._source_conn)
self._registry.update(table, all_columns)
self._load(table, all_columns, full=False)
def _load(self, table: str, columns: list[str], full: bool) -> None:
"""Fetch *table* into cache, adding delta key/timestamp columns when tracked."""
cfg = self._delta.get(table)
if cfg:
# The cache must always hold the key (to upsert) and the change column
# (to compute the watermark), even if no query referenced them.
columns = list(dict.fromkeys([*columns, *cfg.key_columns, cfg.change_column]))
self._cache.load_table(table, columns, self._source_conn, full=full)
self._registry.update(table, columns)
if cfg:
self._cache.create_unique_index(table, cfg.key_columns)
watermark = self._cache.max_value(table, cfg.change_column)
self._cache.set_last_synced_at(table, watermark)
def _run_in_memory(self, parsed: ParsedQuery) -> list[dict]:
logger.debug(f"Executing in SQLite RAM: {parsed.sqlite_sql!r} params={parsed.params!r}")
conn = self._cache.connection
if parsed.params is None:
cursor = conn.execute(parsed.sqlite_sql)
else:
cursor = conn.execute(parsed.sqlite_sql, parsed.params)
col_names = [desc[0] for desc in cursor.description]
rows = cursor.fetchall()
col_names, rows = self._cache.execute_in_memory(parsed.sqlite_sql, parsed.params)
return [dict(zip(col_names, row)) for row in rows]