Batch large-table loads to bound memory and add per-table state to stats

This commit is contained in:
Jan Doubravský
2026-06-05 14:44:07 +02:00
parent 85bb84a1a6
commit 286a5f207d
11 changed files with 436 additions and 29 deletions
+22 -2
View File
@@ -1,5 +1,6 @@
import sqlite3
import threading
from dataclasses import replace
from typing import cast
from loguru import logger
@@ -12,7 +13,7 @@ from .delta import DeltaConfig, DeltaRefresher, ResolvedDelta
from .executor import QueryExecutor
from .parser import Params, parse
from .registry import ColumnRegistry
from .stats import Stats, StatsCollector
from .stats import Stats, StatsCollector, TableState, TableStats
class CachingEngine:
@@ -68,8 +69,26 @@ class CachingEngine:
@property
def stats(self) -> Stats:
states = self._cache.get_states()
with self._cache._lock:
return self._stats.snapshot(self._cache.connection)
base = self._stats.snapshot(self._cache.connection, states)
return replace(base, tables={n: self._enrich(n, t) for n, t in base.tables.items()})
def _enrich(self, name: str, table_stats: TableStats) -> TableStats:
"""Annotate a TableStats with how it is refreshed and TTL staleness."""
if name in self._delta:
tracking = "delta"
elif name in self._ttl:
tracking = "ttl"
else:
tracking = "static"
state = table_stats.state
if state == TableState.READY and name in self._ttl:
age = self._cache.seconds_since_refresh(name)
if age is not None and age > self._ttl[name]:
state = TableState.STALE
return replace(table_stats, tracking=tracking, state=state)
def execute(self, sql: str, params: Params = None) -> list[dict]:
parsed = parse(sql, params)
@@ -130,6 +149,7 @@ class CachingEngine:
"DELETE FROM _sqlmem_columns WHERE table_name = ?", (table,)
)
self._cache.connection.commit()
self._cache.clear_state(table)
def reset(self) -> None:
"""Wipe the whole cache (RAM + cache.db). Use after structural source changes."""