Fix cache stampede with double-checked locking in load_table

This commit is contained in:
Jan Doubravský
2026-06-11 13:03:22 +02:00
parent a68b8994e3
commit 46370fe651
7 changed files with 139 additions and 7 deletions
+41 -5
View File
@@ -1,3 +1,4 @@
from collections.abc import Callable
from typing import Any
from loguru import logger
@@ -47,6 +48,20 @@ class QueryExecutor:
else:
self._ensure_columns(table, parsed.columns_by_table[table])
def _full_satisfied(self, table: str) -> bool:
"""True if *table* is cached in full and not TTL-expired (a SELECT * hit)."""
return (
self._cache.is_table_cached(table)
and self._cache.is_table_full(table)
and not self._ttl_expired(table)
)
def _columns_satisfied(self, table: str, columns: list[str]) -> bool:
"""True if *table* is cached with all *columns* present and not TTL-expired."""
if not self._cache.is_table_cached(table) or self._ttl_expired(table):
return False
return set(columns).issubset(self._cache.get_table_columns(table))
def _ensure_full(self, table: str) -> None:
"""Load every column of *table* (SELECT * / t.*), refetching unless already full."""
cached = self._cache.is_table_cached(table)
@@ -67,7 +82,7 @@ class QueryExecutor:
self._stats.record_miss()
columns = self._cache.discover_columns(table, self._source_conn)
self._load(table, columns, full=True)
self._load(table, columns, full=True, satisfied=lambda cols: self._full_satisfied(table))
def _ensure_columns(self, table: str, columns: list[str]) -> None:
"""Load *table* with at least *columns*, refetching on new columns or TTL expiry."""
@@ -95,10 +110,27 @@ class QueryExecutor:
all_columns = list(self._registry.get_columns(table)) + missing
# Preserve a fully-cached table's status across a TTL reload.
full = table_cached and self._cache.is_table_full(table)
self._load(table, all_columns, full=full)
self._load(
table,
all_columns,
full=full,
satisfied=lambda cols: self._columns_satisfied(table, cols),
)
def _load(self, table: str, columns: list[str], full: bool) -> None:
"""Fetch *table* into cache, adding delta key/timestamp and index columns."""
def _load(
self,
table: str,
columns: list[str],
full: bool,
satisfied: Callable[[list[str]], bool] | None = None,
) -> None:
"""Fetch *table* into cache, adding delta key/timestamp and index columns.
*satisfied* is the double-checked-locking predicate evaluated under the
load lock (see :meth:`CacheManager.load_table`); it is given the final,
augmented column list so a concurrent loader that already produced an
equivalent (or wider) cache is detected and the redundant reload skipped.
"""
cfg = self._delta.get(table)
extra = list(self._index_columns.get(table, []))
if cfg:
@@ -108,7 +140,11 @@ class QueryExecutor:
if extra:
columns = list(dict.fromkeys([*columns, *extra]))
self._cache.load_table(table, columns, self._source_conn, full=full)
recheck: Callable[[], bool] | None = None
if satisfied is not None:
final_columns = columns
recheck = lambda: satisfied(final_columns) # noqa: E731
self._cache.load_table(table, columns, self._source_conn, full=full, recheck=recheck)
self._registry.update(table, columns)
if cfg: