Fix cache stampede with double-checked locking in load_table

2026-06-11 13:03:22 +02:00
parent a68b8994e3
commit 46370fe651
7 changed files with 139 additions and 7 deletions
@@ -1,3 +1,4 @@
+from collections.abc import Callable
 from typing import Any

 from loguru import logger
@@ -47,6 +48,20 @@ class QueryExecutor:
        else:
            self._ensure_columns(table, parsed.columns_by_table[table])

+    def _full_satisfied(self, table: str) -> bool:
+        """True if *table* is cached in full and not TTL-expired (a SELECT * hit)."""
+        return (
+            self._cache.is_table_cached(table)
+            and self._cache.is_table_full(table)
+            and not self._ttl_expired(table)
+        )
+
+    def _columns_satisfied(self, table: str, columns: list[str]) -> bool:
+        """True if *table* is cached with all *columns* present and not TTL-expired."""
+        if not self._cache.is_table_cached(table) or self._ttl_expired(table):
+            return False
+        return set(columns).issubset(self._cache.get_table_columns(table))
+
    def _ensure_full(self, table: str) -> None:
        """Load every column of *table* (SELECT * / t.*), refetching unless already full."""
        cached = self._cache.is_table_cached(table)
@@ -67,7 +82,7 @@ class QueryExecutor:
            self._stats.record_miss()

        columns = self._cache.discover_columns(table, self._source_conn)
-        self._load(table, columns, full=True)
+        self._load(table, columns, full=True, satisfied=lambda cols: self._full_satisfied(table))

    def _ensure_columns(self, table: str, columns: list[str]) -> None:
        """Load *table* with at least *columns*, refetching on new columns or TTL expiry."""
@@ -95,10 +110,27 @@ class QueryExecutor:
        all_columns = list(self._registry.get_columns(table)) + missing
        # Preserve a fully-cached table's status across a TTL reload.
        full = table_cached and self._cache.is_table_full(table)
-        self._load(table, all_columns, full=full)
+        self._load(
+            table,
+            all_columns,
+            full=full,
+            satisfied=lambda cols: self._columns_satisfied(table, cols),
+        )

-    def _load(self, table: str, columns: list[str], full: bool) -> None:
-        """Fetch *table* into cache, adding delta key/timestamp and index columns."""
+    def _load(
+        self,
+        table: str,
+        columns: list[str],
+        full: bool,
+        satisfied: Callable[[list[str]], bool] | None = None,
+    ) -> None:
+        """Fetch *table* into cache, adding delta key/timestamp and index columns.
+
+        *satisfied* is the double-checked-locking predicate evaluated under the
+        load lock (see :meth:`CacheManager.load_table`); it is given the final,
+        augmented column list so a concurrent loader that already produced an
+        equivalent (or wider) cache is detected and the redundant reload skipped.
+        """
        cfg = self._delta.get(table)
        extra = list(self._index_columns.get(table, []))
        if cfg:
@@ -108,7 +140,11 @@ class QueryExecutor:
        if extra:
            columns = list(dict.fromkeys([*columns, *extra]))

-        self._cache.load_table(table, columns, self._source_conn, full=full)
+        recheck: Callable[[], bool] | None = None
+        if satisfied is not None:
+            final_columns = columns
+            recheck = lambda: satisfied(final_columns)  # noqa: E731
+        self._cache.load_table(table, columns, self._source_conn, full=full, recheck=recheck)
        self._registry.update(table, columns)

        if cfg: