Fix cache stampede with double-checked locking in load_table
This commit is contained in:
+41
-5
@@ -1,3 +1,4 @@
|
||||
from collections.abc import Callable
|
||||
from typing import Any
|
||||
|
||||
from loguru import logger
|
||||
@@ -47,6 +48,20 @@ class QueryExecutor:
|
||||
else:
|
||||
self._ensure_columns(table, parsed.columns_by_table[table])
|
||||
|
||||
def _full_satisfied(self, table: str) -> bool:
|
||||
"""True if *table* is cached in full and not TTL-expired (a SELECT * hit)."""
|
||||
return (
|
||||
self._cache.is_table_cached(table)
|
||||
and self._cache.is_table_full(table)
|
||||
and not self._ttl_expired(table)
|
||||
)
|
||||
|
||||
def _columns_satisfied(self, table: str, columns: list[str]) -> bool:
|
||||
"""True if *table* is cached with all *columns* present and not TTL-expired."""
|
||||
if not self._cache.is_table_cached(table) or self._ttl_expired(table):
|
||||
return False
|
||||
return set(columns).issubset(self._cache.get_table_columns(table))
|
||||
|
||||
def _ensure_full(self, table: str) -> None:
|
||||
"""Load every column of *table* (SELECT * / t.*), refetching unless already full."""
|
||||
cached = self._cache.is_table_cached(table)
|
||||
@@ -67,7 +82,7 @@ class QueryExecutor:
|
||||
self._stats.record_miss()
|
||||
|
||||
columns = self._cache.discover_columns(table, self._source_conn)
|
||||
self._load(table, columns, full=True)
|
||||
self._load(table, columns, full=True, satisfied=lambda cols: self._full_satisfied(table))
|
||||
|
||||
def _ensure_columns(self, table: str, columns: list[str]) -> None:
|
||||
"""Load *table* with at least *columns*, refetching on new columns or TTL expiry."""
|
||||
@@ -95,10 +110,27 @@ class QueryExecutor:
|
||||
all_columns = list(self._registry.get_columns(table)) + missing
|
||||
# Preserve a fully-cached table's status across a TTL reload.
|
||||
full = table_cached and self._cache.is_table_full(table)
|
||||
self._load(table, all_columns, full=full)
|
||||
self._load(
|
||||
table,
|
||||
all_columns,
|
||||
full=full,
|
||||
satisfied=lambda cols: self._columns_satisfied(table, cols),
|
||||
)
|
||||
|
||||
def _load(self, table: str, columns: list[str], full: bool) -> None:
|
||||
"""Fetch *table* into cache, adding delta key/timestamp and index columns."""
|
||||
def _load(
|
||||
self,
|
||||
table: str,
|
||||
columns: list[str],
|
||||
full: bool,
|
||||
satisfied: Callable[[list[str]], bool] | None = None,
|
||||
) -> None:
|
||||
"""Fetch *table* into cache, adding delta key/timestamp and index columns.
|
||||
|
||||
*satisfied* is the double-checked-locking predicate evaluated under the
|
||||
load lock (see :meth:`CacheManager.load_table`); it is given the final,
|
||||
augmented column list so a concurrent loader that already produced an
|
||||
equivalent (or wider) cache is detected and the redundant reload skipped.
|
||||
"""
|
||||
cfg = self._delta.get(table)
|
||||
extra = list(self._index_columns.get(table, []))
|
||||
if cfg:
|
||||
@@ -108,7 +140,11 @@ class QueryExecutor:
|
||||
if extra:
|
||||
columns = list(dict.fromkeys([*columns, *extra]))
|
||||
|
||||
self._cache.load_table(table, columns, self._source_conn, full=full)
|
||||
recheck: Callable[[], bool] | None = None
|
||||
if satisfied is not None:
|
||||
final_columns = columns
|
||||
recheck = lambda: satisfied(final_columns) # noqa: E731
|
||||
self._cache.load_table(table, columns, self._source_conn, full=full, recheck=recheck)
|
||||
self._registry.update(table, columns)
|
||||
|
||||
if cfg:
|
||||
|
||||
Reference in New Issue
Block a user