Add secondary indexes to accelerate cache lookups

This commit is contained in:
Jan Doubravský
2026-06-05 18:17:55 +02:00
parent 286a5f207d
commit 757a8f4eba
8 changed files with 213 additions and 4 deletions
+33
View File
@@ -2,6 +2,7 @@ import atexit
import signal
import sqlite3
import threading
from dataclasses import dataclass
from datetime import datetime, timezone
from pathlib import Path
@@ -15,6 +16,12 @@ from .stats import TableState
SCHEMA_VERSION = 3
@dataclass(frozen=True)
class _Index:
name: str
columns: tuple[str, ...]
class CacheManager:
def __init__(self, db_path: Path, backup_interval: int) -> None:
self._db_path = db_path
@@ -23,6 +30,7 @@ class CacheManager:
self._lock = threading.Lock() # serializes connection access
self._load_lock = threading.Lock() # serializes full table loads
self._states: dict[str, str] = {} # table → live processing state
self._index_defs: dict[str, list[_Index]] = {} # table → secondary indexes
self._closed = False
self._ensure_meta_tables()
@@ -190,6 +198,30 @@ class CacheManager:
def clear_state(self, table: str) -> None:
self._states.pop(table, None)
def add_index(self, table: str, columns: list[str]) -> None:
"""Register a secondary index to (re)create on *columns* after each load."""
name = "sqlmem_idx_" + "_".join([table, *columns])
defs = self._index_defs.setdefault(table, [])
if all(d.name != name for d in defs):
defs.append(_Index(name=name, columns=tuple(columns)))
def _create_indexes(self, table: str, available: list[str]) -> None:
"""Create the registered secondary indexes whose columns are all cached."""
available_set = set(available)
for idx in self._index_defs.get(table, []):
if not set(idx.columns) <= available_set:
logger.warning(
f"Skipping index {idx.name!r}: columns {idx.columns} not all cached."
)
continue
cols = ", ".join(idx.columns)
with self._lock:
self._mem_conn.execute(
f"CREATE INDEX IF NOT EXISTS {idx.name} ON {table} ({cols})"
)
self._mem_conn.commit()
logger.debug(f"Index {idx.name!r} ready on {table} ({cols})")
def load_table(
self,
table: str,
@@ -243,6 +275,7 @@ class CacheManager:
self.set_state(table, TableState.ERROR)
raise
self._create_indexes(table, columns)
self.mark_table_refreshed(table, total, full)
self.set_state(table, TableState.READY)
logger.info(f"Table {table!r} cached ({total} rows, columns: {columns})")