Add secondary indexes to accelerate cache lookups
This commit is contained in:
@@ -2,6 +2,7 @@ import atexit
|
||||
import signal
|
||||
import sqlite3
|
||||
import threading
|
||||
from dataclasses import dataclass
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
|
||||
@@ -15,6 +16,12 @@ from .stats import TableState
|
||||
SCHEMA_VERSION = 3
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class _Index:
|
||||
name: str
|
||||
columns: tuple[str, ...]
|
||||
|
||||
|
||||
class CacheManager:
|
||||
def __init__(self, db_path: Path, backup_interval: int) -> None:
|
||||
self._db_path = db_path
|
||||
@@ -23,6 +30,7 @@ class CacheManager:
|
||||
self._lock = threading.Lock() # serializes connection access
|
||||
self._load_lock = threading.Lock() # serializes full table loads
|
||||
self._states: dict[str, str] = {} # table → live processing state
|
||||
self._index_defs: dict[str, list[_Index]] = {} # table → secondary indexes
|
||||
self._closed = False
|
||||
|
||||
self._ensure_meta_tables()
|
||||
@@ -190,6 +198,30 @@ class CacheManager:
|
||||
def clear_state(self, table: str) -> None:
|
||||
self._states.pop(table, None)
|
||||
|
||||
def add_index(self, table: str, columns: list[str]) -> None:
|
||||
"""Register a secondary index to (re)create on *columns* after each load."""
|
||||
name = "sqlmem_idx_" + "_".join([table, *columns])
|
||||
defs = self._index_defs.setdefault(table, [])
|
||||
if all(d.name != name for d in defs):
|
||||
defs.append(_Index(name=name, columns=tuple(columns)))
|
||||
|
||||
def _create_indexes(self, table: str, available: list[str]) -> None:
|
||||
"""Create the registered secondary indexes whose columns are all cached."""
|
||||
available_set = set(available)
|
||||
for idx in self._index_defs.get(table, []):
|
||||
if not set(idx.columns) <= available_set:
|
||||
logger.warning(
|
||||
f"Skipping index {idx.name!r}: columns {idx.columns} not all cached."
|
||||
)
|
||||
continue
|
||||
cols = ", ".join(idx.columns)
|
||||
with self._lock:
|
||||
self._mem_conn.execute(
|
||||
f"CREATE INDEX IF NOT EXISTS {idx.name} ON {table} ({cols})"
|
||||
)
|
||||
self._mem_conn.commit()
|
||||
logger.debug(f"Index {idx.name!r} ready on {table} ({cols})")
|
||||
|
||||
def load_table(
|
||||
self,
|
||||
table: str,
|
||||
@@ -243,6 +275,7 @@ class CacheManager:
|
||||
self.set_state(table, TableState.ERROR)
|
||||
raise
|
||||
|
||||
self._create_indexes(table, columns)
|
||||
self.mark_table_refreshed(table, total, full)
|
||||
self.set_state(table, TableState.READY)
|
||||
logger.info(f"Table {table!r} cached ({total} rows, columns: {columns})")
|
||||
|
||||
Reference in New Issue
Block a user