diff --git a/CHANGELOG.md b/CHANGELOG.md index 0cec7d2..15685c9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,26 @@ All notable changes to this project will be documented in this file. --- +## [1.1.0] - 2026-06-03 + +### Added +- `Stats` and `TableStats` frozen dataclasses — snapshot of runtime cache statistics (hit/miss/refetch counts, per-table row count, columns, last refresh timestamp) +- `StatsCollector` — internal thread-safe counter; increments on every cache hit, miss, and re-fetch +- `engine.stats` property — returns a `Stats` snapshot at any point in time +- `Stats` and `TableStats` exported from the public API + +### Changed +- `pyproject.toml` — bumped version to `1.1.0` + +--- + +## [1.0.0] - 2026-06-03 + +### Changed +- `pyproject.toml` — bumped version to `1.0.0` + +--- + ## [0.4.0] - 2026-06-03 ### Added diff --git a/README.md b/README.md index 7671e4d..ce21d3b 100644 --- a/README.md +++ b/README.md @@ -103,7 +103,18 @@ from sqlmem import ReadOnlyError, UnsupportedQueryError ## Logging -SQLmem uses [loguru](https://github.com/Delgan/loguru). Set `SQLMEM_DEBUG=true` for verbose output (every query, cache hit/miss, backup events). Default level is INFO. +SQLmem is silent by default. Call `add_sink()` to opt in: + +```python +import sys +from sqlmem import add_sink + +add_sink(sys.stderr) # INFO by default +add_sink(sys.stderr, level="DEBUG") # verbose: every query, cache hit/miss, backup +add_sink("sqlmem.log", rotation="10 MB") # to a file +``` + +Set `SQLMEM_DEBUG=true` in `.env` to make the default level DEBUG when no explicit `level` is passed to `add_sink()`. ## Limitations diff --git a/pyproject.toml b/pyproject.toml index 6c7ec15..ca54a78 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "sqlmem" -version = "0.4.0" +version = "1.1.0" description = "" authors = [ {name = "jan.doubravsky@gmail.com"} diff --git a/src/sqlmem/__init__.py b/src/sqlmem/__init__.py index 4c21084..a621848 100644 --- a/src/sqlmem/__init__.py +++ b/src/sqlmem/__init__.py @@ -5,6 +5,7 @@ from loguru import logger from .config import DEBUG from .engine import CachingEngine from .exceptions import ReadOnlyError, UnsupportedQueryError +from .stats import Stats, TableStats _DEFAULT_FORMAT = ( "{time:YYYY-MM-DD HH:mm:ss} | " @@ -34,4 +35,4 @@ def add_sink(sink: Any, *, level: str | None = None, **kwargs: Any) -> None: logger.add(sink, level=level or ("DEBUG" if DEBUG else "INFO"), filter="sqlmem", **kwargs) -__all__ = ["CachingEngine", "ReadOnlyError", "UnsupportedQueryError", "add_sink"] +__all__ = ["CachingEngine", "ReadOnlyError", "UnsupportedQueryError", "Stats", "TableStats", "add_sink"] diff --git a/src/sqlmem/engine.py b/src/sqlmem/engine.py index 0325756..d1c813c 100644 --- a/src/sqlmem/engine.py +++ b/src/sqlmem/engine.py @@ -8,6 +8,7 @@ from .config import BACKUP_INTERVAL_SECONDS, CACHE_DB_PATH from .executor import QueryExecutor from .parser import parse from .registry import ColumnRegistry +from .stats import Stats, StatsCollector class CachingEngine: @@ -17,13 +18,18 @@ class CachingEngine: self._source_engine = source_engine self._cache = CacheManager(CACHE_DB_PATH, BACKUP_INTERVAL_SECONDS) self._registry = ColumnRegistry(self._cache.connection) + self._stats = StatsCollector() logger.info("CachingEngine initialized.") + @property + def stats(self) -> Stats: + return self._stats.snapshot(self._cache.connection) + def execute(self, sql: str) -> list[dict]: parsed = parse(sql) with self._source_engine.connect() as sa_conn: raw_conn: sqlite3.Connection = sa_conn.connection.dbapi_connection - executor = QueryExecutor(self._cache, self._registry, raw_conn) + executor = QueryExecutor(self._cache, self._registry, raw_conn, self._stats) return executor.execute(parsed) def invalidate(self, table: str) -> None: diff --git a/src/sqlmem/executor.py b/src/sqlmem/executor.py index 23c7ea4..7f08b2e 100644 --- a/src/sqlmem/executor.py +++ b/src/sqlmem/executor.py @@ -5,13 +5,21 @@ from loguru import logger from .cache import CacheManager from .parser import ParsedQuery from .registry import ColumnRegistry +from .stats import StatsCollector class QueryExecutor: - def __init__(self, cache: CacheManager, registry: ColumnRegistry, source_conn: sqlite3.Connection) -> None: + def __init__( + self, + cache: CacheManager, + registry: ColumnRegistry, + source_conn: sqlite3.Connection, + stats: StatsCollector, + ) -> None: self._cache = cache self._registry = registry self._source_conn = source_conn + self._stats = stats def execute(self, parsed: ParsedQuery) -> list[dict]: table = parsed.table @@ -26,11 +34,15 @@ class QueryExecutor: f"Re-fetching {table!r} — new columns requested: {missing}. " f"Expanding cache from {self._registry.get_columns(table)} + {missing}" ) + self._stats.record_refetch() + else: + self._stats.record_miss() all_columns = list(self._registry.get_columns(table)) + missing self._cache.load_table(table, all_columns, self._source_conn) self._registry.update(table, all_columns) else: logger.debug(f"Cache hit: {table!r} columns={columns}") + self._stats.record_hit() return self._run_in_memory(parsed) diff --git a/src/sqlmem/stats.py b/src/sqlmem/stats.py new file mode 100644 index 0000000..a64affb --- /dev/null +++ b/src/sqlmem/stats.py @@ -0,0 +1,61 @@ +import sqlite3 +import threading +from dataclasses import dataclass + + +@dataclass(frozen=True) +class TableStats: + rows: int + columns: list[str] + last_refresh: str + + +@dataclass(frozen=True) +class Stats: + hits: int + misses: int + refetches: int + tables: dict[str, TableStats] + + +class StatsCollector: + def __init__(self) -> None: + self._lock = threading.Lock() + self.hits = 0 + self.misses = 0 + self.refetches = 0 + + def record_hit(self) -> None: + with self._lock: + self.hits += 1 + + def record_miss(self) -> None: + with self._lock: + self.misses += 1 + + def record_refetch(self) -> None: + with self._lock: + self.refetches += 1 + + def snapshot(self, conn: sqlite3.Connection) -> Stats: + with self._lock: + hits, misses, refetches = self.hits, self.misses, self.refetches + + tables: dict[str, TableStats] = {} + for table_name, row_count, last_refresh in conn.execute( + "SELECT table_name, row_count, last_refresh_at FROM _sqlmem_tables" + ).fetchall(): + columns = [ + r[0] + for r in conn.execute( + "SELECT column_name FROM _sqlmem_columns WHERE table_name = ? ORDER BY column_name", + (table_name,), + ).fetchall() + ] + tables[table_name] = TableStats( + rows=row_count or 0, + columns=columns, + last_refresh=last_refresh, + ) + + return Stats(hits=hits, misses=misses, refetches=refetches, tables=tables)