Wire datetime_columns through query params and reads; add db_size and vacuum guard
This commit is contained in:
@@ -3,6 +3,7 @@ from typing import Any
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from ._coerce import to_sqlite_datetime as datetime_to_epoch_us
|
||||
from .config import DEBUG
|
||||
from .delta import DeltaConfig
|
||||
from .engine import CachingEngine
|
||||
@@ -63,4 +64,5 @@ __all__ = [
|
||||
"Stats",
|
||||
"TableStats",
|
||||
"add_sink",
|
||||
"datetime_to_epoch_us",
|
||||
]
|
||||
|
||||
+57
-3
@@ -10,6 +10,7 @@ left untouched.
|
||||
|
||||
import datetime
|
||||
import decimal
|
||||
import re
|
||||
import uuid
|
||||
from typing import Any
|
||||
|
||||
@@ -17,6 +18,10 @@ Params = tuple | list | dict | None
|
||||
|
||||
_EPOCH = datetime.datetime(1970, 1, 1, tzinfo=datetime.timezone.utc)
|
||||
|
||||
# A string that *starts* with an ISO date+time (``2026-05-01T00:00:00`` or
|
||||
# space-separated). Used to spot a datetime passed as a string in a query param.
|
||||
_ISO_DATETIME_RE = re.compile(r"^\d{4}-\d{2}-\d{2}[T ]\d{2}:\d{2}:\d{2}")
|
||||
|
||||
|
||||
def to_sqlite(value: Any) -> Any:
|
||||
if isinstance(value, decimal.Decimal):
|
||||
@@ -53,13 +58,62 @@ def to_sqlite_datetime(value: Any) -> int | None:
|
||||
return None
|
||||
|
||||
|
||||
def from_sqlite_datetime(value: Any) -> Any:
|
||||
"""Inverse of :func:`to_sqlite_datetime`: INTEGER µs-since-epoch → UTC datetime.
|
||||
|
||||
Non-integers (a ``NULL`` value, or a column that isn't datetime-typed) pass
|
||||
through unchanged.
|
||||
"""
|
||||
if isinstance(value, bool) or not isinstance(value, int):
|
||||
return value
|
||||
return _EPOCH + datetime.timedelta(microseconds=value)
|
||||
|
||||
|
||||
def coerce_row(row: tuple) -> tuple:
|
||||
return tuple(to_sqlite(v) for v in row)
|
||||
|
||||
|
||||
def coerce_params(params: Params) -> tuple | dict | None:
|
||||
def _coerce_param(value: Any, dt_table: bool) -> Any:
|
||||
"""Coerce a single query parameter.
|
||||
|
||||
When the query touches a table that stores datetime columns as INTEGER µs
|
||||
(*dt_table*), a datetime object or an ISO-datetime string is converted to
|
||||
epoch µs so a ``WHERE`` comparison matches the stored INTEGER instead of
|
||||
comparing INTEGER against TEXT (which SQLite affinity makes always false).
|
||||
Otherwise the default stringifying coercion applies, unchanged.
|
||||
"""
|
||||
if dt_table and (
|
||||
isinstance(value, datetime.datetime)
|
||||
or (isinstance(value, str) and _ISO_DATETIME_RE.match(value))
|
||||
):
|
||||
result = to_sqlite_datetime(value)
|
||||
if result is not None:
|
||||
return result
|
||||
return to_sqlite(value)
|
||||
|
||||
|
||||
def coerce_params(params: Params, dt_table: bool = False) -> tuple | dict | None:
|
||||
if params is None:
|
||||
return None
|
||||
if isinstance(params, dict):
|
||||
return {key: to_sqlite(val) for key, val in params.items()}
|
||||
return tuple(to_sqlite(val) for val in params)
|
||||
return {key: _coerce_param(val, dt_table) for key, val in params.items()}
|
||||
return tuple(_coerce_param(val, dt_table) for val in params)
|
||||
|
||||
|
||||
def reverse_coerce_rows(
|
||||
rows: list[tuple], col_names: list[str], dt_cols: set[str]
|
||||
) -> list[tuple]:
|
||||
"""Turn INTEGER µs back into ``datetime`` for result columns in *dt_cols*.
|
||||
|
||||
A no-op when no result column is a datetime column, so non-datetime queries
|
||||
pay nothing.
|
||||
"""
|
||||
if not dt_cols:
|
||||
return rows
|
||||
dt_idx = {i for i, c in enumerate(col_names) if c in dt_cols}
|
||||
if not dt_idx:
|
||||
return rows
|
||||
return [
|
||||
tuple(from_sqlite_datetime(v) if i in dt_idx else v for i, v in enumerate(row))
|
||||
for row in rows
|
||||
]
|
||||
|
||||
+58
-8
@@ -9,7 +9,13 @@ from pathlib import Path
|
||||
from loguru import logger
|
||||
|
||||
import sqlmem._meta as _meta
|
||||
from ._coerce import coerce_params, coerce_row, to_sqlite, to_sqlite_datetime
|
||||
from ._coerce import (
|
||||
coerce_params,
|
||||
coerce_row,
|
||||
reverse_coerce_rows,
|
||||
to_sqlite,
|
||||
to_sqlite_datetime,
|
||||
)
|
||||
from ._sql import quote, quote_list, quote_source
|
||||
from .config import FETCH_BATCH_SIZE, SQL_DIALECT
|
||||
from .stats import TableState
|
||||
@@ -42,6 +48,7 @@ class CacheManager:
|
||||
fetch_batch: int = FETCH_BATCH_SIZE,
|
||||
pragmas: dict[str, str | int] | None = None,
|
||||
datetime_columns: dict[str, list[str]] | None = None,
|
||||
return_datetime: bool = True,
|
||||
) -> None:
|
||||
self._db_path = db_path
|
||||
self._backup_interval = backup_interval
|
||||
@@ -51,6 +58,7 @@ class CacheManager:
|
||||
self._pragmas = dict(pragmas or {}) # extra read/layout PRAGMAs (disk mode)
|
||||
# table → columns stored as INTEGER µs-since-epoch instead of ISO TEXT
|
||||
self._datetime_columns = {t: list(c) for t, c in (datetime_columns or {}).items()}
|
||||
self._return_datetime = return_datetime # reverse-coerce reads back to datetime
|
||||
self._lock = threading.Lock() # serializes connection access
|
||||
self._load_lock = threading.Lock() # serializes full table loads
|
||||
self._states: dict[str, str] = {} # table → live processing state
|
||||
@@ -498,16 +506,38 @@ class CacheManager:
|
||||
self._read_conns.append(conn)
|
||||
return conn
|
||||
|
||||
def _query_datetime_cols(self, tables: list[str] | None) -> set[str]:
|
||||
"""Datetime columns (stored as INTEGER µs) belonging to *tables*.
|
||||
|
||||
Empty when no table is known/configured, so a query that touches no
|
||||
datetime column pays nothing and behaves exactly as before.
|
||||
"""
|
||||
if not self._datetime_columns or not tables:
|
||||
return set()
|
||||
cols: set[str] = set()
|
||||
for table in tables:
|
||||
cols.update(self._datetime_columns.get(table, ()))
|
||||
return cols
|
||||
|
||||
def execute_in_memory(
|
||||
self, sql: str, params: tuple | list | dict | None = None
|
||||
self,
|
||||
sql: str,
|
||||
params: tuple | list | dict | None = None,
|
||||
tables: list[str] | None = None,
|
||||
) -> tuple[list[str], list[tuple]]:
|
||||
"""Run a read query against the cache.
|
||||
|
||||
In-memory mode serializes with writers on the single connection. Disk mode
|
||||
reads from a per-thread WAL connection, so reads run concurrently with
|
||||
writers and each other (see :meth:`_read_conn`).
|
||||
|
||||
When *tables* names a table with ``datetime_columns``, ISO/datetime query
|
||||
params are coerced to epoch µs so a ``WHERE`` matches the stored INTEGER,
|
||||
and (unless ``return_datetime=False``) those columns are returned as real
|
||||
:class:`~datetime.datetime` objects rather than raw integers.
|
||||
"""
|
||||
bound = coerce_params(params)
|
||||
dt_cols = self._query_datetime_cols(tables)
|
||||
bound = coerce_params(params, dt_table=bool(dt_cols))
|
||||
if self._in_memory:
|
||||
with self._lock:
|
||||
cursor = (
|
||||
@@ -517,12 +547,14 @@ class CacheManager:
|
||||
)
|
||||
col_names = [desc[0] for desc in cursor.description]
|
||||
rows = cursor.fetchall()
|
||||
return col_names, rows
|
||||
else:
|
||||
conn = self._read_conn()
|
||||
cursor = conn.execute(sql) if bound is None else conn.execute(sql, bound)
|
||||
col_names = [desc[0] for desc in cursor.description]
|
||||
rows = cursor.fetchall()
|
||||
|
||||
conn = self._read_conn()
|
||||
cursor = conn.execute(sql) if bound is None else conn.execute(sql, bound)
|
||||
col_names = [desc[0] for desc in cursor.description]
|
||||
rows = cursor.fetchall()
|
||||
if self._return_datetime and dt_cols:
|
||||
rows = reverse_coerce_rows(rows, col_names, dt_cols)
|
||||
return col_names, rows
|
||||
|
||||
# --- delta refresh support ---------------------------------------------
|
||||
@@ -585,6 +617,15 @@ class CacheManager:
|
||||
row = self._conn.execute(f"SELECT COUNT(*) FROM {quote(table)}").fetchone()
|
||||
return int(row[0]) if row else 0
|
||||
|
||||
def db_size_bytes(self) -> int:
|
||||
"""On-disk size of the cache file in bytes (0 in memory mode / if absent)."""
|
||||
if self._in_memory:
|
||||
return 0
|
||||
try:
|
||||
return self._db_path.stat().st_size
|
||||
except OSError:
|
||||
return 0
|
||||
|
||||
def reset(self) -> None:
|
||||
"""Wipe the entire cache — every cached table plus the on-disk data
|
||||
(the file is deleted in memory mode, VACUUMed in place in disk mode)."""
|
||||
@@ -676,6 +717,15 @@ class CacheManager:
|
||||
logger.debug("vacuum() called in memory mode — no-op.")
|
||||
return
|
||||
if incremental:
|
||||
av = self._conn.execute("PRAGMA auto_vacuum").fetchone()[0]
|
||||
if av != 2: # 0 = NONE, 1 = FULL, 2 = INCREMENTAL
|
||||
logger.warning(
|
||||
f"vacuum(incremental=True) called but auto_vacuum={av} (not "
|
||||
"INCREMENTAL) — no pages will be reclaimed. Rebuild the cache "
|
||||
"with pragmas={'auto_vacuum': 'INCREMENTAL'} via hard_reset(), "
|
||||
"or run vacuum(incremental=False) for a full VACUUM."
|
||||
)
|
||||
return
|
||||
with self._lock:
|
||||
self._conn.execute(f"PRAGMA incremental_vacuum({pages})")
|
||||
self._conn.commit()
|
||||
|
||||
@@ -67,6 +67,7 @@ class CachingEngine:
|
||||
dialect: str | None = None,
|
||||
pragmas: dict[str, str | int] | None = None,
|
||||
datetime_columns: dict[str, list[str]] | None = None,
|
||||
return_datetime: bool = True,
|
||||
blocking_startup_refresh: bool = False,
|
||||
) -> None:
|
||||
self._source_engine = source_engine
|
||||
@@ -83,6 +84,7 @@ class CachingEngine:
|
||||
fetch_batch=fetch_batch if fetch_batch is not None else FETCH_BATCH_SIZE,
|
||||
pragmas=pragmas,
|
||||
datetime_columns=datetime_columns,
|
||||
return_datetime=return_datetime,
|
||||
)
|
||||
self._registry = ColumnRegistry(self._cache.connection)
|
||||
self._stats = StatsCollector()
|
||||
@@ -152,7 +154,11 @@ class CachingEngine:
|
||||
last_runs = self._cache.get_last_runs()
|
||||
with self._cache._lock:
|
||||
base = self._stats.snapshot(self._cache.connection, states)
|
||||
base = replace(base, errors=self._cache.error_total)
|
||||
base = replace(
|
||||
base,
|
||||
errors=self._cache.error_total,
|
||||
db_size_bytes=self._cache.db_size_bytes(),
|
||||
)
|
||||
return replace(
|
||||
base,
|
||||
tables={n: self._enrich(n, t, errors, last_runs) for n, t in base.tables.items()},
|
||||
|
||||
@@ -118,5 +118,7 @@ class QueryExecutor:
|
||||
|
||||
def _run_in_memory(self, parsed: ParsedQuery) -> list[dict]:
|
||||
logger.debug(f"Executing in SQLite RAM: {parsed.sqlite_sql!r} params={parsed.params!r}")
|
||||
col_names, rows = self._cache.execute_in_memory(parsed.sqlite_sql, parsed.params)
|
||||
col_names, rows = self._cache.execute_in_memory(
|
||||
parsed.sqlite_sql, parsed.params, parsed.tables
|
||||
)
|
||||
return [dict(zip(col_names, row)) for row in rows]
|
||||
|
||||
@@ -40,6 +40,7 @@ class Stats:
|
||||
refetches: int
|
||||
tables: dict[str, TableStats]
|
||||
errors: int = 0 # total load/refresh failures since start
|
||||
db_size_bytes: int = 0 # on-disk cache file size (0 in memory mode)
|
||||
|
||||
|
||||
class StatsCollector:
|
||||
|
||||
Reference in New Issue
Block a user