diff --git a/.gitignore b/.gitignore index 4e4d970..759d192 100644 --- a/.gitignore +++ b/.gitignore @@ -40,7 +40,12 @@ coverage.xml .DS_Store Thumbs.db +# Environment +.env +.env.* + # Agents AGENTS.md CLAUDE.md -DESIGN_DOCUMENT_MODULE.md \ No newline at end of file +DESIGN_DOCUMENT_MODULE.md +.claude \ No newline at end of file diff --git a/CHANGELOG.md b/CHANGELOG.md index 8008650..0cec7d2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,10 +4,31 @@ All notable changes to this project will be documented in this file. ## [Unreleased] +--- + +## [0.4.0] - 2026-06-03 + +### Added +- `add_sink(sink, *, level, **kwargs)` — public API for routing sqlmem log records to any loguru-compatible sink (stream, file, callable); supports all loguru `logger.add()` kwargs including `rotation`, `retention`, etc. + ### Changed -- `_meta.py` now reads version dynamically from installed package metadata via `importlib.metadata` instead of a hardcoded string -- Bumped version to `0.2.0` in `pyproject.toml` -- `CHANGELOG.md` restructured with `[0.2.0]` release section +- `pyproject.toml` — bumped version to `0.4.0` +- `config.py` — replaced destructive `logger.remove()` + forced default sink with `logger.disable("sqlmem")`; sqlmem is now silent by default and does not interfere with the host application's logging setup + +--- + +## [0.3.0] - 2026-06-03 + +### Added +- `README.md` — full project documentation: architecture overview, quick start, cache behaviour, persistence, configuration, exceptions, logging, and limitations + +### Changed +- `pyproject.toml` — bumped version to `0.3.0` +- `parser.py` — `_extract_columns` now deduplicates column names while preserving order +- `.gitignore` — added `.env` and `.env.*` to prevent accidental commit of environment files + +### Security +- Removed `.env` from git tracking (`git rm --cached`) --- diff --git a/README.md b/README.md index d491501..7671e4d 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,126 @@ # SQLmem +Transparent in-memory cache layer between SQLAlchemy and your database. Drop it in front of any SQLAlchemy engine — SELECT queries are served from a fast in-memory SQLite cache, writes pass through unchanged. + +## How it works + +``` +Application (SQLAlchemy) + │ + ▼ + [ SQLmem Proxy ] + ┌──────────────────────────────┐ + │ SQL Parser │ → detects SELECT vs. write + │ Column Registry │ → tracks which columns are cached per table + │ Cache Manager (SQLite RAM) │ → stores data in memory + │ Query Executor │ → cache hit / miss logic + └──────────────────────────────┘ + │ + ▼ + Database (via original SQLAlchemy engine) +``` + +On the first SELECT for a table, SQLmem fetches the required rows from the database and stores them in an in-memory SQLite instance. Subsequent queries for the same columns hit the in-memory cache with no database round-trip. When a query requests a column not yet in cache, SQLmem re-fetches the table with the expanded column set. + +## Installation + +```bash +pip install sqlmem +# or with Poetry +poetry add sqlmem +``` + +Requires Python 3.14. + +## Quick start + +```python +from sqlmem import CachingEngine +from sqlalchemy import create_engine, text + +base_engine = create_engine("postgresql://user:pass@host/db") +engine = CachingEngine(base_engine) + +# Use exactly like a regular SQLAlchemy engine: +results = engine.execute("SELECT id, name FROM users WHERE status = 'active'") +for row in results: + print(row["id"], row["name"]) +``` + +`execute()` returns a list of dicts. Results are compatible with standard iteration patterns. + +## Cache behaviour + +**Column accumulation** — SQLmem learns which columns your app needs at runtime, no upfront configuration required: + +``` +Query 1: SELECT a, b FROM orders → cache miss → fetch orders(a, b) from DB +Query 2: SELECT a, d FROM orders → new column d → re-fetch orders(a, b, d) +Query 3: SELECT b FROM orders → cache hit, no DB query +Query 4: SELECT * FROM orders → UnsupportedQueryError (wildcard not supported) +Query 5: SELECT a FROM orders JOIN … → UnsupportedQueryError (JOIN not supported) +``` + +**Writes are blocked** — INSERT, UPDATE, and DELETE raise `ReadOnlyError`. SQLmem is a read-only cache. + +## Persistence + +The in-memory cache is optionally persisted to `cache.db` on disk: + +- **On startup**: if `cache.db` exists, it is loaded into memory. +- **Hourly**: a background thread writes a snapshot to disk. +- **On shutdown**: a final flush via `atexit` and SIGTERM handler. + +Schema version is checked on load — if it does not match, the stale file is discarded and the cache is rebuilt from the database. + +## Manual cache invalidation + +```python +engine.invalidate("orders") # drops the table from cache; next query re-fetches from DB +engine.close() # flush to disk and shut down background thread +``` + +## Configuration + +Set via environment variables or a `.env` file: + +| Variable | Default | Description | +|---|---|---| +| `SQLMEM_DEBUG` | `false` | `true` enables DEBUG-level logging | +| `SQLMEM_CACHE_DB` | `cache.db` | Path to the on-disk persistence file | +| `SQLMEM_BACKUP_INTERVAL` | `3600` | Backup interval in seconds | + +## Exceptions + +| Exception | When raised | +|---|---| +| `ReadOnlyError` | INSERT, UPDATE, or DELETE statement | +| `UnsupportedQueryError` | `SELECT *` or any JOIN | + +```python +from sqlmem import ReadOnlyError, UnsupportedQueryError +``` + +## Logging + +SQLmem uses [loguru](https://github.com/Delgan/loguru). Set `SQLMEM_DEBUG=true` for verbose output (every query, cache hit/miss, backup events). Default level is INFO. + +## Limitations + +- `SELECT *` and JOIN queries are not supported. +- No distributed cache backend (Redis etc.). +- No transactional consistency guarantees. +- Write operations (INSERT/UPDATE/DELETE) are always blocked. + +## Dependencies + +| Layer | Library | +|---|---| +| SQL parsing | `sqlglot` | +| Cache storage | `sqlite3` (stdlib) | +| Integration | SQLAlchemy 2.x | +| Logging | `loguru`, `python-dotenv` | + +## License + +MIT diff --git a/pyproject.toml b/pyproject.toml index bbbe1b7..6c7ec15 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "sqlmem" -version = "0.2.0" +version = "0.4.0" description = "" authors = [ {name = "jan.doubravsky@gmail.com"} diff --git a/src/sqlmem/__init__.py b/src/sqlmem/__init__.py index 3e8a707..4c21084 100644 --- a/src/sqlmem/__init__.py +++ b/src/sqlmem/__init__.py @@ -1,4 +1,37 @@ +from typing import Any + +from loguru import logger + +from .config import DEBUG from .engine import CachingEngine from .exceptions import ReadOnlyError, UnsupportedQueryError -__all__ = ["CachingEngine", "ReadOnlyError", "UnsupportedQueryError"] +_DEFAULT_FORMAT = ( + "{time:YYYY-MM-DD HH:mm:ss} | " + "{level: <8} | " + "{name}:{line} - " + "{message}" +) + + +def add_sink(sink: Any, *, level: str | None = None, **kwargs: Any) -> None: + """Route sqlmem log records to *sink*. + + Accepts any sink supported by loguru (file path, stream, callable, …). + *level* defaults to ``DEBUG`` when ``SQLMEM_DEBUG=true``, otherwise ``INFO``. + Extra keyword arguments are forwarded to :func:`loguru.logger.add`. + + Example:: + + import sys + from sqlmem import add_sink + add_sink(sys.stderr) + add_sink("sqlmem.log", rotation="10 MB") + """ + logger.enable("sqlmem") + kwargs.setdefault("format", _DEFAULT_FORMAT) + kwargs.setdefault("colorize", True) + logger.add(sink, level=level or ("DEBUG" if DEBUG else "INFO"), filter="sqlmem", **kwargs) + + +__all__ = ["CachingEngine", "ReadOnlyError", "UnsupportedQueryError", "add_sink"] diff --git a/src/sqlmem/config.py b/src/sqlmem/config.py index f8b7771..25fc91f 100644 --- a/src/sqlmem/config.py +++ b/src/sqlmem/config.py @@ -10,10 +10,5 @@ DEBUG = os.getenv("SQLMEM_DEBUG", "false").lower() == "true" CACHE_DB_PATH = Path(os.getenv("SQLMEM_CACHE_DB", "cache.db")) BACKUP_INTERVAL_SECONDS = int(os.getenv("SQLMEM_BACKUP_INTERVAL", "3600")) -logger.remove() -logger.add( - sink=lambda msg: print(msg, end=""), - level="DEBUG" if DEBUG else "INFO", - format="{time:YYYY-MM-DD HH:mm:ss} | {level: <8} | {name}:{line} - {message}", - colorize=True, -) +# Silent by default — callers opt in via add_sink(). +logger.disable("sqlmem") diff --git a/src/sqlmem/parser.py b/src/sqlmem/parser.py index 2316654..5066fcd 100644 --- a/src/sqlmem/parser.py +++ b/src/sqlmem/parser.py @@ -63,9 +63,13 @@ def _extract_table(statement: exp.Select) -> str: def _extract_columns(statement: exp.Select) -> list[str]: - columns = [] + seen: set[str] = set() + columns: list[str] = [] for col in statement.find_all(exp.Column): - columns.append(col.name) + name = col.name + if name not in seen: + seen.add(name) + columns.append(name) if not columns: raise UnsupportedQueryError("Could not extract column names from query.") return columns diff --git a/tests/test_config.py b/tests/test_config.py new file mode 100644 index 0000000..42cc474 --- /dev/null +++ b/tests/test_config.py @@ -0,0 +1,54 @@ +import importlib + +import pytest + +import sqlmem.config as cfg + + +def _reload(monkeypatch, **env_vars): + for key in ("SQLMEM_DEBUG", "SQLMEM_CACHE_DB", "SQLMEM_BACKUP_INTERVAL"): + monkeypatch.delenv(key, raising=False) + for key, val in env_vars.items(): + monkeypatch.setenv(key, val) + importlib.reload(cfg) + + +def test_debug_defaults_to_false(monkeypatch): + _reload(monkeypatch) + assert cfg.DEBUG is False + + +def test_debug_true(monkeypatch): + _reload(monkeypatch, SQLMEM_DEBUG="true") + assert cfg.DEBUG is True + + +def test_debug_case_insensitive(monkeypatch): + _reload(monkeypatch, SQLMEM_DEBUG="TRUE") + assert cfg.DEBUG is True + + +def test_debug_explicit_false(monkeypatch): + _reload(monkeypatch, SQLMEM_DEBUG="false") + assert cfg.DEBUG is False + + +def test_cache_db_default_name(monkeypatch): + _reload(monkeypatch) + assert cfg.CACHE_DB_PATH.name == "cache.db" + + +def test_cache_db_custom_path(monkeypatch, tmp_path): + custom = str(tmp_path / "my_cache.db") + _reload(monkeypatch, SQLMEM_CACHE_DB=custom) + assert cfg.CACHE_DB_PATH == tmp_path / "my_cache.db" + + +def test_backup_interval_default(monkeypatch): + _reload(monkeypatch) + assert cfg.BACKUP_INTERVAL_SECONDS == 3600 + + +def test_backup_interval_custom(monkeypatch): + _reload(monkeypatch, SQLMEM_BACKUP_INTERVAL="7200") + assert cfg.BACKUP_INTERVAL_SECONDS == 7200 diff --git a/tests/test_engine.py b/tests/test_engine.py new file mode 100644 index 0000000..f90a9a7 --- /dev/null +++ b/tests/test_engine.py @@ -0,0 +1,248 @@ +import sqlite3 +from pathlib import Path + +import pytest +from sqlalchemy import create_engine + +import sqlmem.engine as eng_mod +from sqlmem import CachingEngine, ReadOnlyError, UnsupportedQueryError + + +# --------------------------------------------------------------------------- +# Fixtures +# --------------------------------------------------------------------------- + +@pytest.fixture +def source_db(tmp_path): + """File-based SQLite source with two pre-populated tables.""" + db_path = tmp_path / "source.db" + conn = sqlite3.connect(db_path) + conn.execute("CREATE TABLE products (id TEXT, name TEXT, price TEXT)") + conn.executemany( + "INSERT INTO products VALUES (?, ?, ?)", + [("1", "Widget", "9.99"), ("2", "Gadget", "19.99"), ("3", "Doohickey", "4.99")], + ) + conn.execute("CREATE TABLE orders (order_id TEXT, product_id TEXT, qty TEXT)") + conn.executemany( + "INSERT INTO orders VALUES (?, ?, ?)", + [("101", "1", "2"), ("102", "2", "1")], + ) + conn.commit() + conn.close() + return db_path + + +@pytest.fixture +def source_engine(source_db): + engine = create_engine(f"sqlite:///{source_db}") + yield engine + engine.dispose() + + +@pytest.fixture +def cache_path(tmp_path): + return tmp_path / "cache.db" + + +@pytest.fixture +def engine(source_engine, cache_path, monkeypatch): + """CachingEngine pointed at a temp cache DB.""" + monkeypatch.setattr(eng_mod, "CACHE_DB_PATH", cache_path) + monkeypatch.setattr(eng_mod, "BACKUP_INTERVAL_SECONDS", 9999) + ce = CachingEngine(source_engine) + yield ce + ce.close() + + +# --------------------------------------------------------------------------- +# Basic SELECT execution (in-memory) +# --------------------------------------------------------------------------- + +def test_select_returns_list_of_dicts(engine): + rows = engine.execute("SELECT id, name FROM products") + assert isinstance(rows, list) + assert all(isinstance(r, dict) for r in rows) + + +def test_select_correct_row_count(engine): + assert len(engine.execute("SELECT id, name FROM products")) == 3 + + +def test_select_correct_values(engine): + rows = engine.execute("SELECT id, name FROM products") + assert {r["name"] for r in rows} == {"Widget", "Gadget", "Doohickey"} + + +def test_select_with_where_clause(engine): + rows = engine.execute("SELECT id, price FROM products WHERE id = '1'") + assert len(rows) == 1 + assert rows[0]["price"] == "9.99" + + +def test_select_with_order_and_limit(engine): + rows = engine.execute("SELECT id, name FROM products ORDER BY id LIMIT 2") + assert len(rows) == 2 + assert rows[0]["id"] == "1" + + +def test_select_different_table(engine): + rows = engine.execute("SELECT order_id, qty FROM orders") + assert len(rows) == 2 + + +def test_where_on_non_selected_column(engine): + """WHERE references a column not in SELECT — parser must extract it for the cache.""" + rows = engine.execute("SELECT name FROM products WHERE price = '9.99'") + assert len(rows) == 1 + assert rows[0]["name"] == "Widget" + + +# --------------------------------------------------------------------------- +# In-memory caching behaviour +# --------------------------------------------------------------------------- + +def test_cache_hit_survives_source_deletion(engine, source_db): + engine.execute("SELECT id, name FROM products") + # Wipe source — cache must still answer + conn = sqlite3.connect(source_db) + conn.execute("DELETE FROM products") + conn.commit() + conn.close() + rows = engine.execute("SELECT id, name FROM products") + assert len(rows) == 3 + + +def test_new_column_triggers_refetch(engine): + engine.execute("SELECT id FROM products") + rows = engine.execute("SELECT id, name FROM products") + assert "Widget" in {r["name"] for r in rows} + + +def test_second_query_same_columns_is_cache_hit(engine): + engine.execute("SELECT id, name FROM products") + assert engine._cache.is_table_cached("products") is True + rows = engine.execute("SELECT id, name FROM products") + assert len(rows) == 3 + + +# --------------------------------------------------------------------------- +# SQL file creation — backup to disk +# --------------------------------------------------------------------------- + +def test_close_creates_sql_file(engine, cache_path): + engine.execute("SELECT id, name FROM products") + engine.close() + assert cache_path.exists() + + +def test_sql_file_is_valid_sqlite(engine, cache_path): + engine.execute("SELECT id, name FROM products") + engine.close() + conn = sqlite3.connect(cache_path) + tables = {t[0] for t in conn.execute( + "SELECT name FROM sqlite_master WHERE type='table'" + ).fetchall()} + conn.close() + assert "_sqlmem_tables" in tables + assert "products" in tables + + +def test_sql_file_contains_cached_rows(engine, cache_path): + engine.execute("SELECT id, name FROM products") + engine.close() + conn = sqlite3.connect(cache_path) + rows = conn.execute("SELECT id, name FROM products").fetchall() + conn.close() + assert len(rows) == 3 + + +def test_sql_file_meta_table_present(engine, cache_path): + engine.execute("SELECT id FROM products") + engine.close() + conn = sqlite3.connect(cache_path) + row = conn.execute( + "SELECT value FROM _sqlmem_meta WHERE key = 'schema_version'" + ).fetchone() + conn.close() + assert row is not None + assert int(row[0]) >= 1 + + +def test_reload_from_disk_file(source_engine, cache_path, monkeypatch): + """New CachingEngine picks up table cached by a previous instance.""" + monkeypatch.setattr(eng_mod, "CACHE_DB_PATH", cache_path) + monkeypatch.setattr(eng_mod, "BACKUP_INTERVAL_SECONDS", 9999) + + ce1 = CachingEngine(source_engine) + ce1.execute("SELECT id, name FROM products") + ce1.close() + + ce2 = CachingEngine(source_engine) + assert ce2._cache.is_table_cached("products") is True + ce2.close() + + +def test_reload_data_intact_after_restart(source_engine, cache_path, monkeypatch): + monkeypatch.setattr(eng_mod, "CACHE_DB_PATH", cache_path) + monkeypatch.setattr(eng_mod, "BACKUP_INTERVAL_SECONDS", 9999) + + ce1 = CachingEngine(source_engine) + ce1.execute("SELECT id, name FROM products") + ce1.close() + + ce2 = CachingEngine(source_engine) + rows = ce2.execute("SELECT id, name FROM products") + ce2.close() + assert {r["name"] for r in rows} == {"Widget", "Gadget", "Doohickey"} + + +# --------------------------------------------------------------------------- +# Error handling +# --------------------------------------------------------------------------- + +def test_insert_raises_readonly(engine): + with pytest.raises(ReadOnlyError): + engine.execute("INSERT INTO products VALUES ('4', 'New', '1.00')") + + +def test_update_raises_readonly(engine): + with pytest.raises(ReadOnlyError): + engine.execute("UPDATE products SET price = '0' WHERE id = '1'") + + +def test_delete_raises_readonly(engine): + with pytest.raises(ReadOnlyError): + engine.execute("DELETE FROM products WHERE id = '1'") + + +def test_join_raises_unsupported(engine): + with pytest.raises(UnsupportedQueryError): + engine.execute( + "SELECT p.name, o.qty FROM products p JOIN orders o ON p.id = o.product_id" + ) + + +def test_select_star_raises_unsupported(engine): + with pytest.raises(UnsupportedQueryError): + engine.execute("SELECT * FROM products") + + +# --------------------------------------------------------------------------- +# Cache invalidation +# --------------------------------------------------------------------------- + +def test_invalidate_marks_table_absent(engine): + engine.execute("SELECT id, name FROM products") + engine.invalidate("products") + assert engine._cache.is_table_cached("products") is False + + +def test_invalidate_then_refetch_works(engine): + engine.execute("SELECT id, name FROM products") + engine.invalidate("products") + rows = engine.execute("SELECT id, name FROM products") + assert len(rows) == 3 + + +def test_invalidate_unknown_table_is_noop(engine): + engine.invalidate("nonexistent_table") # must not raise