diff --git a/.gitignore b/.gitignore
index 4e4d970..759d192 100644
--- a/.gitignore
+++ b/.gitignore
@@ -40,7 +40,12 @@ coverage.xml
.DS_Store
Thumbs.db
+# Environment
+.env
+.env.*
+
# Agents
AGENTS.md
CLAUDE.md
-DESIGN_DOCUMENT_MODULE.md
\ No newline at end of file
+DESIGN_DOCUMENT_MODULE.md
+.claude
\ No newline at end of file
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 8008650..0cec7d2 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -4,10 +4,31 @@ All notable changes to this project will be documented in this file.
## [Unreleased]
+---
+
+## [0.4.0] - 2026-06-03
+
+### Added
+- `add_sink(sink, *, level, **kwargs)` — public API for routing sqlmem log records to any loguru-compatible sink (stream, file, callable); supports all loguru `logger.add()` kwargs including `rotation`, `retention`, etc.
+
### Changed
-- `_meta.py` now reads version dynamically from installed package metadata via `importlib.metadata` instead of a hardcoded string
-- Bumped version to `0.2.0` in `pyproject.toml`
-- `CHANGELOG.md` restructured with `[0.2.0]` release section
+- `pyproject.toml` — bumped version to `0.4.0`
+- `config.py` — replaced destructive `logger.remove()` + forced default sink with `logger.disable("sqlmem")`; sqlmem is now silent by default and does not interfere with the host application's logging setup
+
+---
+
+## [0.3.0] - 2026-06-03
+
+### Added
+- `README.md` — full project documentation: architecture overview, quick start, cache behaviour, persistence, configuration, exceptions, logging, and limitations
+
+### Changed
+- `pyproject.toml` — bumped version to `0.3.0`
+- `parser.py` — `_extract_columns` now deduplicates column names while preserving order
+- `.gitignore` — added `.env` and `.env.*` to prevent accidental commit of environment files
+
+### Security
+- Removed `.env` from git tracking (`git rm --cached`)
---
diff --git a/README.md b/README.md
index d491501..7671e4d 100644
--- a/README.md
+++ b/README.md
@@ -1,2 +1,126 @@
# SQLmem
+Transparent in-memory cache layer between SQLAlchemy and your database. Drop it in front of any SQLAlchemy engine — SELECT queries are served from a fast in-memory SQLite cache, writes pass through unchanged.
+
+## How it works
+
+```
+Application (SQLAlchemy)
+ │
+ ▼
+ [ SQLmem Proxy ]
+ ┌──────────────────────────────┐
+ │ SQL Parser │ → detects SELECT vs. write
+ │ Column Registry │ → tracks which columns are cached per table
+ │ Cache Manager (SQLite RAM) │ → stores data in memory
+ │ Query Executor │ → cache hit / miss logic
+ └──────────────────────────────┘
+ │
+ ▼
+ Database (via original SQLAlchemy engine)
+```
+
+On the first SELECT for a table, SQLmem fetches the required rows from the database and stores them in an in-memory SQLite instance. Subsequent queries for the same columns hit the in-memory cache with no database round-trip. When a query requests a column not yet in cache, SQLmem re-fetches the table with the expanded column set.
+
+## Installation
+
+```bash
+pip install sqlmem
+# or with Poetry
+poetry add sqlmem
+```
+
+Requires Python 3.14.
+
+## Quick start
+
+```python
+from sqlmem import CachingEngine
+from sqlalchemy import create_engine, text
+
+base_engine = create_engine("postgresql://user:pass@host/db")
+engine = CachingEngine(base_engine)
+
+# Use exactly like a regular SQLAlchemy engine:
+results = engine.execute("SELECT id, name FROM users WHERE status = 'active'")
+for row in results:
+ print(row["id"], row["name"])
+```
+
+`execute()` returns a list of dicts. Results are compatible with standard iteration patterns.
+
+## Cache behaviour
+
+**Column accumulation** — SQLmem learns which columns your app needs at runtime, no upfront configuration required:
+
+```
+Query 1: SELECT a, b FROM orders → cache miss → fetch orders(a, b) from DB
+Query 2: SELECT a, d FROM orders → new column d → re-fetch orders(a, b, d)
+Query 3: SELECT b FROM orders → cache hit, no DB query
+Query 4: SELECT * FROM orders → UnsupportedQueryError (wildcard not supported)
+Query 5: SELECT a FROM orders JOIN … → UnsupportedQueryError (JOIN not supported)
+```
+
+**Writes are blocked** — INSERT, UPDATE, and DELETE raise `ReadOnlyError`. SQLmem is a read-only cache.
+
+## Persistence
+
+The in-memory cache is optionally persisted to `cache.db` on disk:
+
+- **On startup**: if `cache.db` exists, it is loaded into memory.
+- **Hourly**: a background thread writes a snapshot to disk.
+- **On shutdown**: a final flush via `atexit` and SIGTERM handler.
+
+Schema version is checked on load — if it does not match, the stale file is discarded and the cache is rebuilt from the database.
+
+## Manual cache invalidation
+
+```python
+engine.invalidate("orders") # drops the table from cache; next query re-fetches from DB
+engine.close() # flush to disk and shut down background thread
+```
+
+## Configuration
+
+Set via environment variables or a `.env` file:
+
+| Variable | Default | Description |
+|---|---|---|
+| `SQLMEM_DEBUG` | `false` | `true` enables DEBUG-level logging |
+| `SQLMEM_CACHE_DB` | `cache.db` | Path to the on-disk persistence file |
+| `SQLMEM_BACKUP_INTERVAL` | `3600` | Backup interval in seconds |
+
+## Exceptions
+
+| Exception | When raised |
+|---|---|
+| `ReadOnlyError` | INSERT, UPDATE, or DELETE statement |
+| `UnsupportedQueryError` | `SELECT *` or any JOIN |
+
+```python
+from sqlmem import ReadOnlyError, UnsupportedQueryError
+```
+
+## Logging
+
+SQLmem uses [loguru](https://github.com/Delgan/loguru). Set `SQLMEM_DEBUG=true` for verbose output (every query, cache hit/miss, backup events). Default level is INFO.
+
+## Limitations
+
+- `SELECT *` and JOIN queries are not supported.
+- No distributed cache backend (Redis etc.).
+- No transactional consistency guarantees.
+- Write operations (INSERT/UPDATE/DELETE) are always blocked.
+
+## Dependencies
+
+| Layer | Library |
+|---|---|
+| SQL parsing | `sqlglot` |
+| Cache storage | `sqlite3` (stdlib) |
+| Integration | SQLAlchemy 2.x |
+| Logging | `loguru`, `python-dotenv` |
+
+## License
+
+MIT
diff --git a/pyproject.toml b/pyproject.toml
index bbbe1b7..6c7ec15 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
[project]
name = "sqlmem"
-version = "0.2.0"
+version = "0.4.0"
description = ""
authors = [
{name = "jan.doubravsky@gmail.com"}
diff --git a/src/sqlmem/__init__.py b/src/sqlmem/__init__.py
index 3e8a707..4c21084 100644
--- a/src/sqlmem/__init__.py
+++ b/src/sqlmem/__init__.py
@@ -1,4 +1,37 @@
+from typing import Any
+
+from loguru import logger
+
+from .config import DEBUG
from .engine import CachingEngine
from .exceptions import ReadOnlyError, UnsupportedQueryError
-__all__ = ["CachingEngine", "ReadOnlyError", "UnsupportedQueryError"]
+_DEFAULT_FORMAT = (
+ "{time:YYYY-MM-DD HH:mm:ss} | "
+ "{level: <8} | "
+ "{name}:{line} - "
+ "{message}"
+)
+
+
+def add_sink(sink: Any, *, level: str | None = None, **kwargs: Any) -> None:
+ """Route sqlmem log records to *sink*.
+
+ Accepts any sink supported by loguru (file path, stream, callable, …).
+ *level* defaults to ``DEBUG`` when ``SQLMEM_DEBUG=true``, otherwise ``INFO``.
+ Extra keyword arguments are forwarded to :func:`loguru.logger.add`.
+
+ Example::
+
+ import sys
+ from sqlmem import add_sink
+ add_sink(sys.stderr)
+ add_sink("sqlmem.log", rotation="10 MB")
+ """
+ logger.enable("sqlmem")
+ kwargs.setdefault("format", _DEFAULT_FORMAT)
+ kwargs.setdefault("colorize", True)
+ logger.add(sink, level=level or ("DEBUG" if DEBUG else "INFO"), filter="sqlmem", **kwargs)
+
+
+__all__ = ["CachingEngine", "ReadOnlyError", "UnsupportedQueryError", "add_sink"]
diff --git a/src/sqlmem/config.py b/src/sqlmem/config.py
index f8b7771..25fc91f 100644
--- a/src/sqlmem/config.py
+++ b/src/sqlmem/config.py
@@ -10,10 +10,5 @@ DEBUG = os.getenv("SQLMEM_DEBUG", "false").lower() == "true"
CACHE_DB_PATH = Path(os.getenv("SQLMEM_CACHE_DB", "cache.db"))
BACKUP_INTERVAL_SECONDS = int(os.getenv("SQLMEM_BACKUP_INTERVAL", "3600"))
-logger.remove()
-logger.add(
- sink=lambda msg: print(msg, end=""),
- level="DEBUG" if DEBUG else "INFO",
- format="{time:YYYY-MM-DD HH:mm:ss} | {level: <8} | {name}:{line} - {message}",
- colorize=True,
-)
+# Silent by default — callers opt in via add_sink().
+logger.disable("sqlmem")
diff --git a/src/sqlmem/parser.py b/src/sqlmem/parser.py
index 2316654..5066fcd 100644
--- a/src/sqlmem/parser.py
+++ b/src/sqlmem/parser.py
@@ -63,9 +63,13 @@ def _extract_table(statement: exp.Select) -> str:
def _extract_columns(statement: exp.Select) -> list[str]:
- columns = []
+ seen: set[str] = set()
+ columns: list[str] = []
for col in statement.find_all(exp.Column):
- columns.append(col.name)
+ name = col.name
+ if name not in seen:
+ seen.add(name)
+ columns.append(name)
if not columns:
raise UnsupportedQueryError("Could not extract column names from query.")
return columns
diff --git a/tests/test_config.py b/tests/test_config.py
new file mode 100644
index 0000000..42cc474
--- /dev/null
+++ b/tests/test_config.py
@@ -0,0 +1,54 @@
+import importlib
+
+import pytest
+
+import sqlmem.config as cfg
+
+
+def _reload(monkeypatch, **env_vars):
+ for key in ("SQLMEM_DEBUG", "SQLMEM_CACHE_DB", "SQLMEM_BACKUP_INTERVAL"):
+ monkeypatch.delenv(key, raising=False)
+ for key, val in env_vars.items():
+ monkeypatch.setenv(key, val)
+ importlib.reload(cfg)
+
+
+def test_debug_defaults_to_false(monkeypatch):
+ _reload(monkeypatch)
+ assert cfg.DEBUG is False
+
+
+def test_debug_true(monkeypatch):
+ _reload(monkeypatch, SQLMEM_DEBUG="true")
+ assert cfg.DEBUG is True
+
+
+def test_debug_case_insensitive(monkeypatch):
+ _reload(monkeypatch, SQLMEM_DEBUG="TRUE")
+ assert cfg.DEBUG is True
+
+
+def test_debug_explicit_false(monkeypatch):
+ _reload(monkeypatch, SQLMEM_DEBUG="false")
+ assert cfg.DEBUG is False
+
+
+def test_cache_db_default_name(monkeypatch):
+ _reload(monkeypatch)
+ assert cfg.CACHE_DB_PATH.name == "cache.db"
+
+
+def test_cache_db_custom_path(monkeypatch, tmp_path):
+ custom = str(tmp_path / "my_cache.db")
+ _reload(monkeypatch, SQLMEM_CACHE_DB=custom)
+ assert cfg.CACHE_DB_PATH == tmp_path / "my_cache.db"
+
+
+def test_backup_interval_default(monkeypatch):
+ _reload(monkeypatch)
+ assert cfg.BACKUP_INTERVAL_SECONDS == 3600
+
+
+def test_backup_interval_custom(monkeypatch):
+ _reload(monkeypatch, SQLMEM_BACKUP_INTERVAL="7200")
+ assert cfg.BACKUP_INTERVAL_SECONDS == 7200
diff --git a/tests/test_engine.py b/tests/test_engine.py
new file mode 100644
index 0000000..f90a9a7
--- /dev/null
+++ b/tests/test_engine.py
@@ -0,0 +1,248 @@
+import sqlite3
+from pathlib import Path
+
+import pytest
+from sqlalchemy import create_engine
+
+import sqlmem.engine as eng_mod
+from sqlmem import CachingEngine, ReadOnlyError, UnsupportedQueryError
+
+
+# ---------------------------------------------------------------------------
+# Fixtures
+# ---------------------------------------------------------------------------
+
+@pytest.fixture
+def source_db(tmp_path):
+ """File-based SQLite source with two pre-populated tables."""
+ db_path = tmp_path / "source.db"
+ conn = sqlite3.connect(db_path)
+ conn.execute("CREATE TABLE products (id TEXT, name TEXT, price TEXT)")
+ conn.executemany(
+ "INSERT INTO products VALUES (?, ?, ?)",
+ [("1", "Widget", "9.99"), ("2", "Gadget", "19.99"), ("3", "Doohickey", "4.99")],
+ )
+ conn.execute("CREATE TABLE orders (order_id TEXT, product_id TEXT, qty TEXT)")
+ conn.executemany(
+ "INSERT INTO orders VALUES (?, ?, ?)",
+ [("101", "1", "2"), ("102", "2", "1")],
+ )
+ conn.commit()
+ conn.close()
+ return db_path
+
+
+@pytest.fixture
+def source_engine(source_db):
+ engine = create_engine(f"sqlite:///{source_db}")
+ yield engine
+ engine.dispose()
+
+
+@pytest.fixture
+def cache_path(tmp_path):
+ return tmp_path / "cache.db"
+
+
+@pytest.fixture
+def engine(source_engine, cache_path, monkeypatch):
+ """CachingEngine pointed at a temp cache DB."""
+ monkeypatch.setattr(eng_mod, "CACHE_DB_PATH", cache_path)
+ monkeypatch.setattr(eng_mod, "BACKUP_INTERVAL_SECONDS", 9999)
+ ce = CachingEngine(source_engine)
+ yield ce
+ ce.close()
+
+
+# ---------------------------------------------------------------------------
+# Basic SELECT execution (in-memory)
+# ---------------------------------------------------------------------------
+
+def test_select_returns_list_of_dicts(engine):
+ rows = engine.execute("SELECT id, name FROM products")
+ assert isinstance(rows, list)
+ assert all(isinstance(r, dict) for r in rows)
+
+
+def test_select_correct_row_count(engine):
+ assert len(engine.execute("SELECT id, name FROM products")) == 3
+
+
+def test_select_correct_values(engine):
+ rows = engine.execute("SELECT id, name FROM products")
+ assert {r["name"] for r in rows} == {"Widget", "Gadget", "Doohickey"}
+
+
+def test_select_with_where_clause(engine):
+ rows = engine.execute("SELECT id, price FROM products WHERE id = '1'")
+ assert len(rows) == 1
+ assert rows[0]["price"] == "9.99"
+
+
+def test_select_with_order_and_limit(engine):
+ rows = engine.execute("SELECT id, name FROM products ORDER BY id LIMIT 2")
+ assert len(rows) == 2
+ assert rows[0]["id"] == "1"
+
+
+def test_select_different_table(engine):
+ rows = engine.execute("SELECT order_id, qty FROM orders")
+ assert len(rows) == 2
+
+
+def test_where_on_non_selected_column(engine):
+ """WHERE references a column not in SELECT — parser must extract it for the cache."""
+ rows = engine.execute("SELECT name FROM products WHERE price = '9.99'")
+ assert len(rows) == 1
+ assert rows[0]["name"] == "Widget"
+
+
+# ---------------------------------------------------------------------------
+# In-memory caching behaviour
+# ---------------------------------------------------------------------------
+
+def test_cache_hit_survives_source_deletion(engine, source_db):
+ engine.execute("SELECT id, name FROM products")
+ # Wipe source — cache must still answer
+ conn = sqlite3.connect(source_db)
+ conn.execute("DELETE FROM products")
+ conn.commit()
+ conn.close()
+ rows = engine.execute("SELECT id, name FROM products")
+ assert len(rows) == 3
+
+
+def test_new_column_triggers_refetch(engine):
+ engine.execute("SELECT id FROM products")
+ rows = engine.execute("SELECT id, name FROM products")
+ assert "Widget" in {r["name"] for r in rows}
+
+
+def test_second_query_same_columns_is_cache_hit(engine):
+ engine.execute("SELECT id, name FROM products")
+ assert engine._cache.is_table_cached("products") is True
+ rows = engine.execute("SELECT id, name FROM products")
+ assert len(rows) == 3
+
+
+# ---------------------------------------------------------------------------
+# SQL file creation — backup to disk
+# ---------------------------------------------------------------------------
+
+def test_close_creates_sql_file(engine, cache_path):
+ engine.execute("SELECT id, name FROM products")
+ engine.close()
+ assert cache_path.exists()
+
+
+def test_sql_file_is_valid_sqlite(engine, cache_path):
+ engine.execute("SELECT id, name FROM products")
+ engine.close()
+ conn = sqlite3.connect(cache_path)
+ tables = {t[0] for t in conn.execute(
+ "SELECT name FROM sqlite_master WHERE type='table'"
+ ).fetchall()}
+ conn.close()
+ assert "_sqlmem_tables" in tables
+ assert "products" in tables
+
+
+def test_sql_file_contains_cached_rows(engine, cache_path):
+ engine.execute("SELECT id, name FROM products")
+ engine.close()
+ conn = sqlite3.connect(cache_path)
+ rows = conn.execute("SELECT id, name FROM products").fetchall()
+ conn.close()
+ assert len(rows) == 3
+
+
+def test_sql_file_meta_table_present(engine, cache_path):
+ engine.execute("SELECT id FROM products")
+ engine.close()
+ conn = sqlite3.connect(cache_path)
+ row = conn.execute(
+ "SELECT value FROM _sqlmem_meta WHERE key = 'schema_version'"
+ ).fetchone()
+ conn.close()
+ assert row is not None
+ assert int(row[0]) >= 1
+
+
+def test_reload_from_disk_file(source_engine, cache_path, monkeypatch):
+ """New CachingEngine picks up table cached by a previous instance."""
+ monkeypatch.setattr(eng_mod, "CACHE_DB_PATH", cache_path)
+ monkeypatch.setattr(eng_mod, "BACKUP_INTERVAL_SECONDS", 9999)
+
+ ce1 = CachingEngine(source_engine)
+ ce1.execute("SELECT id, name FROM products")
+ ce1.close()
+
+ ce2 = CachingEngine(source_engine)
+ assert ce2._cache.is_table_cached("products") is True
+ ce2.close()
+
+
+def test_reload_data_intact_after_restart(source_engine, cache_path, monkeypatch):
+ monkeypatch.setattr(eng_mod, "CACHE_DB_PATH", cache_path)
+ monkeypatch.setattr(eng_mod, "BACKUP_INTERVAL_SECONDS", 9999)
+
+ ce1 = CachingEngine(source_engine)
+ ce1.execute("SELECT id, name FROM products")
+ ce1.close()
+
+ ce2 = CachingEngine(source_engine)
+ rows = ce2.execute("SELECT id, name FROM products")
+ ce2.close()
+ assert {r["name"] for r in rows} == {"Widget", "Gadget", "Doohickey"}
+
+
+# ---------------------------------------------------------------------------
+# Error handling
+# ---------------------------------------------------------------------------
+
+def test_insert_raises_readonly(engine):
+ with pytest.raises(ReadOnlyError):
+ engine.execute("INSERT INTO products VALUES ('4', 'New', '1.00')")
+
+
+def test_update_raises_readonly(engine):
+ with pytest.raises(ReadOnlyError):
+ engine.execute("UPDATE products SET price = '0' WHERE id = '1'")
+
+
+def test_delete_raises_readonly(engine):
+ with pytest.raises(ReadOnlyError):
+ engine.execute("DELETE FROM products WHERE id = '1'")
+
+
+def test_join_raises_unsupported(engine):
+ with pytest.raises(UnsupportedQueryError):
+ engine.execute(
+ "SELECT p.name, o.qty FROM products p JOIN orders o ON p.id = o.product_id"
+ )
+
+
+def test_select_star_raises_unsupported(engine):
+ with pytest.raises(UnsupportedQueryError):
+ engine.execute("SELECT * FROM products")
+
+
+# ---------------------------------------------------------------------------
+# Cache invalidation
+# ---------------------------------------------------------------------------
+
+def test_invalidate_marks_table_absent(engine):
+ engine.execute("SELECT id, name FROM products")
+ engine.invalidate("products")
+ assert engine._cache.is_table_cached("products") is False
+
+
+def test_invalidate_then_refetch_works(engine):
+ engine.execute("SELECT id, name FROM products")
+ engine.invalidate("products")
+ rows = engine.execute("SELECT id, name FROM products")
+ assert len(rows) == 3
+
+
+def test_invalidate_unknown_table_is_noop(engine):
+ engine.invalidate("nonexistent_table") # must not raise