Store named datetime columns as INTEGER microseconds (datetime_columns)

This commit is contained in:
Jan Doubravský
2026-06-09 18:18:38 +02:00
parent a21b5a2a04
commit 8e46ee3547
11 changed files with 255 additions and 22 deletions
+75 -1
View File
@@ -4,7 +4,7 @@ import uuid
import pytest
from sqlmem._coerce import coerce_params, to_sqlite
from sqlmem._coerce import coerce_params, to_sqlite, to_sqlite_datetime
from sqlmem.cache import CacheManager
@@ -91,6 +91,80 @@ def test_coerce_params_none():
assert coerce_params(None) is None
# --- to_sqlite_datetime (INTEGER µs storage, 1.12.0) ------------------------
def test_datetime_to_epoch_micros():
# 2026-06-01T10:00:00Z -> microseconds since epoch
dt = datetime.datetime(2026, 6, 1, 10, 0, 0, tzinfo=datetime.timezone.utc)
expected = int(dt.timestamp() * 1_000_000)
assert to_sqlite_datetime(dt) == expected
def test_datetime_naive_treated_as_utc():
naive = datetime.datetime(2026, 6, 1, 10, 0, 0)
aware = naive.replace(tzinfo=datetime.timezone.utc)
assert to_sqlite_datetime(naive) == to_sqlite_datetime(aware)
def test_datetime_micros_are_exact():
dt = datetime.datetime(2026, 6, 5, 14, 54, 24, 823000, tzinfo=datetime.timezone.utc)
us = to_sqlite_datetime(dt)
# round-trips back to the same instant with no rounding loss
back = datetime.datetime(1970, 1, 1, tzinfo=datetime.timezone.utc) + datetime.timedelta(
microseconds=us
)
assert back == dt
def test_datetime_none_passes_through():
assert to_sqlite_datetime(None) is None
def test_datetime_iso_string_parsed():
assert to_sqlite_datetime("2026-06-01T10:00:00+00:00") == to_sqlite_datetime(
datetime.datetime(2026, 6, 1, 10, 0, 0, tzinfo=datetime.timezone.utc)
)
def test_datetime_unparseable_is_none():
assert to_sqlite_datetime("not a date") is None
# --- integration: datetime_columns are stored as INTEGER --------------------
def test_datetime_column_stored_as_integer(tmp_path):
c = CacheManager(
db_path=tmp_path / "cache.db",
backup_interval=9999,
datetime_columns={"t": ["changed"]},
)
dt = datetime.datetime(2026, 6, 1, 10, 0, 0, tzinfo=datetime.timezone.utc)
c.load_table("t", ["id", "changed"], FakeSource([("1", dt)]))
# Column declared INTEGER, value stored as µs-since-epoch.
coltype = c.connection.execute("PRAGMA table_info(t)").fetchall()
types = {row[1]: row[2] for row in coltype}
assert types["changed"] == "INTEGER"
assert types["id"] == "TEXT"
_, out = c.execute_in_memory("SELECT changed FROM t")
assert out == [(to_sqlite_datetime(dt),)]
c.close()
def test_non_datetime_columns_unaffected_by_datetime_columns(tmp_path):
c = CacheManager(
db_path=tmp_path / "cache.db",
backup_interval=9999,
datetime_columns={"t": ["changed"]},
)
c.load_table("t", ["id", "price"], FakeSource([("1", decimal.Decimal("9.99"))]))
_, out = c.execute_in_memory("SELECT id, price FROM t")
assert out == [("1", "9.99")] # still TEXT/ISO coercion
c.close()
# --- integration: values reach the cache through coercion -------------------
+53 -1
View File
@@ -1,6 +1,6 @@
import sqlite3
import threading
from datetime import datetime
from datetime import datetime, timezone
from types import SimpleNamespace
import pytest
@@ -140,6 +140,18 @@ def test_bind_watermark_passes_through_non_datetime():
assert _bind_watermark("12345") == "12345"
# --- INTEGER µs watermark binding (datetime_columns, 1.12.0) ----------------
def test_bind_watermark_epoch_us_reconstructs_datetime():
dt = datetime(2026, 6, 5, 14, 54, 24, 823000, tzinfo=timezone.utc)
us = int(dt.timestamp() * 1_000_000)
# Whether the watermark is an int or its digit string (it round-trips through
# the TEXT last_synced_at column), it binds back to the same UTC datetime.
assert _bind_watermark(us, epoch_us=True) == dt
assert _bind_watermark(str(us), epoch_us=True) == dt
class _SpyCursor:
def __init__(self, rows):
self._rows = list(rows)
@@ -174,6 +186,46 @@ def test_refresh_binds_watermark_as_datetime(env):
assert params == (datetime(2026, 6, 5, 14, 54, 24, 823000),)
class _RowSource:
"""Returns fixed rows for any query (for loading datetime-typed source data)."""
def __init__(self, rows):
self._rows = rows
def execute(self, sql, params=()):
return _SpyCursor(self._rows)
def test_datetime_column_watermark_stored_as_int_and_bound_back(tmp_path):
"""A change column declared in datetime_columns is stored as INTEGER µs; the
watermark is bound back to a real datetime for the source query."""
cache = CacheManager(
db_path=tmp_path / "c.db",
backup_interval=9999,
datetime_columns={"products": ["changed"]},
)
dt1 = datetime(2026, 6, 1, 10, 0, 0, tzinfo=timezone.utc)
dt2 = datetime(2026, 6, 1, 10, 5, 0, tzinfo=timezone.utc)
cache.load_table("products", ["id", "changed"], _RowSource([("1", dt1), ("2", dt2)]))
cache.create_unique_index("products", ["id"])
cache.set_last_synced_at("products", cache.max_value("products", "changed"))
# Watermark persisted as the max INTEGER µs (digit string out of the TEXT col).
wm = cache.get_last_synced_at("products")
assert wm == str(int(dt2.timestamp() * 1_000_000))
refresher = DeltaRefresher(
cache, {"products": ResolvedDelta("changed", ["id"])}
)
spy = _SpySource(rows=[]) # no new rows — just capture the bound watermark
refresher.refresh(spy)
assert spy.bound, "source query was never issued"
_, params = spy.bound[-1]
assert params == (dt2,) # bound back as datetime, not an int/string
cache.close()
# ---------------------------------------------------------------------------
# Refresh failures are recorded (4.3) so a stuck delta is visible in stats
# ---------------------------------------------------------------------------