Files
DECNET/decnet/web/db/sqlite/repository.py
anti fee697694d feat(ttp): E.3.3 repository — insert_tags + listing rollups (dual backend)
Dialect-split: portable rollup queries on TTPMixin; bulk insert with
ON CONFLICT DO NOTHING / INSERT IGNORE in the per-dialect repos.
Confidence-floor (< 0.3) drop applied at mixin layer before the
dialect hook. BaseRepository now declares the six TTP methods abstract.

Tests in tests/web/db/test_ttp_repo.py flipped from pytest.fail stubs
to real dual-backend behavioral tests; tests/ttp/test_confidence.py
drop-below-floor xfail removed.
2026-05-01 08:04:46 -04:00

123 lines
5.4 KiB
Python

from typing import Any, List, Optional
from sqlalchemy import func, select, text, literal_column
from sqlalchemy.dialects.sqlite import insert as sqlite_insert
from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker
from decnet.config import _ROOT
from decnet.web.db.models import Log, TTPTag
from decnet.web.db.sqlite.database import get_async_engine
from decnet.web.db.sqlmodel_repo import SQLModelRepository
class SQLiteRepository(SQLModelRepository):
"""SQLite backend — uses ``aiosqlite``.
Overrides the two places where SQLite's SQL dialect differs from
MySQL/PostgreSQL: legacy-schema migration (via ``PRAGMA table_info``)
and the log-histogram bucket expression (via ``strftime`` + ``unixepoch``).
"""
def __init__(self, db_path: str = str(_ROOT / "decnet.db")) -> None:
self.db_path = db_path
self.engine = get_async_engine(db_path)
self.session_factory = async_sessionmaker(
self.engine, class_=AsyncSession, expire_on_commit=False
)
async def _migrate_attackers_table(self) -> None:
"""Drop the old attackers table if it lacks the uuid column (pre-UUID schema).
Also adds the GeoIP columns (``country_code``, ``country_source``)
to existing tables that predate them. SQLite's
``ALTER TABLE ADD COLUMN`` is idempotent only if we gate on
``PRAGMA table_info`` first — re-adding raises.
"""
async with self.engine.begin() as conn:
rows = (await conn.execute(text("PRAGMA table_info(attackers)"))).fetchall()
if rows and not any(r[1] == "uuid" for r in rows):
await conn.execute(text("DROP TABLE attackers"))
return # create_all() rebuilds fresh — no need to patch columns.
if not rows:
return # table absent; create_all() handles it.
existing_cols = {r[1] for r in rows}
if "country_code" not in existing_cols:
await conn.execute(text(
"ALTER TABLE attackers ADD COLUMN country_code VARCHAR(2)"
))
await conn.execute(text(
"CREATE INDEX IF NOT EXISTS ix_attackers_country_code "
"ON attackers (country_code)"
))
if "country_source" not in existing_cols:
await conn.execute(text(
"ALTER TABLE attackers ADD COLUMN country_source VARCHAR(16)"
))
async def _migrate_session_profile_table(self) -> None:
"""Add DEBT-036 keystroke-dynamics columns (start-of-action latency,
three-bucket pause histogram, top-bigrams JSON) to existing tables.
SQLite's ``ALTER TABLE ADD COLUMN`` fails if the column already
exists, so gate on ``PRAGMA table_info`` to stay idempotent.
"""
async with self.engine.begin() as conn:
rows = (await conn.execute(text("PRAGMA table_info(session_profile)"))).fetchall()
if not rows:
return # table absent; create_all() handles it.
existing_cols = {r[1] for r in rows}
additions = [
("kd_top_bigrams", "TEXT"),
("kd_start_of_action_latency", "REAL"),
("kd_pause_hist_burst", "INTEGER"),
("kd_pause_hist_think", "INTEGER"),
("kd_pause_hist_distracted", "INTEGER"),
]
for col_name, col_type in additions:
if col_name not in existing_cols:
await conn.execute(text(
f"ALTER TABLE session_profile ADD COLUMN {col_name} {col_type}"
))
def _json_field_equals(self, key: str):
# SQLite stores JSON as text; json_extract is the canonical accessor.
return text(f"json_extract(fields, '$.{key}') = :val")
async def _insert_tags_or_ignore(self, rows: list[TTPTag]) -> int:
"""Bulk-insert with SQLite's ``ON CONFLICT DO NOTHING`` on the
``uuid`` PK. Returns rowcount of newly-inserted rows; the
skipped duplicates do not count.
"""
if not rows:
return 0
payload = [r.model_dump() for r in rows]
stmt = sqlite_insert(TTPTag.__table__).values(payload) # type: ignore[attr-defined]
stmt = stmt.on_conflict_do_nothing(index_elements=["uuid"])
async with self._session() as session:
result = await session.execute(stmt)
await session.commit()
return int(result.rowcount or 0)
async def get_log_histogram(
self,
search: Optional[str] = None,
start_time: Optional[str] = None,
end_time: Optional[str] = None,
interval_minutes: int = 15,
) -> List[dict]:
bucket_seconds = max(interval_minutes, 1) * 60
bucket_expr: Any = literal_column(
f"datetime((strftime('%s', timestamp) / {bucket_seconds}) * {bucket_seconds}, 'unixepoch')"
).label("bucket_time")
statement: Any = select(bucket_expr, func.count().label("count")).select_from(Log)
statement = self._apply_filters(statement, search, start_time, end_time)
statement = statement.group_by(literal_column("bucket_time")).order_by(
literal_column("bucket_time")
)
async with self._session() as session:
results = await session.execute(statement)
return [{"time": r[0], "count": r[1]} for r in results.all()]