diff --git a/decnet/web/db/models/attackers.py b/decnet/web/db/models/attackers.py index 9c20c19d..e8e083a1 100644 --- a/decnet/web/db/models/attackers.py +++ b/decnet/web/db/models/attackers.py @@ -141,10 +141,43 @@ class SessionProfile(SQLModel, table=True): # Fixed-width BINARY(8) rather than BLOB: MySQL can't index BLOB/TEXT # columns without a prefix length, and SimHashes are always exactly 8 # bytes so a variable-length type gains nothing here. + # + # PII discipline: the simhash is computed over keystroke CHARACTERS + # (digraph bigrams), never over the raw content of the input stream — + # attacker passwords typed over SSH must never land in this column. kd_digraph_simhash: Optional[bytes] = Field( default=None, sa_column=Column("kd_digraph_simhash", BINARY(8), nullable=True, index=True), ) + # Top-N most-common digraphs with their mean IAT, as JSON. + # Complements kd_digraph_simhash: the simhash answers "same typist?", + # this answers "same typist IN THE SAME MENTAL STATE?" (tired vs rested + # vs distracted shifts bigram-specific IATs measurably). Shape: + # [["th", 47, 0.082], ["in", 31, 0.091], ...] (bigram, count, mean_iat_s) + # Same PII discipline as kd_digraph_simhash: bigram CHARACTERS only, + # no content. Bounded by the ingester to N≤32 to cap row width. + kd_top_bigrams: Optional[str] = Field( + default=None, sa_column=Column("kd_top_bigrams", Text, nullable=True), + ) + # IAT of the first keystroke following an idle gap > 1s (or the + # session-start gap before the first keystroke ever). Separates + # "initiating a command" from "executing a remembered one" — real + # humans have measurable start-of-action latency, bots don't. Median + # across all such initiations in the session, seconds. + kd_start_of_action_latency: Optional[float] = None + # Three-bucket pause-length histogram, counts (not ratios — raw counts + # preserve the total-keystrokes denominator in the column itself): + # burst : IAT < 0.2s (muscle-memory digraphs) + # think : 0.2s ≤ IAT < 1.5s (semantic boundary, context switch) + # distracted: IAT ≥ 1.5s (went to look something up, got paged, + # actively reading another window) + # More discriminating than the flat burst_ratio/think_ratio pair: + # C2 operators concentrate in the burst bucket with a thin tail; + # opportunistic humans have a fat think bucket plus a long distracted + # tail. Nulls indicate "ingester hasn't run yet", not "zero events". + kd_pause_hist_burst: Optional[int] = None + kd_pause_hist_think: Optional[int] = None + kd_pause_hist_distracted: Optional[int] = None # Derived totals. total_keystrokes: Optional[int] = None session_duration_s: Optional[float] = None diff --git a/decnet/web/db/mysql/repository.py b/decnet/web/db/mysql/repository.py index 8547061a..b069c3e8 100644 --- a/decnet/web/db/mysql/repository.py +++ b/decnet/web/db/mysql/repository.py @@ -96,6 +96,35 @@ class MySQLRepository(SQLModelRepository): f"ALTER TABLE `{table_name}` MODIFY COLUMN `{col_name}` {spec}" )) + async def _migrate_session_profile_table(self) -> None: + """Add DEBT-036 keystroke-dynamics columns (start-of-action latency, + three-bucket pause histogram, top-bigrams JSON) to existing tables. + + MySQL's ``ALTER TABLE ADD COLUMN`` fails if the column already + exists, so gate on ``information_schema.COLUMNS`` to stay + idempotent. + """ + async with self.engine.begin() as conn: + rows = (await conn.execute(text( + "SELECT COLUMN_NAME FROM information_schema.COLUMNS " + "WHERE TABLE_SCHEMA = DATABASE() AND TABLE_NAME = 'session_profile'" + ))).fetchall() + if not rows: + return + existing_cols = {r[0] for r in rows} + additions = [ + ("kd_top_bigrams", "TEXT NULL"), + ("kd_start_of_action_latency", "DOUBLE NULL"), + ("kd_pause_hist_burst", "INT NULL"), + ("kd_pause_hist_think", "INT NULL"), + ("kd_pause_hist_distracted", "INT NULL"), + ] + for col_name, col_spec in additions: + if col_name not in existing_cols: + await conn.execute(text( + f"ALTER TABLE session_profile ADD COLUMN {col_name} {col_spec}" + )) + async def initialize(self) -> None: """Create tables and run all MySQL-specific migrations. @@ -108,6 +137,7 @@ class MySQLRepository(SQLModelRepository): await lock_conn.execute(text("SELECT GET_LOCK('decnet_schema_init', 30)")) try: await self._migrate_attackers_table() + await self._migrate_session_profile_table() await self._migrate_column_types() async with self.engine.begin() as conn: await conn.run_sync(SQLModel.metadata.create_all) diff --git a/decnet/web/db/sqlite/repository.py b/decnet/web/db/sqlite/repository.py index e920e94f..372820f5 100644 --- a/decnet/web/db/sqlite/repository.py +++ b/decnet/web/db/sqlite/repository.py @@ -54,6 +54,31 @@ class SQLiteRepository(SQLModelRepository): "ALTER TABLE attackers ADD COLUMN country_source VARCHAR(16)" )) + async def _migrate_session_profile_table(self) -> None: + """Add DEBT-036 keystroke-dynamics columns (start-of-action latency, + three-bucket pause histogram, top-bigrams JSON) to existing tables. + + SQLite's ``ALTER TABLE ADD COLUMN`` fails if the column already + exists, so gate on ``PRAGMA table_info`` to stay idempotent. + """ + async with self.engine.begin() as conn: + rows = (await conn.execute(text("PRAGMA table_info(session_profile)"))).fetchall() + if not rows: + return # table absent; create_all() handles it. + existing_cols = {r[1] for r in rows} + additions = [ + ("kd_top_bigrams", "TEXT"), + ("kd_start_of_action_latency", "REAL"), + ("kd_pause_hist_burst", "INTEGER"), + ("kd_pause_hist_think", "INTEGER"), + ("kd_pause_hist_distracted", "INTEGER"), + ] + for col_name, col_type in additions: + if col_name not in existing_cols: + await conn.execute(text( + f"ALTER TABLE session_profile ADD COLUMN {col_name} {col_type}" + )) + def _json_field_equals(self, key: str): # SQLite stores JSON as text; json_extract is the canonical accessor. return text(f"json_extract(fields, '$.{key}') = :val") diff --git a/decnet/web/db/sqlmodel_repo.py b/decnet/web/db/sqlmodel_repo.py index d52c858e..83758fc0 100644 --- a/decnet/web/db/sqlmodel_repo.py +++ b/decnet/web/db/sqlmodel_repo.py @@ -142,6 +142,7 @@ class SQLModelRepository(BaseRepository): """Create tables if absent and seed the admin user.""" from sqlmodel import SQLModel await self._migrate_attackers_table() + await self._migrate_session_profile_table() async with self.engine.begin() as conn: await conn.run_sync(SQLModel.metadata.create_all) await self._ensure_admin_user() @@ -181,6 +182,11 @@ class SQLModelRepository(BaseRepository): """Legacy-schema cleanup. Override per dialect (DDL introspection is non-portable).""" return None + async def _migrate_session_profile_table(self) -> None: + """Add DEBT-036 keystroke-dynamics columns to existing session_profile + rows. Override per dialect — DDL introspection is non-portable.""" + return None + # ---------------------------------------------------------------- logs @staticmethod diff --git a/tests/db/mysql/test_mysql_migration.py b/tests/db/mysql/test_mysql_migration.py index 7182c2f4..febe9cab 100644 --- a/tests/db/mysql/test_mysql_migration.py +++ b/tests/db/mysql/test_mysql_migration.py @@ -191,7 +191,9 @@ async def test_migrate_column_types_default_clause_per_column(): @pytest.mark.asyncio async def test_mysql_initialize_calls_migrate_column_types(): - """MySQLRepository.initialize() must invoke _migrate_column_types after _migrate_attackers_table.""" + """MySQLRepository.initialize() must invoke every migration helper in + the right order: attackers first, then session_profile (DEBT-036), + then column types, then seed the admin user.""" repo = _make_repo() call_order: list[str] = [] @@ -199,15 +201,19 @@ async def test_mysql_initialize_calls_migrate_column_types(): async def fake_migrate_attackers(): call_order.append("migrate_attackers") + async def fake_migrate_session_profile(): + call_order.append("migrate_session_profile") + async def fake_migrate_column_types(): call_order.append("migrate_column_types") async def fake_ensure_admin(): call_order.append("ensure_admin") - repo._migrate_attackers_table = fake_migrate_attackers - repo._migrate_column_types = fake_migrate_column_types - repo._ensure_admin_user = fake_ensure_admin + repo._migrate_attackers_table = fake_migrate_attackers + repo._migrate_session_profile_table = fake_migrate_session_profile + repo._migrate_column_types = fake_migrate_column_types + repo._ensure_admin_user = fake_ensure_admin # Stub engine.begin() so create_all is a no-op fake_conn = AsyncMock() @@ -220,5 +226,9 @@ async def test_mysql_initialize_calls_migrate_column_types(): await repo.initialize() - assert call_order == ["migrate_attackers", "migrate_column_types", "ensure_admin"], \ - f"Unexpected call order: {call_order}" + assert call_order == [ + "migrate_attackers", + "migrate_session_profile", + "migrate_column_types", + "ensure_admin", + ], f"Unexpected call order: {call_order}"