feat(db): extend SessionProfile schema with DEBT-036 keystroke features
Adds the three signal columns motivated by the manual keystroke
analysis in DEBT-036 directly to the SessionProfile table. Pre-v1 so
we modify the schema in place — Alembic arrives at v1.
Columns:
- kd_top_bigrams (TEXT) — JSON of top-N most-common digraphs with
mean IAT per bigram. Complements kd_digraph_simhash ("same typist?")
with "same typist in same mental state?" (tired / rested / distracted
shifts bigram-specific IATs measurably).
- kd_start_of_action_latency (REAL/DOUBLE) — median IAT of the first
keystroke after an idle gap > 1s. Separates "initiating a command"
from "executing a remembered one"; real humans have measurable
start-of-action latency, bots don't.
- kd_pause_hist_burst / _think / _distracted (INT) — three-bucket
histogram (counts, <0.2s / 0.2-1.5s / >1.5s). More discriminating
than the existing flat burst_ratio / think_ratio pair: C2 operators
concentrate in burst with a thin tail; opportunistic humans have a
fat think bucket and a long distracted tail.
Both backends get an idempotent ADD COLUMN migration
(_migrate_session_profile_table) wired into initialize() alongside
the existing _migrate_attackers_table path — guards on PRAGMA
table_info (SQLite) / information_schema.COLUMNS (MySQL) so reruns
are safe.
PII discipline comment on kd_digraph_simhash and kd_top_bigrams:
both operate on bigram CHARACTERS, never on raw input stream content.
Attacker passwords typed over SSH must not land here.
Test updated for the MySQL initialize() migration-order contract.
This commit is contained in:
@@ -141,10 +141,43 @@ class SessionProfile(SQLModel, table=True):
|
||||
# Fixed-width BINARY(8) rather than BLOB: MySQL can't index BLOB/TEXT
|
||||
# columns without a prefix length, and SimHashes are always exactly 8
|
||||
# bytes so a variable-length type gains nothing here.
|
||||
#
|
||||
# PII discipline: the simhash is computed over keystroke CHARACTERS
|
||||
# (digraph bigrams), never over the raw content of the input stream —
|
||||
# attacker passwords typed over SSH must never land in this column.
|
||||
kd_digraph_simhash: Optional[bytes] = Field(
|
||||
default=None,
|
||||
sa_column=Column("kd_digraph_simhash", BINARY(8), nullable=True, index=True),
|
||||
)
|
||||
# Top-N most-common digraphs with their mean IAT, as JSON.
|
||||
# Complements kd_digraph_simhash: the simhash answers "same typist?",
|
||||
# this answers "same typist IN THE SAME MENTAL STATE?" (tired vs rested
|
||||
# vs distracted shifts bigram-specific IATs measurably). Shape:
|
||||
# [["th", 47, 0.082], ["in", 31, 0.091], ...] (bigram, count, mean_iat_s)
|
||||
# Same PII discipline as kd_digraph_simhash: bigram CHARACTERS only,
|
||||
# no content. Bounded by the ingester to N≤32 to cap row width.
|
||||
kd_top_bigrams: Optional[str] = Field(
|
||||
default=None, sa_column=Column("kd_top_bigrams", Text, nullable=True),
|
||||
)
|
||||
# IAT of the first keystroke following an idle gap > 1s (or the
|
||||
# session-start gap before the first keystroke ever). Separates
|
||||
# "initiating a command" from "executing a remembered one" — real
|
||||
# humans have measurable start-of-action latency, bots don't. Median
|
||||
# across all such initiations in the session, seconds.
|
||||
kd_start_of_action_latency: Optional[float] = None
|
||||
# Three-bucket pause-length histogram, counts (not ratios — raw counts
|
||||
# preserve the total-keystrokes denominator in the column itself):
|
||||
# burst : IAT < 0.2s (muscle-memory digraphs)
|
||||
# think : 0.2s ≤ IAT < 1.5s (semantic boundary, context switch)
|
||||
# distracted: IAT ≥ 1.5s (went to look something up, got paged,
|
||||
# actively reading another window)
|
||||
# More discriminating than the flat burst_ratio/think_ratio pair:
|
||||
# C2 operators concentrate in the burst bucket with a thin tail;
|
||||
# opportunistic humans have a fat think bucket plus a long distracted
|
||||
# tail. Nulls indicate "ingester hasn't run yet", not "zero events".
|
||||
kd_pause_hist_burst: Optional[int] = None
|
||||
kd_pause_hist_think: Optional[int] = None
|
||||
kd_pause_hist_distracted: Optional[int] = None
|
||||
# Derived totals.
|
||||
total_keystrokes: Optional[int] = None
|
||||
session_duration_s: Optional[float] = None
|
||||
|
||||
@@ -96,6 +96,35 @@ class MySQLRepository(SQLModelRepository):
|
||||
f"ALTER TABLE `{table_name}` MODIFY COLUMN `{col_name}` {spec}"
|
||||
))
|
||||
|
||||
async def _migrate_session_profile_table(self) -> None:
|
||||
"""Add DEBT-036 keystroke-dynamics columns (start-of-action latency,
|
||||
three-bucket pause histogram, top-bigrams JSON) to existing tables.
|
||||
|
||||
MySQL's ``ALTER TABLE ADD COLUMN`` fails if the column already
|
||||
exists, so gate on ``information_schema.COLUMNS`` to stay
|
||||
idempotent.
|
||||
"""
|
||||
async with self.engine.begin() as conn:
|
||||
rows = (await conn.execute(text(
|
||||
"SELECT COLUMN_NAME FROM information_schema.COLUMNS "
|
||||
"WHERE TABLE_SCHEMA = DATABASE() AND TABLE_NAME = 'session_profile'"
|
||||
))).fetchall()
|
||||
if not rows:
|
||||
return
|
||||
existing_cols = {r[0] for r in rows}
|
||||
additions = [
|
||||
("kd_top_bigrams", "TEXT NULL"),
|
||||
("kd_start_of_action_latency", "DOUBLE NULL"),
|
||||
("kd_pause_hist_burst", "INT NULL"),
|
||||
("kd_pause_hist_think", "INT NULL"),
|
||||
("kd_pause_hist_distracted", "INT NULL"),
|
||||
]
|
||||
for col_name, col_spec in additions:
|
||||
if col_name not in existing_cols:
|
||||
await conn.execute(text(
|
||||
f"ALTER TABLE session_profile ADD COLUMN {col_name} {col_spec}"
|
||||
))
|
||||
|
||||
async def initialize(self) -> None:
|
||||
"""Create tables and run all MySQL-specific migrations.
|
||||
|
||||
@@ -108,6 +137,7 @@ class MySQLRepository(SQLModelRepository):
|
||||
await lock_conn.execute(text("SELECT GET_LOCK('decnet_schema_init', 30)"))
|
||||
try:
|
||||
await self._migrate_attackers_table()
|
||||
await self._migrate_session_profile_table()
|
||||
await self._migrate_column_types()
|
||||
async with self.engine.begin() as conn:
|
||||
await conn.run_sync(SQLModel.metadata.create_all)
|
||||
|
||||
@@ -54,6 +54,31 @@ class SQLiteRepository(SQLModelRepository):
|
||||
"ALTER TABLE attackers ADD COLUMN country_source VARCHAR(16)"
|
||||
))
|
||||
|
||||
async def _migrate_session_profile_table(self) -> None:
|
||||
"""Add DEBT-036 keystroke-dynamics columns (start-of-action latency,
|
||||
three-bucket pause histogram, top-bigrams JSON) to existing tables.
|
||||
|
||||
SQLite's ``ALTER TABLE ADD COLUMN`` fails if the column already
|
||||
exists, so gate on ``PRAGMA table_info`` to stay idempotent.
|
||||
"""
|
||||
async with self.engine.begin() as conn:
|
||||
rows = (await conn.execute(text("PRAGMA table_info(session_profile)"))).fetchall()
|
||||
if not rows:
|
||||
return # table absent; create_all() handles it.
|
||||
existing_cols = {r[1] for r in rows}
|
||||
additions = [
|
||||
("kd_top_bigrams", "TEXT"),
|
||||
("kd_start_of_action_latency", "REAL"),
|
||||
("kd_pause_hist_burst", "INTEGER"),
|
||||
("kd_pause_hist_think", "INTEGER"),
|
||||
("kd_pause_hist_distracted", "INTEGER"),
|
||||
]
|
||||
for col_name, col_type in additions:
|
||||
if col_name not in existing_cols:
|
||||
await conn.execute(text(
|
||||
f"ALTER TABLE session_profile ADD COLUMN {col_name} {col_type}"
|
||||
))
|
||||
|
||||
def _json_field_equals(self, key: str):
|
||||
# SQLite stores JSON as text; json_extract is the canonical accessor.
|
||||
return text(f"json_extract(fields, '$.{key}') = :val")
|
||||
|
||||
@@ -142,6 +142,7 @@ class SQLModelRepository(BaseRepository):
|
||||
"""Create tables if absent and seed the admin user."""
|
||||
from sqlmodel import SQLModel
|
||||
await self._migrate_attackers_table()
|
||||
await self._migrate_session_profile_table()
|
||||
async with self.engine.begin() as conn:
|
||||
await conn.run_sync(SQLModel.metadata.create_all)
|
||||
await self._ensure_admin_user()
|
||||
@@ -181,6 +182,11 @@ class SQLModelRepository(BaseRepository):
|
||||
"""Legacy-schema cleanup. Override per dialect (DDL introspection is non-portable)."""
|
||||
return None
|
||||
|
||||
async def _migrate_session_profile_table(self) -> None:
|
||||
"""Add DEBT-036 keystroke-dynamics columns to existing session_profile
|
||||
rows. Override per dialect — DDL introspection is non-portable."""
|
||||
return None
|
||||
|
||||
# ---------------------------------------------------------------- logs
|
||||
|
||||
@staticmethod
|
||||
|
||||
@@ -191,7 +191,9 @@ async def test_migrate_column_types_default_clause_per_column():
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_mysql_initialize_calls_migrate_column_types():
|
||||
"""MySQLRepository.initialize() must invoke _migrate_column_types after _migrate_attackers_table."""
|
||||
"""MySQLRepository.initialize() must invoke every migration helper in
|
||||
the right order: attackers first, then session_profile (DEBT-036),
|
||||
then column types, then seed the admin user."""
|
||||
repo = _make_repo()
|
||||
|
||||
call_order: list[str] = []
|
||||
@@ -199,6 +201,9 @@ async def test_mysql_initialize_calls_migrate_column_types():
|
||||
async def fake_migrate_attackers():
|
||||
call_order.append("migrate_attackers")
|
||||
|
||||
async def fake_migrate_session_profile():
|
||||
call_order.append("migrate_session_profile")
|
||||
|
||||
async def fake_migrate_column_types():
|
||||
call_order.append("migrate_column_types")
|
||||
|
||||
@@ -206,6 +211,7 @@ async def test_mysql_initialize_calls_migrate_column_types():
|
||||
call_order.append("ensure_admin")
|
||||
|
||||
repo._migrate_attackers_table = fake_migrate_attackers
|
||||
repo._migrate_session_profile_table = fake_migrate_session_profile
|
||||
repo._migrate_column_types = fake_migrate_column_types
|
||||
repo._ensure_admin_user = fake_ensure_admin
|
||||
|
||||
@@ -220,5 +226,9 @@ async def test_mysql_initialize_calls_migrate_column_types():
|
||||
|
||||
await repo.initialize()
|
||||
|
||||
assert call_order == ["migrate_attackers", "migrate_column_types", "ensure_admin"], \
|
||||
f"Unexpected call order: {call_order}"
|
||||
assert call_order == [
|
||||
"migrate_attackers",
|
||||
"migrate_session_profile",
|
||||
"migrate_column_types",
|
||||
"ensure_admin",
|
||||
], f"Unexpected call order: {call_order}"
|
||||
|
||||
Reference in New Issue
Block a user