diff --git a/decnet/web/db/models/__init__.py b/decnet/web/db/models/__init__.py index cda33950..ea32a0fc 100644 --- a/decnet/web/db/models/__init__.py +++ b/decnet/web/db/models/__init__.py @@ -48,7 +48,6 @@ from .attackers import ( AttackerFingerprintState, AttackerIdentity, AttackersResponse, - SessionProfile, SmtpTarget, ) from .attacker_intel import ( @@ -255,7 +254,6 @@ __all__ = [ "AttackersResponse", "ObservationRow", "ObservedAttachment", - "SessionProfile", "SmtpTarget", # campaigns "Campaign", diff --git a/decnet/web/db/models/attacker_intel.py b/decnet/web/db/models/attacker_intel.py index 99be69df..97f580ee 100644 --- a/decnet/web/db/models/attacker_intel.py +++ b/decnet/web/db/models/attacker_intel.py @@ -24,7 +24,7 @@ class AttackerIntel(SQLModel, table=True): ``schema_version`` is committed to storage from day one — federation gossip in v2/v3 requires cross-operator compatibility, and retrofitting a version column after rows exist is painful. Mirrors - the rationale on :class:`SessionProfile`. + the rationale on :class:`AttackerIdentity`'s ``schema_version``. """ __tablename__ = "attacker_intel" diff --git a/decnet/web/db/models/attackers.py b/decnet/web/db/models/attackers.py index 061e5be5..bf5dfbe7 100644 --- a/decnet/web/db/models/attackers.py +++ b/decnet/web/db/models/attackers.py @@ -1,4 +1,13 @@ -"""Attacker core + per-attacker behavioral and per-session profile rows.""" +"""Attacker core + per-attacker behavioral rows. + +Per-session keystroke-dynamics fingerprints have moved out of this +module: the column-zoo ``SessionProfile`` shipped here pre-v0 was +superseded by the BEHAVE-SHELL ``observations`` table +(``decnet/web/db/models/observations.py``), which mirrors the BEHAVE +``Observation`` envelope and accepts every primitive the extractor +emits. See ``development/BEHAVE-INTEGRATION.md`` for the design and +``DEBT-036`` (stale) → ``DEBT-050`` for the paydown trail. +""" from datetime import datetime, timezone from typing import Any, List, Optional @@ -9,26 +18,6 @@ from sqlmodel import Field, SQLModel from ._base import _BIG_TEXT -# ─── Keystroke-dynamics tuning constants ────────────────────────────────────── -# -# These are the semantic thresholds the session-profile ingester (DEBT-036) -# uses to bucket IATs and decide what "started a new action" means. Keeping -# them here (not inline in the ingester) so that: -# * the schema docstrings below can reference exact boundaries instead of -# copy-pasted magic numbers, and -# * a future calibration pass against real honeypot session data only has -# to touch one place. -# All values in seconds. - -KD_PAUSE_BURST_MAX_S: float = 0.2 # IAT < this = muscle-memory digraph -KD_PAUSE_THINK_MAX_S: float = 1.5 # IAT < this = semantic / context-switch pause - # everything ≥ this lands in the distracted bucket -KD_START_OF_ACTION_IDLE_S: float = 2.0 # idle gap that counts as "new action" - # raised from 1s — 1s still catches a lot of - # mid-command hesitation, 2s is closer to - # empirical "meaningfully new action" - - class Attacker(SQLModel, table=True): """ Per-IP **observation** row. Every distinct source IP we observe gets @@ -208,10 +197,15 @@ class AttackerIdentity(SQLModel, table=True): c2_endpoints: Optional[str] = Field( default=None, sa_column=Column("c2_endpoints", Text, nullable=True) ) - # V2 keystroke-dynamics hook. Same shape as - # SessionProfile.kd_digraph_simhash; this is the centroid (or - # majority vote) across the identity's sessions. BINARY(8) so - # MySQL can index without a prefix length, same as session_profile. + # V2 keystroke-dynamics hook. Per-identity centroid (or majority + # vote) across the identity's session-level digraph SimHashes. + # The per-session SimHashes themselves now ride as BEHAVE + # observations (``cognitive.*`` digraph primitive — see + # ``development/BEHAVE-INTEGRATION.md`` and the BEHAVE-SHELL + # registry); this column is the rollup the (future) attribution + # engine will write into so the federation gossip layer + # has one identity-level fingerprint to compare across operators. + # BINARY(8) so MySQL can index without a prefix length. kd_digraph_simhash: Optional[bytes] = Field( default=None, sa_column=Column("kd_digraph_simhash", BINARY(8), nullable=True, index=True), @@ -281,139 +275,6 @@ class AttackerBehavior(SQLModel, table=True): ) -class SessionProfile(SQLModel, table=True): - """ - Per-session keystroke-dynamics fingerprint. - - One row per recorded interactive session. Pre-v1 the ingestion job - that populates these columns is not yet built (tracked as gap #2 in - SIGNAL_CAPTURE_AUDIT.md); the table ships empty so that: - * downstream correlation/federation work can target a stable schema, and - * `schema_version` is committed to storage from day one — federation - gossip in v2 requires cross-operator compatibility, and retrofitting - a version column after rows exist is painful. - - All feature columns are nullable so the empty write path (one row per - closed session) is valid without the behavioral analyzer online yet. - """ - __tablename__ = "session_profile" - sid: str = Field(primary_key=True) # session UUID - log_id: Optional[int] = Field( - default=None, foreign_key="logs.id", index=True - ) - schema_version: int = Field(default=1) - # ────────────────────────────────────────────────────────────────────── - # Keystroke-dynamics feature columns (kd_*). - # - # Intended use: session clustering and tooling attribution - # ("is this the same typist?" / "is this a known C2 - # framework's paste cadence?"). - # Explicitly NOT for: attribution to named individuals, access or - # admission decisions, any ML-driven identity lookup, - # or biometric-login-style user identification. Those - # framings push into legal/ethics territory we don't - # want this project walking into by accident. - # PII discipline: every kd_* column aggregates CHARACTERS and TIMING - # only — never raw input-stream content. Attacker - # passwords typed over SSH must not land here. - # Nulls semantic: a null means "ingester hasn't run on this session - # yet", not "zero events". Consumers should treat - # null as absent, not as a computed zero. - # ────────────────────────────────────────────────────────────────────── - # Inter-key interval timing moments (seconds). - kd_iki_mean: Optional[float] = None - kd_iki_stdev: Optional[float] = None - kd_iki_p50: Optional[float] = None - kd_iki_p95: Optional[float] = None - kd_enter_latency_p50: Optional[float] = None - kd_enter_latency_p95: Optional[float] = None - # Cadence ratios. - kd_burst_ratio: Optional[float] = None - kd_think_ratio: Optional[float] = None - # Control-character rates (events per keystroke). - kd_ctrl_backspace: Optional[float] = None - kd_ctrl_wkill: Optional[float] = None - kd_ctrl_ukill: Optional[float] = None - kd_ctrl_abort: Optional[float] = None - kd_ctrl_eof: Optional[float] = None - kd_arrow_rate: Optional[float] = None - kd_tab_rate: Optional[float] = None - # 8-byte SimHash over keystroke digraphs — Hamming-comparable across sessions. - # Fixed-width BINARY(8) rather than BLOB: MySQL can't index BLOB/TEXT - # columns without a prefix length, and SimHashes are always exactly 8 - # bytes so a variable-length type gains nothing here. - # - # PII discipline: the simhash is computed over keystroke CHARACTERS - # (digraph bigrams), never over the raw content of the input stream — - # attacker passwords typed over SSH must never land in this column. - kd_digraph_simhash: Optional[bytes] = Field( - default=None, - sa_column=Column("kd_digraph_simhash", BINARY(8), nullable=True, index=True), - ) - # Top-N most-common digraphs with their mean IAT, as JSON. - # Complements kd_digraph_simhash: the simhash answers "same typist?", - # this answers "same typist IN THE SAME MENTAL STATE?" (tired vs rested - # vs distracted shifts bigram-specific IATs measurably). Shape: - # [["th", 47, 0.082], ["in", 31, 0.091], ...] (bigram, count, mean_iat_s) - # Bounded by the ingester to N≤32 to cap row width. - # - # TODO(DEBT-036 upgrade path): JSON-in-TEXT is fine for v1's - # "surface the typist's top digraphs on the attacker page" use - # case, but every similarity query (e.g. "find sessions where the - # 'th' digraph mean IAT is within 20 ms of this one") has to pull - # the string, parse JSON, compare — O(sessions) with a constant - # overhead per row. If that query shape becomes hot, promote to a - # dedicated `session_bigram_stats(sid, bigram, count, mean_iat_s)` - # table with a (bigram, mean_iat_s) index, or a JSONB column on - # Postgres with a GIN index. Either is straightforward, neither - # changes the write-side ingester materially. - kd_top_bigrams: Optional[str] = Field( - default=None, sa_column=Column("kd_top_bigrams", Text, nullable=True), - ) - # IAT of the first keystroke following an idle gap > - # KD_START_OF_ACTION_IDLE_S (or the session-start gap before the - # very first keystroke). Separates "initiating a command" from - # "executing a remembered one" — real humans have measurable - # start-of-action latency, bots don't. Median across all such - # initiations in the session, seconds. - # - # Prompt-agnostic on purpose: PS1 / multi-line prompts / sudo - # password prompts make prompt-anchored detection fragile. The - # idle-gap approach conflates post-prompt action-start with - # mid-session think-and-resume — acceptable for a single median - # field; if we later want to split them, feed the concurrent - # output-stream prompt-pattern into the ingester and fall back to - # time-only detection when it misses. - kd_start_of_action_latency: Optional[float] = None - # Three-bucket pause-length histogram, counts (not ratios — raw - # counts preserve the total-keystrokes denominator in the column - # itself). Bucket edges are the KD_PAUSE_* module constants: - # burst : IAT < KD_PAUSE_BURST_MAX_S (muscle-memory digraphs) - # think : KD_PAUSE_BURST_MAX_S ≤ IAT < KD_PAUSE_THINK_MAX_S - # (semantic boundary, context switch) - # distracted: IAT ≥ KD_PAUSE_THINK_MAX_S (went to look something - # up, got paged, reading another window) - # More discriminating than the flat burst_ratio / think_ratio pair: - # C2 operators concentrate in the burst bucket with a thin tail; - # opportunistic humans have a fat think bucket and a long - # distracted tail. - kd_pause_hist_burst: Optional[int] = None - kd_pause_hist_think: Optional[int] = None - kd_pause_hist_distracted: Optional[int] = None - # Longest IAT in the session, seconds. The distracted-bucket count - # alone can't tell "one 3-second pause" from "three 60-second - # pauses" — both contribute 1-3 to the distracted bucket but - # represent different behaviours (brief think vs actual - # disengagement). max_pause_gap carries that signal in one scalar. - kd_max_pause_gap: Optional[float] = None - # Derived totals. - total_keystrokes: Optional[int] = None - session_duration_s: Optional[float] = None - created_at: datetime = Field( - default_factory=lambda: datetime.now(timezone.utc) - ) - - class SmtpTarget(SQLModel, table=True): """ Per-attacker list of victim domains observed via the SMTP honeypots. diff --git a/decnet/web/db/mysql/repository.py b/decnet/web/db/mysql/repository.py index 2e5accaa..6b39bfc6 100644 --- a/decnet/web/db/mysql/repository.py +++ b/decnet/web/db/mysql/repository.py @@ -97,35 +97,6 @@ class MySQLRepository(SQLModelRepository): f"ALTER TABLE `{table_name}` MODIFY COLUMN `{col_name}` {spec}" )) - async def _migrate_session_profile_table(self) -> None: - """Add DEBT-036 keystroke-dynamics columns (start-of-action latency, - three-bucket pause histogram, top-bigrams JSON) to existing tables. - - MySQL's ``ALTER TABLE ADD COLUMN`` fails if the column already - exists, so gate on ``information_schema.COLUMNS`` to stay - idempotent. - """ - async with self.engine.begin() as conn: - rows = (await conn.execute(text( - "SELECT COLUMN_NAME FROM information_schema.COLUMNS " - "WHERE TABLE_SCHEMA = DATABASE() AND TABLE_NAME = 'session_profile'" - ))).fetchall() - if not rows: - return - existing_cols = {r[0] for r in rows} - additions = [ - ("kd_top_bigrams", "TEXT NULL"), - ("kd_start_of_action_latency", "DOUBLE NULL"), - ("kd_pause_hist_burst", "INT NULL"), - ("kd_pause_hist_think", "INT NULL"), - ("kd_pause_hist_distracted", "INT NULL"), - ] - for col_name, col_spec in additions: - if col_name not in existing_cols: - await conn.execute(text( - f"ALTER TABLE session_profile ADD COLUMN {col_name} {col_spec}" - )) - async def initialize(self) -> None: """Create tables and run all MySQL-specific migrations. @@ -138,7 +109,6 @@ class MySQLRepository(SQLModelRepository): await lock_conn.execute(text("SELECT GET_LOCK('decnet_schema_init', 30)")) try: await self._migrate_attackers_table() - await self._migrate_session_profile_table() await self._migrate_column_types() async with self.engine.begin() as conn: await conn.run_sync(SQLModel.metadata.create_all) diff --git a/decnet/web/db/repository.py b/decnet/web/db/repository.py index 79ed017d..399c8105 100644 --- a/decnet/web/db/repository.py +++ b/decnet/web/db/repository.py @@ -303,16 +303,6 @@ class BaseRepository(ABC): """Bulk-fetch behavior rows keyed by attacker IP (JOIN to attackers).""" pass - @abstractmethod - async def upsert_session_profile(self, sid: str, data: dict[str, Any]) -> None: - """Insert or update the keystroke-dynamics profile row for a session.""" - pass - - @abstractmethod - async def get_session_profile(self, sid: str) -> Optional[dict[str, Any]]: - """Retrieve the keystroke-dynamics profile row for a session.""" - pass - # ─── BEHAVE-SHELL observations ───────────────────────────────────── # See development/BEHAVE-INTEGRATION.md §"Storage" for the full # schema rationale. Every observation envelope emitted by the diff --git a/decnet/web/db/sqlite/repository.py b/decnet/web/db/sqlite/repository.py index 247345e1..37d048e4 100644 --- a/decnet/web/db/sqlite/repository.py +++ b/decnet/web/db/sqlite/repository.py @@ -55,31 +55,6 @@ class SQLiteRepository(SQLModelRepository): "ALTER TABLE attackers ADD COLUMN country_source VARCHAR(16)" )) - async def _migrate_session_profile_table(self) -> None: - """Add DEBT-036 keystroke-dynamics columns (start-of-action latency, - three-bucket pause histogram, top-bigrams JSON) to existing tables. - - SQLite's ``ALTER TABLE ADD COLUMN`` fails if the column already - exists, so gate on ``PRAGMA table_info`` to stay idempotent. - """ - async with self.engine.begin() as conn: - rows = (await conn.execute(text("PRAGMA table_info(session_profile)"))).fetchall() - if not rows: - return # table absent; create_all() handles it. - existing_cols = {r[1] for r in rows} - additions = [ - ("kd_top_bigrams", "TEXT"), - ("kd_start_of_action_latency", "REAL"), - ("kd_pause_hist_burst", "INTEGER"), - ("kd_pause_hist_think", "INTEGER"), - ("kd_pause_hist_distracted", "INTEGER"), - ] - for col_name, col_type in additions: - if col_name not in existing_cols: - await conn.execute(text( - f"ALTER TABLE session_profile ADD COLUMN {col_name} {col_type}" - )) - def _json_field_equals(self, key: str): # SQLite stores JSON as text; json_extract is the canonical accessor. return text(f"json_extract(fields, '$.{key}') = :val") diff --git a/decnet/web/db/sqlmodel_repo/__init__.py b/decnet/web/db/sqlmodel_repo/__init__.py index cceda4c0..6944cd86 100644 --- a/decnet/web/db/sqlmodel_repo/__init__.py +++ b/decnet/web/db/sqlmodel_repo/__init__.py @@ -97,7 +97,6 @@ class SQLModelRepository( """Create tables if absent and seed the admin user.""" from sqlmodel import SQLModel await self._migrate_attackers_table() - await self._migrate_session_profile_table() async with self.engine.begin() as conn: await conn.run_sync(SQLModel.metadata.create_all) await self._ensure_admin_user() @@ -137,11 +136,6 @@ class SQLModelRepository( """Legacy-schema cleanup. Override per dialect (DDL introspection is non-portable).""" return None - async def _migrate_session_profile_table(self) -> None: - """Add DEBT-036 keystroke-dynamics columns to existing session_profile - rows. Override per dialect — DDL introspection is non-portable.""" - return None - async def get_deckies(self) -> List[dict]: _state = await asyncio.to_thread(load_state) return [_d.model_dump() for _d in _state[0].deckies] if _state else [] diff --git a/decnet/web/db/sqlmodel_repo/attackers/__init__.py b/decnet/web/db/sqlmodel_repo/attackers/__init__.py index cb0b5b08..92383afd 100644 --- a/decnet/web/db/sqlmodel_repo/attackers/__init__.py +++ b/decnet/web/db/sqlmodel_repo/attackers/__init__.py @@ -1,9 +1,9 @@ """Attacker repository methods. -The full domain spans ~500 lines of methods across attacker rows, -behavior signals, session profiles, SMTP victim tracking, and -log-derived activity views. Each concern lives in its own submixin; -``AttackersMixin`` composes them. +Per-concern submixins composed onto ``AttackersMixin``. The legacy +``SessionProfilesMixin`` was dropped when the BEHAVE-SHELL +``observations`` table replaced the ``session_profile`` column-zoo +(see DEBT-050 → ``decnet/web/db/sqlmodel_repo/observations.py``). ``_deserialize_attacker`` lives on ``AttackersCoreMixin`` and is reached from ``IdentitiesMixin.list_observations_for_identity`` via ``self.`` — @@ -15,14 +15,12 @@ from __future__ import annotations from decnet.web.db.sqlmodel_repo.attackers._core import AttackersCoreMixin from decnet.web.db.sqlmodel_repo.attackers.activity import AttackerActivityMixin from decnet.web.db.sqlmodel_repo.attackers.behavior import AttackerBehaviorMixin -from decnet.web.db.sqlmodel_repo.attackers.sessions import SessionProfilesMixin from decnet.web.db.sqlmodel_repo.attackers.smtp import SmtpTargetsMixin class AttackersMixin( AttackerActivityMixin, AttackerBehaviorMixin, - SessionProfilesMixin, SmtpTargetsMixin, AttackersCoreMixin, ): diff --git a/decnet/web/db/sqlmodel_repo/attackers/sessions.py b/decnet/web/db/sqlmodel_repo/attackers/sessions.py deleted file mode 100644 index 07787ce6..00000000 --- a/decnet/web/db/sqlmodel_repo/attackers/sessions.py +++ /dev/null @@ -1,51 +0,0 @@ -"""Per-session profile rows (keystroke-dynamics features land here at -ingestion-time post-V2).""" -from __future__ import annotations - -from typing import Any, Optional - -from sqlalchemy import select - -from decnet.web.db.models import SessionProfile - - -from decnet.web.db.sqlmodel_repo._helpers import _MixinBase - -class SessionProfilesMixin(_MixinBase): - async def upsert_session_profile( - self, - sid: str, - data: dict[str, Any], - ) -> None: - """ - Write (or update) the session_profile row for *sid*. - - Pre-v1, the typical call is the empty-write path at session close: - `upsert_session_profile(sid, {"log_id": })` — all keystroke - feature columns stay NULL until the V2 ingestion job populates them. - """ - async with self._session() as session: - result = await session.execute( - select(SessionProfile).where(SessionProfile.sid == sid) - ) - existing = result.scalar_one_or_none() - if existing: - for k, v in data.items(): - setattr(existing, k, v) - session.add(existing) - else: - session.add(SessionProfile(sid=sid, **data)) - await session.commit() - - async def get_session_profile( - self, - sid: str, - ) -> Optional[dict[str, Any]]: - async with self._session() as session: - result = await session.execute( - select(SessionProfile).where(SessionProfile.sid == sid) - ) - row = result.scalar_one_or_none() - if not row: - return None - return row.model_dump(mode="json") diff --git a/decnet/web/router/attackers/api_get_attacker_detail.py b/decnet/web/router/attackers/api_get_attacker_detail.py index f7263142..04670207 100644 --- a/decnet/web/router/attackers/api_get_attacker_detail.py +++ b/decnet/web/router/attackers/api_get_attacker_detail.py @@ -41,4 +41,13 @@ async def get_attacker_detail( # a ROTATION DETECTED badge when the count crosses a threshold. attacker["ip_leaks"] = await repo.get_attacker_ip_leaks(uuid, limit=10) attacker["ip_leaks_total"] = await repo.count_attacker_ip_leaks(uuid) + # BEHAVE-SHELL observations — latest value per primitive for this + # attacker. Empty dict (rendered as empty list) until the + # extractor (DEBT-050) lands and starts writing rows. The frontend + # panel that consumes this ships in BEHAVE-INTEGRATION.md Phase 5. + latest_per_primitive = await repo.latest_observation_per_primitive(uuid) + attacker["observations"] = [ + {"primitive": primitive, **payload} + for primitive, payload in sorted(latest_per_primitive.items()) + ] return attacker diff --git a/tests/db/mysql/test_mysql_migration.py b/tests/db/mysql/test_mysql_migration.py index febe9cab..5d50e506 100644 --- a/tests/db/mysql/test_mysql_migration.py +++ b/tests/db/mysql/test_mysql_migration.py @@ -191,9 +191,13 @@ async def test_migrate_column_types_default_clause_per_column(): @pytest.mark.asyncio async def test_mysql_initialize_calls_migrate_column_types(): - """MySQLRepository.initialize() must invoke every migration helper in - the right order: attackers first, then session_profile (DEBT-036), - then column types, then seed the admin user.""" + """MySQLRepository.initialize() must invoke every migration helper + in the right order: attackers first, then column types, then seed + the admin user. + + The legacy ``_migrate_session_profile_table`` step (DEBT-036) was + dropped when SessionProfile was deleted in favour of the + ``observations`` table — see DEBT-050 / BEHAVE-INTEGRATION.md.""" repo = _make_repo() call_order: list[str] = [] @@ -201,9 +205,6 @@ async def test_mysql_initialize_calls_migrate_column_types(): async def fake_migrate_attackers(): call_order.append("migrate_attackers") - async def fake_migrate_session_profile(): - call_order.append("migrate_session_profile") - async def fake_migrate_column_types(): call_order.append("migrate_column_types") @@ -211,7 +212,6 @@ async def test_mysql_initialize_calls_migrate_column_types(): call_order.append("ensure_admin") repo._migrate_attackers_table = fake_migrate_attackers - repo._migrate_session_profile_table = fake_migrate_session_profile repo._migrate_column_types = fake_migrate_column_types repo._ensure_admin_user = fake_ensure_admin @@ -228,7 +228,6 @@ async def test_mysql_initialize_calls_migrate_column_types(): assert call_order == [ "migrate_attackers", - "migrate_session_profile", "migrate_column_types", "ensure_admin", ], f"Unexpected call order: {call_order}" diff --git a/tests/db/test_base_repo.py b/tests/db/test_base_repo.py index e9c0f83b..1a0245ca 100644 --- a/tests/db/test_base_repo.py +++ b/tests/db/test_base_repo.py @@ -38,8 +38,6 @@ class DummyRepo(BaseRepository): async def upsert_attacker_behavior(self, u, d): await super().upsert_attacker_behavior(u, d) async def get_attacker_behavior(self, u): await super().get_attacker_behavior(u) async def get_behaviors_for_ips(self, ips): await super().get_behaviors_for_ips(ips) - async def upsert_session_profile(self, sid, data): await super().upsert_session_profile(sid, data) - async def get_session_profile(self, sid): await super().get_session_profile(sid) # BEHAVE-SHELL observations (DEBT-050 / BEHAVE-INTEGRATION.md Phase 1) async def upsert_observation(self, data): await super().upsert_observation(data); return "" async def latest_observation_per_primitive(self, attacker_uuid): await super().latest_observation_per_primitive(attacker_uuid); return {} @@ -129,8 +127,6 @@ async def test_base_repo_coverage(): await dr.upsert_attacker_behavior("a", {}) await dr.get_attacker_behavior("a") await dr.get_behaviors_for_ips({"1.1.1.1"}) - await dr.upsert_session_profile("sid", {}) - await dr.get_session_profile("sid") await dr.upsert_observation({}) await dr.latest_observation_per_primitive("a") await dr.observations_time_series("a", "motor.input_modality") diff --git a/tests/profiler/test_session_profile.py b/tests/profiler/test_session_profile.py deleted file mode 100644 index c7022aa4..00000000 --- a/tests/profiler/test_session_profile.py +++ /dev/null @@ -1,55 +0,0 @@ -""" -Tests for the session_profile table + repo helpers (SIGNAL_CAPTURE_AUDIT gap #2). - -Pre-v1 the ingestion job that populates keystroke-dynamics features is -deferred; this suite exercises the empty-write path (one row per session, -all feature columns NULL) and round-trips a filled row so future work can -land without re-discovering the schema. -""" -import pytest -from decnet.web.db.factory import get_repository - - -@pytest.fixture -async def repo(tmp_path): - r = get_repository(db_path=str(tmp_path / "session_profile.db")) - await r.initialize() - return r - - -@pytest.mark.anyio -async def test_empty_write_path_ships_null_features(repo): - # Session close writes `{}` — schema_version defaults to 1, all feature - # columns stay NULL. - await repo.upsert_session_profile("sid-1", {}) - row = await repo.get_session_profile("sid-1") - assert row is not None - assert row["sid"] == "sid-1" - assert row["schema_version"] == 1 - assert row["kd_iki_mean"] is None - assert row["kd_digraph_simhash"] is None - assert row["total_keystrokes"] is None - - -@pytest.mark.anyio -async def test_upsert_replaces_existing(repo): - await repo.upsert_session_profile("sid-2", {}) - await repo.upsert_session_profile( - "sid-2", - { - "kd_iki_mean": 0.120, - "kd_iki_p95": 0.450, - "total_keystrokes": 512, - "session_duration_s": 61.3, - }, - ) - row = await repo.get_session_profile("sid-2") - assert row["kd_iki_mean"] == pytest.approx(0.120) - assert row["kd_iki_p95"] == pytest.approx(0.450) - assert row["total_keystrokes"] == 512 - assert row["session_duration_s"] == pytest.approx(61.3) - - -@pytest.mark.anyio -async def test_get_missing_returns_none(repo): - assert await repo.get_session_profile("does-not-exist") is None diff --git a/tests/ttp/test_lifter_absence.py b/tests/ttp/test_lifter_absence.py index dc2d7bc7..2216bb88 100644 --- a/tests/ttp/test_lifter_absence.py +++ b/tests/ttp/test_lifter_absence.py @@ -2,7 +2,7 @@ Every per-source lifter is allowed (and expected) to encounter events whose required join is missing — no ``AttackerIntel`` row, -no ``SessionProfile``, no ``AttackerBehavior``, no canary record, +no ``ObservationRow``, no ``AttackerBehavior``, no canary record, no identity row, no ``CredentialReuse`` entry. Absence is the steady state, not the exception. The contract pinned here: