feat(web): drop SessionProfile, wire observations into AttackerDetail (DEBT-050 / DEBT-036 closure)
Destructive half of BEHAVE-INTEGRATION.md Phase 1. SessionProfile + its kd_* columns + the dialect ALTER TABLE migration helpers are deleted outright; pre-v1, the table shipped empty, no migration ceremony required (per the no-new-_migrate_-pre-v1 memory rule). DEBT-036 closes via DEBT-050 supersedure. AttackerDetail's ``observations`` field is wired to the new ``observations`` table and returns an empty list until the BEHAVE-SHELL extractor (DEBT-050 Phase 2) starts emitting. decnet/web/db/models/attackers.py — SessionProfile class deleted (~135 lines), KD_PAUSE_*/KD_START_OF_ACTION_IDLE_S module constants deleted, module docstring updated to point at the observations table. AttackerIdentity.kd_digraph_simhash is KEPT — it's the v2 federation centroid hook, not a SessionProfile field; docstring repointed to the BEHAVE primitive that will populate it. decnet/web/db/sqlmodel_repo/attackers/sessions.py — DELETED. SessionProfilesMixin dropped from the AttackersMixin MRO. decnet/web/db/repository.py — abstract upsert_session_profile + get_session_profile removed. decnet/web/db/sqlite/repository.py + mysql/repository.py — _migrate_session_profile_table helpers and their initialize() calls removed. mysql initialize() now goes attackers → column_types → admin (no session_profile step). decnet/web/db/models/__init__.py — SessionProfile re-export gone. decnet/web/db/models/attacker_intel.py — docstring cross-reference to SessionProfile.schema_version retargeted to AttackerIdentity. decnet/web/router/attackers/api_get_attacker_detail.py — adds ``observations: []`` to the response by calling ``repo.latest_observation_per_primitive(uuid)`` and projecting to a list sorted by primitive path. Empty until the extractor lands; shape matches BEHAVE-INTEGRATION.md §"AttackerDetail consumer". tests/profiler/test_session_profile.py — DELETED (56 lines). tests/db/test_base_repo.py — DummyRepo loses upsert_session_profile and get_session_profile overrides. tests/db/mysql/test_mysql_migration.py — initialize-call-order assertion updated; session_profile step removed from the expected sequence; docstring records why. tests/ttp/test_lifter_absence.py — docstring "no SessionProfile" → "no ObservationRow".
This commit is contained in:
@@ -48,7 +48,6 @@ from .attackers import (
|
|||||||
AttackerFingerprintState,
|
AttackerFingerprintState,
|
||||||
AttackerIdentity,
|
AttackerIdentity,
|
||||||
AttackersResponse,
|
AttackersResponse,
|
||||||
SessionProfile,
|
|
||||||
SmtpTarget,
|
SmtpTarget,
|
||||||
)
|
)
|
||||||
from .attacker_intel import (
|
from .attacker_intel import (
|
||||||
@@ -255,7 +254,6 @@ __all__ = [
|
|||||||
"AttackersResponse",
|
"AttackersResponse",
|
||||||
"ObservationRow",
|
"ObservationRow",
|
||||||
"ObservedAttachment",
|
"ObservedAttachment",
|
||||||
"SessionProfile",
|
|
||||||
"SmtpTarget",
|
"SmtpTarget",
|
||||||
# campaigns
|
# campaigns
|
||||||
"Campaign",
|
"Campaign",
|
||||||
|
|||||||
@@ -24,7 +24,7 @@ class AttackerIntel(SQLModel, table=True):
|
|||||||
``schema_version`` is committed to storage from day one — federation
|
``schema_version`` is committed to storage from day one — federation
|
||||||
gossip in v2/v3 requires cross-operator compatibility, and
|
gossip in v2/v3 requires cross-operator compatibility, and
|
||||||
retrofitting a version column after rows exist is painful. Mirrors
|
retrofitting a version column after rows exist is painful. Mirrors
|
||||||
the rationale on :class:`SessionProfile`.
|
the rationale on :class:`AttackerIdentity`'s ``schema_version``.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
__tablename__ = "attacker_intel"
|
__tablename__ = "attacker_intel"
|
||||||
|
|||||||
@@ -1,4 +1,13 @@
|
|||||||
"""Attacker core + per-attacker behavioral and per-session profile rows."""
|
"""Attacker core + per-attacker behavioral rows.
|
||||||
|
|
||||||
|
Per-session keystroke-dynamics fingerprints have moved out of this
|
||||||
|
module: the column-zoo ``SessionProfile`` shipped here pre-v0 was
|
||||||
|
superseded by the BEHAVE-SHELL ``observations`` table
|
||||||
|
(``decnet/web/db/models/observations.py``), which mirrors the BEHAVE
|
||||||
|
``Observation`` envelope and accepts every primitive the extractor
|
||||||
|
emits. See ``development/BEHAVE-INTEGRATION.md`` for the design and
|
||||||
|
``DEBT-036`` (stale) → ``DEBT-050`` for the paydown trail.
|
||||||
|
"""
|
||||||
from datetime import datetime, timezone
|
from datetime import datetime, timezone
|
||||||
from typing import Any, List, Optional
|
from typing import Any, List, Optional
|
||||||
|
|
||||||
@@ -9,26 +18,6 @@ from sqlmodel import Field, SQLModel
|
|||||||
from ._base import _BIG_TEXT
|
from ._base import _BIG_TEXT
|
||||||
|
|
||||||
|
|
||||||
# ─── Keystroke-dynamics tuning constants ──────────────────────────────────────
|
|
||||||
#
|
|
||||||
# These are the semantic thresholds the session-profile ingester (DEBT-036)
|
|
||||||
# uses to bucket IATs and decide what "started a new action" means. Keeping
|
|
||||||
# them here (not inline in the ingester) so that:
|
|
||||||
# * the schema docstrings below can reference exact boundaries instead of
|
|
||||||
# copy-pasted magic numbers, and
|
|
||||||
# * a future calibration pass against real honeypot session data only has
|
|
||||||
# to touch one place.
|
|
||||||
# All values in seconds.
|
|
||||||
|
|
||||||
KD_PAUSE_BURST_MAX_S: float = 0.2 # IAT < this = muscle-memory digraph
|
|
||||||
KD_PAUSE_THINK_MAX_S: float = 1.5 # IAT < this = semantic / context-switch pause
|
|
||||||
# everything ≥ this lands in the distracted bucket
|
|
||||||
KD_START_OF_ACTION_IDLE_S: float = 2.0 # idle gap that counts as "new action"
|
|
||||||
# raised from 1s — 1s still catches a lot of
|
|
||||||
# mid-command hesitation, 2s is closer to
|
|
||||||
# empirical "meaningfully new action"
|
|
||||||
|
|
||||||
|
|
||||||
class Attacker(SQLModel, table=True):
|
class Attacker(SQLModel, table=True):
|
||||||
"""
|
"""
|
||||||
Per-IP **observation** row. Every distinct source IP we observe gets
|
Per-IP **observation** row. Every distinct source IP we observe gets
|
||||||
@@ -208,10 +197,15 @@ class AttackerIdentity(SQLModel, table=True):
|
|||||||
c2_endpoints: Optional[str] = Field(
|
c2_endpoints: Optional[str] = Field(
|
||||||
default=None, sa_column=Column("c2_endpoints", Text, nullable=True)
|
default=None, sa_column=Column("c2_endpoints", Text, nullable=True)
|
||||||
)
|
)
|
||||||
# V2 keystroke-dynamics hook. Same shape as
|
# V2 keystroke-dynamics hook. Per-identity centroid (or majority
|
||||||
# SessionProfile.kd_digraph_simhash; this is the centroid (or
|
# vote) across the identity's session-level digraph SimHashes.
|
||||||
# majority vote) across the identity's sessions. BINARY(8) so
|
# The per-session SimHashes themselves now ride as BEHAVE
|
||||||
# MySQL can index without a prefix length, same as session_profile.
|
# observations (``cognitive.*`` digraph primitive — see
|
||||||
|
# ``development/BEHAVE-INTEGRATION.md`` and the BEHAVE-SHELL
|
||||||
|
# registry); this column is the rollup the (future) attribution
|
||||||
|
# engine will write into so the federation gossip layer
|
||||||
|
# has one identity-level fingerprint to compare across operators.
|
||||||
|
# BINARY(8) so MySQL can index without a prefix length.
|
||||||
kd_digraph_simhash: Optional[bytes] = Field(
|
kd_digraph_simhash: Optional[bytes] = Field(
|
||||||
default=None,
|
default=None,
|
||||||
sa_column=Column("kd_digraph_simhash", BINARY(8), nullable=True, index=True),
|
sa_column=Column("kd_digraph_simhash", BINARY(8), nullable=True, index=True),
|
||||||
@@ -281,139 +275,6 @@ class AttackerBehavior(SQLModel, table=True):
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class SessionProfile(SQLModel, table=True):
|
|
||||||
"""
|
|
||||||
Per-session keystroke-dynamics fingerprint.
|
|
||||||
|
|
||||||
One row per recorded interactive session. Pre-v1 the ingestion job
|
|
||||||
that populates these columns is not yet built (tracked as gap #2 in
|
|
||||||
SIGNAL_CAPTURE_AUDIT.md); the table ships empty so that:
|
|
||||||
* downstream correlation/federation work can target a stable schema, and
|
|
||||||
* `schema_version` is committed to storage from day one — federation
|
|
||||||
gossip in v2 requires cross-operator compatibility, and retrofitting
|
|
||||||
a version column after rows exist is painful.
|
|
||||||
|
|
||||||
All feature columns are nullable so the empty write path (one row per
|
|
||||||
closed session) is valid without the behavioral analyzer online yet.
|
|
||||||
"""
|
|
||||||
__tablename__ = "session_profile"
|
|
||||||
sid: str = Field(primary_key=True) # session UUID
|
|
||||||
log_id: Optional[int] = Field(
|
|
||||||
default=None, foreign_key="logs.id", index=True
|
|
||||||
)
|
|
||||||
schema_version: int = Field(default=1)
|
|
||||||
# ──────────────────────────────────────────────────────────────────────
|
|
||||||
# Keystroke-dynamics feature columns (kd_*).
|
|
||||||
#
|
|
||||||
# Intended use: session clustering and tooling attribution
|
|
||||||
# ("is this the same typist?" / "is this a known C2
|
|
||||||
# framework's paste cadence?").
|
|
||||||
# Explicitly NOT for: attribution to named individuals, access or
|
|
||||||
# admission decisions, any ML-driven identity lookup,
|
|
||||||
# or biometric-login-style user identification. Those
|
|
||||||
# framings push into legal/ethics territory we don't
|
|
||||||
# want this project walking into by accident.
|
|
||||||
# PII discipline: every kd_* column aggregates CHARACTERS and TIMING
|
|
||||||
# only — never raw input-stream content. Attacker
|
|
||||||
# passwords typed over SSH must not land here.
|
|
||||||
# Nulls semantic: a null means "ingester hasn't run on this session
|
|
||||||
# yet", not "zero events". Consumers should treat
|
|
||||||
# null as absent, not as a computed zero.
|
|
||||||
# ──────────────────────────────────────────────────────────────────────
|
|
||||||
# Inter-key interval timing moments (seconds).
|
|
||||||
kd_iki_mean: Optional[float] = None
|
|
||||||
kd_iki_stdev: Optional[float] = None
|
|
||||||
kd_iki_p50: Optional[float] = None
|
|
||||||
kd_iki_p95: Optional[float] = None
|
|
||||||
kd_enter_latency_p50: Optional[float] = None
|
|
||||||
kd_enter_latency_p95: Optional[float] = None
|
|
||||||
# Cadence ratios.
|
|
||||||
kd_burst_ratio: Optional[float] = None
|
|
||||||
kd_think_ratio: Optional[float] = None
|
|
||||||
# Control-character rates (events per keystroke).
|
|
||||||
kd_ctrl_backspace: Optional[float] = None
|
|
||||||
kd_ctrl_wkill: Optional[float] = None
|
|
||||||
kd_ctrl_ukill: Optional[float] = None
|
|
||||||
kd_ctrl_abort: Optional[float] = None
|
|
||||||
kd_ctrl_eof: Optional[float] = None
|
|
||||||
kd_arrow_rate: Optional[float] = None
|
|
||||||
kd_tab_rate: Optional[float] = None
|
|
||||||
# 8-byte SimHash over keystroke digraphs — Hamming-comparable across sessions.
|
|
||||||
# Fixed-width BINARY(8) rather than BLOB: MySQL can't index BLOB/TEXT
|
|
||||||
# columns without a prefix length, and SimHashes are always exactly 8
|
|
||||||
# bytes so a variable-length type gains nothing here.
|
|
||||||
#
|
|
||||||
# PII discipline: the simhash is computed over keystroke CHARACTERS
|
|
||||||
# (digraph bigrams), never over the raw content of the input stream —
|
|
||||||
# attacker passwords typed over SSH must never land in this column.
|
|
||||||
kd_digraph_simhash: Optional[bytes] = Field(
|
|
||||||
default=None,
|
|
||||||
sa_column=Column("kd_digraph_simhash", BINARY(8), nullable=True, index=True),
|
|
||||||
)
|
|
||||||
# Top-N most-common digraphs with their mean IAT, as JSON.
|
|
||||||
# Complements kd_digraph_simhash: the simhash answers "same typist?",
|
|
||||||
# this answers "same typist IN THE SAME MENTAL STATE?" (tired vs rested
|
|
||||||
# vs distracted shifts bigram-specific IATs measurably). Shape:
|
|
||||||
# [["th", 47, 0.082], ["in", 31, 0.091], ...] (bigram, count, mean_iat_s)
|
|
||||||
# Bounded by the ingester to N≤32 to cap row width.
|
|
||||||
#
|
|
||||||
# TODO(DEBT-036 upgrade path): JSON-in-TEXT is fine for v1's
|
|
||||||
# "surface the typist's top digraphs on the attacker page" use
|
|
||||||
# case, but every similarity query (e.g. "find sessions where the
|
|
||||||
# 'th' digraph mean IAT is within 20 ms of this one") has to pull
|
|
||||||
# the string, parse JSON, compare — O(sessions) with a constant
|
|
||||||
# overhead per row. If that query shape becomes hot, promote to a
|
|
||||||
# dedicated `session_bigram_stats(sid, bigram, count, mean_iat_s)`
|
|
||||||
# table with a (bigram, mean_iat_s) index, or a JSONB column on
|
|
||||||
# Postgres with a GIN index. Either is straightforward, neither
|
|
||||||
# changes the write-side ingester materially.
|
|
||||||
kd_top_bigrams: Optional[str] = Field(
|
|
||||||
default=None, sa_column=Column("kd_top_bigrams", Text, nullable=True),
|
|
||||||
)
|
|
||||||
# IAT of the first keystroke following an idle gap >
|
|
||||||
# KD_START_OF_ACTION_IDLE_S (or the session-start gap before the
|
|
||||||
# very first keystroke). Separates "initiating a command" from
|
|
||||||
# "executing a remembered one" — real humans have measurable
|
|
||||||
# start-of-action latency, bots don't. Median across all such
|
|
||||||
# initiations in the session, seconds.
|
|
||||||
#
|
|
||||||
# Prompt-agnostic on purpose: PS1 / multi-line prompts / sudo
|
|
||||||
# password prompts make prompt-anchored detection fragile. The
|
|
||||||
# idle-gap approach conflates post-prompt action-start with
|
|
||||||
# mid-session think-and-resume — acceptable for a single median
|
|
||||||
# field; if we later want to split them, feed the concurrent
|
|
||||||
# output-stream prompt-pattern into the ingester and fall back to
|
|
||||||
# time-only detection when it misses.
|
|
||||||
kd_start_of_action_latency: Optional[float] = None
|
|
||||||
# Three-bucket pause-length histogram, counts (not ratios — raw
|
|
||||||
# counts preserve the total-keystrokes denominator in the column
|
|
||||||
# itself). Bucket edges are the KD_PAUSE_* module constants:
|
|
||||||
# burst : IAT < KD_PAUSE_BURST_MAX_S (muscle-memory digraphs)
|
|
||||||
# think : KD_PAUSE_BURST_MAX_S ≤ IAT < KD_PAUSE_THINK_MAX_S
|
|
||||||
# (semantic boundary, context switch)
|
|
||||||
# distracted: IAT ≥ KD_PAUSE_THINK_MAX_S (went to look something
|
|
||||||
# up, got paged, reading another window)
|
|
||||||
# More discriminating than the flat burst_ratio / think_ratio pair:
|
|
||||||
# C2 operators concentrate in the burst bucket with a thin tail;
|
|
||||||
# opportunistic humans have a fat think bucket and a long
|
|
||||||
# distracted tail.
|
|
||||||
kd_pause_hist_burst: Optional[int] = None
|
|
||||||
kd_pause_hist_think: Optional[int] = None
|
|
||||||
kd_pause_hist_distracted: Optional[int] = None
|
|
||||||
# Longest IAT in the session, seconds. The distracted-bucket count
|
|
||||||
# alone can't tell "one 3-second pause" from "three 60-second
|
|
||||||
# pauses" — both contribute 1-3 to the distracted bucket but
|
|
||||||
# represent different behaviours (brief think vs actual
|
|
||||||
# disengagement). max_pause_gap carries that signal in one scalar.
|
|
||||||
kd_max_pause_gap: Optional[float] = None
|
|
||||||
# Derived totals.
|
|
||||||
total_keystrokes: Optional[int] = None
|
|
||||||
session_duration_s: Optional[float] = None
|
|
||||||
created_at: datetime = Field(
|
|
||||||
default_factory=lambda: datetime.now(timezone.utc)
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class SmtpTarget(SQLModel, table=True):
|
class SmtpTarget(SQLModel, table=True):
|
||||||
"""
|
"""
|
||||||
Per-attacker list of victim domains observed via the SMTP honeypots.
|
Per-attacker list of victim domains observed via the SMTP honeypots.
|
||||||
|
|||||||
@@ -97,35 +97,6 @@ class MySQLRepository(SQLModelRepository):
|
|||||||
f"ALTER TABLE `{table_name}` MODIFY COLUMN `{col_name}` {spec}"
|
f"ALTER TABLE `{table_name}` MODIFY COLUMN `{col_name}` {spec}"
|
||||||
))
|
))
|
||||||
|
|
||||||
async def _migrate_session_profile_table(self) -> None:
|
|
||||||
"""Add DEBT-036 keystroke-dynamics columns (start-of-action latency,
|
|
||||||
three-bucket pause histogram, top-bigrams JSON) to existing tables.
|
|
||||||
|
|
||||||
MySQL's ``ALTER TABLE ADD COLUMN`` fails if the column already
|
|
||||||
exists, so gate on ``information_schema.COLUMNS`` to stay
|
|
||||||
idempotent.
|
|
||||||
"""
|
|
||||||
async with self.engine.begin() as conn:
|
|
||||||
rows = (await conn.execute(text(
|
|
||||||
"SELECT COLUMN_NAME FROM information_schema.COLUMNS "
|
|
||||||
"WHERE TABLE_SCHEMA = DATABASE() AND TABLE_NAME = 'session_profile'"
|
|
||||||
))).fetchall()
|
|
||||||
if not rows:
|
|
||||||
return
|
|
||||||
existing_cols = {r[0] for r in rows}
|
|
||||||
additions = [
|
|
||||||
("kd_top_bigrams", "TEXT NULL"),
|
|
||||||
("kd_start_of_action_latency", "DOUBLE NULL"),
|
|
||||||
("kd_pause_hist_burst", "INT NULL"),
|
|
||||||
("kd_pause_hist_think", "INT NULL"),
|
|
||||||
("kd_pause_hist_distracted", "INT NULL"),
|
|
||||||
]
|
|
||||||
for col_name, col_spec in additions:
|
|
||||||
if col_name not in existing_cols:
|
|
||||||
await conn.execute(text(
|
|
||||||
f"ALTER TABLE session_profile ADD COLUMN {col_name} {col_spec}"
|
|
||||||
))
|
|
||||||
|
|
||||||
async def initialize(self) -> None:
|
async def initialize(self) -> None:
|
||||||
"""Create tables and run all MySQL-specific migrations.
|
"""Create tables and run all MySQL-specific migrations.
|
||||||
|
|
||||||
@@ -138,7 +109,6 @@ class MySQLRepository(SQLModelRepository):
|
|||||||
await lock_conn.execute(text("SELECT GET_LOCK('decnet_schema_init', 30)"))
|
await lock_conn.execute(text("SELECT GET_LOCK('decnet_schema_init', 30)"))
|
||||||
try:
|
try:
|
||||||
await self._migrate_attackers_table()
|
await self._migrate_attackers_table()
|
||||||
await self._migrate_session_profile_table()
|
|
||||||
await self._migrate_column_types()
|
await self._migrate_column_types()
|
||||||
async with self.engine.begin() as conn:
|
async with self.engine.begin() as conn:
|
||||||
await conn.run_sync(SQLModel.metadata.create_all)
|
await conn.run_sync(SQLModel.metadata.create_all)
|
||||||
|
|||||||
@@ -303,16 +303,6 @@ class BaseRepository(ABC):
|
|||||||
"""Bulk-fetch behavior rows keyed by attacker IP (JOIN to attackers)."""
|
"""Bulk-fetch behavior rows keyed by attacker IP (JOIN to attackers)."""
|
||||||
pass
|
pass
|
||||||
|
|
||||||
@abstractmethod
|
|
||||||
async def upsert_session_profile(self, sid: str, data: dict[str, Any]) -> None:
|
|
||||||
"""Insert or update the keystroke-dynamics profile row for a session."""
|
|
||||||
pass
|
|
||||||
|
|
||||||
@abstractmethod
|
|
||||||
async def get_session_profile(self, sid: str) -> Optional[dict[str, Any]]:
|
|
||||||
"""Retrieve the keystroke-dynamics profile row for a session."""
|
|
||||||
pass
|
|
||||||
|
|
||||||
# ─── BEHAVE-SHELL observations ─────────────────────────────────────
|
# ─── BEHAVE-SHELL observations ─────────────────────────────────────
|
||||||
# See development/BEHAVE-INTEGRATION.md §"Storage" for the full
|
# See development/BEHAVE-INTEGRATION.md §"Storage" for the full
|
||||||
# schema rationale. Every observation envelope emitted by the
|
# schema rationale. Every observation envelope emitted by the
|
||||||
|
|||||||
@@ -55,31 +55,6 @@ class SQLiteRepository(SQLModelRepository):
|
|||||||
"ALTER TABLE attackers ADD COLUMN country_source VARCHAR(16)"
|
"ALTER TABLE attackers ADD COLUMN country_source VARCHAR(16)"
|
||||||
))
|
))
|
||||||
|
|
||||||
async def _migrate_session_profile_table(self) -> None:
|
|
||||||
"""Add DEBT-036 keystroke-dynamics columns (start-of-action latency,
|
|
||||||
three-bucket pause histogram, top-bigrams JSON) to existing tables.
|
|
||||||
|
|
||||||
SQLite's ``ALTER TABLE ADD COLUMN`` fails if the column already
|
|
||||||
exists, so gate on ``PRAGMA table_info`` to stay idempotent.
|
|
||||||
"""
|
|
||||||
async with self.engine.begin() as conn:
|
|
||||||
rows = (await conn.execute(text("PRAGMA table_info(session_profile)"))).fetchall()
|
|
||||||
if not rows:
|
|
||||||
return # table absent; create_all() handles it.
|
|
||||||
existing_cols = {r[1] for r in rows}
|
|
||||||
additions = [
|
|
||||||
("kd_top_bigrams", "TEXT"),
|
|
||||||
("kd_start_of_action_latency", "REAL"),
|
|
||||||
("kd_pause_hist_burst", "INTEGER"),
|
|
||||||
("kd_pause_hist_think", "INTEGER"),
|
|
||||||
("kd_pause_hist_distracted", "INTEGER"),
|
|
||||||
]
|
|
||||||
for col_name, col_type in additions:
|
|
||||||
if col_name not in existing_cols:
|
|
||||||
await conn.execute(text(
|
|
||||||
f"ALTER TABLE session_profile ADD COLUMN {col_name} {col_type}"
|
|
||||||
))
|
|
||||||
|
|
||||||
def _json_field_equals(self, key: str):
|
def _json_field_equals(self, key: str):
|
||||||
# SQLite stores JSON as text; json_extract is the canonical accessor.
|
# SQLite stores JSON as text; json_extract is the canonical accessor.
|
||||||
return text(f"json_extract(fields, '$.{key}') = :val")
|
return text(f"json_extract(fields, '$.{key}') = :val")
|
||||||
|
|||||||
@@ -97,7 +97,6 @@ class SQLModelRepository(
|
|||||||
"""Create tables if absent and seed the admin user."""
|
"""Create tables if absent and seed the admin user."""
|
||||||
from sqlmodel import SQLModel
|
from sqlmodel import SQLModel
|
||||||
await self._migrate_attackers_table()
|
await self._migrate_attackers_table()
|
||||||
await self._migrate_session_profile_table()
|
|
||||||
async with self.engine.begin() as conn:
|
async with self.engine.begin() as conn:
|
||||||
await conn.run_sync(SQLModel.metadata.create_all)
|
await conn.run_sync(SQLModel.metadata.create_all)
|
||||||
await self._ensure_admin_user()
|
await self._ensure_admin_user()
|
||||||
@@ -137,11 +136,6 @@ class SQLModelRepository(
|
|||||||
"""Legacy-schema cleanup. Override per dialect (DDL introspection is non-portable)."""
|
"""Legacy-schema cleanup. Override per dialect (DDL introspection is non-portable)."""
|
||||||
return None
|
return None
|
||||||
|
|
||||||
async def _migrate_session_profile_table(self) -> None:
|
|
||||||
"""Add DEBT-036 keystroke-dynamics columns to existing session_profile
|
|
||||||
rows. Override per dialect — DDL introspection is non-portable."""
|
|
||||||
return None
|
|
||||||
|
|
||||||
async def get_deckies(self) -> List[dict]:
|
async def get_deckies(self) -> List[dict]:
|
||||||
_state = await asyncio.to_thread(load_state)
|
_state = await asyncio.to_thread(load_state)
|
||||||
return [_d.model_dump() for _d in _state[0].deckies] if _state else []
|
return [_d.model_dump() for _d in _state[0].deckies] if _state else []
|
||||||
|
|||||||
@@ -1,9 +1,9 @@
|
|||||||
"""Attacker repository methods.
|
"""Attacker repository methods.
|
||||||
|
|
||||||
The full domain spans ~500 lines of methods across attacker rows,
|
Per-concern submixins composed onto ``AttackersMixin``. The legacy
|
||||||
behavior signals, session profiles, SMTP victim tracking, and
|
``SessionProfilesMixin`` was dropped when the BEHAVE-SHELL
|
||||||
log-derived activity views. Each concern lives in its own submixin;
|
``observations`` table replaced the ``session_profile`` column-zoo
|
||||||
``AttackersMixin`` composes them.
|
(see DEBT-050 → ``decnet/web/db/sqlmodel_repo/observations.py``).
|
||||||
|
|
||||||
``_deserialize_attacker`` lives on ``AttackersCoreMixin`` and is reached
|
``_deserialize_attacker`` lives on ``AttackersCoreMixin`` and is reached
|
||||||
from ``IdentitiesMixin.list_observations_for_identity`` via ``self.`` —
|
from ``IdentitiesMixin.list_observations_for_identity`` via ``self.`` —
|
||||||
@@ -15,14 +15,12 @@ from __future__ import annotations
|
|||||||
from decnet.web.db.sqlmodel_repo.attackers._core import AttackersCoreMixin
|
from decnet.web.db.sqlmodel_repo.attackers._core import AttackersCoreMixin
|
||||||
from decnet.web.db.sqlmodel_repo.attackers.activity import AttackerActivityMixin
|
from decnet.web.db.sqlmodel_repo.attackers.activity import AttackerActivityMixin
|
||||||
from decnet.web.db.sqlmodel_repo.attackers.behavior import AttackerBehaviorMixin
|
from decnet.web.db.sqlmodel_repo.attackers.behavior import AttackerBehaviorMixin
|
||||||
from decnet.web.db.sqlmodel_repo.attackers.sessions import SessionProfilesMixin
|
|
||||||
from decnet.web.db.sqlmodel_repo.attackers.smtp import SmtpTargetsMixin
|
from decnet.web.db.sqlmodel_repo.attackers.smtp import SmtpTargetsMixin
|
||||||
|
|
||||||
|
|
||||||
class AttackersMixin(
|
class AttackersMixin(
|
||||||
AttackerActivityMixin,
|
AttackerActivityMixin,
|
||||||
AttackerBehaviorMixin,
|
AttackerBehaviorMixin,
|
||||||
SessionProfilesMixin,
|
|
||||||
SmtpTargetsMixin,
|
SmtpTargetsMixin,
|
||||||
AttackersCoreMixin,
|
AttackersCoreMixin,
|
||||||
):
|
):
|
||||||
|
|||||||
@@ -1,51 +0,0 @@
|
|||||||
"""Per-session profile rows (keystroke-dynamics features land here at
|
|
||||||
ingestion-time post-V2)."""
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
from typing import Any, Optional
|
|
||||||
|
|
||||||
from sqlalchemy import select
|
|
||||||
|
|
||||||
from decnet.web.db.models import SessionProfile
|
|
||||||
|
|
||||||
|
|
||||||
from decnet.web.db.sqlmodel_repo._helpers import _MixinBase
|
|
||||||
|
|
||||||
class SessionProfilesMixin(_MixinBase):
|
|
||||||
async def upsert_session_profile(
|
|
||||||
self,
|
|
||||||
sid: str,
|
|
||||||
data: dict[str, Any],
|
|
||||||
) -> None:
|
|
||||||
"""
|
|
||||||
Write (or update) the session_profile row for *sid*.
|
|
||||||
|
|
||||||
Pre-v1, the typical call is the empty-write path at session close:
|
|
||||||
`upsert_session_profile(sid, {"log_id": <id>})` — all keystroke
|
|
||||||
feature columns stay NULL until the V2 ingestion job populates them.
|
|
||||||
"""
|
|
||||||
async with self._session() as session:
|
|
||||||
result = await session.execute(
|
|
||||||
select(SessionProfile).where(SessionProfile.sid == sid)
|
|
||||||
)
|
|
||||||
existing = result.scalar_one_or_none()
|
|
||||||
if existing:
|
|
||||||
for k, v in data.items():
|
|
||||||
setattr(existing, k, v)
|
|
||||||
session.add(existing)
|
|
||||||
else:
|
|
||||||
session.add(SessionProfile(sid=sid, **data))
|
|
||||||
await session.commit()
|
|
||||||
|
|
||||||
async def get_session_profile(
|
|
||||||
self,
|
|
||||||
sid: str,
|
|
||||||
) -> Optional[dict[str, Any]]:
|
|
||||||
async with self._session() as session:
|
|
||||||
result = await session.execute(
|
|
||||||
select(SessionProfile).where(SessionProfile.sid == sid)
|
|
||||||
)
|
|
||||||
row = result.scalar_one_or_none()
|
|
||||||
if not row:
|
|
||||||
return None
|
|
||||||
return row.model_dump(mode="json")
|
|
||||||
@@ -41,4 +41,13 @@ async def get_attacker_detail(
|
|||||||
# a ROTATION DETECTED badge when the count crosses a threshold.
|
# a ROTATION DETECTED badge when the count crosses a threshold.
|
||||||
attacker["ip_leaks"] = await repo.get_attacker_ip_leaks(uuid, limit=10)
|
attacker["ip_leaks"] = await repo.get_attacker_ip_leaks(uuid, limit=10)
|
||||||
attacker["ip_leaks_total"] = await repo.count_attacker_ip_leaks(uuid)
|
attacker["ip_leaks_total"] = await repo.count_attacker_ip_leaks(uuid)
|
||||||
|
# BEHAVE-SHELL observations — latest value per primitive for this
|
||||||
|
# attacker. Empty dict (rendered as empty list) until the
|
||||||
|
# extractor (DEBT-050) lands and starts writing rows. The frontend
|
||||||
|
# panel that consumes this ships in BEHAVE-INTEGRATION.md Phase 5.
|
||||||
|
latest_per_primitive = await repo.latest_observation_per_primitive(uuid)
|
||||||
|
attacker["observations"] = [
|
||||||
|
{"primitive": primitive, **payload}
|
||||||
|
for primitive, payload in sorted(latest_per_primitive.items())
|
||||||
|
]
|
||||||
return attacker
|
return attacker
|
||||||
|
|||||||
@@ -191,9 +191,13 @@ async def test_migrate_column_types_default_clause_per_column():
|
|||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_mysql_initialize_calls_migrate_column_types():
|
async def test_mysql_initialize_calls_migrate_column_types():
|
||||||
"""MySQLRepository.initialize() must invoke every migration helper in
|
"""MySQLRepository.initialize() must invoke every migration helper
|
||||||
the right order: attackers first, then session_profile (DEBT-036),
|
in the right order: attackers first, then column types, then seed
|
||||||
then column types, then seed the admin user."""
|
the admin user.
|
||||||
|
|
||||||
|
The legacy ``_migrate_session_profile_table`` step (DEBT-036) was
|
||||||
|
dropped when SessionProfile was deleted in favour of the
|
||||||
|
``observations`` table — see DEBT-050 / BEHAVE-INTEGRATION.md."""
|
||||||
repo = _make_repo()
|
repo = _make_repo()
|
||||||
|
|
||||||
call_order: list[str] = []
|
call_order: list[str] = []
|
||||||
@@ -201,9 +205,6 @@ async def test_mysql_initialize_calls_migrate_column_types():
|
|||||||
async def fake_migrate_attackers():
|
async def fake_migrate_attackers():
|
||||||
call_order.append("migrate_attackers")
|
call_order.append("migrate_attackers")
|
||||||
|
|
||||||
async def fake_migrate_session_profile():
|
|
||||||
call_order.append("migrate_session_profile")
|
|
||||||
|
|
||||||
async def fake_migrate_column_types():
|
async def fake_migrate_column_types():
|
||||||
call_order.append("migrate_column_types")
|
call_order.append("migrate_column_types")
|
||||||
|
|
||||||
@@ -211,7 +212,6 @@ async def test_mysql_initialize_calls_migrate_column_types():
|
|||||||
call_order.append("ensure_admin")
|
call_order.append("ensure_admin")
|
||||||
|
|
||||||
repo._migrate_attackers_table = fake_migrate_attackers
|
repo._migrate_attackers_table = fake_migrate_attackers
|
||||||
repo._migrate_session_profile_table = fake_migrate_session_profile
|
|
||||||
repo._migrate_column_types = fake_migrate_column_types
|
repo._migrate_column_types = fake_migrate_column_types
|
||||||
repo._ensure_admin_user = fake_ensure_admin
|
repo._ensure_admin_user = fake_ensure_admin
|
||||||
|
|
||||||
@@ -228,7 +228,6 @@ async def test_mysql_initialize_calls_migrate_column_types():
|
|||||||
|
|
||||||
assert call_order == [
|
assert call_order == [
|
||||||
"migrate_attackers",
|
"migrate_attackers",
|
||||||
"migrate_session_profile",
|
|
||||||
"migrate_column_types",
|
"migrate_column_types",
|
||||||
"ensure_admin",
|
"ensure_admin",
|
||||||
], f"Unexpected call order: {call_order}"
|
], f"Unexpected call order: {call_order}"
|
||||||
|
|||||||
@@ -38,8 +38,6 @@ class DummyRepo(BaseRepository):
|
|||||||
async def upsert_attacker_behavior(self, u, d): await super().upsert_attacker_behavior(u, d)
|
async def upsert_attacker_behavior(self, u, d): await super().upsert_attacker_behavior(u, d)
|
||||||
async def get_attacker_behavior(self, u): await super().get_attacker_behavior(u)
|
async def get_attacker_behavior(self, u): await super().get_attacker_behavior(u)
|
||||||
async def get_behaviors_for_ips(self, ips): await super().get_behaviors_for_ips(ips)
|
async def get_behaviors_for_ips(self, ips): await super().get_behaviors_for_ips(ips)
|
||||||
async def upsert_session_profile(self, sid, data): await super().upsert_session_profile(sid, data)
|
|
||||||
async def get_session_profile(self, sid): await super().get_session_profile(sid)
|
|
||||||
# BEHAVE-SHELL observations (DEBT-050 / BEHAVE-INTEGRATION.md Phase 1)
|
# BEHAVE-SHELL observations (DEBT-050 / BEHAVE-INTEGRATION.md Phase 1)
|
||||||
async def upsert_observation(self, data): await super().upsert_observation(data); return ""
|
async def upsert_observation(self, data): await super().upsert_observation(data); return ""
|
||||||
async def latest_observation_per_primitive(self, attacker_uuid): await super().latest_observation_per_primitive(attacker_uuid); return {}
|
async def latest_observation_per_primitive(self, attacker_uuid): await super().latest_observation_per_primitive(attacker_uuid); return {}
|
||||||
@@ -129,8 +127,6 @@ async def test_base_repo_coverage():
|
|||||||
await dr.upsert_attacker_behavior("a", {})
|
await dr.upsert_attacker_behavior("a", {})
|
||||||
await dr.get_attacker_behavior("a")
|
await dr.get_attacker_behavior("a")
|
||||||
await dr.get_behaviors_for_ips({"1.1.1.1"})
|
await dr.get_behaviors_for_ips({"1.1.1.1"})
|
||||||
await dr.upsert_session_profile("sid", {})
|
|
||||||
await dr.get_session_profile("sid")
|
|
||||||
await dr.upsert_observation({})
|
await dr.upsert_observation({})
|
||||||
await dr.latest_observation_per_primitive("a")
|
await dr.latest_observation_per_primitive("a")
|
||||||
await dr.observations_time_series("a", "motor.input_modality")
|
await dr.observations_time_series("a", "motor.input_modality")
|
||||||
|
|||||||
@@ -1,55 +0,0 @@
|
|||||||
"""
|
|
||||||
Tests for the session_profile table + repo helpers (SIGNAL_CAPTURE_AUDIT gap #2).
|
|
||||||
|
|
||||||
Pre-v1 the ingestion job that populates keystroke-dynamics features is
|
|
||||||
deferred; this suite exercises the empty-write path (one row per session,
|
|
||||||
all feature columns NULL) and round-trips a filled row so future work can
|
|
||||||
land without re-discovering the schema.
|
|
||||||
"""
|
|
||||||
import pytest
|
|
||||||
from decnet.web.db.factory import get_repository
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
|
||||||
async def repo(tmp_path):
|
|
||||||
r = get_repository(db_path=str(tmp_path / "session_profile.db"))
|
|
||||||
await r.initialize()
|
|
||||||
return r
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.anyio
|
|
||||||
async def test_empty_write_path_ships_null_features(repo):
|
|
||||||
# Session close writes `{}` — schema_version defaults to 1, all feature
|
|
||||||
# columns stay NULL.
|
|
||||||
await repo.upsert_session_profile("sid-1", {})
|
|
||||||
row = await repo.get_session_profile("sid-1")
|
|
||||||
assert row is not None
|
|
||||||
assert row["sid"] == "sid-1"
|
|
||||||
assert row["schema_version"] == 1
|
|
||||||
assert row["kd_iki_mean"] is None
|
|
||||||
assert row["kd_digraph_simhash"] is None
|
|
||||||
assert row["total_keystrokes"] is None
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.anyio
|
|
||||||
async def test_upsert_replaces_existing(repo):
|
|
||||||
await repo.upsert_session_profile("sid-2", {})
|
|
||||||
await repo.upsert_session_profile(
|
|
||||||
"sid-2",
|
|
||||||
{
|
|
||||||
"kd_iki_mean": 0.120,
|
|
||||||
"kd_iki_p95": 0.450,
|
|
||||||
"total_keystrokes": 512,
|
|
||||||
"session_duration_s": 61.3,
|
|
||||||
},
|
|
||||||
)
|
|
||||||
row = await repo.get_session_profile("sid-2")
|
|
||||||
assert row["kd_iki_mean"] == pytest.approx(0.120)
|
|
||||||
assert row["kd_iki_p95"] == pytest.approx(0.450)
|
|
||||||
assert row["total_keystrokes"] == 512
|
|
||||||
assert row["session_duration_s"] == pytest.approx(61.3)
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.anyio
|
|
||||||
async def test_get_missing_returns_none(repo):
|
|
||||||
assert await repo.get_session_profile("does-not-exist") is None
|
|
||||||
@@ -2,7 +2,7 @@
|
|||||||
|
|
||||||
Every per-source lifter is allowed (and expected) to encounter
|
Every per-source lifter is allowed (and expected) to encounter
|
||||||
events whose required join is missing — no ``AttackerIntel`` row,
|
events whose required join is missing — no ``AttackerIntel`` row,
|
||||||
no ``SessionProfile``, no ``AttackerBehavior``, no canary record,
|
no ``ObservationRow``, no ``AttackerBehavior``, no canary record,
|
||||||
no identity row, no ``CredentialReuse`` entry. Absence is the
|
no identity row, no ``CredentialReuse`` entry. Absence is the
|
||||||
steady state, not the exception. The contract pinned here:
|
steady state, not the exception. The contract pinned here:
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user