refactor(models): split models.py into topical submodules

decnet/web/db/models.py was approaching 1000 lines across User/Log/
Attacker/Swarm/Topology/Workers/Updater/Health domains. Split into a
package with one module per domain; __init__.py re-exports every symbol
so all 52 call sites keep importing from decnet.web.db.models
unchanged.
This commit is contained in:
2026-04-22 21:55:41 -04:00
parent 119b4e8724
commit d47a84c90b
12 changed files with 1307 additions and 1014 deletions

View File

@@ -0,0 +1,150 @@
"""Attacker core + per-attacker behavioral and per-session profile rows."""
from datetime import datetime, timezone
from typing import Any, List, Optional
from pydantic import BaseModel
from sqlalchemy import Column, Text
from sqlmodel import Field, SQLModel
from ._base import _BIG_TEXT
class Attacker(SQLModel, table=True):
__tablename__ = "attackers"
uuid: str = Field(primary_key=True)
ip: str = Field(index=True)
first_seen: datetime = Field(index=True)
last_seen: datetime = Field(index=True)
event_count: int = Field(default=0)
service_count: int = Field(default=0)
decky_count: int = Field(default=0)
# JSON blobs — these grow over the attacker's lifetime. Use MEDIUMTEXT on
# MySQL (16 MiB) for the fields that accumulate (fingerprints, commands,
# and the deckies/services lists that are unbounded in principle).
services: str = Field(
default="[]", sa_column=Column("services", _BIG_TEXT, nullable=False, default="[]")
) # JSON list[str]
deckies: str = Field(
default="[]", sa_column=Column("deckies", _BIG_TEXT, nullable=False, default="[]")
) # JSON list[str], first-contact ordered
traversal_path: Optional[str] = Field(
default=None, sa_column=Column("traversal_path", Text, nullable=True)
) # "decky-01 → decky-03 → decky-05"
is_traversal: bool = Field(default=False)
bounty_count: int = Field(default=0)
credential_count: int = Field(default=0)
fingerprints: str = Field(
default="[]", sa_column=Column("fingerprints", _BIG_TEXT, nullable=False, default="[]")
) # JSON list[dict] — bounty fingerprints
commands: str = Field(
default="[]", sa_column=Column("commands", _BIG_TEXT, nullable=False, default="[]")
) # JSON list[dict] — commands per service/decky
updated_at: datetime = Field(
default_factory=lambda: datetime.now(timezone.utc), index=True
)
class AttackerBehavior(SQLModel, table=True):
"""
Timing & behavioral profile for an attacker, joined to Attacker by uuid.
Kept in a separate table so the core Attacker row stays narrow and
behavior data can be updated independently (e.g. as the sniffer observes
more packets) without touching the event-count aggregates.
"""
__tablename__ = "attacker_behavior"
attacker_uuid: str = Field(primary_key=True, foreign_key="attackers.uuid")
# OS / TCP stack fingerprint (rolled up from sniffer events)
os_guess: Optional[str] = None
hop_distance: Optional[int] = None
tcp_fingerprint: str = Field(
default="{}",
sa_column=Column("tcp_fingerprint", Text, nullable=False, default="{}"),
) # JSON: window, wscale, mss, options_sig
# Raw SSH KEX algorithm preference strings observed across HASSH probes
# (one entry per hassh_fingerprint event). Keeping the raw ordered list
# enables post-hoc KEX-order fingerprinting beyond the HASSH hash.
kex_order_raw: Optional[str] = Field(
default=None,
sa_column=Column("kex_order_raw", Text, nullable=True),
) # JSON list[str] — kex_algorithms comma-separated strings
# Sniffer-observed SSH client identification strings (RFC 4253 §4.2),
# deduped in observation order. Captures the attacker's SSH client
# software (e.g. "SSH-2.0-OpenSSH_9.2p1", "SSH-2.0-libssh2_1.10.0").
ssh_client_banners: Optional[str] = Field(
default=None,
sa_column=Column("ssh_client_banners", Text, nullable=True),
) # JSON list[str]
retransmit_count: int = Field(default=0)
# Behavioral (derived by the profiler from log-event timing)
behavior_class: Optional[str] = None # beaconing | interactive | scanning | brute_force | slow_scan | mixed | unknown
beacon_interval_s: Optional[float] = None
beacon_jitter_pct: Optional[float] = None
tool_guesses: Optional[str] = None # JSON list[str] — all matched tools
timing_stats: str = Field(
default="{}",
sa_column=Column("timing_stats", Text, nullable=False, default="{}"),
) # JSON: mean/median/stdev/min/max IAT
phase_sequence: str = Field(
default="{}",
sa_column=Column("phase_sequence", Text, nullable=False, default="{}"),
) # JSON: recon_end/exfil_start/latency
updated_at: datetime = Field(
default_factory=lambda: datetime.now(timezone.utc), index=True
)
class SessionProfile(SQLModel, table=True):
"""
Per-session keystroke-dynamics fingerprint.
One row per recorded interactive session. Pre-v1 the ingestion job
that populates these columns is not yet built (tracked as gap #2 in
SIGNAL_CAPTURE_AUDIT.md); the table ships empty so that:
* downstream correlation/federation work can target a stable schema, and
* `schema_version` is committed to storage from day one — federation
gossip in v2 requires cross-operator compatibility, and retrofitting
a version column after rows exist is painful.
All feature columns are nullable so the empty write path (one row per
closed session) is valid without the behavioral analyzer online yet.
"""
__tablename__ = "session_profile"
sid: str = Field(primary_key=True) # session UUID
log_id: Optional[int] = Field(
default=None, foreign_key="logs.id", index=True
)
schema_version: int = Field(default=1)
# Inter-key interval timing moments (seconds).
kd_iki_mean: Optional[float] = None
kd_iki_stdev: Optional[float] = None
kd_iki_p50: Optional[float] = None
kd_iki_p95: Optional[float] = None
kd_enter_latency_p50: Optional[float] = None
kd_enter_latency_p95: Optional[float] = None
# Cadence ratios.
kd_burst_ratio: Optional[float] = None
kd_think_ratio: Optional[float] = None
# Control-character rates (events per keystroke).
kd_ctrl_backspace: Optional[float] = None
kd_ctrl_wkill: Optional[float] = None
kd_ctrl_ukill: Optional[float] = None
kd_ctrl_abort: Optional[float] = None
kd_ctrl_eof: Optional[float] = None
kd_arrow_rate: Optional[float] = None
kd_tab_rate: Optional[float] = None
# 8-byte SimHash over keystroke digraphs — Hamming-comparable across sessions.
kd_digraph_simhash: Optional[bytes] = Field(default=None, index=True)
# Derived totals.
total_keystrokes: Optional[int] = None
session_duration_s: Optional[float] = None
created_at: datetime = Field(
default_factory=lambda: datetime.now(timezone.utc)
)
class AttackersResponse(BaseModel):
total: int
limit: int
offset: int
data: List[dict[str, Any]]