merge: testing → main (reconcile 2-week divergence)

This commit is contained in:
2026-04-28 18:36:00 -04:00
parent 499836c9e4
commit 862e4dbb31
1235 changed files with 160255 additions and 7996 deletions

View File

@@ -0,0 +1,319 @@
"""
Database tables (SQLModel) and HTTP request/response shapes (Pydantic).
Split into topical modules for readability, but every symbol is re-exported
from this package so ``from decnet.web.db.models import X`` keeps working
everywhere — no importer needs to know which submodule a class lives in.
"""
from ._base import (
NullableDatetime,
NullableString,
_BIG_TEXT,
_normalize_null,
)
from .common import (
MessageResponse,
)
from .canary import (
CanaryBlob,
CanaryBlobResponse,
CanaryBlobsResponse,
CanaryKind,
CanaryState,
CanaryToken,
CanaryTokenCreateRequest,
CanaryTokenResponse,
CanaryTokensResponse,
CanaryTrigger,
CanaryTriggerResponse,
CanaryTriggersResponse,
)
from .auth import (
AdminConfigResponse,
ChangePasswordRequest,
ConfigResponse,
CreateUserRequest,
DeploymentLimitRequest,
GlobalMutationIntervalRequest,
LoginRequest,
ResetUserPasswordRequest,
Token,
UpdateUserRoleRequest,
User,
UserResponse,
)
from .attackers import (
Attacker,
AttackerBehavior,
AttackerIdentity,
AttackersResponse,
SessionProfile,
SmtpTarget,
)
from .attacker_intel import (
AttackerIntel,
)
from .campaigns import (
Campaign,
CampaignsResponse,
)
from .deploy import (
DeployIniRequest,
DeployResponse,
MutateIntervalRequest,
PurgeResponse,
)
from .fleet import (
LOCAL_HOST_SENTINEL,
FleetDecky,
)
from .health import (
ComponentHealth,
HealthResponse,
)
from .orchestrator import (
OrchestratorEmail,
OrchestratorEmailsResponse,
OrchestratorEvent,
OrchestratorEventsResponse,
)
from .realism import (
RealismConfig,
SyntheticFile,
SyntheticFilesResponse,
)
from .logs import (
Bounty,
BountyResponse,
Credential,
CredentialReuse,
CredentialReuseResponse,
CredentialsResponse,
Log,
LogsResponse,
State,
StatsResponse,
)
from .swarm import (
DeckyShard,
DeckyShardView,
SwarmCheckResponse,
SwarmDeployRequest,
SwarmDeployResponse,
SwarmEnrolledBundle,
SwarmEnrollRequest,
SwarmHost,
SwarmHostHealth,
SwarmHostResult,
SwarmHostView,
SwarmTeardownRequest,
SwarmUpdaterBundle,
)
from .topology import (
LAN,
ArchetypeCatalogResponse,
ArchetypeEntry,
DeckyCreateRequest,
DeckyRow,
DeckyUpdateRequest,
DeployAcceptedResponse,
EdgeCreateRequest,
EdgeRow,
LANCreateRequest,
LANRow,
LANUpdateRequest,
MutationEnqueueRequest,
MutationEnqueueResponse,
MutationRow,
NextIPResponse,
NextSubnetResponse,
NotEditableResponse,
ReapReportResponse,
ServiceCatalogResponse,
Topology,
TopologyDecky,
TopologyDetail,
TopologyEdge,
TopologyGenerateRequest,
TopologyListResponse,
TopologyMutation,
TopologyStatusEvent,
TopologyStatusEventRow,
TopologySummary,
ValidationErrorResponse,
ValidationIssueResponse,
VersionConflictResponse,
)
from .updater import (
HostReleaseInfo,
HostReleasesResponse,
PushUpdateRequest,
PushUpdateResponse,
PushUpdateResult,
RollbackRequest,
RollbackResponse,
)
from .webhooks import (
SimpleEvent,
WebhookCreateRequest,
WebhookCreateResponse,
WebhookResponse,
WebhookSubscription,
WebhookTestResponse,
WebhookUpdateRequest,
)
from .workers import (
StartAllResponse,
StartFailure,
WorkerControlResponse,
WorkersResponse,
WorkerStatus,
)
__all__ = [
# _base
"NullableDatetime",
"NullableString",
"_BIG_TEXT",
"_normalize_null",
# common
"MessageResponse",
# canary
"CanaryBlob",
"CanaryBlobResponse",
"CanaryBlobsResponse",
"CanaryKind",
"CanaryState",
"CanaryToken",
"CanaryTokenCreateRequest",
"CanaryTokenResponse",
"CanaryTokensResponse",
"CanaryTrigger",
"CanaryTriggerResponse",
"CanaryTriggersResponse",
# auth
"AdminConfigResponse",
"ChangePasswordRequest",
"ConfigResponse",
"CreateUserRequest",
"DeploymentLimitRequest",
"GlobalMutationIntervalRequest",
"LoginRequest",
"ResetUserPasswordRequest",
"Token",
"UpdateUserRoleRequest",
"User",
"UserResponse",
# attackers
"Attacker",
"AttackerBehavior",
"AttackerIdentity",
"AttackerIntel",
"AttackersResponse",
"SessionProfile",
"SmtpTarget",
# campaigns
"Campaign",
"CampaignsResponse",
# deploy
"DeployIniRequest",
"DeployResponse",
"MutateIntervalRequest",
"PurgeResponse",
# fleet
"LOCAL_HOST_SENTINEL",
"FleetDecky",
# health
"ComponentHealth",
"HealthResponse",
# orchestrator
"OrchestratorEmail",
"OrchestratorEmailsResponse",
"OrchestratorEvent",
"OrchestratorEventsResponse",
# realism
"RealismConfig",
"SyntheticFile",
"SyntheticFilesResponse",
# logs
"Bounty",
"BountyResponse",
"Credential",
"CredentialReuse",
"CredentialReuseResponse",
"CredentialsResponse",
"Log",
"LogsResponse",
"State",
"StatsResponse",
# swarm
"DeckyShard",
"DeckyShardView",
"SwarmCheckResponse",
"SwarmDeployRequest",
"SwarmDeployResponse",
"SwarmEnrolledBundle",
"SwarmEnrollRequest",
"SwarmHost",
"SwarmHostHealth",
"SwarmHostResult",
"SwarmHostView",
"SwarmTeardownRequest",
"SwarmUpdaterBundle",
# topology
"LAN",
"ArchetypeCatalogResponse",
"ArchetypeEntry",
"DeckyCreateRequest",
"DeckyRow",
"DeckyUpdateRequest",
"DeployAcceptedResponse",
"EdgeCreateRequest",
"EdgeRow",
"LANCreateRequest",
"LANRow",
"LANUpdateRequest",
"MutationEnqueueRequest",
"MutationEnqueueResponse",
"MutationRow",
"NextIPResponse",
"NextSubnetResponse",
"NotEditableResponse",
"ReapReportResponse",
"ServiceCatalogResponse",
"Topology",
"TopologyDecky",
"TopologyDetail",
"TopologyEdge",
"TopologyGenerateRequest",
"TopologyListResponse",
"TopologyMutation",
"TopologyStatusEvent",
"TopologyStatusEventRow",
"TopologySummary",
"ValidationErrorResponse",
"ValidationIssueResponse",
"VersionConflictResponse",
# updater
"HostReleaseInfo",
"HostReleasesResponse",
"PushUpdateRequest",
"PushUpdateResponse",
"PushUpdateResult",
"RollbackRequest",
"RollbackResponse",
# webhooks
"SimpleEvent",
"WebhookCreateRequest",
"WebhookCreateResponse",
"WebhookResponse",
"WebhookSubscription",
"WebhookTestResponse",
"WebhookUpdateRequest",
# workers
"StartAllResponse",
"StartFailure",
"WorkerControlResponse",
"WorkersResponse",
"WorkerStatus",
]

View File

@@ -0,0 +1,23 @@
"""Shared column/validator helpers used across model domain modules."""
from datetime import datetime
from typing import Annotated, Any, Optional
from pydantic import BeforeValidator
from sqlalchemy import Text
from sqlalchemy.dialects.mysql import MEDIUMTEXT
# Use on columns that accumulate over an attacker's lifetime (commands,
# fingerprints, state blobs). TEXT on MySQL caps at 64 KiB; MEDIUMTEXT
# stretches to 16 MiB. SQLite has no fixed-width text types so Text()
# stays unchanged there.
_BIG_TEXT = Text().with_variant(MEDIUMTEXT(), "mysql")
def _normalize_null(v: Any) -> Any:
if isinstance(v, str) and v.lower() in ("null", "undefined", ""):
return None
return v
NullableDatetime = Annotated[Optional[datetime], BeforeValidator(_normalize_null)]
NullableString = Annotated[Optional[str], BeforeValidator(_normalize_null)]

View File

@@ -0,0 +1,93 @@
"""Threat-intel enrichment row — one per attacker IP, TTL-cached."""
from datetime import datetime, timezone
from typing import Optional
from sqlalchemy import Column
from sqlmodel import Field, SQLModel
from ._base import _BIG_TEXT
class AttackerIntel(SQLModel, table=True):
"""Aggregated threat-intel verdict for a single attacker IP.
Populated by the ``decnet enrich`` worker, which queries multiple
free-tier intel providers (GreyNoise Community, AbuseIPDB,
abuse.ch Feodo Tracker + ThreatFox) and writes one row per
attacker IP. The row is TTL-cached via ``expires_at`` so re-firings
inside the cache window short-circuit before any HTTP egress.
Per-provider columns are nullable until each provider has answered;
the enrichment pass writes whichever providers succeeded and leaves
the rest unchanged on a partial failure.
``schema_version`` is committed to storage from day one — federation
gossip in v2/v3 requires cross-operator compatibility, and
retrofitting a version column after rows exist is painful. Mirrors
the rationale on :class:`SessionProfile`.
"""
__tablename__ = "attacker_intel"
uuid: str = Field(primary_key=True) # uuid.uuid4().hex, generated by writer
# Canonical key. One intel row per attacker UUID; FK guarantees no orphan
# rows when an attacker is deleted, and UNIQUE keeps upserts honest.
attacker_uuid: str = Field(
foreign_key="attackers.uuid",
unique=True,
index=True,
)
# DENORMALISED — NOT a key. The IP the worker queried providers with at
# write time. Useful for SIEM payloads and audit lookups; updated on every
# upsert if the attacker rotates IPs. Never use this column as a lookup
# key; ``attacker_uuid`` is the only canonical identifier here.
attacker_ip: str = Field(index=True)
schema_version: int = Field(default=1)
# ── GreyNoise Community ─────────────────────────────────────────────
# classification ∈ {"benign", "malicious", "suspicious", "unknown"}
greynoise_classification: Optional[str] = Field(default=None, max_length=32)
greynoise_raw: str = Field(
default="{}",
sa_column=Column("greynoise_raw", _BIG_TEXT, nullable=False, default="{}"),
)
greynoise_queried_at: Optional[datetime] = Field(default=None)
# ── AbuseIPDB ────────────────────────────────────────────────────────
# 0..100 abuse confidence score
abuseipdb_score: Optional[int] = Field(default=None)
abuseipdb_raw: str = Field(
default="{}",
sa_column=Column("abuseipdb_raw", _BIG_TEXT, nullable=False, default="{}"),
)
abuseipdb_queried_at: Optional[datetime] = Field(default=None)
# ── abuse.ch Feodo Tracker ───────────────────────────────────────────
feodo_listed: Optional[bool] = Field(default=None)
feodo_raw: str = Field(
default="{}",
sa_column=Column("feodo_raw", _BIG_TEXT, nullable=False, default="{}"),
)
feodo_queried_at: Optional[datetime] = Field(default=None)
# ── abuse.ch ThreatFox ───────────────────────────────────────────────
threatfox_listed: Optional[bool] = Field(default=None)
threatfox_raw: str = Field(
default="{}",
sa_column=Column("threatfox_raw", _BIG_TEXT, nullable=False, default="{}"),
)
threatfox_queried_at: Optional[datetime] = Field(default=None)
# ── Aggregate verdict ────────────────────────────────────────────────
# Synthesised from per-provider columns. ∈ {"malicious", "suspicious",
# "benign", "unknown"}. Used by the dashboard and webhook consumers
# that don't want to reason over four provider columns.
aggregate_verdict: Optional[str] = Field(
default=None, max_length=32, index=True
)
# ── TTL bookkeeping ──────────────────────────────────────────────────
cached_at: datetime = Field(
default_factory=lambda: datetime.now(timezone.utc), index=True
)
expires_at: datetime = Field(index=True)

View File

@@ -0,0 +1,414 @@
"""Attacker core + per-attacker behavioral and per-session profile rows."""
from datetime import datetime, timezone
from typing import Any, List, Optional
from pydantic import BaseModel
from sqlalchemy import BINARY, Column, Text, UniqueConstraint
from sqlmodel import Field, SQLModel
from ._base import _BIG_TEXT
# ─── Keystroke-dynamics tuning constants ──────────────────────────────────────
#
# These are the semantic thresholds the session-profile ingester (DEBT-036)
# uses to bucket IATs and decide what "started a new action" means. Keeping
# them here (not inline in the ingester) so that:
# * the schema docstrings below can reference exact boundaries instead of
# copy-pasted magic numbers, and
# * a future calibration pass against real honeypot session data only has
# to touch one place.
# All values in seconds.
KD_PAUSE_BURST_MAX_S: float = 0.2 # IAT < this = muscle-memory digraph
KD_PAUSE_THINK_MAX_S: float = 1.5 # IAT < this = semantic / context-switch pause
# everything ≥ this lands in the distracted bucket
KD_START_OF_ACTION_IDLE_S: float = 2.0 # idle gap that counts as "new action"
# raised from 1s — 1s still catches a lot of
# mid-command hesitation, 2s is closer to
# empirical "meaningfully new action"
class Attacker(SQLModel, table=True):
"""
Per-IP **observation** row. Every distinct source IP we observe gets
one of these. The semantic role is "observation event," not "actor
identity" — an actor rotating across N IPs produces N rows here.
The deduped actor view lives in ``AttackerIdentity`` (one identity
per actor; many observations per identity); the per-operation view
lives in ``Campaign``. ``identity_id`` is set by the clusterer
worker once it resolves which observations are the same hands.
NULL while the clusterer hasn't run on this row yet.
See ``development/IDENTITY_RESOLUTION.md`` for the three-level
hierarchy rationale.
"""
__tablename__ = "attackers"
uuid: str = Field(primary_key=True)
ip: str = Field(index=True)
identity_id: Optional[str] = Field(
default=None,
foreign_key="attacker_identities.uuid",
index=True,
)
first_seen: datetime = Field(index=True)
last_seen: datetime = Field(index=True)
event_count: int = Field(default=0)
service_count: int = Field(default=0)
decky_count: int = Field(default=0)
# JSON blobs — these grow over the attacker's lifetime. Use MEDIUMTEXT on
# MySQL (16 MiB) for the fields that accumulate (fingerprints, commands,
# and the deckies/services lists that are unbounded in principle).
services: str = Field(
default="[]", sa_column=Column("services", _BIG_TEXT, nullable=False, default="[]")
) # JSON list[str]
deckies: str = Field(
default="[]", sa_column=Column("deckies", _BIG_TEXT, nullable=False, default="[]")
) # JSON list[str], first-contact ordered
traversal_path: Optional[str] = Field(
default=None, sa_column=Column("traversal_path", Text, nullable=True)
) # "decky-01 → decky-03 → decky-05"
is_traversal: bool = Field(default=False)
bounty_count: int = Field(default=0)
credential_count: int = Field(default=0)
fingerprints: str = Field(
default="[]", sa_column=Column("fingerprints", _BIG_TEXT, nullable=False, default="[]")
) # JSON list[dict] — bounty fingerprints
commands: str = Field(
default="[]", sa_column=Column("commands", _BIG_TEXT, nullable=False, default="[]")
) # JSON list[dict] — commands per service/decky
# GeoIP enrichment (populated by the profiler from decnet.geoip.enrich_ip).
# Nullable because private / loopback / IPv6 sources never resolve.
country_code: Optional[str] = Field(default=None, max_length=2, index=True)
country_source: Optional[str] = Field(default=None, max_length=16)
# ASN enrichment (populated by the profiler from decnet.asn.enrich_ip).
# Nullable for the same reasons as country_code, plus IPs not currently
# announced in the global BGP table (e.g. CGNAT, dark space).
asn: Optional[int] = Field(default=None, index=True)
as_name: Optional[str] = Field(default=None, max_length=128)
asn_source: Optional[str] = Field(default=None, max_length=16)
# Reverse-DNS (PTR) name, one-shot resolved by the profiler at first
# sighting. Nullable — many attackers run infra with no rDNS, and
# private/loopback addresses never resolve. 256 chars matches
# RFC 1035 max hostname length.
ptr_record: Optional[str] = Field(default=None, max_length=256)
updated_at: datetime = Field(
default_factory=lambda: datetime.now(timezone.utc), index=True
)
class AttackerIdentity(SQLModel, table=True):
"""
Resolved actor identity — the dedup'd "same hands" row that one or
more ``Attacker`` observations FK into. Populated by the (future)
clusterer worker; NULL on every observation until it runs.
Why a separate table from ``Attacker``: an actor rotating across N
IPs produces N observation rows but only ONE identity row. The
identity is recovered from signals the attacker can't cheaply
rotate — JA3, HASSH, payload hashes, C2 callbacks, and (V2)
keystroke-rhythm SimHash. See ``development/IDENTITY_RESOLUTION.md``.
All clusterer-populated fields are nullable; the table ships empty
in the schema-only PR (commit 1) and stays empty until the
clusterer lands. Empty is valid.
``schema_version`` is non-negotiable from day one. Federation
gossip in V2 will share identity vectors across operators;
bumping feature definitions without a version field silently
poisons receivers.
"""
__tablename__ = "attacker_identities"
uuid: str = Field(primary_key=True)
schema_version: int = Field(default=1)
# Set by the campaign clusterer. The ``campaigns`` table now
# exists; this is a real FK. Nullable until the campaign clusterer
# has run on this identity row.
campaign_id: Optional[str] = Field(
default=None, foreign_key="campaigns.uuid", index=True
)
first_seen_at: Optional[datetime] = Field(default=None, index=True)
last_seen_at: Optional[datetime] = Field(default=None, index=True)
created_at: datetime = Field(
default_factory=lambda: datetime.now(timezone.utc), index=True
)
updated_at: datetime = Field(
default_factory=lambda: datetime.now(timezone.utc), index=True
)
# Identity-cohesion score from the clusterer. Range [0, 1]; null
# until the clusterer writes. Higher = more confident the
# observations linked to this identity are the same hands.
confidence: Optional[float] = Field(default=None)
# Denormalized count of FK'd Attacker rows. Maintained by the
# clusterer when it links/unlinks. Cheap dashboard read.
observation_count: int = Field(default=0)
# Fingerprint summary columns. JSON-serialized list[str] in TEXT
# because: (a) federation gossip wants this exact shape on the
# wire, (b) MySQL can't index BLOB/TEXT without prefix lengths,
# (c) actors can present multiple JA3/HASSH values across tools
# so a scalar column is wrong.
ja3_hashes: Optional[str] = Field(
default=None, sa_column=Column("ja3_hashes", Text, nullable=True)
)
hassh_hashes: Optional[str] = Field(
default=None, sa_column=Column("hassh_hashes", Text, nullable=True)
)
# JSON list[str] — SHA-256 fingerprints of leaf certs presented by
# attacker-run TLS servers, captured by the active prober alongside
# JARM. Same federation-gossip rationale as ja3_hashes/hassh_hashes:
# a self-signed cert reused across C2 nodes is an instant cluster-link
# signal, and TEXT keeps MySQL indexable via prefix length.
tls_cert_sha256: Optional[str] = Field(
default=None, sa_column=Column("tls_cert_sha256", Text, nullable=True)
)
# Payload SimHash list — 64-bit ints serialized as hex strings.
# SimHashes are Hamming-comparable, which is the entire reason
# they're a list (not a set).
payload_simhashes: Optional[str] = Field(
default=None, sa_column=Column("payload_simhashes", Text, nullable=True)
)
c2_endpoints: Optional[str] = Field(
default=None, sa_column=Column("c2_endpoints", Text, nullable=True)
)
# V2 keystroke-dynamics hook. Same shape as
# SessionProfile.kd_digraph_simhash; this is the centroid (or
# majority vote) across the identity's sessions. BINARY(8) so
# MySQL can index without a prefix length, same as session_profile.
kd_digraph_simhash: Optional[bytes] = Field(
default=None,
sa_column=Column("kd_digraph_simhash", BINARY(8), nullable=True, index=True),
)
# Soft-merge audit trail. When the clusterer collapses two
# identities, the loser's row stays in place with this set to the
# winner's UUID — preserves the audit trail without orphaning FKs
# from any cached subscribers. Resolvers (e.g.
# GET /identities/{uuid}) follow the chain and surface the winner.
merged_into_uuid: Optional[str] = Field(
default=None, foreign_key="attacker_identities.uuid", index=True
)
# Operator-editable free-form notes — annotation surface for human
# analysts ("known APT-XX cluster," "matches MISP event 1234").
notes: Optional[str] = Field(
default=None, sa_column=Column("notes", Text, nullable=True)
)
class AttackerBehavior(SQLModel, table=True):
"""
Timing & behavioral profile for an attacker, joined to Attacker by uuid.
Kept in a separate table so the core Attacker row stays narrow and
behavior data can be updated independently (e.g. as the sniffer observes
more packets) without touching the event-count aggregates.
"""
__tablename__ = "attacker_behavior"
attacker_uuid: str = Field(primary_key=True, foreign_key="attackers.uuid")
# OS / TCP stack fingerprint (rolled up from sniffer events)
os_guess: Optional[str] = None
hop_distance: Optional[int] = None
tcp_fingerprint: str = Field(
default="{}",
sa_column=Column("tcp_fingerprint", Text, nullable=False, default="{}"),
) # JSON: window, wscale, mss, options_sig
# Raw SSH KEX algorithm preference strings observed across HASSH probes
# (one entry per hassh_fingerprint event). Keeping the raw ordered list
# enables post-hoc KEX-order fingerprinting beyond the HASSH hash.
kex_order_raw: Optional[str] = Field(
default=None,
sa_column=Column("kex_order_raw", Text, nullable=True),
) # JSON list[str] — kex_algorithms comma-separated strings
# Sniffer-observed SSH client identification strings (RFC 4253 §4.2),
# deduped in observation order. Captures the attacker's SSH client
# software (e.g. "SSH-2.0-OpenSSH_9.2p1", "SSH-2.0-libssh2_1.10.0").
ssh_client_banners: Optional[str] = Field(
default=None,
sa_column=Column("ssh_client_banners", Text, nullable=True),
) # JSON list[str]
retransmit_count: int = Field(default=0)
# Behavioral (derived by the profiler from log-event timing)
behavior_class: Optional[str] = None # beaconing | interactive | scanning | brute_force | slow_scan | mixed | unknown
beacon_interval_s: Optional[float] = None
beacon_jitter_pct: Optional[float] = None
tool_guesses: Optional[str] = None # JSON list[str] — all matched tools
timing_stats: str = Field(
default="{}",
sa_column=Column("timing_stats", Text, nullable=False, default="{}"),
) # JSON: mean/median/stdev/min/max IAT
phase_sequence: str = Field(
default="{}",
sa_column=Column("phase_sequence", Text, nullable=False, default="{}"),
) # JSON: recon_end/exfil_start/latency
updated_at: datetime = Field(
default_factory=lambda: datetime.now(timezone.utc), index=True
)
class SessionProfile(SQLModel, table=True):
"""
Per-session keystroke-dynamics fingerprint.
One row per recorded interactive session. Pre-v1 the ingestion job
that populates these columns is not yet built (tracked as gap #2 in
SIGNAL_CAPTURE_AUDIT.md); the table ships empty so that:
* downstream correlation/federation work can target a stable schema, and
* `schema_version` is committed to storage from day one — federation
gossip in v2 requires cross-operator compatibility, and retrofitting
a version column after rows exist is painful.
All feature columns are nullable so the empty write path (one row per
closed session) is valid without the behavioral analyzer online yet.
"""
__tablename__ = "session_profile"
sid: str = Field(primary_key=True) # session UUID
log_id: Optional[int] = Field(
default=None, foreign_key="logs.id", index=True
)
schema_version: int = Field(default=1)
# ──────────────────────────────────────────────────────────────────────
# Keystroke-dynamics feature columns (kd_*).
#
# Intended use: session clustering and tooling attribution
# ("is this the same typist?" / "is this a known C2
# framework's paste cadence?").
# Explicitly NOT for: attribution to named individuals, access or
# admission decisions, any ML-driven identity lookup,
# or biometric-login-style user identification. Those
# framings push into legal/ethics territory we don't
# want this project walking into by accident.
# PII discipline: every kd_* column aggregates CHARACTERS and TIMING
# only — never raw input-stream content. Attacker
# passwords typed over SSH must not land here.
# Nulls semantic: a null means "ingester hasn't run on this session
# yet", not "zero events". Consumers should treat
# null as absent, not as a computed zero.
# ──────────────────────────────────────────────────────────────────────
# Inter-key interval timing moments (seconds).
kd_iki_mean: Optional[float] = None
kd_iki_stdev: Optional[float] = None
kd_iki_p50: Optional[float] = None
kd_iki_p95: Optional[float] = None
kd_enter_latency_p50: Optional[float] = None
kd_enter_latency_p95: Optional[float] = None
# Cadence ratios.
kd_burst_ratio: Optional[float] = None
kd_think_ratio: Optional[float] = None
# Control-character rates (events per keystroke).
kd_ctrl_backspace: Optional[float] = None
kd_ctrl_wkill: Optional[float] = None
kd_ctrl_ukill: Optional[float] = None
kd_ctrl_abort: Optional[float] = None
kd_ctrl_eof: Optional[float] = None
kd_arrow_rate: Optional[float] = None
kd_tab_rate: Optional[float] = None
# 8-byte SimHash over keystroke digraphs — Hamming-comparable across sessions.
# Fixed-width BINARY(8) rather than BLOB: MySQL can't index BLOB/TEXT
# columns without a prefix length, and SimHashes are always exactly 8
# bytes so a variable-length type gains nothing here.
#
# PII discipline: the simhash is computed over keystroke CHARACTERS
# (digraph bigrams), never over the raw content of the input stream —
# attacker passwords typed over SSH must never land in this column.
kd_digraph_simhash: Optional[bytes] = Field(
default=None,
sa_column=Column("kd_digraph_simhash", BINARY(8), nullable=True, index=True),
)
# Top-N most-common digraphs with their mean IAT, as JSON.
# Complements kd_digraph_simhash: the simhash answers "same typist?",
# this answers "same typist IN THE SAME MENTAL STATE?" (tired vs rested
# vs distracted shifts bigram-specific IATs measurably). Shape:
# [["th", 47, 0.082], ["in", 31, 0.091], ...] (bigram, count, mean_iat_s)
# Bounded by the ingester to N≤32 to cap row width.
#
# TODO(DEBT-036 upgrade path): JSON-in-TEXT is fine for v1's
# "surface the typist's top digraphs on the attacker page" use
# case, but every similarity query (e.g. "find sessions where the
# 'th' digraph mean IAT is within 20 ms of this one") has to pull
# the string, parse JSON, compare — O(sessions) with a constant
# overhead per row. If that query shape becomes hot, promote to a
# dedicated `session_bigram_stats(sid, bigram, count, mean_iat_s)`
# table with a (bigram, mean_iat_s) index, or a JSONB column on
# Postgres with a GIN index. Either is straightforward, neither
# changes the write-side ingester materially.
kd_top_bigrams: Optional[str] = Field(
default=None, sa_column=Column("kd_top_bigrams", Text, nullable=True),
)
# IAT of the first keystroke following an idle gap >
# KD_START_OF_ACTION_IDLE_S (or the session-start gap before the
# very first keystroke). Separates "initiating a command" from
# "executing a remembered one" — real humans have measurable
# start-of-action latency, bots don't. Median across all such
# initiations in the session, seconds.
#
# Prompt-agnostic on purpose: PS1 / multi-line prompts / sudo
# password prompts make prompt-anchored detection fragile. The
# idle-gap approach conflates post-prompt action-start with
# mid-session think-and-resume — acceptable for a single median
# field; if we later want to split them, feed the concurrent
# output-stream prompt-pattern into the ingester and fall back to
# time-only detection when it misses.
kd_start_of_action_latency: Optional[float] = None
# Three-bucket pause-length histogram, counts (not ratios — raw
# counts preserve the total-keystrokes denominator in the column
# itself). Bucket edges are the KD_PAUSE_* module constants:
# burst : IAT < KD_PAUSE_BURST_MAX_S (muscle-memory digraphs)
# think : KD_PAUSE_BURST_MAX_S ≤ IAT < KD_PAUSE_THINK_MAX_S
# (semantic boundary, context switch)
# distracted: IAT ≥ KD_PAUSE_THINK_MAX_S (went to look something
# up, got paged, reading another window)
# More discriminating than the flat burst_ratio / think_ratio pair:
# C2 operators concentrate in the burst bucket with a thin tail;
# opportunistic humans have a fat think bucket and a long
# distracted tail.
kd_pause_hist_burst: Optional[int] = None
kd_pause_hist_think: Optional[int] = None
kd_pause_hist_distracted: Optional[int] = None
# Longest IAT in the session, seconds. The distracted-bucket count
# alone can't tell "one 3-second pause" from "three 60-second
# pauses" — both contribute 1-3 to the distracted bucket but
# represent different behaviours (brief think vs actual
# disengagement). max_pause_gap carries that signal in one scalar.
kd_max_pause_gap: Optional[float] = None
# Derived totals.
total_keystrokes: Optional[int] = None
session_duration_s: Optional[float] = None
created_at: datetime = Field(
default_factory=lambda: datetime.now(timezone.utc)
)
class SmtpTarget(SQLModel, table=True):
"""
Per-attacker list of victim domains observed via the SMTP honeypots.
Each row is one (attacker_uuid, domain) pair — an attacker who relays
mail to 500 addresses at acme.com collapses into a single row with
count=500. Only the *domain* is stored; local-parts (the bit before
`@`) are dropped at ingestion, so this table contains no PII beyond
the target organisation's identity.
Shape is designed for future V2 federation gossip: the
`smtp_target_seen(domain)` query returns aggregate counts with zero
cross-org attacker leakage — each operator can answer "have you seen
this domain being targeted?" without exposing *which* attackers did.
"""
__tablename__ = "smtp_targets"
id: Optional[int] = Field(default=None, primary_key=True)
attacker_uuid: str = Field(foreign_key="attackers.uuid", index=True)
domain: str = Field(index=True)
first_seen: datetime = Field(default_factory=lambda: datetime.now(timezone.utc))
last_seen: datetime = Field(
default_factory=lambda: datetime.now(timezone.utc), index=True
)
# Aggregate counter — one rcpt_to / message_accepted recipient bumps this.
count: int = Field(default=1)
__table_args__ = (
UniqueConstraint("attacker_uuid", "domain", name="uq_smtp_targets_attacker_domain"),
)
class AttackersResponse(BaseModel):
total: int
limit: int
offset: int
data: List[dict[str, Any]]

View File

@@ -0,0 +1,73 @@
"""Auth + user-management tables and DTOs."""
from typing import List, Literal
from pydantic import BaseModel, Field as PydanticField
from sqlmodel import Field, SQLModel
class User(SQLModel, table=True):
__tablename__ = "users"
uuid: str = Field(primary_key=True)
username: str = Field(index=True, unique=True)
password_hash: str
role: str = Field(default="viewer")
must_change_password: bool = Field(default=False)
# --- API Request/Response Models (Pydantic) ---
class Token(BaseModel):
access_token: str
token_type: str
must_change_password: bool = False
class LoginRequest(BaseModel):
username: str
password: str = PydanticField(..., max_length=72)
class ChangePasswordRequest(BaseModel):
old_password: str = PydanticField(..., max_length=72)
new_password: str = PydanticField(..., max_length=72)
# --- Configuration Models ---
class CreateUserRequest(BaseModel):
username: str = PydanticField(..., min_length=1, max_length=64)
password: str = PydanticField(..., min_length=8, max_length=72)
role: Literal["admin", "viewer"] = "viewer"
class UpdateUserRoleRequest(BaseModel):
role: Literal["admin", "viewer"]
class ResetUserPasswordRequest(BaseModel):
new_password: str = PydanticField(..., min_length=8, max_length=72)
class DeploymentLimitRequest(BaseModel):
deployment_limit: int = PydanticField(..., ge=1, le=500)
class GlobalMutationIntervalRequest(BaseModel):
global_mutation_interval: str = PydanticField(..., pattern=r"^[1-9]\d*[mdMyY]$")
class UserResponse(BaseModel):
uuid: str
username: str
role: str
must_change_password: bool
class ConfigResponse(BaseModel):
role: str
deployment_limit: int
global_mutation_interval: str
class AdminConfigResponse(ConfigResponse):
users: List[UserResponse]

View File

@@ -0,0 +1,83 @@
"""Campaign — operation-level grouping of resolved attacker identities."""
from datetime import datetime, timezone
from typing import Any, List, Optional
from pydantic import BaseModel
from sqlalchemy import Column, Text
from sqlmodel import Field, SQLModel
class Campaign(SQLModel, table=True):
"""
Campaign — one operation, one or more identities.
Sits one level above ``AttackerIdentity``: an actor (identity) may
appear in multiple campaigns over time, and a campaign may have
several distinct identities cooperating (e.g. a night-shift and
day-shift operator on the same job — fixture F5 multi_operator).
Populated by the campaign clusterer worker (downstream of identity
resolution). Empty rows are valid; the table ships empty until the
clusterer lands. ``schema_version`` is non-negotiable from day one
for the same federation-gossip reason ``AttackerIdentity`` carries
one — bumping campaign-level feature definitions without a version
field silently poisons cross-operator gossip in V2.
See ``development/CAMPAIGN_CLUSTERING.md`` for the signal taxonomy
(phase-handoff, shared-infra, temporal overlap, cohort).
"""
__tablename__ = "campaigns"
uuid: str = Field(primary_key=True)
schema_version: int = Field(default=1)
first_seen_at: Optional[datetime] = Field(default=None, index=True)
last_seen_at: Optional[datetime] = Field(default=None, index=True)
created_at: datetime = Field(
default_factory=lambda: datetime.now(timezone.utc), index=True
)
updated_at: datetime = Field(
default_factory=lambda: datetime.now(timezone.utc), index=True
)
# Campaign-cohesion score from the clusterer. Range [0, 1]; null
# until the clusterer writes. Higher = more confident the linked
# identities are part of the same operation.
confidence: Optional[float] = Field(default=None)
# Denormalized count of FK'd ``AttackerIdentity`` rows.
identity_count: int = Field(default=0)
# Aggregated fingerprint summary across member identities. Same
# JSON-serialized list[str] in TEXT shape as
# ``AttackerIdentity.{ja3,hassh,payload_simhashes,c2_endpoints}`` —
# federation gossip wants the same wire shape at every layer.
ja3_hashes: Optional[str] = Field(
default=None, sa_column=Column("ja3_hashes", Text, nullable=True)
)
hassh_hashes: Optional[str] = Field(
default=None, sa_column=Column("hassh_hashes", Text, nullable=True)
)
tls_cert_sha256: Optional[str] = Field(
default=None, sa_column=Column("tls_cert_sha256", Text, nullable=True)
)
payload_simhashes: Optional[str] = Field(
default=None, sa_column=Column("payload_simhashes", Text, nullable=True)
)
c2_endpoints: Optional[str] = Field(
default=None, sa_column=Column("c2_endpoints", Text, nullable=True)
)
# Soft-merge audit trail — same revocable-merge pattern as
# ``AttackerIdentity.merged_into_uuid``. When the clusterer
# collapses two campaigns, the loser's row stays in place with this
# set to the winner's UUID; resolvers follow the chain.
merged_into_uuid: Optional[str] = Field(
default=None, foreign_key="campaigns.uuid", index=True
)
# Operator-editable free-form notes — annotation surface for
# human analysts ("APT-XX Q2 campaign", "matches CTI report 5678").
notes: Optional[str] = Field(
default=None, sa_column=Column("notes", Text, nullable=True)
)
class CampaignsResponse(BaseModel):
total: int
limit: int
offset: int
data: List[dict[str, Any]]

View File

@@ -0,0 +1,242 @@
"""Canary token tables + CRUD DTOs.
Canary tokens are decoy artifacts (operator-uploaded honeydocs / synthesised
fake configs) planted inside a decky's filesystem. When an attacker exfils
the artifact and uses it, an HTTP slug or DNS subdomain encoded into the
file is hit; the ``decnet canary`` worker observes the callback and
publishes ``canary.{token_id}.triggered`` on the bus. The webhook fanout
+ correlator pick it up the same way they handle any other attacker
event — no canary-specific consumer wiring needed downstream.
Three tables:
* :class:`CanaryBlob` — operator-uploaded source artifact, deduped by
sha256. The original bytes live on disk under
``/var/lib/decnet/canary/blobs/{sha256}``; this row carries metadata
+ refcount-aware deletion.
* :class:`CanaryToken` — one planted artifact in one decky. Either
references a blob (``blob_id``) and an instrumenter, or is a wholly
synthesised fake (e.g. ``aws_creds`` / ``git_config`` from a
generator) and ``blob_id`` is NULL. ``callback_token`` is the short
random slug embedded into HTTP URLs and DNS labels — unique across
the fleet so the worker can resolve a hit to a row in one query.
* :class:`CanaryTrigger` — append-only log of every callback hit.
``attacker_id`` is back-filled by the correlator after it attributes
``src_ip`` to an existing :class:`Attacker`; NULL until then.
We follow the project convention from :mod:`webhooks` and
:mod:`orchestrator`: stringly-typed UUIDs (``str`` PKs via
``str(uuid4())``), no FK to the composite-PK fleet table, indexes on
the join keys. Pydantic request/response shapes live in this same
file (per :mod:`feedback_models_single_source`).
"""
from __future__ import annotations
import json
from datetime import datetime, timezone
from typing import Any, List, Literal, Optional
from uuid import uuid4
from pydantic import BaseModel, Field as PydanticField
from sqlalchemy import Column, Index, Text
from sqlmodel import Field, SQLModel
from ._base import _BIG_TEXT
# --- Enum-shaped string literals -------------------------------------------
CanaryKind = Literal["http", "dns", "aws_passive"]
"""Detection mechanism for a token.
* ``http`` — slug embedded in artifact; attacker fetches our HTTP endpoint.
* ``dns`` — subdomain embedded; attacker's resolver looks up our DNS server.
* ``aws_passive`` — fake AWS credentials with no callback wiring. Trips
zero alerts on its own; useful only as bait + as evidence the attacker
read the file when correlated with other timing signals.
"""
CanaryState = Literal["planted", "revoked", "failed"]
"""Lifecycle state of a token row.
* ``planted`` — file is in the decky and the slug/host is live.
* ``revoked`` — operator deleted the token; planter unlinked the file
(best-effort) and the slug/host stops resolving.
* ``failed`` — placement failed (docker exec error, instrumenter
rejected the blob, etc.); surfaced in the UI so the operator can
retry or pick a different kind.
"""
# --- DB tables -------------------------------------------------------------
class CanaryBlob(SQLModel, table=True):
"""Operator-uploaded source artifact, deduped by sha256.
The same bytes uploaded twice produce the same row (insert-or-get
semantics in the repository). We never store the bytes inline —
only the disk path derived from ``sha256``. Deletion is
refcount-aware: ``DELETE`` is rejected while at least one
:class:`CanaryToken` references the blob.
"""
__tablename__ = "canary_blobs"
uuid: str = Field(default_factory=lambda: str(uuid4()), primary_key=True)
sha256: str = Field(index=True, unique=True)
filename: str # original filename — UI display only, not used for path resolution
content_type: str # sniffed MIME (python-magic); drives instrumenter selection
size_bytes: int
uploaded_by: str = Field(index=True) # User.uuid
uploaded_at: datetime = Field(default_factory=lambda: datetime.now(timezone.utc))
class CanaryToken(SQLModel, table=True):
"""One canary artifact planted inside one decky."""
__tablename__ = "canary_tokens"
__table_args__ = (
Index("ix_canary_tokens_decky", "decky_name", "state"),
)
uuid: str = Field(default_factory=lambda: str(uuid4()), primary_key=True)
kind: str = Field(index=True) # CanaryKind literal at the API layer
decky_name: str = Field(index=True) # FleetDecky.name; no FK (composite PK)
blob_uuid: Optional[str] = Field(
default=None, foreign_key="canary_blobs.uuid", index=True,
)
# Which instrumenter mutated the blob (``docx``/``xlsx``/``pdf``/``html``/
# ``image``/``plain``/``passthrough``). NULL when the artifact came
# from a synthesizer (``git_config``/``env_file``/``ssh_key``/
# ``aws_creds``/``honeydoc``); ``generator`` carries that name instead.
instrumenter: Optional[str] = Field(default=None)
generator: Optional[str] = Field(default=None)
placement_path: str # absolute path inside the container
# Short random slug (e.g. 16 url-safe bytes). Embedded in HTTP URLs
# *and* DNS labels — same value, different envelope, so both
# detection paths resolve to the same token row.
callback_token: str = Field(unique=True, index=True)
# Stable secret used by re-instrumentation: same blob + same seed
# = same mutated bytes, so re-seeding produces the same on-disk
# artifact and the planter is naturally idempotent.
secret_seed: str
placed_at: datetime = Field(default_factory=lambda: datetime.now(timezone.utc))
last_triggered_at: Optional[datetime] = Field(default=None, index=True)
trigger_count: int = Field(default=0)
created_by: str = Field(index=True) # User.uuid; "system" for baseline-seeded tokens
state: str = Field(default="planted", index=True)
last_error: Optional[str] = Field(
default=None, sa_column=Column("last_error", Text, nullable=True),
)
class CanaryTrigger(SQLModel, table=True):
"""Append-only log of one callback hit."""
__tablename__ = "canary_triggers"
__table_args__ = (
Index("ix_canary_triggers_token_ts", "token_uuid", "occurred_at"),
Index("ix_canary_triggers_attacker", "attacker_id"),
)
uuid: str = Field(default_factory=lambda: str(uuid4()), primary_key=True)
token_uuid: str = Field(foreign_key="canary_tokens.uuid", index=True)
occurred_at: datetime = Field(default_factory=lambda: datetime.now(timezone.utc))
src_ip: str = Field(index=True)
user_agent: Optional[str] = None
request_path: Optional[str] = None # HTTP path including the slug
dns_qname: Optional[str] = None # DNS qname when the hit came over DNS
# JSON-encoded request headers (HTTP) or empty for DNS. Stored as
# TEXT for cross-dialect portability — same trick as
# :attr:`WebhookSubscription.topic_patterns`.
raw_headers: str = Field(
default="{}",
sa_column=Column("raw_headers", _BIG_TEXT, nullable=False, default="{}"),
)
# Set by the correlator once it attributes ``src_ip`` to an existing
# :class:`Attacker`. NULL until correlation runs (which happens on
# the bus event we publish, so latency is sub-second).
attacker_id: Optional[str] = Field(default=None, index=True)
def headers(self) -> dict[str, Any]:
"""Decode :attr:`raw_headers` JSON; ``{}`` on bad/empty input."""
try:
raw = json.loads(self.raw_headers or "{}")
except (ValueError, TypeError):
return {}
return raw if isinstance(raw, dict) else {}
# --- API request / response shapes -----------------------------------------
class CanaryBlobResponse(BaseModel):
uuid: str
sha256: str
filename: str
content_type: str
size_bytes: int
uploaded_by: str
uploaded_at: datetime
# Number of tokens currently referencing this blob. Surfaces in the
# UI so operators don't try to delete a blob that's still in use,
# and the API uses it to gate ``DELETE`` (returns 409).
token_count: int = 0
class CanaryTokenCreateRequest(BaseModel):
"""Generate + plant a new token.
Exactly one of ``blob_uuid`` (operator-supplied artifact) or
``generator`` (synthesised fake) must be set. Validated in the
router so the 400 carries a clear detail message.
"""
decky_name: str = PydanticField(..., min_length=1)
kind: CanaryKind
placement_path: str = PydanticField(..., min_length=1)
blob_uuid: Optional[str] = None
generator: Optional[str] = None # git_config | env_file | ssh_key | aws_creds | honeydoc
# Optional override for the path-mapping helper — useful when the
# operator wants a specific Windows-shaped path on a windows-persona
# decky. Defaults to placement_path verbatim.
persona_path_hint: Optional[str] = None
class CanaryTokenResponse(BaseModel):
uuid: str
kind: CanaryKind
decky_name: str
blob_uuid: Optional[str]
instrumenter: Optional[str]
generator: Optional[str]
placement_path: str
callback_token: str
placed_at: datetime
last_triggered_at: Optional[datetime]
trigger_count: int
created_by: str
state: CanaryState
last_error: Optional[str]
class CanaryTriggerResponse(BaseModel):
uuid: str
token_uuid: str
occurred_at: datetime
src_ip: str
user_agent: Optional[str]
request_path: Optional[str]
dns_qname: Optional[str]
headers: dict[str, Any] = PydanticField(default_factory=dict)
attacker_id: Optional[str]
class CanaryTokensResponse(BaseModel):
tokens: List[CanaryTokenResponse]
total: int
class CanaryTriggersResponse(BaseModel):
triggers: List[CanaryTriggerResponse]
total: int
class CanaryBlobsResponse(BaseModel):
blobs: List[CanaryBlobResponse]
total: int

View File

@@ -0,0 +1,15 @@
"""Generic response shapes used across multiple router domains."""
from __future__ import annotations
from pydantic import BaseModel
class MessageResponse(BaseModel):
"""Standard envelope for mutations whose only payload is a status message.
Pinning the wire shape at the decorator (``response_model=MessageResponse``)
prevents a handler that accidentally returns a richer dict — e.g. a user
row with ``password_hash`` — from leaking extra fields to the client.
"""
message: str

View File

@@ -0,0 +1,29 @@
"""Fleet deploy + mutate-interval request DTOs."""
from typing import Optional
from pydantic import BaseModel, ConfigDict, Field as PydanticField
from decnet.models import IniContent
class MutateIntervalRequest(BaseModel):
# Human-readable duration: <number><unit> where unit is m(inutes), d(ays), M(onths), y/Y(ears).
# Minimum granularity is 1 minute. Seconds are not accepted.
mutate_interval: Optional[str] = PydanticField(None, pattern=r"^[1-9]\d*[mdMyY]$")
class DeployIniRequest(BaseModel):
model_config = ConfigDict(extra="forbid")
# This field now enforces strict INI structure during Pydantic initialization.
# The OpenAPI schema correctly shows it as a required string.
ini_content: IniContent = PydanticField(..., description="A valid INI formatted string")
class DeployResponse(BaseModel):
message: str
mode: str
class PurgeResponse(BaseModel):
message: str
deleted: dict[str, int]

View File

@@ -0,0 +1,72 @@
"""Fleet decky table — DB mirror of ``decnet-state.json``.
The legacy unihost / MACVLAN / IPVLAN deploy path persists fleet state to a
JSON file (``/var/lib/decnet/decnet-state.json``) via
:func:`decnet.config.save_state`. That file is consumed directly by
``decnet status``/``decnet teardown``, the sniffer, and the collector — all
host-local CLI / worker code that may run on a box without the API daemon.
The FleetDecky table is a *mirror* of that JSON state inside MySQL/SQLite so
DB-only consumers (the orchestrator, the web dashboard, the REST API) can
see fleet decoys without touching the filesystem.
Both writers — CLI ``decnet deploy`` (``engine.deployer.deploy``) and the
web/API deploy path (``web.router.fleet.api_deploy_deckies``) — write to
*both* surfaces. A reconciler (``decnet.fleet.reconciler``) handles drift.
Schema mirrors :class:`decnet.web.db.models.swarm.DeckyShard` field-for-field
so the dashboard can render fleet rows with the same card shape. The PK is
composite ``(host_uuid, name)`` to future-proof for multi-host motherships
(a master that runs its own local fleet AND swarm-shards onto workers). In
unihost mode ``host_uuid`` defaults to the sentinel
:data:`LOCAL_HOST_SENTINEL`; we deliberately do NOT FK to ``swarm_hosts``
because the local mothership is not enrolled as a swarm worker.
"""
from datetime import datetime, timezone
from typing import Optional
from sqlalchemy import Column, Text
from sqlmodel import Field, SQLModel
from ._base import _BIG_TEXT
LOCAL_HOST_SENTINEL = "local"
class FleetDecky(SQLModel, table=True):
"""A unihost / MACVLAN / IPVLAN decky deployed on the local mothership.
Disjoint from :class:`DeckyShard` (SWARM-only) and :class:`TopologyDecky`
(MazeNET-only). Composite PK lets multiple hosts coexist when a future
mothership runs both a local fleet and acts as a swarm master.
"""
__tablename__ = "fleet_deckies"
host_uuid: str = Field(
default=LOCAL_HOST_SENTINEL, primary_key=True, index=True,
)
name: str = Field(primary_key=True)
# JSON list of service names on this decky (snapshot of assignment).
services: str = Field(
sa_column=Column("services", _BIG_TEXT, nullable=False, default="[]")
)
# Full serialised DeckyConfig — lets the dashboard render the same rich
# card (hostname/distro/archetype/service_config/mutate_interval) without
# round-tripping to load_state() on every page render.
decky_config: Optional[str] = Field(
default=None, sa_column=Column("decky_config", _BIG_TEXT, nullable=True)
)
decky_ip: Optional[str] = Field(default=None)
# pending|running|failed|torn_down|degraded|tearing_down|teardown_failed
state: str = Field(default="pending", index=True)
last_error: Optional[str] = Field(
default=None, sa_column=Column("last_error", Text, nullable=True),
)
compose_hash: Optional[str] = Field(default=None)
# Last reconciler observation (docker inspect) — lets the dashboard show
# "stale" rows whose reconciler hasn't ticked.
last_seen: Optional[datetime] = Field(default=None)
updated_at: datetime = Field(
default_factory=lambda: datetime.now(timezone.utc)
)

View File

@@ -0,0 +1,14 @@
"""Health-endpoint DTOs."""
from typing import Literal, Optional
from pydantic import BaseModel
class ComponentHealth(BaseModel):
status: Literal["ok", "failing"]
detail: Optional[str] = None
class HealthResponse(BaseModel):
status: Literal["healthy", "degraded", "unhealthy"]
components: dict[str, ComponentHealth]

View File

@@ -0,0 +1,222 @@
"""Log / Bounty / Credential / State tables + their list-response DTOs."""
from datetime import datetime, timezone
from typing import Any, List, Optional
from pydantic import BaseModel
from sqlalchemy import Column, Index, Text, UniqueConstraint
from sqlmodel import Field, SQLModel
from ._base import _BIG_TEXT
class Log(SQLModel, table=True):
__tablename__ = "logs"
id: Optional[int] = Field(default=None, primary_key=True)
timestamp: datetime = Field(default_factory=lambda: datetime.now(timezone.utc), index=True)
decky: str = Field(index=True)
service: str = Field(index=True)
event_type: str = Field(index=True)
attacker_ip: str = Field(index=True)
# Long-text columns — use TEXT so MySQL DDL doesn't truncate to VARCHAR(255).
# TEXT is equivalent to plain text in SQLite.
raw_line: str = Field(sa_column=Column("raw_line", Text, nullable=False))
fields: str = Field(sa_column=Column("fields", Text, nullable=False))
msg: Optional[str] = Field(default=None, sa_column=Column("msg", Text, nullable=True))
# OTEL trace context — bridges the collector→ingester trace to the SSE
# read path. Nullable so pre-existing rows and non-traced deployments
# are unaffected.
trace_id: Optional[str] = Field(default=None)
span_id: Optional[str] = Field(default=None)
class Bounty(SQLModel, table=True):
__tablename__ = "bounty"
id: Optional[int] = Field(default=None, primary_key=True)
timestamp: datetime = Field(default_factory=lambda: datetime.now(timezone.utc), index=True)
decky: str = Field(index=True)
service: str = Field(index=True)
attacker_ip: str = Field(index=True)
bounty_type: str = Field(index=True)
payload: str = Field(sa_column=Column("payload", Text, nullable=False))
class Credential(SQLModel, table=True):
"""One observed credential attempt against a decky service.
Forward-compatible across every auth-bearing service in the fleet:
SSH user+pass, Telnet user+pass, SMTP domain+pass, LDAP dn+pass,
Redis password-only, etc. The two universal lossless representations
(``secret_b64`` + ``secret_sha256``) hoist to indexed columns so
cross-service reuse queries don't scan opaque JSON.
Per-service identity (the human-meaningful "who's authenticating")
lives in ``principal`` — username for SSH, domain for SMTP, dn for
LDAP. Nullable for principal-less mechanisms (Redis AUTH, bearer
tokens). Fully service-specific keys ride in ``fields`` JSON.
Dedup contract: same (attacker_ip, decky, service, secret_sha256,
principal_or_empty) tuple → upsert, bumps ``attempt_count`` and
``last_seen``. Different secret or different principal → new row.
``attacker_uuid`` is backfilled by the profiler once an Attacker row
has been minted for the source IP. It is nullable on first write so
the credential ingest path stays decoupled from the profiler.
"""
__tablename__ = "credentials"
__table_args__ = (
Index("ix_credentials_secret_service", "secret_sha256", "service"),
Index("ix_credentials_principal_service", "principal", "service"),
)
id: Optional[int] = Field(default=None, primary_key=True)
# Keyed by attacker IP (not attackers.uuid) on the write path to
# avoid the chicken-and-egg of landing a credential before the
# profiler has minted the Attacker. The profiler backfills
# ``attacker_uuid`` once it knows the IP, so cross-IP reuse queries
# eventually have an indexed FK to traverse.
attacker_ip: str = Field(index=True)
attacker_uuid: Optional[str] = Field(
default=None, foreign_key="attackers.uuid", index=True
)
decky_name: str = Field(index=True)
service: str = Field(index=True)
principal: Optional[str] = Field(default=None, index=True, max_length=256)
# Discriminator for what `secret_b64` actually contains. Default
# ``"plaintext"`` — a recoverable password the attacker sent on the
# wire (SSH/Telnet/FTP/IMAP/POP3/SMTP/Redis/LDAP/MQTT). Other kinds:
# ``"postgres_md5_challenge"`` (md5(md5(pw+user)+salt) hex bytes
# the attacker sent in the Postgres password message — plaintext
# irrecoverable), ``"vnc_des_response"`` (16-byte DES-encrypted
# challenge response — same shape).
#
# Reuse semantics gracefully degrade: same secret_sha256 only
# correlates within a single ``secret_kind``. Cross-kind matches
# are meaningless because different challenges produce different
# bytes for the same plaintext password.
secret_kind: str = Field(default="plaintext", index=True, max_length=32)
# Universal lossless secret representations. For non-plaintext
# kinds, secret_b64 is base64 of the raw attacker-sent bytes (after
# hex-decode for protocols that ship the response as a hex string).
secret_sha256: str = Field(index=True, max_length=64)
secret_b64: Optional[str] = Field(default=None, max_length=2048)
# Best-effort printable form — non-printable bytes collapsed to '?'
# by either auth-helper.c (SSH/Telnet) or the ingester's legacy
# adapter (FTP/POP3/IMAP/SMTP). May be lossy on non-UTF8.
secret_printable: Optional[str] = Field(default=None, max_length=512)
outcome: Optional[str] = Field(default=None, max_length=16) # success|failure|observed
fields: str = Field(
sa_column=Column("fields", _BIG_TEXT, nullable=False, default="{}")
)
first_seen: datetime = Field(
default_factory=lambda: datetime.now(timezone.utc), index=True
)
last_seen: datetime = Field(
default_factory=lambda: datetime.now(timezone.utc), index=True
)
attempt_count: int = Field(default=1)
class CredentialReuse(SQLModel, table=True):
"""One observed credential reuse pattern across deckies and/or services.
A row here is a *finding* produced by the correlator: the same
``(secret_sha256, secret_kind, principal)`` tuple was observed
against ``target_count`` distinct decky×service pairs. Upserted on
that natural key — the row accumulates new deckies/services/IPs
over time as the credential is reused.
The ``confidence`` column is reserved for a future fuzzy-match pass
(credential variants, e.g. ``hunter2`` vs ``hunter22``); rows
written by the exact-secret correlator are always 1.0.
"""
__tablename__ = "credential_reuse"
__table_args__ = (
UniqueConstraint(
"secret_sha256", "secret_kind", "principal_key",
name="uq_credential_reuse_secret_principal",
),
)
id: str = Field(primary_key=True, max_length=36)
secret_sha256: str = Field(index=True, max_length=64)
secret_kind: str = Field(index=True, max_length=32)
# Optional human-readable principal (e.g. "root"). Nullable — for
# cross-principal reuse rows we leave this null, but we still need
# a unique constraint, so ``principal_key`` is the non-null
# canonicalised form ("" when principal is null) used in the
# uniqueness tuple. SQLite's NULLs-distinct-in-UNIQUE behaviour
# would otherwise let duplicate null-principal rows through.
principal: Optional[str] = Field(default=None, max_length=256)
principal_key: str = Field(default="", max_length=256)
attacker_uuids: str = Field(
default="[]",
sa_column=Column("attacker_uuids", _BIG_TEXT, nullable=False, default="[]"),
) # JSON list[str]
attacker_ips: str = Field(
default="[]",
sa_column=Column("attacker_ips", _BIG_TEXT, nullable=False, default="[]"),
) # JSON list[str]
deckies: str = Field(
default="[]",
sa_column=Column("deckies", _BIG_TEXT, nullable=False, default="[]"),
) # JSON list[str]
services: str = Field(
default="[]",
sa_column=Column("services", _BIG_TEXT, nullable=False, default="[]"),
) # JSON list[str]
# COUNT(DISTINCT decky||':'||service). The discriminative scalar
# for ranking and filtering — a credential seen on 12 targets is
# far more interesting than one seen on 2.
target_count: int = Field(default=0, index=True)
attempt_count: int = Field(default=0)
confidence: float = Field(default=1.0)
first_seen: datetime = Field(
default_factory=lambda: datetime.now(timezone.utc), index=True
)
last_seen: datetime = Field(
default_factory=lambda: datetime.now(timezone.utc), index=True
)
updated_at: datetime = Field(
default_factory=lambda: datetime.now(timezone.utc), index=True
)
class CredentialReuseResponse(BaseModel):
total: int
limit: int
offset: int
data: List[dict[str, Any]]
class State(SQLModel, table=True):
__tablename__ = "state"
key: str = Field(primary_key=True)
# JSON-serialized DecnetConfig or other state blobs — can be large as
# deckies/services accumulate. MEDIUMTEXT on MySQL (16 MiB ceiling).
value: str = Field(sa_column=Column("value", _BIG_TEXT, nullable=False))
class LogsResponse(BaseModel):
total: int
limit: int
offset: int
data: List[dict[str, Any]]
class BountyResponse(BaseModel):
total: int
limit: int
offset: int
data: List[dict[str, Any]]
class CredentialsResponse(BaseModel):
total: int
limit: int
offset: int
data: List[dict[str, Any]]
class StatsResponse(BaseModel):
total_logs: int
unique_attackers: int
active_deckies: int
deployed_deckies: int

View File

@@ -0,0 +1,111 @@
"""Orchestrator-emitted activity events.
Purpose-built sibling to ``logs.Log`` so attacker-originated events stay
cleanly separable from synthetic life-injection events at query time.
The orchestrator worker is the sole writer.
"""
from datetime import datetime, timezone
from typing import Any, List, Optional
from uuid import uuid4
from pydantic import BaseModel
from sqlalchemy import Column, Index, Text
from sqlmodel import Field, SQLModel
class OrchestratorEvent(SQLModel, table=True):
"""One orchestrator-driven action against a decky.
``kind`` discriminates the two MVP flavours:
* ``"traffic"`` — a protocol-driven interaction (SSH command exec for
MVP). ``src_decky_uuid`` is the *logical* originator and may differ
from the actual TCP source for the duration of the MVP, where the
orchestrator process drives the connection from the host. ``v1``
will execute the connection from inside the source container.
* ``"file"`` — a filesystem touch via ``docker exec`` against the
destination decky. ``src_decky_uuid`` is null.
``payload`` is the per-action JSON envelope: command run, exit code,
stdout/stderr digest, file path, byte counts, etc. Schema is
deliberately loose — the worker can extend it without a migration.
"""
__tablename__ = "orchestrator_events"
__table_args__ = (
Index("ix_orchestrator_events_dst_ts", "dst_decky_uuid", "ts"),
)
uuid: str = Field(default_factory=lambda: str(uuid4()), primary_key=True)
ts: datetime = Field(
default_factory=lambda: datetime.now(timezone.utc), index=True
)
kind: str = Field(index=True, max_length=16) # traffic|file
protocol: str = Field(index=True, max_length=16) # ssh for MVP
action: str = Field(max_length=64) # exec:uptime|file:create|...
# No FK to topology_deckies: dst/src may be a TopologyDecky.uuid
# (MazeNET source), a "host_uuid:name" composite (fleet / SWARM shard
# sources), or — for retired deckies — a row that's already gone. The
# column is an opaque identifier matching whatever
# ``BaseRepository.list_running_deckies`` emits in its ``uuid`` field.
# Index is kept; the FK was misleading and broke fleet-source events.
src_decky_uuid: Optional[str] = Field(default=None, index=True)
dst_decky_uuid: str = Field(index=True)
success: bool = Field(default=False, index=True)
payload: str = Field(
sa_column=Column("payload", Text, nullable=False, default="{}")
)
class OrchestratorEventsResponse(BaseModel):
total: int
limit: int
offset: int
data: List[dict[str, Any]]
class OrchestratorEmail(SQLModel, table=True):
"""One fake email generated by the ``decnet emailgen`` worker.
Sibling table to :class:`OrchestratorEvent` — kept disjoint because
email rows carry domain-specific fields (subject, message_id,
in_reply_to, language) that have no analogue in the SSH/file events
and would otherwise bloat ``OrchestratorEvent.payload``.
The mail decky's UUID lives in ``mail_decky_uuid`` (the host serving
the IMAP/POP3 mailbox). ``thread_id`` is a worker-side UUID used to
chain replies; ``in_reply_to`` is the parent email's RFC 2822
Message-ID header value (or ``None`` for thread roots).
``payload`` follows the same loose-JSON convention as
:class:`OrchestratorEvent`: ``bytes``, ``generation_ms``, ``model``,
``mannerisms_used``, etc. The worker can extend it without a
migration.
"""
__tablename__ = "orchestrator_emails"
__table_args__ = (
Index("ix_orchestrator_emails_mail_ts", "mail_decky_uuid", "ts"),
Index("ix_orchestrator_emails_thread", "thread_id"),
)
uuid: str = Field(default_factory=lambda: str(uuid4()), primary_key=True)
ts: datetime = Field(
default_factory=lambda: datetime.now(timezone.utc), index=True
)
mail_decky_uuid: str = Field(index=True)
thread_id: str = Field(index=True)
message_id: str = Field(max_length=255)
in_reply_to: Optional[str] = Field(default=None, max_length=255)
sender_email: str = Field(max_length=255, index=True)
recipient_email: str = Field(max_length=255, index=True)
subject: str = Field(max_length=512)
language: str = Field(max_length=8, default="en")
eml_path: str = Field(max_length=1024)
success: bool = Field(default=False, index=True)
payload: str = Field(
sa_column=Column("payload", Text, nullable=False, default="{}")
)
class OrchestratorEmailsResponse(BaseModel):
total: int
limit: int
offset: int
data: List[dict[str, Any]]

View File

@@ -0,0 +1,107 @@
"""Realism — synthetic-file state across orchestrator ticks.
The orchestrator's pre-realism file generator forgot every file the
moment it was planted: each tick wrote a brand-new ``notes-{ts}.txt``
with a literal unix-epoch suffix. No edits, no rotation, no diurnal
shape — three of the realism failures the migration is fixing.
:class:`SyntheticFile` is the per-(decky, path) memory that lets the
realism engine read back yesterday's ``TODO.md``, mutate it, write
back the new body, and let the dashboard inspect the lineage.
Pre-v1: schema lives directly in the SQLModel; no ``_migrate_*``
helper (per the project's "no new migrations pre-v1" rule —
``feedback_no_new_migrations_prev1.md``). Alembic lands at v1.
"""
from datetime import datetime, timezone
from typing import Any, List
from uuid import uuid4
from pydantic import BaseModel
from sqlalchemy import Column, Index, Text, UniqueConstraint
from sqlmodel import Field, SQLModel
SYNTHETIC_FILE_BODY_LIMIT = 65536
"""Cap on persisted ``synthetic_files.last_body`` bytes.
Enforced by the repo on both insert and update — callers may pass the
full body; the repo clips. Large blobs (DOCX/PDF, canary artifacts) are
wasted disk on the master side; the decky filesystem holds the canonical
bytes."""
class SyntheticFile(SQLModel, table=True):
"""One realism-planted file on one decky.
The unique key is ``(decky_uuid, path)`` — there's at most one
realism record per location, even if the planter has rotated the
file (rotation updates ``edit_count`` and ``last_modified``, not
a new row).
``last_body`` is capped — large blobs (DOCX/PDF, future canary
artifacts) are truncated at write time. The edit-in-place flow
(stage 3b) only needs the body when the content class supports
body-level mutation (``note``, ``todo``, ``draft``, ``script``),
so storing the canonical bytes for binary blobs would be wasted.
``content_hash`` is sha256 of the *body bytes only* — never of
metadata or wrapper headers — so a hash compare is a cheap
"did the body change?" check across edits.
"""
__tablename__ = "synthetic_files"
__table_args__ = (
UniqueConstraint(
"decky_uuid", "path", name="uq_synthetic_files_decky_path",
),
Index("ix_synthetic_files_decky_modified", "decky_uuid", "last_modified"),
)
uuid: str = Field(default_factory=lambda: str(uuid4()), primary_key=True)
decky_uuid: str = Field(index=True, max_length=64)
# Capped at 512 so the (decky_uuid, path) unique index fits MySQL's
# 3072-byte utf8mb4 limit: (64+512)*4 = 2304 bytes. Real realism +
# canary paths are well under (longest is
# ``/home/<persona>/Documents/Q3-Operations-Review.docx``, ~70 chars).
path: str = Field(max_length=512)
persona: str = Field(max_length=128) # EmailPersona.name
content_class: str = Field(max_length=32, index=True) # ContentClass enum value
created_at: datetime = Field(
default_factory=lambda: datetime.now(timezone.utc), index=True,
)
last_modified: datetime = Field(
default_factory=lambda: datetime.now(timezone.utc),
)
edit_count: int = Field(default=0)
content_hash: str = Field(max_length=64) # sha256 hex
last_body: str = Field(
sa_column=Column("last_body", Text, nullable=False, default="")
)
class SyntheticFilesResponse(BaseModel):
total: int
limit: int
offset: int
data: List[dict[str, Any]]
class RealismConfig(SQLModel, table=True):
"""Operator-tunable realism knobs.
Single-row-per-key schema: each row carries one piece of operator
config (today: ``key="weights"`` → JSON encoding the planner's
user/system/canary weights and canary probability). The planner
reads in-memory module globals; the orchestrator worker refreshes
those globals from this table on a periodic tick.
UUID PK + unique key per ``feedback_uuid_over_natural_keys.md``.
"""
__tablename__ = "realism_config"
uuid: str = Field(default_factory=lambda: str(uuid4()), primary_key=True)
key: str = Field(max_length=64, unique=True, index=True)
value: str = Field(
sa_column=Column("value", Text, nullable=False, default="{}"),
)
updated_at: datetime = Field(
default_factory=lambda: datetime.now(timezone.utc),
)

View File

@@ -0,0 +1,200 @@
"""Swarm host + decky shard tables and their HTTP DTOs."""
from datetime import datetime, timezone
from typing import Annotated, Any, Optional
from pydantic import BaseModel, Field as PydanticField
from sqlalchemy import Column, Text
from sqlmodel import Field, SQLModel
from decnet.models import DecnetConfig
from ._base import _BIG_TEXT
class SwarmHost(SQLModel, table=True):
"""A worker host enrolled into a DECNET swarm.
Rows exist only on the master. Populated by `decnet swarm enroll` and
read by the swarm controller when sharding deckies onto workers.
"""
__tablename__ = "swarm_hosts"
uuid: str = Field(primary_key=True)
name: str = Field(index=True, unique=True)
address: str # IP or hostname reachable by the master
agent_port: int = Field(default=8765)
status: str = Field(default="enrolled", index=True)
# ISO-8601 string of the last successful agent /health probe
last_heartbeat: Optional[datetime] = Field(default=None)
client_cert_fingerprint: str # SHA-256 hex of worker's issued client cert
# SHA-256 hex of the updater-identity cert, if the host was enrolled
# with ``--updater`` / ``issue_updater_bundle``. ``None`` for hosts
# that only have an agent identity.
updater_cert_fingerprint: Optional[str] = Field(default=None)
# Directory on the master where the per-worker cert bundle lives
cert_bundle_path: str
enrolled_at: datetime = Field(default_factory=lambda: datetime.now(timezone.utc))
notes: Optional[str] = Field(default=None, sa_column=Column("notes", Text, nullable=True))
# Per-host driver preference. True => deckies on this host run over IPvlan
# (L2) instead of macvlan — required when the host is a VirtualBox guest
# bridged over Wi-Fi, because Wi-Fi APs only allow one MAC per station
# and macvlan's per-container MACs rotate the VM's DHCP lease.
use_ipvlan: bool = Field(default=False)
class DeckyShard(SQLModel, table=True):
"""Mapping of a single decky to the worker host running it (swarm mode)."""
__tablename__ = "decky_shards"
decky_name: str = Field(primary_key=True)
host_uuid: str = Field(foreign_key="swarm_hosts.uuid", index=True)
# JSON list of service names running on this decky (snapshot of assignment).
services: str = Field(sa_column=Column("services", _BIG_TEXT, nullable=False, default="[]"))
# Full serialised DeckyConfig from the most recent dispatch or heartbeat.
# Lets the dashboard render the same rich card (hostname/distro/archetype/
# service_config/mutate_interval) that the local-fleet view uses, without
# needing a live round-trip to the worker for every page render.
decky_config: Optional[str] = Field(
default=None, sa_column=Column("decky_config", _BIG_TEXT, nullable=True)
)
decky_ip: Optional[str] = Field(default=None)
state: str = Field(default="pending", index=True) # pending|running|failed|torn_down|degraded|tearing_down|teardown_failed
last_error: Optional[str] = Field(default=None, sa_column=Column("last_error", Text, nullable=True))
compose_hash: Optional[str] = Field(default=None)
# Timestamp of the last heartbeat that echoed this shard; lets the UI
# show "stale" decks whose agent has gone silent.
last_seen: Optional[datetime] = Field(default=None)
updated_at: datetime = Field(default_factory=lambda: datetime.now(timezone.utc))
# --- Swarm API DTOs ---
# Request/response contracts for the master-side swarm controller
# (decnet/web/swarm_api.py). The underlying SQLModel tables — SwarmHost and
# DeckyShard — live above; these are the HTTP-facing shapes.
class SwarmEnrollRequest(BaseModel):
# x509 CommonName is capped at 64 bytes (RFC 5280 UB-common-name) — the
# cert issuer would reject anything longer with a ValueError.
# Pattern: ASCII hostname-safe characters only. The name is embedded
# both in the CN and as a SAN DNS entry; x509.DNSName only accepts
# A-label ASCII, so non-ASCII would blow up at issuance.
name: str = PydanticField(
..., min_length=1, max_length=64,
pattern=r"^[A-Za-z0-9][A-Za-z0-9._\-]*$",
)
address: str = PydanticField(
..., min_length=1, max_length=253,
pattern=r"^[A-Za-z0-9][A-Za-z0-9._:\-]*$",
description="IP or DNS the master uses to reach the worker",
)
agent_port: int = PydanticField(default=8765, ge=1, le=65535)
sans: list[
Annotated[
str,
PydanticField(
min_length=1, max_length=253,
pattern=r"^[A-Za-z0-9][A-Za-z0-9._:\-]*$",
),
]
] = PydanticField(
default_factory=list,
description="Extra SANs (IPs / hostnames) to embed in the worker cert",
)
notes: Optional[str] = None
issue_updater_bundle: bool = PydanticField(
default=False,
description="If true, also issue an updater cert (CN=updater@<name>) for the remote self-updater",
)
class SwarmUpdaterBundle(BaseModel):
"""Subset of SwarmEnrolledBundle for the updater identity."""
fingerprint: str
updater_cert_pem: str
updater_key_pem: str
class SwarmEnrolledBundle(BaseModel):
"""Cert bundle returned to the operator — must be delivered to the worker."""
host_uuid: str
name: str
address: str
agent_port: int
fingerprint: str
ca_cert_pem: str
worker_cert_pem: str
worker_key_pem: str
updater: Optional[SwarmUpdaterBundle] = None
class SwarmHostView(BaseModel):
uuid: str
name: str
address: str
agent_port: int
status: str
last_heartbeat: Optional[datetime] = None
client_cert_fingerprint: str
updater_cert_fingerprint: Optional[str] = None
enrolled_at: datetime
notes: Optional[str] = None
use_ipvlan: bool = False
class DeckyShardView(BaseModel):
"""One decky → host mapping, enriched with the host's identity for display."""
decky_name: str
decky_ip: Optional[str] = None # resolved from the stored DecnetConfig at read time
host_uuid: str
host_name: str
host_address: str
host_status: str
services: list[str]
state: str
last_error: Optional[str] = None
compose_hash: Optional[str] = None
updated_at: datetime
# Enriched fields lifted from the stored DeckyConfig snapshot so the
# dashboard can render the same card shape as the local-fleet view.
hostname: Optional[str] = None
distro: Optional[str] = None
archetype: Optional[str] = None
service_config: dict[str, dict[str, Any]] = {}
mutate_interval: Optional[int] = None
last_mutated: float = 0.0
last_seen: Optional[datetime] = None
class SwarmDeployRequest(BaseModel):
config: DecnetConfig
dry_run: bool = False
no_cache: bool = False
class SwarmTeardownRequest(BaseModel):
host_uuid: Optional[str] = PydanticField(
default=None,
description="If set, tear down only this worker; otherwise tear down all hosts",
)
decky_id: Optional[str] = None
class SwarmHostResult(BaseModel):
host_uuid: str
host_name: str
ok: bool
detail: Any | None = None
class SwarmDeployResponse(BaseModel):
results: list[SwarmHostResult]
class SwarmHostHealth(BaseModel):
host_uuid: str
name: str
address: str
reachable: bool
detail: Any | None = None
class SwarmCheckResponse(BaseModel):
results: list[SwarmHostHealth]

View File

@@ -0,0 +1,442 @@
"""MazeNET topology tables + the REST DTOs that wrap them."""
from datetime import datetime, timezone
from typing import Annotated, Any, Literal, Optional
from uuid import uuid4
from pydantic import BaseModel, BeforeValidator, ConfigDict, Field as PydanticField
from sqlalchemy import Column, Index, Text, UniqueConstraint
from sqlmodel import Field, SQLModel
from ._base import _BIG_TEXT
# --- MazeNET tables ---
# Nested deception topologies: an arbitrary-depth DAG of LANs connected by
# multi-homed "bridge" deckies. Purpose-built; disjoint from DeckyShard which
# remains SWARM-only.
class Topology(SQLModel, table=True):
__tablename__ = "topologies"
id: str = Field(default_factory=lambda: str(uuid4()), primary_key=True)
name: str = Field(index=True, unique=True)
mode: str = Field(default="unihost") # unihost|agent
# When ``mode == "agent"``, pins this topology to a specific enrolled
# worker. ``None`` for unihost topologies (master-local deploy).
target_host_uuid: Optional[str] = Field(
default=None, foreign_key="swarm_hosts.uuid", index=True
)
# Full TopologyConfig snapshot (including seed) used at generation time.
config_snapshot: str = Field(
sa_column=Column("config_snapshot", _BIG_TEXT, nullable=False, default="{}")
)
status: str = Field(
default="pending", index=True
) # pending|deploying|active|degraded|failed|tearing_down|torn_down
status_changed_at: datetime = Field(
default_factory=lambda: datetime.now(timezone.utc)
)
created_at: datetime = Field(
default_factory=lambda: datetime.now(timezone.utc), index=True
)
# Optimistic-concurrency token. Bumped by repo methods that mutate
# the topology or any child row when an expected_version is supplied.
# Callers pass their last-seen version; mismatch raises VersionConflict.
version: int = Field(default=1, nullable=False)
# Set by the heartbeat handler when an agent's reported
# ``applied_version_hash`` diverges from what we expect it to be
# running. Drained by the mutator watch loop, which re-pushes via
# AgentClient and clears the flag. NULL for unihost topologies.
needs_resync: bool = Field(default=False, nullable=False)
# JSON-serialised list of EmailPersona dicts consumed by the
# ``decnet emailgen`` worker. Empty list = no fake mailbox owners
# configured for this topology, the worker skips it.
email_personas: str = Field(
sa_column=Column(
"email_personas", _BIG_TEXT, nullable=False, default="[]"
)
)
# ISO 639-1 language code applied to any persona that doesn't override
# ``language`` itself. English by default; ANTI's deployments default
# to "es" by editing this column.
language_default: str = Field(default="en", max_length=8)
class LAN(SQLModel, table=True):
__tablename__ = "lans"
__table_args__ = (UniqueConstraint("topology_id", "name", name="uq_lan_topology_name"),)
id: str = Field(default_factory=lambda: str(uuid4()), primary_key=True)
topology_id: str = Field(foreign_key="topologies.id", index=True)
name: str
# Populated after the Docker network is created; nullable before deploy.
docker_network_id: Optional[str] = Field(default=None)
subnet: str
is_dmz: bool = Field(default=False)
# Canvas layout coordinates (set by the web editor). Nullable so
# generator-emitted LANs don't need auto-layout at generation time.
x: Optional[float] = Field(default=None)
y: Optional[float] = Field(default=None)
class TopologyDecky(SQLModel, table=True):
"""A decky belonging to a MazeNET topology.
Disjoint from DeckyShard (which is SWARM-only). UUID PK; decky name is
unique only within a topology, so two topologies can both have a
``decky-01`` without colliding.
"""
__tablename__ = "topology_deckies"
__table_args__ = (
UniqueConstraint("topology_id", "name", name="uq_topology_decky_name"),
)
uuid: str = Field(default_factory=lambda: str(uuid4()), primary_key=True)
topology_id: str = Field(foreign_key="topologies.id", index=True)
name: str
# JSON list[str] of service names on this decky (snapshot of assignment).
services: str = Field(
sa_column=Column("services", _BIG_TEXT, nullable=False, default="[]")
)
# Full serialised DeckyConfig snapshot — lets the dashboard render the
# same card shape as DeckyShard without a live round-trip.
decky_config: Optional[str] = Field(
default=None, sa_column=Column("decky_config", _BIG_TEXT, nullable=True)
)
ip: Optional[str] = Field(default=None)
# Same vocabulary as DeckyShard.state to keep dashboard rendering uniform.
state: str = Field(
default="pending", index=True
) # pending|running|failed|torn_down|degraded|tearing_down|teardown_failed
last_error: Optional[str] = Field(
default=None, sa_column=Column("last_error", Text, nullable=True)
)
compose_hash: Optional[str] = Field(default=None)
last_seen: Optional[datetime] = Field(default=None)
updated_at: datetime = Field(
default_factory=lambda: datetime.now(timezone.utc)
)
# Canvas layout coordinates (set by the web editor). Nullable so
# generator-emitted deckies don't need auto-layout at generation time.
x: Optional[float] = Field(default=None)
y: Optional[float] = Field(default=None)
class TopologyEdge(SQLModel, table=True):
"""Membership edge: a decky attached to a LAN.
A decky appearing in ≥2 edges is multi-homed (a bridge decky).
"""
__tablename__ = "topology_edges"
id: str = Field(default_factory=lambda: str(uuid4()), primary_key=True)
topology_id: str = Field(foreign_key="topologies.id", index=True)
decky_uuid: str = Field(foreign_key="topology_deckies.uuid", index=True)
lan_id: str = Field(foreign_key="lans.id", index=True)
is_bridge: bool = Field(default=False)
forwards_l3: bool = Field(default=False)
class TopologyStatusEvent(SQLModel, table=True):
"""Append-only audit log of topology status transitions."""
__tablename__ = "topology_status_events"
id: str = Field(default_factory=lambda: str(uuid4()), primary_key=True)
topology_id: str = Field(foreign_key="topologies.id", index=True)
from_status: str
to_status: str
at: datetime = Field(
default_factory=lambda: datetime.now(timezone.utc), index=True
)
reason: Optional[str] = Field(
default=None, sa_column=Column("reason", Text, nullable=True)
)
class TopologyMutation(SQLModel, table=True):
"""Operator-requested live mutation for an active MazeNET topology.
Each row is one intent (add LAN, attach decky, etc.). The mutator's
reconciler claims ``pending`` rows atomically (see
``SQLModelRepository.claim_next_mutation``), applies them against
Docker, and writes ``applied`` or ``failed`` back. The ``(state,
topology_id)`` composite index keeps the watch-loop guard query
cheap even with years of mutation history.
"""
__tablename__ = "topology_mutations"
__table_args__ = (
Index(
"ix_topology_mutations_state_topology",
"state",
"topology_id",
),
)
id: str = Field(default_factory=lambda: str(uuid4()), primary_key=True)
topology_id: str = Field(foreign_key="topologies.id", index=True)
# add_lan|remove_lan|add_decky|attach_decky|detach_decky|
# remove_decky|update_decky|update_lan
op: str = Field(index=True)
# JSON-serialised op payload (keys depend on ``op``).
payload: str = Field(
sa_column=Column("payload", _BIG_TEXT, nullable=False, default="{}")
)
# pending|applying|applied|failed
state: str = Field(default="pending", index=True)
requested_at: datetime = Field(
default_factory=lambda: datetime.now(timezone.utc), index=True
)
applied_at: Optional[datetime] = Field(default=None)
reason: Optional[str] = Field(
default=None, sa_column=Column("reason", Text, nullable=True)
)
# --- MazeNET Topology REST DTOs (phase 3) ---
# Request/response shapes for /api/v1/topologies. All write paths are
# admin-only; reads accept admin or viewer. Child CRUD is pending-only;
# mutations of active|degraded topologies go through the queue.
class TopologyGenerateRequest(BaseModel):
"""Body for POST /topologies — mirrors the `topology generate` CLI."""
name: str = PydanticField(..., min_length=1, max_length=64)
mode: str = PydanticField(default="unihost", pattern=r"^(unihost|agent)$")
target_host_uuid: Optional[str] = None
depth: int = PydanticField(..., ge=1, le=16)
branching_factor: int = PydanticField(..., ge=1, le=8)
deckies_per_lan_min: int = PydanticField(default=1, ge=0, le=32)
deckies_per_lan_max: int = PydanticField(default=3, ge=1, le=32)
bridge_forward_probability: float = PydanticField(default=1.0, ge=0.0, le=1.0)
cross_edge_probability: float = PydanticField(default=0.0, ge=0.0, le=1.0)
services_explicit: Optional[list[str]] = None
randomize_services: bool = True
seed: Optional[int] = PydanticField(default=None, ge=0)
class TopologySummary(BaseModel):
"""List-row shape for GET /topologies."""
model_config = ConfigDict(extra="ignore")
id: str
name: str
mode: str
target_host_uuid: Optional[str] = None
status: str
version: int
needs_resync: bool = False
created_at: datetime
status_changed_at: Optional[datetime] = None
class TopologyListResponse(BaseModel):
total: int
limit: Optional[int] = None
offset: Optional[int] = None
data: list[TopologySummary]
class LANRow(BaseModel):
model_config = ConfigDict(extra="ignore")
id: str
topology_id: str
name: str
subnet: str
is_dmz: bool = False
docker_network_id: Optional[str] = None
x: Optional[float] = None
y: Optional[float] = None
class DeckyRow(BaseModel):
model_config = ConfigDict(extra="ignore")
uuid: str
topology_id: str
name: str
services: list[str] = PydanticField(default_factory=list)
decky_config: Optional[dict[str, Any]] = None
ip: Optional[str] = None
state: str
last_error: Optional[str] = None
x: Optional[float] = None
y: Optional[float] = None
class EdgeRow(BaseModel):
model_config = ConfigDict(extra="ignore")
id: str
topology_id: str
decky_uuid: str
lan_id: str
is_bridge: bool = False
forwards_l3: bool = False
class TopologyDetail(BaseModel):
"""Hydrated topology — mirrors persistence.hydrate() output.
``topology`` uses :class:`TopologySummary` which already exposes
``target_host_uuid`` — agent-targeted topologies surface their
pinned host through that field.
"""
topology: TopologySummary
lans: list[LANRow]
deckies: list[DeckyRow]
edges: list[EdgeRow]
class TopologyStatusEventRow(BaseModel):
model_config = ConfigDict(extra="ignore")
id: str
topology_id: str
from_status: str
to_status: str
at: datetime
reason: Optional[str] = None
class LANCreateRequest(BaseModel):
name: str = PydanticField(..., min_length=1, max_length=64)
subnet: Optional[str] = None
is_dmz: bool = False
x: Optional[float] = None
y: Optional[float] = None
expected_version: Optional[int] = None
class LANUpdateRequest(BaseModel):
name: Optional[str] = None
subnet: Optional[str] = None
is_dmz: Optional[bool] = None
x: Optional[float] = None
y: Optional[float] = None
expected_version: Optional[int] = None
class DeckyCreateRequest(BaseModel):
name: str = PydanticField(..., min_length=1, max_length=64)
services: list[str] = PydanticField(default_factory=list)
decky_config: Optional[dict[str, Any]] = None
x: Optional[float] = None
y: Optional[float] = None
expected_version: Optional[int] = None
class DeckyUpdateRequest(BaseModel):
name: Optional[str] = None
services: Optional[list[str]] = None
decky_config: Optional[dict[str, Any]] = None
x: Optional[float] = None
y: Optional[float] = None
expected_version: Optional[int] = None
class EdgeCreateRequest(BaseModel):
decky_uuid: str
lan_id: str
is_bridge: bool = False
forwards_l3: bool = False
expected_version: Optional[int] = None
_MUTATION_OPS = Literal[
"add_lan",
"remove_lan",
"add_decky",
"attach_decky",
"detach_decky",
"remove_decky",
"update_decky",
"update_lan",
]
class MutationEnqueueRequest(BaseModel):
op: _MUTATION_OPS
payload: dict[str, Any] = PydanticField(default_factory=dict)
expected_version: Optional[int] = None
def _decode_json_payload(v: Any) -> Any:
"""Accept either a dict or a JSON-encoded string for mutation payloads."""
if isinstance(v, str):
import json as _json
return _json.loads(v) if v else {}
return v
_MutationPayload = Annotated[dict[str, Any], BeforeValidator(_decode_json_payload)]
class MutationRow(BaseModel):
model_config = ConfigDict(extra="ignore")
id: str
topology_id: str
op: str
payload: _MutationPayload = PydanticField(default_factory=dict)
state: str
requested_at: datetime
applied_at: Optional[datetime] = None
reason: Optional[str] = None
class MutationEnqueueResponse(BaseModel):
mutation_id: str
state: str = "pending"
class ValidationIssueResponse(BaseModel):
severity: str
code: str
message: str
target: dict[str, Any] = PydanticField(default_factory=dict)
class ValidationErrorResponse(BaseModel):
detail: str = "Topology validation failed"
issues: list[ValidationIssueResponse]
class VersionConflictResponse(BaseModel):
detail: str = "Topology version conflict"
current: int
expected: int
class NotEditableResponse(BaseModel):
detail: str = "Topology not editable"
status: str
reason: Optional[str] = None
class ServiceCatalogResponse(BaseModel):
services: list[str]
class ArchetypeEntry(BaseModel):
slug: str
display_name: str
description: str
services: list[str]
preferred_distros: list[str]
nmap_os: str
class ArchetypeCatalogResponse(BaseModel):
archetypes: list[ArchetypeEntry]
class NextIPResponse(BaseModel):
subnet: str
ip: str
class NextSubnetResponse(BaseModel):
subnet: str
class DeployAcceptedResponse(BaseModel):
topology_id: str
status: str
dry_run: bool = False
class ReapReportResponse(BaseModel):
live_prefixes: list[str]
orphan_prefixes: list[str]
containers_removed: list[str]
networks_removed: list[str]
errors: list[str]

View File

@@ -0,0 +1,73 @@
"""Remote updates DTOs (master → worker /updater fan-out)."""
from typing import Any, Literal, Optional
from pydantic import BaseModel, Field as PydanticField
# --- Remote Updates (master → worker /updater) DTOs ---
# Powers the dashboard's Remote Updates page. The master dashboard calls
# these (auth-gated) endpoints; internally they fan out to each worker's
# updater daemon over mTLS via UpdaterClient.
class HostReleaseInfo(BaseModel):
host_uuid: str
host_name: str
address: str
reachable: bool
# These fields mirror the updater's /health payload when reachable; they
# are all Optional so an unreachable host still serializes cleanly.
agent_status: Optional[str] = None
current_sha: Optional[str] = None
previous_sha: Optional[str] = None
releases: list[dict[str, Any]] = PydanticField(default_factory=list)
detail: Optional[str] = None # populated when unreachable
class HostReleasesResponse(BaseModel):
hosts: list[HostReleaseInfo]
class PushUpdateRequest(BaseModel):
host_uuids: Optional[list[str]] = PydanticField(
default=None,
description="Target specific hosts; mutually exclusive with 'all'.",
)
all: bool = PydanticField(default=False, description="Target every non-decommissioned host with an updater bundle.")
include_self: bool = PydanticField(
default=False,
description="After a successful /update, also push /update-self to upgrade the updater itself.",
)
exclude: list[str] = PydanticField(
default_factory=list,
description="Additional tarball exclude globs (on top of the built-in defaults).",
)
class PushUpdateResult(BaseModel):
host_uuid: str
host_name: str
# updated = /update 200. rolled-back = /update 409 (auto-recovered).
# failed = transport error or non-200/409 response. self-updated = /update-self succeeded.
status: Literal["updated", "rolled-back", "failed", "self-updated", "self-failed"]
http_status: Optional[int] = None
sha: Optional[str] = None
detail: Optional[str] = None
stderr: Optional[str] = None
class PushUpdateResponse(BaseModel):
sha: str
tarball_bytes: int
results: list[PushUpdateResult]
class RollbackRequest(BaseModel):
host_uuid: str = PydanticField(..., description="Host to roll back to its previous release slot.")
class RollbackResponse(BaseModel):
host_uuid: str
host_name: str
status: Literal["rolled-back", "failed"]
http_status: Optional[int] = None
detail: Optional[str] = None

View File

@@ -0,0 +1,162 @@
"""Webhook subscription table + CRUD DTOs.
Webhooks push DECNET bus events out to external SIEM / SOAR stacks
(Wazuh, Shuffle, TheHive, n8n, ...). Each subscription carries a set
of NATS-style topic patterns; the `decnet webhook` worker subscribes
to the union of patterns across all enabled subscriptions and POSTs
matching events to each matching URL with HMAC-SHA256 signing.
Simple mode (UI) exposes a friendly enum (`AttackerDetail`,
`DeckyStatus`, `SystemStatus`) that expands to patterns at save time.
Advanced mode lets an admin set raw patterns directly. Storage is
always the expanded list — the enum is sugar at the router layer.
"""
from __future__ import annotations
import json
from datetime import datetime, timezone
from typing import Any, List, Literal, Optional
from uuid import uuid4
from pydantic import BaseModel, Field as PydanticField, HttpUrl
from sqlmodel import Field, SQLModel
SimpleEvent = Literal["AttackerDetail", "DeckyStatus", "SystemStatus"]
class WebhookSubscription(SQLModel, table=True):
__tablename__ = "webhook_subscriptions"
uuid: str = Field(default_factory=lambda: str(uuid4()), primary_key=True)
name: str = Field(index=True, unique=True)
url: str
secret: str # HMAC-SHA256 key; plaintext pre-v1 (see DEBT-037 §7)
# JSON-encoded list[str] of NATS-style bus topic patterns.
# Storing as TEXT keeps the schema portable across SQLite and MySQL
# without pulling in dialect-specific JSON columns.
topic_patterns: str = Field(default="[]")
enabled: bool = Field(default=True, index=True)
consecutive_failures: int = Field(default=0)
last_success_at: Optional[datetime] = None
last_failure_at: Optional[datetime] = None
last_error: Optional[str] = None
# Set when the circuit breaker auto-disables the subscription after
# too many consecutive failures. NULL means "not tripped" — the
# subscription is either active (enabled=True) or admin-paused
# (enabled=False, auto_disabled_at=NULL). A non-NULL stamp with
# enabled=False means the worker tripped it; the operator clears
# the flag by re-enabling via PATCH.
auto_disabled_at: Optional[datetime] = None
created_at: datetime = Field(default_factory=lambda: datetime.now(timezone.utc))
updated_at: datetime = Field(default_factory=lambda: datetime.now(timezone.utc))
def patterns(self) -> list[str]:
"""Decode `topic_patterns` to a list. Returns [] on bad/empty JSON."""
try:
raw = json.loads(self.topic_patterns or "[]")
except (ValueError, TypeError):
return []
return [p for p in raw if isinstance(p, str)]
# --- API Request / Response Models (Pydantic) ---
class WebhookCreateRequest(BaseModel):
name: str = PydanticField(..., min_length=1, max_length=64)
url: HttpUrl
# If secret is omitted, the router generates a secure random one and
# returns it exactly once on the create response. After that, callers
# can only rotate via PATCH.
secret: Optional[str] = PydanticField(None, min_length=16, max_length=256)
# At least one of simple_events / topic_patterns must be non-empty
# (validated in the router, not Pydantic, so the 400 carries a clear
# detail message).
simple_events: List[SimpleEvent] = PydanticField(default_factory=list)
topic_patterns: List[str] = PydanticField(default_factory=list)
enabled: bool = True
class WebhookUpdateRequest(BaseModel):
# Partial update — every field optional; the router diffs against the
# current row and only writes what changed.
name: Optional[str] = PydanticField(None, min_length=1, max_length=64)
url: Optional[HttpUrl] = None
secret: Optional[str] = PydanticField(None, min_length=16, max_length=256)
simple_events: Optional[List[SimpleEvent]] = None
topic_patterns: Optional[List[str]] = None
enabled: Optional[bool] = None
class WebhookResponse(BaseModel):
"""Public shape — deliberately omits `secret`.
The `warnings` field carries non-blocking advisories about the
subscription's configuration — e.g. an `http://` URL is fine but
surfaces a warning so the operator knows the event body is
plaintext on the wire. Empty list when nothing is worth flagging.
"""
uuid: str
name: str
url: str
topic_patterns: List[str]
enabled: bool
consecutive_failures: int
last_success_at: Optional[datetime] = None
last_failure_at: Optional[datetime] = None
last_error: Optional[str] = None
auto_disabled_at: Optional[datetime] = None
created_at: datetime
updated_at: datetime
warnings: List[str] = PydanticField(default_factory=list)
class WebhookCreateResponse(WebhookResponse):
"""Create-path response — carries the secret exactly once, for copy-out."""
secret: str
class WebhookTestResponse(BaseModel):
delivered: bool
status_code: Optional[int] = None
error: Optional[str] = None
def _compute_warnings(url: str) -> List[str]:
"""Non-blocking advisories about a subscription's configuration.
The HMAC signature detects tampering regardless of transport, but an
on-path attacker can still *read* the event body over plaintext HTTP.
We surface the warning and let the admin decide — matches DECNET's
operator-trust posture (see THREAT_MODEL WH-03).
"""
out: List[str] = []
lower = (url or "").lower()
if lower.startswith("http://"):
out.append(
"insecure_url: URL uses http://. Event bodies (including "
"payload fields) traverse the wire in plaintext; HMAC still "
"detects tampering but anyone on-path can read the event. "
"Use https:// in production."
)
return out
def _row_to_response_dict(row: dict[str, Any]) -> dict[str, Any]:
"""Normalize a DB row into the WebhookResponse dict shape.
Used by the CRUD router to decode `topic_patterns` JSON, drop the
`secret` column, and compute any configuration warnings.
"""
out = dict(row)
raw = out.pop("topic_patterns", "[]")
try:
out["topic_patterns"] = json.loads(raw or "[]")
except (ValueError, TypeError):
out["topic_patterns"] = []
out.pop("secret", None)
out["warnings"] = _compute_warnings(out.get("url", ""))
return out

View File

@@ -0,0 +1,50 @@
"""Workers panel DTOs (bus-backed health + control)."""
from typing import Any, Dict, List, Literal, Optional
from pydantic import BaseModel, Field as PydanticField
# --- Workers panel (Config → Workers) ---
# Bus-backed health + control: workers heartbeat on ``system.<name>.health``
# and listen on ``system.<name>.control``. The API aggregates last-seen
# heartbeats via the worker registry; these are the HTTP-facing shapes.
class WorkerStatus(BaseModel):
name: str
# ``ok`` — heartbeat within 90s (3× 30s heartbeat interval)
# ``stale`` — worker was seen before but hasn't pulsed in 90s+
# ``unknown`` — we've never received a heartbeat from this name
status: Literal["ok", "stale", "unknown"]
last_heartbeat_ts: Optional[float] = None
seconds_since: Optional[float] = None
# Whatever the worker's ``extra()`` callback put in the heartbeat;
# opaque to the panel, displayed only if the UI knows the key.
extra: Dict[str, Any] = PydanticField(default_factory=dict)
# True iff a ``decnet-<name>.service`` unit file is present on the
# host. False flips the UI START button to disabled with a
# "Unit not installed" tooltip. Default True for backwards compat
# on clients that pre-date the field.
installed: bool = True
class WorkersResponse(BaseModel):
workers: List[WorkerStatus]
generated_at: float
bus_connected: bool
class WorkerControlResponse(BaseModel):
accepted: bool
worker: str
action: str
class StartFailure(BaseModel):
name: str
reason: str
class StartAllResponse(BaseModel):
started: List[str]
already_running: List[str]
failed: List[StartFailure]