feat(attackers): scanned vs. interacted service bucketing on detail page

Adds a new card on AttackerDetail: SCANNED · N services | INTERACTED
WITH · M services. Distinguishes port-scanners (N high, M=0) from
actual engagement (M>0) at a glance — the analyst's first question
when triaging a new attacker row.

Classifier lives in decnet/correlation/event_kinds.py, a single
source of truth for the event-type vocabulary:

- INTERACTION_EVENT_TYPES — command-family (command/exec/query/...),
  SMTP engagement (mail_from/rcpt_to/message_accepted), file/payload
  activity (file_captured/upload/download_attempt/retr), pub/sub
  (publish/subscribe), recorded TTY sessions.
- NOISE_EVENT_TYPES — DECNET-internal (startup/shutdown/parse_error/
  unknown_*).
- Everything else defaults to scan. Conservative by design: new
  template verbs show up as "scanned" until explicitly promoted.

Bucket logic: a service is "interacted" if ≥1 of its events
classifies as interaction; otherwise "scanned" if ≥1 scan event;
noise-only services drop. Disjoint by construction.

Deliberate no-schema path: compute on-the-fly in the detail endpoint
via SELECT DISTINCT service, event_type FROM logs. Small result set
(tens of pairs per attacker), cost is trivial vs. the existing
behavior/commands queries. Trade-off: one more DB round-trip per
detail view in exchange for zero ALTER TABLE migration pain and
immediate classifier-change feedback loop.

Profiler's _COMMAND_EVENT_TYPES stays as-is (strict subset of
interactions that carry executable text), with a comment pointing at
the new canonical module.

Closes DEVELOPMENT.md "Attacker Intelligence §Service-Level Behavioral
Profiling — Services actively interacted with".
This commit is contained in:
2026-04-24 17:12:20 -04:00
parent ce6b4a4174
commit 351a8939c3
8 changed files with 322 additions and 1 deletions

View File

@@ -0,0 +1,113 @@
"""Classify RFC 5424 event_type strings as interaction vs. scan vs. noise.
Used by:
- The attacker detail endpoint to split services into "scanned" and
"interacted with" buckets, distinguishing port scanners from
attackers who actually engaged.
- The profiler worker to filter command-family events when extracting
executed-command history.
Classification is conservative: an unknown event_type defaults to
``scan`` rather than ``interaction``. That way a new service template
emitting a fresh verb shows up as "scanned" on the dashboard — visible
but not over-credited. Adding it to ``INTERACTION_EVENT_TYPES`` is
always a deliberate promotion.
"""
from __future__ import annotations
from typing import Literal
# Events that mean the attacker did something past reconnaissance —
# executed a command, sent mail, uploaded a file, subscribed to a topic.
# A service with ≥1 of these from a given attacker is "interacted with".
INTERACTION_EVENT_TYPES: frozenset[str] = frozenset({
# Shell / command-family — lifted from the profiler's original
# command-extraction frozenset; this module is now the source of
# truth for that vocabulary too.
"command",
"exec",
"query",
"input",
"shell_input",
"execute",
"run",
"sql_query",
"redis_command",
"ldap_search",
# SMTP meaningful engagement — once MAIL FROM / RCPT TO lands the
# attacker is trying to send mail, not just banner-grab.
# message_accepted is the DATA-commit moment.
"mail_from",
"rcpt_to",
"rcpt_denied",
"message_accepted",
# File / payload activity
"file_captured",
"upload",
"download_attempt",
"retr", # FTP retrieve
# Pub/sub operational use (vs. mere connection)
"publish",
"subscribe",
# A recorded TTY session is always an interaction — sessrec only
# writes when there was PTY input.
"session_recorded",
})
# Events that are DECNET-internal or protocol-framework noise rather
# than attacker-caused signal. Dropped from both buckets.
NOISE_EVENT_TYPES: frozenset[str] = frozenset({
"startup",
"shutdown",
"config_error",
"parse_error",
"unknown_packet",
"unknown_opcode",
"unknown_command",
"protocol_error",
})
EventKind = Literal["interaction", "scan", "noise"]
def classify_event(event_type: str) -> EventKind:
"""Return the kind label for a single event_type string."""
if event_type in INTERACTION_EVENT_TYPES:
return "interaction"
if event_type in NOISE_EVENT_TYPES:
return "noise"
return "scan"
def bucket_services(
pairs: list[tuple[str, str]],
) -> dict[str, list[str]]:
"""Group distinct service names into scanned vs. interacted buckets.
*pairs* is an iterable of ``(service, event_type)`` tuples — the
shape the repo returns from a ``SELECT DISTINCT service, event_type``
query. A service is placed in ``interacted`` if any of its events
classifies as interaction; otherwise in ``scanned`` if any event
classifies as scan; noise-only services are dropped.
Return shape: ``{"interacted": [...sorted...], "scanned": [...sorted...]}``.
Buckets are disjoint by construction.
"""
best: dict[str, EventKind] = {}
for service, event_type in pairs:
kind = classify_event(event_type)
current = best.get(service)
# Rank: interaction > scan > noise > unset.
if current == "interaction":
continue
if kind == "interaction":
best[service] = "interaction"
elif kind == "scan" and current != "interaction":
best[service] = "scan"
elif kind == "noise" and current is None:
best[service] = "noise"
interacted = sorted(s for s, k in best.items() if k == "interaction")
scanned = sorted(s for s, k in best.items() if k == "scan")
return {"interacted": interacted, "scanned": scanned}

View File

@@ -40,7 +40,13 @@ logger = get_logger("attacker_worker")
_BATCH_SIZE = 500
_STATE_KEY = "attacker_worker_cursor"
# Event types that indicate active command/query execution (not just connection/scan)
# Event types that indicate active command/query execution — the
# shell-family subset of INTERACTION_EVENT_TYPES in
# decnet/correlation/event_kinds.py. Kept here because this set is a
# stricter filter (commands that carry text to extract, vs. interactions
# like RCPT TO or file upload that don't). A test in
# tests/profiler/ asserts it's a subset of the canonical interaction
# set so they can't drift.
_COMMAND_EVENT_TYPES = frozenset({
"command", "exec", "query", "input", "shell_input",
"execute", "run", "sql_query", "redis_command",

View File

@@ -247,6 +247,16 @@ class BaseRepository(ABC):
"""Return `session_recorded` log rows for this attacker, newest first."""
pass
async def get_attacker_service_activity(
self, attacker_uuid: str
) -> list[tuple[str, str]]:
"""Return the distinct ``(service, event_type)`` pairs observed
for one attacker, for bucketing into scanned vs. interacted
services. Default is NotImplementedError so non-SQLModel backends
must opt in; SQLModelRepository overrides with a cheap DISTINCT
query."""
raise NotImplementedError
@abstractmethod
async def get_session_log(self, sid: str) -> Optional[dict[str, Any]]:
"""Look up the `session_recorded` Log row for a given session UUID."""

View File

@@ -881,6 +881,32 @@ class SQLModelRepository(BaseRepository):
page = commands[offset: offset + limit]
return {"total": total, "data": page}
async def get_attacker_service_activity(
self, attacker_uuid: str
) -> list[tuple[str, str]]:
"""Return distinct ``(service, event_type)`` pairs for an attacker.
Resolves IP then ``SELECT DISTINCT service, event_type FROM logs
WHERE attacker_ip = :ip`` — the result set is bounded by the
cardinality of services × event_types (tens, not thousands), so
this stays cheap even for attackers with long event streams.
Caller applies `event_kinds.bucket_services` to split into
scanned vs. interacted.
"""
async with self._session() as session:
ip_res = await session.execute(
select(Attacker.ip).where(Attacker.uuid == attacker_uuid)
)
ip = ip_res.scalar_one_or_none()
if not ip:
return []
rows = await session.execute(
select(Log.service, Log.event_type)
.where(Log.attacker_ip == ip)
.distinct()
)
return [(svc, evt) for svc, evt in rows.all()]
async def get_attacker_artifacts(self, uuid: str) -> list[dict[str, Any]]:
"""Return `file_captured` logs for the attacker identified by UUID.

View File

@@ -2,6 +2,7 @@ from typing import Any
from fastapi import APIRouter, Depends, HTTPException
from decnet.correlation.event_kinds import bucket_services
from decnet.telemetry import traced as _traced
from decnet.web.dependencies import require_viewer, repo
@@ -27,4 +28,10 @@ async def get_attacker_detail(
if not attacker:
raise HTTPException(status_code=404, detail="Attacker not found")
attacker["behavior"] = await repo.get_attacker_behavior(uuid)
# Scanned vs. interacted-with — computed per-request from the log
# stream, not persisted. Cheap (DISTINCT bounded by service ×
# event_type cardinality), and changes to the classifier take effect
# immediately without a profiler re-tick.
pairs = await repo.get_attacker_service_activity(uuid)
attacker["service_activity"] = bucket_services(pairs)
return attacker

View File

@@ -63,6 +63,10 @@ interface AttackerData {
country_source: string | null;
updated_at: string;
behavior: AttackerBehavior | null;
service_activity?: {
interacted: string[];
scanned: string[];
};
}
// ─── Fingerprint rendering ───────────────────────────────────────────────────
@@ -944,6 +948,40 @@ const AttackerDetail: React.FC = () => {
</div>
</div>
{/* Scanned vs. Interacted — activity-depth signal */}
{attacker.service_activity &&
(attacker.service_activity.scanned.length > 0 ||
attacker.service_activity.interacted.length > 0) && (
<div className="stats-grid" style={{ gridTemplateColumns: 'repeat(2, 1fr)' }}>
<div
className="stat-card"
title={
attacker.service_activity.scanned.length > 0
? `Services: ${attacker.service_activity.scanned.join(', ')}`
: 'No services were scanned without engagement.'
}
>
<div className="stat-value matrix-text">
{attacker.service_activity.scanned.length}
</div>
<div className="stat-label">SCANNED · SERVICES</div>
</div>
<div
className="stat-card"
title={
attacker.service_activity.interacted.length > 0
? `Services: ${attacker.service_activity.interacted.join(', ')}`
: 'No services were interacted with — scan-only attacker.'
}
>
<div className="stat-value violet-accent">
{attacker.service_activity.interacted.length}
</div>
<div className="stat-label">INTERACTED WITH · SERVICES</div>
</div>
</div>
)}
{/* Timestamps */}
<Section title="TIMELINE" open={openSections.timeline} onToggle={() => toggle('timeline')}>
<div style={{ padding: '16px', display: 'flex', flexWrap: 'wrap', gap: '32px', fontSize: '0.85rem' }}>

View File

@@ -0,0 +1,91 @@
"""Classifier unit tests for decnet.correlation.event_kinds."""
from __future__ import annotations
from decnet.correlation.event_kinds import (
INTERACTION_EVENT_TYPES,
NOISE_EVENT_TYPES,
bucket_services,
classify_event,
)
def test_shell_family_classifies_as_interaction():
for evt in ("command", "shell_input", "sql_query", "redis_command", "exec"):
assert classify_event(evt) == "interaction", evt
def test_smtp_engagement_classifies_as_interaction():
for evt in ("mail_from", "rcpt_to", "message_accepted"):
assert classify_event(evt) == "interaction", evt
def test_file_and_pubsub_classify_as_interaction():
for evt in ("file_captured", "upload", "retr", "publish", "subscribe"):
assert classify_event(evt) == "interaction", evt
def test_noise_events_classify_as_noise():
for evt in ("startup", "shutdown", "parse_error", "unknown_command"):
assert classify_event(evt) == "noise", evt
def test_scan_touch_events_classify_as_scan():
# These are common template verbs that don't cross into interaction
# and aren't on the noise list.
for evt in ("connection", "disconnect", "tls_client_hello", "auth_attempt",
"banner", "get_request", "head_request"):
assert classify_event(evt) == "scan", evt
def test_unknown_event_defaults_to_scan():
# Conservative default: an unknown verb from a new template should
# show up as "scanned" rather than over-credited as interaction.
assert classify_event("some_future_verb") == "scan"
assert classify_event("") == "scan"
def test_interaction_and_noise_sets_are_disjoint():
assert INTERACTION_EVENT_TYPES.isdisjoint(NOISE_EVENT_TYPES)
def test_bucket_services_single_interaction_wins():
# If a service has both scan-level and interaction-level events,
# it counts as interacted (not scanned).
pairs = [
("ssh", "connection"), # scan
("ssh", "shell_input"), # interaction → wins
]
assert bucket_services(pairs) == {"interacted": ["ssh"], "scanned": []}
def test_bucket_services_noise_only_service_dropped():
pairs = [("bus", "startup"), ("bus", "shutdown")]
assert bucket_services(pairs) == {"interacted": [], "scanned": []}
def test_bucket_services_mixed_realistic():
# Attacker A: scan-only on http + ssh.
# Attacker B (same test but for one attacker's pairs): mixed.
pairs = [
("http", "connection"),
("http", "get_request"),
("ssh", "connection"),
("ssh", "auth_attempt"),
("ssh", "shell_input"), # promotes ssh to interacted
("ftp", "retr"), # interaction
("mongo", "connection"), # scan only
]
result = bucket_services(pairs)
assert result["interacted"] == ["ftp", "ssh"]
assert result["scanned"] == ["http", "mongo"]
def test_bucket_services_empty_input():
assert bucket_services([]) == {"interacted": [], "scanned": []}
def test_bucket_services_returns_sorted_lists():
pairs = [("zzz", "command"), ("aaa", "command"), ("mmm", "connection")]
result = bucket_services(pairs)
assert result["interacted"] == ["aaa", "zzz"] # alphabetical
assert result["scanned"] == ["mmm"]

View File

@@ -183,6 +183,7 @@ class TestGetAttackerDetail:
with patch("decnet.web.router.attackers.api_get_attacker_detail.repo") as mock_repo:
mock_repo.get_attacker_by_uuid = AsyncMock(return_value=sample)
mock_repo.get_attacker_behavior = AsyncMock(return_value=None)
mock_repo.get_attacker_service_activity = AsyncMock(return_value=[])
result = await get_attacker_detail(uuid="att-uuid-1", user={"uuid": "test-user", "role": "viewer"})
@@ -211,6 +212,7 @@ class TestGetAttackerDetail:
with patch("decnet.web.router.attackers.api_get_attacker_detail.repo") as mock_repo:
mock_repo.get_attacker_by_uuid = AsyncMock(return_value=sample)
mock_repo.get_attacker_behavior = AsyncMock(return_value=None)
mock_repo.get_attacker_service_activity = AsyncMock(return_value=[])
result = await get_attacker_detail(uuid="att-uuid-1", user={"uuid": "test-user", "role": "viewer"})
@@ -219,6 +221,34 @@ class TestGetAttackerDetail:
assert isinstance(result["fingerprints"], list)
assert isinstance(result["commands"], list)
@pytest.mark.asyncio
async def test_service_activity_splits_scanned_vs_interacted(self):
"""Attacker detail response buckets services by event-type signal."""
from decnet.web.router.attackers.api_get_attacker_detail import get_attacker_detail
sample = _sample_attacker()
pairs = [
("ssh", "connection"),
("ssh", "shell_input"), # promotes ssh to interacted
("http", "get_request"), # scan only
("ftp", "retr"), # interacted
("bus", "startup"), # noise — dropped
]
with patch("decnet.web.router.attackers.api_get_attacker_detail.repo") as mock_repo:
mock_repo.get_attacker_by_uuid = AsyncMock(return_value=sample)
mock_repo.get_attacker_behavior = AsyncMock(return_value=None)
mock_repo.get_attacker_service_activity = AsyncMock(return_value=pairs)
result = await get_attacker_detail(
uuid="att-uuid-1",
user={"uuid": "test-user", "role": "viewer"},
)
assert result["service_activity"] == {
"interacted": ["ftp", "ssh"],
"scanned": ["http"],
}
# ─── GET /attackers/{uuid}/commands ──────────────────────────────────────────