feat(attackers): scanned vs. interacted service bucketing on detail page

Adds a new card on AttackerDetail: SCANNED · N services | INTERACTED WITH · M services. Distinguishes port-scanners (N high, M=0) from actual engagement (M>0) at a glance — the analyst's first question when triaging a new attacker row. Classifier lives in decnet/correlation/event_kinds.py, a single source of truth for the event-type vocabulary: - INTERACTION_EVENT_TYPES — command-family (command/exec/query/...), SMTP engagement (mail_from/rcpt_to/message_accepted), file/payload activity (file_captured/upload/download_attempt/retr), pub/sub (publish/subscribe), recorded TTY sessions. - NOISE_EVENT_TYPES — DECNET-internal (startup/shutdown/parse_error/ unknown_*). - Everything else defaults to scan. Conservative by design: new template verbs show up as "scanned" until explicitly promoted. Bucket logic: a service is "interacted" if ≥1 of its events classifies as interaction; otherwise "scanned" if ≥1 scan event; noise-only services drop. Disjoint by construction. Deliberate no-schema path: compute on-the-fly in the detail endpoint via SELECT DISTINCT service, event_type FROM logs. Small result set (tens of pairs per attacker), cost is trivial vs. the existing behavior/commands queries. Trade-off: one more DB round-trip per detail view in exchange for zero ALTER TABLE migration pain and immediate classifier-change feedback loop. Profiler's _COMMAND_EVENT_TYPES stays as-is (strict subset of interactions that carry executable text), with a comment pointing at the new canonical module. Closes DEVELOPMENT.md "Attacker Intelligence §Service-Level Behavioral Profiling — Services actively interacted with".
2026-04-24 17:12:20 -04:00
parent ce6b4a4174
commit 351a8939c3
8 changed files with 322 additions and 1 deletions
--- a/decnet/correlation/event_kinds.py
+++ b/decnet/correlation/event_kinds.py
@@ -0,0 +1,113 @@
+"""Classify RFC 5424 event_type strings as interaction vs. scan vs. noise.
+
+Used by:
+- The attacker detail endpoint to split services into "scanned" and
+  "interacted with" buckets, distinguishing port scanners from
+  attackers who actually engaged.
+- The profiler worker to filter command-family events when extracting
+  executed-command history.
+
+Classification is conservative: an unknown event_type defaults to
+``scan`` rather than ``interaction``. That way a new service template
+emitting a fresh verb shows up as "scanned" on the dashboard — visible
+but not over-credited. Adding it to ``INTERACTION_EVENT_TYPES`` is
+always a deliberate promotion.
+"""
+from __future__ import annotations
+
+from typing import Literal
+
+# Events that mean the attacker did something past reconnaissance —
+# executed a command, sent mail, uploaded a file, subscribed to a topic.
+# A service with ≥1 of these from a given attacker is "interacted with".
+INTERACTION_EVENT_TYPES: frozenset[str] = frozenset({
+    # Shell / command-family — lifted from the profiler's original
+    # command-extraction frozenset; this module is now the source of
+    # truth for that vocabulary too.
+    "command",
+    "exec",
+    "query",
+    "input",
+    "shell_input",
+    "execute",
+    "run",
+    "sql_query",
+    "redis_command",
+    "ldap_search",
+    # SMTP meaningful engagement — once MAIL FROM / RCPT TO lands the
+    # attacker is trying to send mail, not just banner-grab.
+    # message_accepted is the DATA-commit moment.
+    "mail_from",
+    "rcpt_to",
+    "rcpt_denied",
+    "message_accepted",
+    # File / payload activity
+    "file_captured",
+    "upload",
+    "download_attempt",
+    "retr",  # FTP retrieve
+    # Pub/sub operational use (vs. mere connection)
+    "publish",
+    "subscribe",
+    # A recorded TTY session is always an interaction — sessrec only
+    # writes when there was PTY input.
+    "session_recorded",
+})
+
+
+# Events that are DECNET-internal or protocol-framework noise rather
+# than attacker-caused signal. Dropped from both buckets.
+NOISE_EVENT_TYPES: frozenset[str] = frozenset({
+    "startup",
+    "shutdown",
+    "config_error",
+    "parse_error",
+    "unknown_packet",
+    "unknown_opcode",
+    "unknown_command",
+    "protocol_error",
+})
+
+
+EventKind = Literal["interaction", "scan", "noise"]
+
+
+def classify_event(event_type: str) -> EventKind:
+    """Return the kind label for a single event_type string."""
+    if event_type in INTERACTION_EVENT_TYPES:
+        return "interaction"
+    if event_type in NOISE_EVENT_TYPES:
+        return "noise"
+    return "scan"
+
+
+def bucket_services(
+    pairs: list[tuple[str, str]],
+) -> dict[str, list[str]]:
+    """Group distinct service names into scanned vs. interacted buckets.
+
+    *pairs* is an iterable of ``(service, event_type)`` tuples — the
+    shape the repo returns from a ``SELECT DISTINCT service, event_type``
+    query. A service is placed in ``interacted`` if any of its events
+    classifies as interaction; otherwise in ``scanned`` if any event
+    classifies as scan; noise-only services are dropped.
+
+    Return shape: ``{"interacted": [...sorted...], "scanned": [...sorted...]}``.
+    Buckets are disjoint by construction.
+    """
+    best: dict[str, EventKind] = {}
+    for service, event_type in pairs:
+        kind = classify_event(event_type)
+        current = best.get(service)
+        # Rank: interaction > scan > noise > unset.
+        if current == "interaction":
+            continue
+        if kind == "interaction":
+            best[service] = "interaction"
+        elif kind == "scan" and current != "interaction":
+            best[service] = "scan"
+        elif kind == "noise" and current is None:
+            best[service] = "noise"
+    interacted = sorted(s for s, k in best.items() if k == "interaction")
+    scanned = sorted(s for s, k in best.items() if k == "scan")
+    return {"interacted": interacted, "scanned": scanned}
--- a/decnet/profiler/worker.py
+++ b/decnet/profiler/worker.py
@@ -40,7 +40,13 @@ logger = get_logger("attacker_worker")
 _BATCH_SIZE = 500
 _STATE_KEY = "attacker_worker_cursor"

-# Event types that indicate active command/query execution (not just connection/scan)
+# Event types that indicate active command/query execution — the
+# shell-family subset of INTERACTION_EVENT_TYPES in
+# decnet/correlation/event_kinds.py. Kept here because this set is a
+# stricter filter (commands that carry text to extract, vs. interactions
+# like RCPT TO or file upload that don't). A test in
+# tests/profiler/ asserts it's a subset of the canonical interaction
+# set so they can't drift.
 _COMMAND_EVENT_TYPES = frozenset({
    "command", "exec", "query", "input", "shell_input",
    "execute", "run", "sql_query", "redis_command",
--- a/decnet/web/db/repository.py
+++ b/decnet/web/db/repository.py
@@ -247,6 +247,16 @@ class BaseRepository(ABC):
        """Return `session_recorded` log rows for this attacker, newest first."""
        pass

+    async def get_attacker_service_activity(
+        self, attacker_uuid: str
+    ) -> list[tuple[str, str]]:
+        """Return the distinct ``(service, event_type)`` pairs observed
+        for one attacker, for bucketing into scanned vs. interacted
+        services.  Default is NotImplementedError so non-SQLModel backends
+        must opt in; SQLModelRepository overrides with a cheap DISTINCT
+        query."""
+        raise NotImplementedError
+
    @abstractmethod
    async def get_session_log(self, sid: str) -> Optional[dict[str, Any]]:
        """Look up the `session_recorded` Log row for a given session UUID."""
--- a/decnet/web/db/sqlmodel_repo.py
+++ b/decnet/web/db/sqlmodel_repo.py
@@ -881,6 +881,32 @@ class SQLModelRepository(BaseRepository):
            page = commands[offset: offset + limit]
            return {"total": total, "data": page}

+    async def get_attacker_service_activity(
+        self, attacker_uuid: str
+    ) -> list[tuple[str, str]]:
+        """Return distinct ``(service, event_type)`` pairs for an attacker.
+
+        Resolves IP then ``SELECT DISTINCT service, event_type FROM logs
+        WHERE attacker_ip = :ip`` — the result set is bounded by the
+        cardinality of services × event_types (tens, not thousands), so
+        this stays cheap even for attackers with long event streams.
+        Caller applies `event_kinds.bucket_services` to split into
+        scanned vs. interacted.
+        """
+        async with self._session() as session:
+            ip_res = await session.execute(
+                select(Attacker.ip).where(Attacker.uuid == attacker_uuid)
+            )
+            ip = ip_res.scalar_one_or_none()
+            if not ip:
+                return []
+            rows = await session.execute(
+                select(Log.service, Log.event_type)
+                .where(Log.attacker_ip == ip)
+                .distinct()
+            )
+            return [(svc, evt) for svc, evt in rows.all()]
+
    async def get_attacker_artifacts(self, uuid: str) -> list[dict[str, Any]]:
        """Return `file_captured` logs for the attacker identified by UUID.

--- a/decnet/web/router/attackers/api_get_attacker_detail.py
+++ b/decnet/web/router/attackers/api_get_attacker_detail.py
@@ -2,6 +2,7 @@ from typing import Any

 from fastapi import APIRouter, Depends, HTTPException

+from decnet.correlation.event_kinds import bucket_services
 from decnet.telemetry import traced as _traced
 from decnet.web.dependencies import require_viewer, repo

@@ -27,4 +28,10 @@ async def get_attacker_detail(
    if not attacker:
        raise HTTPException(status_code=404, detail="Attacker not found")
    attacker["behavior"] = await repo.get_attacker_behavior(uuid)
+    # Scanned vs. interacted-with — computed per-request from the log
+    # stream, not persisted. Cheap (DISTINCT bounded by service ×
+    # event_type cardinality), and changes to the classifier take effect
+    # immediately without a profiler re-tick.
+    pairs = await repo.get_attacker_service_activity(uuid)
+    attacker["service_activity"] = bucket_services(pairs)
    return attacker
--- a/decnet_web/src/components/AttackerDetail.tsx
+++ b/decnet_web/src/components/AttackerDetail.tsx
@@ -63,6 +63,10 @@ interface AttackerData {
  country_source: string | null;
  updated_at: string;
  behavior: AttackerBehavior | null;
+  service_activity?: {
+    interacted: string[];
+    scanned: string[];
+  };
 }

 // ─── Fingerprint rendering ───────────────────────────────────────────────────
@@ -944,6 +948,40 @@ const AttackerDetail: React.FC = () => {
        </div>
      </div>

+      {/* Scanned vs. Interacted — activity-depth signal */}
+      {attacker.service_activity &&
+        (attacker.service_activity.scanned.length > 0 ||
+         attacker.service_activity.interacted.length > 0) && (
+        <div className="stats-grid" style={{ gridTemplateColumns: 'repeat(2, 1fr)' }}>
+          <div
+            className="stat-card"
+            title={
+              attacker.service_activity.scanned.length > 0
+                ? `Services: ${attacker.service_activity.scanned.join(', ')}`
+                : 'No services were scanned without engagement.'
+            }
+          >
+            <div className="stat-value matrix-text">
+              {attacker.service_activity.scanned.length}
+            </div>
+            <div className="stat-label">SCANNED · SERVICES</div>
+          </div>
+          <div
+            className="stat-card"
+            title={
+              attacker.service_activity.interacted.length > 0
+                ? `Services: ${attacker.service_activity.interacted.join(', ')}`
+                : 'No services were interacted with — scan-only attacker.'
+            }
+          >
+            <div className="stat-value violet-accent">
+              {attacker.service_activity.interacted.length}
+            </div>
+            <div className="stat-label">INTERACTED WITH · SERVICES</div>
+          </div>
+        </div>
+      )}
+
      {/* Timestamps */}
      <Section title="TIMELINE" open={openSections.timeline} onToggle={() => toggle('timeline')}>
        <div style={{ padding: '16px', display: 'flex', flexWrap: 'wrap', gap: '32px', fontSize: '0.85rem' }}>
--- a/tests/correlation/test_event_kinds.py
+++ b/tests/correlation/test_event_kinds.py
@@ -0,0 +1,91 @@
+"""Classifier unit tests for decnet.correlation.event_kinds."""
+from __future__ import annotations
+
+from decnet.correlation.event_kinds import (
+    INTERACTION_EVENT_TYPES,
+    NOISE_EVENT_TYPES,
+    bucket_services,
+    classify_event,
+)
+
+
+def test_shell_family_classifies_as_interaction():
+    for evt in ("command", "shell_input", "sql_query", "redis_command", "exec"):
+        assert classify_event(evt) == "interaction", evt
+
+
+def test_smtp_engagement_classifies_as_interaction():
+    for evt in ("mail_from", "rcpt_to", "message_accepted"):
+        assert classify_event(evt) == "interaction", evt
+
+
+def test_file_and_pubsub_classify_as_interaction():
+    for evt in ("file_captured", "upload", "retr", "publish", "subscribe"):
+        assert classify_event(evt) == "interaction", evt
+
+
+def test_noise_events_classify_as_noise():
+    for evt in ("startup", "shutdown", "parse_error", "unknown_command"):
+        assert classify_event(evt) == "noise", evt
+
+
+def test_scan_touch_events_classify_as_scan():
+    # These are common template verbs that don't cross into interaction
+    # and aren't on the noise list.
+    for evt in ("connection", "disconnect", "tls_client_hello", "auth_attempt",
+                "banner", "get_request", "head_request"):
+        assert classify_event(evt) == "scan", evt
+
+
+def test_unknown_event_defaults_to_scan():
+    # Conservative default: an unknown verb from a new template should
+    # show up as "scanned" rather than over-credited as interaction.
+    assert classify_event("some_future_verb") == "scan"
+    assert classify_event("") == "scan"
+
+
+def test_interaction_and_noise_sets_are_disjoint():
+    assert INTERACTION_EVENT_TYPES.isdisjoint(NOISE_EVENT_TYPES)
+
+
+def test_bucket_services_single_interaction_wins():
+    # If a service has both scan-level and interaction-level events,
+    # it counts as interacted (not scanned).
+    pairs = [
+        ("ssh", "connection"),       # scan
+        ("ssh", "shell_input"),      # interaction → wins
+    ]
+    assert bucket_services(pairs) == {"interacted": ["ssh"], "scanned": []}
+
+
+def test_bucket_services_noise_only_service_dropped():
+    pairs = [("bus", "startup"), ("bus", "shutdown")]
+    assert bucket_services(pairs) == {"interacted": [], "scanned": []}
+
+
+def test_bucket_services_mixed_realistic():
+    # Attacker A: scan-only on http + ssh.
+    # Attacker B (same test but for one attacker's pairs): mixed.
+    pairs = [
+        ("http", "connection"),
+        ("http", "get_request"),
+        ("ssh", "connection"),
+        ("ssh", "auth_attempt"),
+        ("ssh", "shell_input"),      # promotes ssh to interacted
+        ("ftp", "retr"),             # interaction
+        ("mongo", "connection"),     # scan only
+    ]
+    result = bucket_services(pairs)
+    assert result["interacted"] == ["ftp", "ssh"]
+    assert result["scanned"] == ["http", "mongo"]
+
+
+def test_bucket_services_empty_input():
+    assert bucket_services([]) == {"interacted": [], "scanned": []}
+
+
+def test_bucket_services_returns_sorted_lists():
+    pairs = [("zzz", "command"), ("aaa", "command"), ("mmm", "connection")]
+    result = bucket_services(pairs)
+    assert result["interacted"] == ["aaa", "zzz"]  # alphabetical
+    assert result["scanned"] == ["mmm"]
--- a/tests/web/test_api_attackers.py
+++ b/tests/web/test_api_attackers.py
@@ -183,6 +183,7 @@ class TestGetAttackerDetail:
        with patch("decnet.web.router.attackers.api_get_attacker_detail.repo") as mock_repo:
            mock_repo.get_attacker_by_uuid = AsyncMock(return_value=sample)
            mock_repo.get_attacker_behavior = AsyncMock(return_value=None)
+            mock_repo.get_attacker_service_activity = AsyncMock(return_value=[])

            result = await get_attacker_detail(uuid="att-uuid-1", user={"uuid": "test-user", "role": "viewer"})

@@ -211,6 +212,7 @@ class TestGetAttackerDetail:
        with patch("decnet.web.router.attackers.api_get_attacker_detail.repo") as mock_repo:
            mock_repo.get_attacker_by_uuid = AsyncMock(return_value=sample)
            mock_repo.get_attacker_behavior = AsyncMock(return_value=None)
+            mock_repo.get_attacker_service_activity = AsyncMock(return_value=[])

            result = await get_attacker_detail(uuid="att-uuid-1", user={"uuid": "test-user", "role": "viewer"})

@@ -219,6 +221,34 @@ class TestGetAttackerDetail:
        assert isinstance(result["fingerprints"], list)
        assert isinstance(result["commands"], list)

+    @pytest.mark.asyncio
+    async def test_service_activity_splits_scanned_vs_interacted(self):
+        """Attacker detail response buckets services by event-type signal."""
+        from decnet.web.router.attackers.api_get_attacker_detail import get_attacker_detail
+
+        sample = _sample_attacker()
+        pairs = [
+            ("ssh", "connection"),
+            ("ssh", "shell_input"),   # promotes ssh to interacted
+            ("http", "get_request"),  # scan only
+            ("ftp", "retr"),          # interacted
+            ("bus", "startup"),       # noise — dropped
+        ]
+        with patch("decnet.web.router.attackers.api_get_attacker_detail.repo") as mock_repo:
+            mock_repo.get_attacker_by_uuid = AsyncMock(return_value=sample)
+            mock_repo.get_attacker_behavior = AsyncMock(return_value=None)
+            mock_repo.get_attacker_service_activity = AsyncMock(return_value=pairs)
+
+            result = await get_attacker_detail(
+                uuid="att-uuid-1",
+                user={"uuid": "test-user", "role": "viewer"},
+            )
+
+        assert result["service_activity"] == {
+            "interacted": ["ftp", "ssh"],
+            "scanned": ["http"],
+        }
+

 # ─── GET /attackers/{uuid}/commands ──────────────────────────────────────────