feat(attackers): scanned vs. interacted service bucketing on detail page
Adds a new card on AttackerDetail: SCANNED · N services | INTERACTED WITH · M services. Distinguishes port-scanners (N high, M=0) from actual engagement (M>0) at a glance — the analyst's first question when triaging a new attacker row. Classifier lives in decnet/correlation/event_kinds.py, a single source of truth for the event-type vocabulary: - INTERACTION_EVENT_TYPES — command-family (command/exec/query/...), SMTP engagement (mail_from/rcpt_to/message_accepted), file/payload activity (file_captured/upload/download_attempt/retr), pub/sub (publish/subscribe), recorded TTY sessions. - NOISE_EVENT_TYPES — DECNET-internal (startup/shutdown/parse_error/ unknown_*). - Everything else defaults to scan. Conservative by design: new template verbs show up as "scanned" until explicitly promoted. Bucket logic: a service is "interacted" if ≥1 of its events classifies as interaction; otherwise "scanned" if ≥1 scan event; noise-only services drop. Disjoint by construction. Deliberate no-schema path: compute on-the-fly in the detail endpoint via SELECT DISTINCT service, event_type FROM logs. Small result set (tens of pairs per attacker), cost is trivial vs. the existing behavior/commands queries. Trade-off: one more DB round-trip per detail view in exchange for zero ALTER TABLE migration pain and immediate classifier-change feedback loop. Profiler's _COMMAND_EVENT_TYPES stays as-is (strict subset of interactions that carry executable text), with a comment pointing at the new canonical module. Closes DEVELOPMENT.md "Attacker Intelligence §Service-Level Behavioral Profiling — Services actively interacted with".
This commit is contained in:
113
decnet/correlation/event_kinds.py
Normal file
113
decnet/correlation/event_kinds.py
Normal file
@@ -0,0 +1,113 @@
|
||||
"""Classify RFC 5424 event_type strings as interaction vs. scan vs. noise.
|
||||
|
||||
Used by:
|
||||
- The attacker detail endpoint to split services into "scanned" and
|
||||
"interacted with" buckets, distinguishing port scanners from
|
||||
attackers who actually engaged.
|
||||
- The profiler worker to filter command-family events when extracting
|
||||
executed-command history.
|
||||
|
||||
Classification is conservative: an unknown event_type defaults to
|
||||
``scan`` rather than ``interaction``. That way a new service template
|
||||
emitting a fresh verb shows up as "scanned" on the dashboard — visible
|
||||
but not over-credited. Adding it to ``INTERACTION_EVENT_TYPES`` is
|
||||
always a deliberate promotion.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Literal
|
||||
|
||||
# Events that mean the attacker did something past reconnaissance —
|
||||
# executed a command, sent mail, uploaded a file, subscribed to a topic.
|
||||
# A service with ≥1 of these from a given attacker is "interacted with".
|
||||
INTERACTION_EVENT_TYPES: frozenset[str] = frozenset({
|
||||
# Shell / command-family — lifted from the profiler's original
|
||||
# command-extraction frozenset; this module is now the source of
|
||||
# truth for that vocabulary too.
|
||||
"command",
|
||||
"exec",
|
||||
"query",
|
||||
"input",
|
||||
"shell_input",
|
||||
"execute",
|
||||
"run",
|
||||
"sql_query",
|
||||
"redis_command",
|
||||
"ldap_search",
|
||||
# SMTP meaningful engagement — once MAIL FROM / RCPT TO lands the
|
||||
# attacker is trying to send mail, not just banner-grab.
|
||||
# message_accepted is the DATA-commit moment.
|
||||
"mail_from",
|
||||
"rcpt_to",
|
||||
"rcpt_denied",
|
||||
"message_accepted",
|
||||
# File / payload activity
|
||||
"file_captured",
|
||||
"upload",
|
||||
"download_attempt",
|
||||
"retr", # FTP retrieve
|
||||
# Pub/sub operational use (vs. mere connection)
|
||||
"publish",
|
||||
"subscribe",
|
||||
# A recorded TTY session is always an interaction — sessrec only
|
||||
# writes when there was PTY input.
|
||||
"session_recorded",
|
||||
})
|
||||
|
||||
|
||||
# Events that are DECNET-internal or protocol-framework noise rather
|
||||
# than attacker-caused signal. Dropped from both buckets.
|
||||
NOISE_EVENT_TYPES: frozenset[str] = frozenset({
|
||||
"startup",
|
||||
"shutdown",
|
||||
"config_error",
|
||||
"parse_error",
|
||||
"unknown_packet",
|
||||
"unknown_opcode",
|
||||
"unknown_command",
|
||||
"protocol_error",
|
||||
})
|
||||
|
||||
|
||||
EventKind = Literal["interaction", "scan", "noise"]
|
||||
|
||||
|
||||
def classify_event(event_type: str) -> EventKind:
|
||||
"""Return the kind label for a single event_type string."""
|
||||
if event_type in INTERACTION_EVENT_TYPES:
|
||||
return "interaction"
|
||||
if event_type in NOISE_EVENT_TYPES:
|
||||
return "noise"
|
||||
return "scan"
|
||||
|
||||
|
||||
def bucket_services(
|
||||
pairs: list[tuple[str, str]],
|
||||
) -> dict[str, list[str]]:
|
||||
"""Group distinct service names into scanned vs. interacted buckets.
|
||||
|
||||
*pairs* is an iterable of ``(service, event_type)`` tuples — the
|
||||
shape the repo returns from a ``SELECT DISTINCT service, event_type``
|
||||
query. A service is placed in ``interacted`` if any of its events
|
||||
classifies as interaction; otherwise in ``scanned`` if any event
|
||||
classifies as scan; noise-only services are dropped.
|
||||
|
||||
Return shape: ``{"interacted": [...sorted...], "scanned": [...sorted...]}``.
|
||||
Buckets are disjoint by construction.
|
||||
"""
|
||||
best: dict[str, EventKind] = {}
|
||||
for service, event_type in pairs:
|
||||
kind = classify_event(event_type)
|
||||
current = best.get(service)
|
||||
# Rank: interaction > scan > noise > unset.
|
||||
if current == "interaction":
|
||||
continue
|
||||
if kind == "interaction":
|
||||
best[service] = "interaction"
|
||||
elif kind == "scan" and current != "interaction":
|
||||
best[service] = "scan"
|
||||
elif kind == "noise" and current is None:
|
||||
best[service] = "noise"
|
||||
interacted = sorted(s for s, k in best.items() if k == "interaction")
|
||||
scanned = sorted(s for s, k in best.items() if k == "scan")
|
||||
return {"interacted": interacted, "scanned": scanned}
|
||||
@@ -40,7 +40,13 @@ logger = get_logger("attacker_worker")
|
||||
_BATCH_SIZE = 500
|
||||
_STATE_KEY = "attacker_worker_cursor"
|
||||
|
||||
# Event types that indicate active command/query execution (not just connection/scan)
|
||||
# Event types that indicate active command/query execution — the
|
||||
# shell-family subset of INTERACTION_EVENT_TYPES in
|
||||
# decnet/correlation/event_kinds.py. Kept here because this set is a
|
||||
# stricter filter (commands that carry text to extract, vs. interactions
|
||||
# like RCPT TO or file upload that don't). A test in
|
||||
# tests/profiler/ asserts it's a subset of the canonical interaction
|
||||
# set so they can't drift.
|
||||
_COMMAND_EVENT_TYPES = frozenset({
|
||||
"command", "exec", "query", "input", "shell_input",
|
||||
"execute", "run", "sql_query", "redis_command",
|
||||
|
||||
@@ -247,6 +247,16 @@ class BaseRepository(ABC):
|
||||
"""Return `session_recorded` log rows for this attacker, newest first."""
|
||||
pass
|
||||
|
||||
async def get_attacker_service_activity(
|
||||
self, attacker_uuid: str
|
||||
) -> list[tuple[str, str]]:
|
||||
"""Return the distinct ``(service, event_type)`` pairs observed
|
||||
for one attacker, for bucketing into scanned vs. interacted
|
||||
services. Default is NotImplementedError so non-SQLModel backends
|
||||
must opt in; SQLModelRepository overrides with a cheap DISTINCT
|
||||
query."""
|
||||
raise NotImplementedError
|
||||
|
||||
@abstractmethod
|
||||
async def get_session_log(self, sid: str) -> Optional[dict[str, Any]]:
|
||||
"""Look up the `session_recorded` Log row for a given session UUID."""
|
||||
|
||||
@@ -881,6 +881,32 @@ class SQLModelRepository(BaseRepository):
|
||||
page = commands[offset: offset + limit]
|
||||
return {"total": total, "data": page}
|
||||
|
||||
async def get_attacker_service_activity(
|
||||
self, attacker_uuid: str
|
||||
) -> list[tuple[str, str]]:
|
||||
"""Return distinct ``(service, event_type)`` pairs for an attacker.
|
||||
|
||||
Resolves IP then ``SELECT DISTINCT service, event_type FROM logs
|
||||
WHERE attacker_ip = :ip`` — the result set is bounded by the
|
||||
cardinality of services × event_types (tens, not thousands), so
|
||||
this stays cheap even for attackers with long event streams.
|
||||
Caller applies `event_kinds.bucket_services` to split into
|
||||
scanned vs. interacted.
|
||||
"""
|
||||
async with self._session() as session:
|
||||
ip_res = await session.execute(
|
||||
select(Attacker.ip).where(Attacker.uuid == attacker_uuid)
|
||||
)
|
||||
ip = ip_res.scalar_one_or_none()
|
||||
if not ip:
|
||||
return []
|
||||
rows = await session.execute(
|
||||
select(Log.service, Log.event_type)
|
||||
.where(Log.attacker_ip == ip)
|
||||
.distinct()
|
||||
)
|
||||
return [(svc, evt) for svc, evt in rows.all()]
|
||||
|
||||
async def get_attacker_artifacts(self, uuid: str) -> list[dict[str, Any]]:
|
||||
"""Return `file_captured` logs for the attacker identified by UUID.
|
||||
|
||||
|
||||
@@ -2,6 +2,7 @@ from typing import Any
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException
|
||||
|
||||
from decnet.correlation.event_kinds import bucket_services
|
||||
from decnet.telemetry import traced as _traced
|
||||
from decnet.web.dependencies import require_viewer, repo
|
||||
|
||||
@@ -27,4 +28,10 @@ async def get_attacker_detail(
|
||||
if not attacker:
|
||||
raise HTTPException(status_code=404, detail="Attacker not found")
|
||||
attacker["behavior"] = await repo.get_attacker_behavior(uuid)
|
||||
# Scanned vs. interacted-with — computed per-request from the log
|
||||
# stream, not persisted. Cheap (DISTINCT bounded by service ×
|
||||
# event_type cardinality), and changes to the classifier take effect
|
||||
# immediately without a profiler re-tick.
|
||||
pairs = await repo.get_attacker_service_activity(uuid)
|
||||
attacker["service_activity"] = bucket_services(pairs)
|
||||
return attacker
|
||||
|
||||
Reference in New Issue
Block a user