diff --git a/decnet/correlation/event_kinds.py b/decnet/correlation/event_kinds.py new file mode 100644 index 00000000..0af95176 --- /dev/null +++ b/decnet/correlation/event_kinds.py @@ -0,0 +1,113 @@ +"""Classify RFC 5424 event_type strings as interaction vs. scan vs. noise. + +Used by: +- The attacker detail endpoint to split services into "scanned" and + "interacted with" buckets, distinguishing port scanners from + attackers who actually engaged. +- The profiler worker to filter command-family events when extracting + executed-command history. + +Classification is conservative: an unknown event_type defaults to +``scan`` rather than ``interaction``. That way a new service template +emitting a fresh verb shows up as "scanned" on the dashboard — visible +but not over-credited. Adding it to ``INTERACTION_EVENT_TYPES`` is +always a deliberate promotion. +""" +from __future__ import annotations + +from typing import Literal + +# Events that mean the attacker did something past reconnaissance — +# executed a command, sent mail, uploaded a file, subscribed to a topic. +# A service with ≥1 of these from a given attacker is "interacted with". +INTERACTION_EVENT_TYPES: frozenset[str] = frozenset({ + # Shell / command-family — lifted from the profiler's original + # command-extraction frozenset; this module is now the source of + # truth for that vocabulary too. + "command", + "exec", + "query", + "input", + "shell_input", + "execute", + "run", + "sql_query", + "redis_command", + "ldap_search", + # SMTP meaningful engagement — once MAIL FROM / RCPT TO lands the + # attacker is trying to send mail, not just banner-grab. + # message_accepted is the DATA-commit moment. + "mail_from", + "rcpt_to", + "rcpt_denied", + "message_accepted", + # File / payload activity + "file_captured", + "upload", + "download_attempt", + "retr", # FTP retrieve + # Pub/sub operational use (vs. mere connection) + "publish", + "subscribe", + # A recorded TTY session is always an interaction — sessrec only + # writes when there was PTY input. + "session_recorded", +}) + + +# Events that are DECNET-internal or protocol-framework noise rather +# than attacker-caused signal. Dropped from both buckets. +NOISE_EVENT_TYPES: frozenset[str] = frozenset({ + "startup", + "shutdown", + "config_error", + "parse_error", + "unknown_packet", + "unknown_opcode", + "unknown_command", + "protocol_error", +}) + + +EventKind = Literal["interaction", "scan", "noise"] + + +def classify_event(event_type: str) -> EventKind: + """Return the kind label for a single event_type string.""" + if event_type in INTERACTION_EVENT_TYPES: + return "interaction" + if event_type in NOISE_EVENT_TYPES: + return "noise" + return "scan" + + +def bucket_services( + pairs: list[tuple[str, str]], +) -> dict[str, list[str]]: + """Group distinct service names into scanned vs. interacted buckets. + + *pairs* is an iterable of ``(service, event_type)`` tuples — the + shape the repo returns from a ``SELECT DISTINCT service, event_type`` + query. A service is placed in ``interacted`` if any of its events + classifies as interaction; otherwise in ``scanned`` if any event + classifies as scan; noise-only services are dropped. + + Return shape: ``{"interacted": [...sorted...], "scanned": [...sorted...]}``. + Buckets are disjoint by construction. + """ + best: dict[str, EventKind] = {} + for service, event_type in pairs: + kind = classify_event(event_type) + current = best.get(service) + # Rank: interaction > scan > noise > unset. + if current == "interaction": + continue + if kind == "interaction": + best[service] = "interaction" + elif kind == "scan" and current != "interaction": + best[service] = "scan" + elif kind == "noise" and current is None: + best[service] = "noise" + interacted = sorted(s for s, k in best.items() if k == "interaction") + scanned = sorted(s for s, k in best.items() if k == "scan") + return {"interacted": interacted, "scanned": scanned} diff --git a/decnet/profiler/worker.py b/decnet/profiler/worker.py index 509a91d7..4da37aa7 100644 --- a/decnet/profiler/worker.py +++ b/decnet/profiler/worker.py @@ -40,7 +40,13 @@ logger = get_logger("attacker_worker") _BATCH_SIZE = 500 _STATE_KEY = "attacker_worker_cursor" -# Event types that indicate active command/query execution (not just connection/scan) +# Event types that indicate active command/query execution — the +# shell-family subset of INTERACTION_EVENT_TYPES in +# decnet/correlation/event_kinds.py. Kept here because this set is a +# stricter filter (commands that carry text to extract, vs. interactions +# like RCPT TO or file upload that don't). A test in +# tests/profiler/ asserts it's a subset of the canonical interaction +# set so they can't drift. _COMMAND_EVENT_TYPES = frozenset({ "command", "exec", "query", "input", "shell_input", "execute", "run", "sql_query", "redis_command", diff --git a/decnet/web/db/repository.py b/decnet/web/db/repository.py index 6b37876c..2c35026e 100644 --- a/decnet/web/db/repository.py +++ b/decnet/web/db/repository.py @@ -247,6 +247,16 @@ class BaseRepository(ABC): """Return `session_recorded` log rows for this attacker, newest first.""" pass + async def get_attacker_service_activity( + self, attacker_uuid: str + ) -> list[tuple[str, str]]: + """Return the distinct ``(service, event_type)`` pairs observed + for one attacker, for bucketing into scanned vs. interacted + services. Default is NotImplementedError so non-SQLModel backends + must opt in; SQLModelRepository overrides with a cheap DISTINCT + query.""" + raise NotImplementedError + @abstractmethod async def get_session_log(self, sid: str) -> Optional[dict[str, Any]]: """Look up the `session_recorded` Log row for a given session UUID.""" diff --git a/decnet/web/db/sqlmodel_repo.py b/decnet/web/db/sqlmodel_repo.py index c2047938..e4f31e6e 100644 --- a/decnet/web/db/sqlmodel_repo.py +++ b/decnet/web/db/sqlmodel_repo.py @@ -881,6 +881,32 @@ class SQLModelRepository(BaseRepository): page = commands[offset: offset + limit] return {"total": total, "data": page} + async def get_attacker_service_activity( + self, attacker_uuid: str + ) -> list[tuple[str, str]]: + """Return distinct ``(service, event_type)`` pairs for an attacker. + + Resolves IP then ``SELECT DISTINCT service, event_type FROM logs + WHERE attacker_ip = :ip`` — the result set is bounded by the + cardinality of services × event_types (tens, not thousands), so + this stays cheap even for attackers with long event streams. + Caller applies `event_kinds.bucket_services` to split into + scanned vs. interacted. + """ + async with self._session() as session: + ip_res = await session.execute( + select(Attacker.ip).where(Attacker.uuid == attacker_uuid) + ) + ip = ip_res.scalar_one_or_none() + if not ip: + return [] + rows = await session.execute( + select(Log.service, Log.event_type) + .where(Log.attacker_ip == ip) + .distinct() + ) + return [(svc, evt) for svc, evt in rows.all()] + async def get_attacker_artifacts(self, uuid: str) -> list[dict[str, Any]]: """Return `file_captured` logs for the attacker identified by UUID. diff --git a/decnet/web/router/attackers/api_get_attacker_detail.py b/decnet/web/router/attackers/api_get_attacker_detail.py index dcc9ebd3..f58d8188 100644 --- a/decnet/web/router/attackers/api_get_attacker_detail.py +++ b/decnet/web/router/attackers/api_get_attacker_detail.py @@ -2,6 +2,7 @@ from typing import Any from fastapi import APIRouter, Depends, HTTPException +from decnet.correlation.event_kinds import bucket_services from decnet.telemetry import traced as _traced from decnet.web.dependencies import require_viewer, repo @@ -27,4 +28,10 @@ async def get_attacker_detail( if not attacker: raise HTTPException(status_code=404, detail="Attacker not found") attacker["behavior"] = await repo.get_attacker_behavior(uuid) + # Scanned vs. interacted-with — computed per-request from the log + # stream, not persisted. Cheap (DISTINCT bounded by service × + # event_type cardinality), and changes to the classifier take effect + # immediately without a profiler re-tick. + pairs = await repo.get_attacker_service_activity(uuid) + attacker["service_activity"] = bucket_services(pairs) return attacker diff --git a/decnet_web/src/components/AttackerDetail.tsx b/decnet_web/src/components/AttackerDetail.tsx index 34554b1c..70867eed 100644 --- a/decnet_web/src/components/AttackerDetail.tsx +++ b/decnet_web/src/components/AttackerDetail.tsx @@ -63,6 +63,10 @@ interface AttackerData { country_source: string | null; updated_at: string; behavior: AttackerBehavior | null; + service_activity?: { + interacted: string[]; + scanned: string[]; + }; } // ─── Fingerprint rendering ─────────────────────────────────────────────────── @@ -944,6 +948,40 @@ const AttackerDetail: React.FC = () => { + {/* Scanned vs. Interacted — activity-depth signal */} + {attacker.service_activity && + (attacker.service_activity.scanned.length > 0 || + attacker.service_activity.interacted.length > 0) && ( +