feat: attacker profiles — UUID model, API routes, list/detail frontend

Migrate Attacker model from IP-based to UUID-based primary key with
auto-migration for old schema. Add GET /attackers (paginated, search,
sort) and GET /attackers/{uuid} API routes. Rewrite Attackers.tsx as
a card grid with full threat info and create AttackerDetail.tsx as a
dedicated detail page with back navigation, stats, commands table,
and fingerprints.
This commit is contained in:
2026-04-13 22:35:13 -04:00
parent 3dc5b509f6
commit a022b4fed6
15 changed files with 1266 additions and 182 deletions

View File

@@ -1,21 +1,20 @@
"""
Attacker profile builder — background worker.
Attacker profile builder — incremental background worker.
Periodically rebuilds the `attackers` table by:
1. Feeding all stored Log.raw_line values through the CorrelationEngine
(which parses RFC 5424 and tracks per-IP event histories + traversals).
2. Merging with the Bounty table (fingerprints, credentials).
3. Extracting commands executed per IP from the structured log fields.
4. Upserting one Attacker record per observed IP.
Maintains a persistent CorrelationEngine and a log-ID cursor across cycles.
On cold start (first cycle or process restart), performs one full build from
all stored logs. Subsequent cycles fetch only new logs via the cursor,
ingest them into the existing engine, and rebuild profiles for affected IPs
only.
Runs every _REBUILD_INTERVAL seconds. Full rebuild each cycle — simple and
correct at honeypot log volumes.
Complexity per cycle: O(new_logs + affected_ips) instead of O(total_logs²).
"""
from __future__ import annotations
import asyncio
import json
from dataclasses import dataclass, field
from datetime import datetime, timezone
from typing import Any
@@ -27,6 +26,8 @@ from decnet.web.db.repository import BaseRepository
logger = get_logger("attacker_worker")
_REBUILD_INTERVAL = 30 # seconds
_BATCH_SIZE = 500
_STATE_KEY = "attacker_worker_cursor"
# Event types that indicate active command/query execution (not just connection/scan)
_COMMAND_EVENT_TYPES = frozenset({
@@ -38,44 +39,95 @@ _COMMAND_EVENT_TYPES = frozenset({
_COMMAND_FIELDS = ("command", "query", "input", "line", "sql", "cmd")
@dataclass
class _WorkerState:
engine: CorrelationEngine = field(default_factory=CorrelationEngine)
last_log_id: int = 0
initialized: bool = False
async def attacker_profile_worker(repo: BaseRepository) -> None:
"""Periodically rebuilds the Attacker table. Designed to run as an asyncio Task."""
"""Periodically updates the Attacker table incrementally. Designed to run as an asyncio Task."""
logger.info("attacker profile worker started interval=%ds", _REBUILD_INTERVAL)
state = _WorkerState()
while True:
await asyncio.sleep(_REBUILD_INTERVAL)
try:
await _rebuild(repo)
await _incremental_update(repo, state)
except Exception as exc:
logger.error("attacker worker: rebuild failed: %s", exc)
logger.error("attacker worker: update failed: %s", exc)
async def _rebuild(repo: BaseRepository) -> None:
async def _incremental_update(repo: BaseRepository, state: _WorkerState) -> None:
if not state.initialized:
await _cold_start(repo, state)
return
affected_ips: set[str] = set()
while True:
batch = await repo.get_logs_after_id(state.last_log_id, limit=_BATCH_SIZE)
if not batch:
break
for row in batch:
event = state.engine.ingest(row["raw_line"])
if event and event.attacker_ip:
affected_ips.add(event.attacker_ip)
state.last_log_id = row["id"]
if len(batch) < _BATCH_SIZE:
break
if not affected_ips:
await repo.set_state(_STATE_KEY, {"last_log_id": state.last_log_id})
return
await _update_profiles(repo, state, affected_ips)
await repo.set_state(_STATE_KEY, {"last_log_id": state.last_log_id})
logger.debug("attacker worker: updated %d profiles (incremental)", len(affected_ips))
async def _cold_start(repo: BaseRepository, state: _WorkerState) -> None:
all_logs = await repo.get_all_logs_raw()
if not all_logs:
state.last_log_id = await repo.get_max_log_id()
state.initialized = True
await repo.set_state(_STATE_KEY, {"last_log_id": state.last_log_id})
return
# Feed raw RFC 5424 lines into the CorrelationEngine
engine = CorrelationEngine()
for row in all_logs:
engine.ingest(row["raw_line"])
state.engine.ingest(row["raw_line"])
state.last_log_id = max(state.last_log_id, row["id"])
if not engine._events:
return
all_ips = set(state.engine._events.keys())
await _update_profiles(repo, state, all_ips)
await repo.set_state(_STATE_KEY, {"last_log_id": state.last_log_id})
traversal_map = {t.attacker_ip: t for t in engine.traversals(min_deckies=2)}
all_bounties = await repo.get_all_bounties_by_ip()
state.initialized = True
logger.debug("attacker worker: cold start rebuilt %d profiles", len(all_ips))
async def _update_profiles(
repo: BaseRepository,
state: _WorkerState,
ips: set[str],
) -> None:
traversal_map = {t.attacker_ip: t for t in state.engine.traversals(min_deckies=2)}
bounties_map = await repo.get_bounties_for_ips(ips)
for ip in ips:
events = state.engine._events.get(ip, [])
if not events:
continue
count = 0
for ip, events in engine._events.items():
traversal = traversal_map.get(ip)
bounties = all_bounties.get(ip, [])
commands = _extract_commands(all_logs, ip)
bounties = bounties_map.get(ip, [])
commands = _extract_commands_from_events(events)
record = _build_record(ip, events, traversal, bounties, commands)
await repo.upsert_attacker(record)
count += 1
logger.debug("attacker worker: rebuilt %d profiles", count)
def _build_record(
@@ -122,42 +174,20 @@ def _first_contact_deckies(events: list[LogEvent]) -> list[str]:
return seen
def _extract_commands(
all_logs: list[dict[str, Any]], ip: str
) -> list[dict[str, Any]]:
def _extract_commands_from_events(events: list[LogEvent]) -> list[dict[str, Any]]:
"""
Extract executed commands for a given attacker IP from raw log rows.
Extract executed commands from LogEvent objects.
Looks for rows where:
- attacker_ip matches
- event_type is a known command-execution type
- fields JSON contains a command-like key
Returns a list of {service, decky, command, timestamp} dicts.
Works directly on LogEvent.fields (already a dict), so no JSON parsing needed.
"""
commands: list[dict[str, Any]] = []
for row in all_logs:
if row.get("attacker_ip") != ip:
for event in events:
if event.event_type not in _COMMAND_EVENT_TYPES:
continue
if row.get("event_type") not in _COMMAND_EVENT_TYPES:
continue
raw_fields = row.get("fields")
if not raw_fields:
continue
# fields is stored as a JSON string in the DB row
if isinstance(raw_fields, str):
try:
fields = json.loads(raw_fields)
except (json.JSONDecodeError, ValueError):
continue
else:
fields = raw_fields
cmd_text: str | None = None
for key in _COMMAND_FIELDS:
val = fields.get(key)
val = event.fields.get(key)
if val:
cmd_text = str(val)
break
@@ -165,12 +195,11 @@ def _extract_commands(
if not cmd_text:
continue
ts = row.get("timestamp")
commands.append({
"service": row.get("service", ""),
"decky": row.get("decky", ""),
"service": event.service,
"decky": event.decky,
"command": cmd_text,
"timestamp": ts.isoformat() if isinstance(ts, datetime) else str(ts),
"timestamp": event.timestamp.isoformat(),
})
return commands