feat: attacker profiles — UUID model, API routes, list/detail frontend

Migrate Attacker model from IP-based to UUID-based primary key with
auto-migration for old schema. Add GET /attackers (paginated, search,
sort) and GET /attackers/{uuid} API routes. Rewrite Attackers.tsx as
a card grid with full threat info and create AttackerDetail.tsx as a
dedicated detail page with back navigation, stats, commands table,
and fingerprints.
This commit is contained in:
2026-04-13 22:35:13 -04:00
parent 3dc5b509f6
commit a022b4fed6
15 changed files with 1266 additions and 182 deletions

View File

@@ -1,21 +1,20 @@
"""
Attacker profile builder — background worker.
Attacker profile builder — incremental background worker.
Periodically rebuilds the `attackers` table by:
1. Feeding all stored Log.raw_line values through the CorrelationEngine
(which parses RFC 5424 and tracks per-IP event histories + traversals).
2. Merging with the Bounty table (fingerprints, credentials).
3. Extracting commands executed per IP from the structured log fields.
4. Upserting one Attacker record per observed IP.
Maintains a persistent CorrelationEngine and a log-ID cursor across cycles.
On cold start (first cycle or process restart), performs one full build from
all stored logs. Subsequent cycles fetch only new logs via the cursor,
ingest them into the existing engine, and rebuild profiles for affected IPs
only.
Runs every _REBUILD_INTERVAL seconds. Full rebuild each cycle — simple and
correct at honeypot log volumes.
Complexity per cycle: O(new_logs + affected_ips) instead of O(total_logs²).
"""
from __future__ import annotations
import asyncio
import json
from dataclasses import dataclass, field
from datetime import datetime, timezone
from typing import Any
@@ -27,6 +26,8 @@ from decnet.web.db.repository import BaseRepository
logger = get_logger("attacker_worker")
_REBUILD_INTERVAL = 30 # seconds
_BATCH_SIZE = 500
_STATE_KEY = "attacker_worker_cursor"
# Event types that indicate active command/query execution (not just connection/scan)
_COMMAND_EVENT_TYPES = frozenset({
@@ -38,44 +39,95 @@ _COMMAND_EVENT_TYPES = frozenset({
_COMMAND_FIELDS = ("command", "query", "input", "line", "sql", "cmd")
@dataclass
class _WorkerState:
engine: CorrelationEngine = field(default_factory=CorrelationEngine)
last_log_id: int = 0
initialized: bool = False
async def attacker_profile_worker(repo: BaseRepository) -> None:
"""Periodically rebuilds the Attacker table. Designed to run as an asyncio Task."""
"""Periodically updates the Attacker table incrementally. Designed to run as an asyncio Task."""
logger.info("attacker profile worker started interval=%ds", _REBUILD_INTERVAL)
state = _WorkerState()
while True:
await asyncio.sleep(_REBUILD_INTERVAL)
try:
await _rebuild(repo)
await _incremental_update(repo, state)
except Exception as exc:
logger.error("attacker worker: rebuild failed: %s", exc)
logger.error("attacker worker: update failed: %s", exc)
async def _rebuild(repo: BaseRepository) -> None:
async def _incremental_update(repo: BaseRepository, state: _WorkerState) -> None:
if not state.initialized:
await _cold_start(repo, state)
return
affected_ips: set[str] = set()
while True:
batch = await repo.get_logs_after_id(state.last_log_id, limit=_BATCH_SIZE)
if not batch:
break
for row in batch:
event = state.engine.ingest(row["raw_line"])
if event and event.attacker_ip:
affected_ips.add(event.attacker_ip)
state.last_log_id = row["id"]
if len(batch) < _BATCH_SIZE:
break
if not affected_ips:
await repo.set_state(_STATE_KEY, {"last_log_id": state.last_log_id})
return
await _update_profiles(repo, state, affected_ips)
await repo.set_state(_STATE_KEY, {"last_log_id": state.last_log_id})
logger.debug("attacker worker: updated %d profiles (incremental)", len(affected_ips))
async def _cold_start(repo: BaseRepository, state: _WorkerState) -> None:
all_logs = await repo.get_all_logs_raw()
if not all_logs:
state.last_log_id = await repo.get_max_log_id()
state.initialized = True
await repo.set_state(_STATE_KEY, {"last_log_id": state.last_log_id})
return
# Feed raw RFC 5424 lines into the CorrelationEngine
engine = CorrelationEngine()
for row in all_logs:
engine.ingest(row["raw_line"])
state.engine.ingest(row["raw_line"])
state.last_log_id = max(state.last_log_id, row["id"])
if not engine._events:
return
all_ips = set(state.engine._events.keys())
await _update_profiles(repo, state, all_ips)
await repo.set_state(_STATE_KEY, {"last_log_id": state.last_log_id})
traversal_map = {t.attacker_ip: t for t in engine.traversals(min_deckies=2)}
all_bounties = await repo.get_all_bounties_by_ip()
state.initialized = True
logger.debug("attacker worker: cold start rebuilt %d profiles", len(all_ips))
async def _update_profiles(
repo: BaseRepository,
state: _WorkerState,
ips: set[str],
) -> None:
traversal_map = {t.attacker_ip: t for t in state.engine.traversals(min_deckies=2)}
bounties_map = await repo.get_bounties_for_ips(ips)
for ip in ips:
events = state.engine._events.get(ip, [])
if not events:
continue
count = 0
for ip, events in engine._events.items():
traversal = traversal_map.get(ip)
bounties = all_bounties.get(ip, [])
commands = _extract_commands(all_logs, ip)
bounties = bounties_map.get(ip, [])
commands = _extract_commands_from_events(events)
record = _build_record(ip, events, traversal, bounties, commands)
await repo.upsert_attacker(record)
count += 1
logger.debug("attacker worker: rebuilt %d profiles", count)
def _build_record(
@@ -122,42 +174,20 @@ def _first_contact_deckies(events: list[LogEvent]) -> list[str]:
return seen
def _extract_commands(
all_logs: list[dict[str, Any]], ip: str
) -> list[dict[str, Any]]:
def _extract_commands_from_events(events: list[LogEvent]) -> list[dict[str, Any]]:
"""
Extract executed commands for a given attacker IP from raw log rows.
Extract executed commands from LogEvent objects.
Looks for rows where:
- attacker_ip matches
- event_type is a known command-execution type
- fields JSON contains a command-like key
Returns a list of {service, decky, command, timestamp} dicts.
Works directly on LogEvent.fields (already a dict), so no JSON parsing needed.
"""
commands: list[dict[str, Any]] = []
for row in all_logs:
if row.get("attacker_ip") != ip:
for event in events:
if event.event_type not in _COMMAND_EVENT_TYPES:
continue
if row.get("event_type") not in _COMMAND_EVENT_TYPES:
continue
raw_fields = row.get("fields")
if not raw_fields:
continue
# fields is stored as a JSON string in the DB row
if isinstance(raw_fields, str):
try:
fields = json.loads(raw_fields)
except (json.JSONDecodeError, ValueError):
continue
else:
fields = raw_fields
cmd_text: str | None = None
for key in _COMMAND_FIELDS:
val = fields.get(key)
val = event.fields.get(key)
if val:
cmd_text = str(val)
break
@@ -165,12 +195,11 @@ def _extract_commands(
if not cmd_text:
continue
ts = row.get("timestamp")
commands.append({
"service": row.get("service", ""),
"decky": row.get("decky", ""),
"service": event.service,
"decky": event.decky,
"command": cmd_text,
"timestamp": ts.isoformat() if isinstance(ts, datetime) else str(ts),
"timestamp": event.timestamp.isoformat(),
})
return commands

View File

@@ -53,7 +53,8 @@ class State(SQLModel, table=True):
class Attacker(SQLModel, table=True):
__tablename__ = "attackers"
ip: str = Field(primary_key=True)
uuid: str = Field(primary_key=True)
ip: str = Field(index=True)
first_seen: datetime = Field(index=True)
last_seen: datetime = Field(index=True)
event_count: int = Field(default=0)

View File

@@ -96,16 +96,36 @@ class BaseRepository(ABC):
"""Retrieve all log rows with fields needed by the attacker profile worker."""
pass
@abstractmethod
async def get_max_log_id(self) -> int:
"""Return the highest log ID, or 0 if the table is empty."""
pass
@abstractmethod
async def get_logs_after_id(self, last_id: int, limit: int = 500) -> list[dict[str, Any]]:
"""Return logs with id > last_id, ordered by id ASC, up to limit."""
pass
@abstractmethod
async def get_all_bounties_by_ip(self) -> dict[str, list[dict[str, Any]]]:
"""Retrieve all bounty rows grouped by attacker_ip."""
pass
@abstractmethod
async def get_bounties_for_ips(self, ips: set[str]) -> dict[str, list[dict[str, Any]]]:
"""Retrieve bounty rows grouped by attacker_ip, filtered to only the given IPs."""
pass
@abstractmethod
async def upsert_attacker(self, data: dict[str, Any]) -> None:
"""Insert or replace an attacker profile record."""
pass
@abstractmethod
async def get_attacker_by_uuid(self, uuid: str) -> Optional[dict[str, Any]]:
"""Retrieve a single attacker profile by UUID."""
pass
@abstractmethod
async def get_attackers(
self,

View File

@@ -29,6 +29,7 @@ class SQLiteRepository(BaseRepository):
async def initialize(self) -> None:
"""Async warm-up / verification. Creates tables if they don't exist."""
from sqlmodel import SQLModel
await self._migrate_attackers_table()
async with self.engine.begin() as conn:
await conn.run_sync(SQLModel.metadata.create_all)
@@ -47,6 +48,13 @@ class SQLiteRepository(BaseRepository):
))
await session.commit()
async def _migrate_attackers_table(self) -> None:
"""Drop the old attackers table if it lacks the uuid column (pre-UUID schema)."""
async with self.engine.begin() as conn:
rows = (await conn.execute(text("PRAGMA table_info(attackers)"))).fetchall()
if rows and not any(r[1] == "uuid" for r in rows):
await conn.execute(text("DROP TABLE attackers"))
async def reinitialize(self) -> None:
"""Initialize the database schema asynchronously (useful for tests)."""
from sqlmodel import SQLModel
@@ -418,6 +426,22 @@ class SQLiteRepository(BaseRepository):
grouped[item.attacker_ip].append(d)
return dict(grouped)
async def get_bounties_for_ips(self, ips: set[str]) -> dict[str, List[dict[str, Any]]]:
from collections import defaultdict
async with self.session_factory() as session:
result = await session.execute(
select(Bounty).where(Bounty.attacker_ip.in_(ips)).order_by(asc(Bounty.timestamp))
)
grouped: dict[str, List[dict[str, Any]]] = defaultdict(list)
for item in result.scalars().all():
d = item.model_dump(mode="json")
try:
d["payload"] = json.loads(d["payload"])
except (json.JSONDecodeError, TypeError):
pass
grouped[item.attacker_ip].append(d)
return dict(grouped)
async def upsert_attacker(self, data: dict[str, Any]) -> None:
async with self.session_factory() as session:
result = await session.execute(
@@ -429,9 +453,31 @@ class SQLiteRepository(BaseRepository):
setattr(existing, k, v)
session.add(existing)
else:
data["uuid"] = str(uuid.uuid4())
session.add(Attacker(**data))
await session.commit()
@staticmethod
def _deserialize_attacker(d: dict[str, Any]) -> dict[str, Any]:
"""Parse JSON-encoded list fields in an attacker dict."""
for key in ("services", "deckies", "fingerprints", "commands"):
if isinstance(d.get(key), str):
try:
d[key] = json.loads(d[key])
except (json.JSONDecodeError, TypeError):
pass
return d
async def get_attacker_by_uuid(self, uuid: str) -> Optional[dict[str, Any]]:
async with self.session_factory() as session:
result = await session.execute(
select(Attacker).where(Attacker.uuid == uuid)
)
attacker = result.scalar_one_or_none()
if not attacker:
return None
return self._deserialize_attacker(attacker.model_dump(mode="json"))
async def get_attackers(
self,
limit: int = 50,
@@ -450,7 +496,10 @@ class SQLiteRepository(BaseRepository):
async with self.session_factory() as session:
result = await session.execute(statement)
return [a.model_dump(mode="json") for a in result.scalars().all()]
return [
self._deserialize_attacker(a.model_dump(mode="json"))
for a in result.scalars().all()
]
async def get_total_attackers(self, search: Optional[str] = None) -> int:
statement = select(func.count()).select_from(Attacker)

View File

@@ -11,6 +11,8 @@ from .fleet.api_mutate_decky import router as mutate_decky_router
from .fleet.api_mutate_interval import router as mutate_interval_router
from .fleet.api_deploy_deckies import router as deploy_deckies_router
from .stream.api_stream_events import router as stream_router
from .attackers.api_get_attackers import router as attackers_router
from .attackers.api_get_attacker_detail import router as attacker_detail_router
api_router = APIRouter()
@@ -31,6 +33,10 @@ api_router.include_router(mutate_decky_router)
api_router.include_router(mutate_interval_router)
api_router.include_router(deploy_deckies_router)
# Attacker Profiles
api_router.include_router(attackers_router)
api_router.include_router(attacker_detail_router)
# Observability
api_router.include_router(stats_router)
api_router.include_router(stream_router)

View File

View File

@@ -0,0 +1,26 @@
from typing import Any
from fastapi import APIRouter, Depends, HTTPException
from decnet.web.dependencies import get_current_user, repo
router = APIRouter()
@router.get(
"/attackers/{uuid}",
tags=["Attacker Profiles"],
responses={
401: {"description": "Could not validate credentials"},
404: {"description": "Attacker not found"},
},
)
async def get_attacker_detail(
uuid: str,
current_user: str = Depends(get_current_user),
) -> dict[str, Any]:
"""Retrieve a single attacker profile by UUID."""
attacker = await repo.get_attacker_by_uuid(uuid)
if not attacker:
raise HTTPException(status_code=404, detail="Attacker not found")
return attacker

View File

@@ -0,0 +1,36 @@
from typing import Any, Optional
from fastapi import APIRouter, Depends, Query
from decnet.web.dependencies import get_current_user, repo
from decnet.web.db.models import AttackersResponse
router = APIRouter()
@router.get(
"/attackers",
response_model=AttackersResponse,
tags=["Attacker Profiles"],
responses={
401: {"description": "Could not validate credentials"},
422: {"description": "Validation error"},
},
)
async def get_attackers(
limit: int = Query(50, ge=1, le=1000),
offset: int = Query(0, ge=0, le=2147483647),
search: Optional[str] = None,
sort_by: str = Query("recent", pattern="^(recent|active|traversals)$"),
current_user: str = Depends(get_current_user),
) -> dict[str, Any]:
"""Retrieve paginated attacker profiles."""
def _norm(v: Optional[str]) -> Optional[str]:
if v in (None, "null", "NULL", "undefined", ""):
return None
return v
s = _norm(search)
_data = await repo.get_attackers(limit=limit, offset=offset, search=s, sort_by=sort_by)
_total = await repo.get_total_attackers(search=s)
return {"total": _total, "limit": limit, "offset": offset, "data": _data}