feat: attacker profiles — UUID model, API routes, list/detail frontend
Migrate Attacker model from IP-based to UUID-based primary key with
auto-migration for old schema. Add GET /attackers (paginated, search,
sort) and GET /attackers/{uuid} API routes. Rewrite Attackers.tsx as
a card grid with full threat info and create AttackerDetail.tsx as a
dedicated detail page with back navigation, stats, commands table,
and fingerprints.
This commit is contained in:
@@ -1,21 +1,20 @@
|
||||
"""
|
||||
Attacker profile builder — background worker.
|
||||
Attacker profile builder — incremental background worker.
|
||||
|
||||
Periodically rebuilds the `attackers` table by:
|
||||
1. Feeding all stored Log.raw_line values through the CorrelationEngine
|
||||
(which parses RFC 5424 and tracks per-IP event histories + traversals).
|
||||
2. Merging with the Bounty table (fingerprints, credentials).
|
||||
3. Extracting commands executed per IP from the structured log fields.
|
||||
4. Upserting one Attacker record per observed IP.
|
||||
Maintains a persistent CorrelationEngine and a log-ID cursor across cycles.
|
||||
On cold start (first cycle or process restart), performs one full build from
|
||||
all stored logs. Subsequent cycles fetch only new logs via the cursor,
|
||||
ingest them into the existing engine, and rebuild profiles for affected IPs
|
||||
only.
|
||||
|
||||
Runs every _REBUILD_INTERVAL seconds. Full rebuild each cycle — simple and
|
||||
correct at honeypot log volumes.
|
||||
Complexity per cycle: O(new_logs + affected_ips) instead of O(total_logs²).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime, timezone
|
||||
from typing import Any
|
||||
|
||||
@@ -27,6 +26,8 @@ from decnet.web.db.repository import BaseRepository
|
||||
logger = get_logger("attacker_worker")
|
||||
|
||||
_REBUILD_INTERVAL = 30 # seconds
|
||||
_BATCH_SIZE = 500
|
||||
_STATE_KEY = "attacker_worker_cursor"
|
||||
|
||||
# Event types that indicate active command/query execution (not just connection/scan)
|
||||
_COMMAND_EVENT_TYPES = frozenset({
|
||||
@@ -38,44 +39,95 @@ _COMMAND_EVENT_TYPES = frozenset({
|
||||
_COMMAND_FIELDS = ("command", "query", "input", "line", "sql", "cmd")
|
||||
|
||||
|
||||
@dataclass
|
||||
class _WorkerState:
|
||||
engine: CorrelationEngine = field(default_factory=CorrelationEngine)
|
||||
last_log_id: int = 0
|
||||
initialized: bool = False
|
||||
|
||||
|
||||
async def attacker_profile_worker(repo: BaseRepository) -> None:
|
||||
"""Periodically rebuilds the Attacker table. Designed to run as an asyncio Task."""
|
||||
"""Periodically updates the Attacker table incrementally. Designed to run as an asyncio Task."""
|
||||
logger.info("attacker profile worker started interval=%ds", _REBUILD_INTERVAL)
|
||||
state = _WorkerState()
|
||||
while True:
|
||||
await asyncio.sleep(_REBUILD_INTERVAL)
|
||||
try:
|
||||
await _rebuild(repo)
|
||||
await _incremental_update(repo, state)
|
||||
except Exception as exc:
|
||||
logger.error("attacker worker: rebuild failed: %s", exc)
|
||||
logger.error("attacker worker: update failed: %s", exc)
|
||||
|
||||
|
||||
async def _rebuild(repo: BaseRepository) -> None:
|
||||
async def _incremental_update(repo: BaseRepository, state: _WorkerState) -> None:
|
||||
if not state.initialized:
|
||||
await _cold_start(repo, state)
|
||||
return
|
||||
|
||||
affected_ips: set[str] = set()
|
||||
|
||||
while True:
|
||||
batch = await repo.get_logs_after_id(state.last_log_id, limit=_BATCH_SIZE)
|
||||
if not batch:
|
||||
break
|
||||
|
||||
for row in batch:
|
||||
event = state.engine.ingest(row["raw_line"])
|
||||
if event and event.attacker_ip:
|
||||
affected_ips.add(event.attacker_ip)
|
||||
state.last_log_id = row["id"]
|
||||
|
||||
if len(batch) < _BATCH_SIZE:
|
||||
break
|
||||
|
||||
if not affected_ips:
|
||||
await repo.set_state(_STATE_KEY, {"last_log_id": state.last_log_id})
|
||||
return
|
||||
|
||||
await _update_profiles(repo, state, affected_ips)
|
||||
await repo.set_state(_STATE_KEY, {"last_log_id": state.last_log_id})
|
||||
|
||||
logger.debug("attacker worker: updated %d profiles (incremental)", len(affected_ips))
|
||||
|
||||
|
||||
async def _cold_start(repo: BaseRepository, state: _WorkerState) -> None:
|
||||
all_logs = await repo.get_all_logs_raw()
|
||||
if not all_logs:
|
||||
state.last_log_id = await repo.get_max_log_id()
|
||||
state.initialized = True
|
||||
await repo.set_state(_STATE_KEY, {"last_log_id": state.last_log_id})
|
||||
return
|
||||
|
||||
# Feed raw RFC 5424 lines into the CorrelationEngine
|
||||
engine = CorrelationEngine()
|
||||
for row in all_logs:
|
||||
engine.ingest(row["raw_line"])
|
||||
state.engine.ingest(row["raw_line"])
|
||||
state.last_log_id = max(state.last_log_id, row["id"])
|
||||
|
||||
if not engine._events:
|
||||
return
|
||||
all_ips = set(state.engine._events.keys())
|
||||
await _update_profiles(repo, state, all_ips)
|
||||
await repo.set_state(_STATE_KEY, {"last_log_id": state.last_log_id})
|
||||
|
||||
traversal_map = {t.attacker_ip: t for t in engine.traversals(min_deckies=2)}
|
||||
all_bounties = await repo.get_all_bounties_by_ip()
|
||||
state.initialized = True
|
||||
logger.debug("attacker worker: cold start rebuilt %d profiles", len(all_ips))
|
||||
|
||||
|
||||
async def _update_profiles(
|
||||
repo: BaseRepository,
|
||||
state: _WorkerState,
|
||||
ips: set[str],
|
||||
) -> None:
|
||||
traversal_map = {t.attacker_ip: t for t in state.engine.traversals(min_deckies=2)}
|
||||
bounties_map = await repo.get_bounties_for_ips(ips)
|
||||
|
||||
for ip in ips:
|
||||
events = state.engine._events.get(ip, [])
|
||||
if not events:
|
||||
continue
|
||||
|
||||
count = 0
|
||||
for ip, events in engine._events.items():
|
||||
traversal = traversal_map.get(ip)
|
||||
bounties = all_bounties.get(ip, [])
|
||||
commands = _extract_commands(all_logs, ip)
|
||||
bounties = bounties_map.get(ip, [])
|
||||
commands = _extract_commands_from_events(events)
|
||||
|
||||
record = _build_record(ip, events, traversal, bounties, commands)
|
||||
await repo.upsert_attacker(record)
|
||||
count += 1
|
||||
|
||||
logger.debug("attacker worker: rebuilt %d profiles", count)
|
||||
|
||||
|
||||
def _build_record(
|
||||
@@ -122,42 +174,20 @@ def _first_contact_deckies(events: list[LogEvent]) -> list[str]:
|
||||
return seen
|
||||
|
||||
|
||||
def _extract_commands(
|
||||
all_logs: list[dict[str, Any]], ip: str
|
||||
) -> list[dict[str, Any]]:
|
||||
def _extract_commands_from_events(events: list[LogEvent]) -> list[dict[str, Any]]:
|
||||
"""
|
||||
Extract executed commands for a given attacker IP from raw log rows.
|
||||
Extract executed commands from LogEvent objects.
|
||||
|
||||
Looks for rows where:
|
||||
- attacker_ip matches
|
||||
- event_type is a known command-execution type
|
||||
- fields JSON contains a command-like key
|
||||
|
||||
Returns a list of {service, decky, command, timestamp} dicts.
|
||||
Works directly on LogEvent.fields (already a dict), so no JSON parsing needed.
|
||||
"""
|
||||
commands: list[dict[str, Any]] = []
|
||||
for row in all_logs:
|
||||
if row.get("attacker_ip") != ip:
|
||||
for event in events:
|
||||
if event.event_type not in _COMMAND_EVENT_TYPES:
|
||||
continue
|
||||
if row.get("event_type") not in _COMMAND_EVENT_TYPES:
|
||||
continue
|
||||
|
||||
raw_fields = row.get("fields")
|
||||
if not raw_fields:
|
||||
continue
|
||||
|
||||
# fields is stored as a JSON string in the DB row
|
||||
if isinstance(raw_fields, str):
|
||||
try:
|
||||
fields = json.loads(raw_fields)
|
||||
except (json.JSONDecodeError, ValueError):
|
||||
continue
|
||||
else:
|
||||
fields = raw_fields
|
||||
|
||||
cmd_text: str | None = None
|
||||
for key in _COMMAND_FIELDS:
|
||||
val = fields.get(key)
|
||||
val = event.fields.get(key)
|
||||
if val:
|
||||
cmd_text = str(val)
|
||||
break
|
||||
@@ -165,12 +195,11 @@ def _extract_commands(
|
||||
if not cmd_text:
|
||||
continue
|
||||
|
||||
ts = row.get("timestamp")
|
||||
commands.append({
|
||||
"service": row.get("service", ""),
|
||||
"decky": row.get("decky", ""),
|
||||
"service": event.service,
|
||||
"decky": event.decky,
|
||||
"command": cmd_text,
|
||||
"timestamp": ts.isoformat() if isinstance(ts, datetime) else str(ts),
|
||||
"timestamp": event.timestamp.isoformat(),
|
||||
})
|
||||
|
||||
return commands
|
||||
|
||||
@@ -53,7 +53,8 @@ class State(SQLModel, table=True):
|
||||
|
||||
class Attacker(SQLModel, table=True):
|
||||
__tablename__ = "attackers"
|
||||
ip: str = Field(primary_key=True)
|
||||
uuid: str = Field(primary_key=True)
|
||||
ip: str = Field(index=True)
|
||||
first_seen: datetime = Field(index=True)
|
||||
last_seen: datetime = Field(index=True)
|
||||
event_count: int = Field(default=0)
|
||||
|
||||
@@ -96,16 +96,36 @@ class BaseRepository(ABC):
|
||||
"""Retrieve all log rows with fields needed by the attacker profile worker."""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def get_max_log_id(self) -> int:
|
||||
"""Return the highest log ID, or 0 if the table is empty."""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def get_logs_after_id(self, last_id: int, limit: int = 500) -> list[dict[str, Any]]:
|
||||
"""Return logs with id > last_id, ordered by id ASC, up to limit."""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def get_all_bounties_by_ip(self) -> dict[str, list[dict[str, Any]]]:
|
||||
"""Retrieve all bounty rows grouped by attacker_ip."""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def get_bounties_for_ips(self, ips: set[str]) -> dict[str, list[dict[str, Any]]]:
|
||||
"""Retrieve bounty rows grouped by attacker_ip, filtered to only the given IPs."""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def upsert_attacker(self, data: dict[str, Any]) -> None:
|
||||
"""Insert or replace an attacker profile record."""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def get_attacker_by_uuid(self, uuid: str) -> Optional[dict[str, Any]]:
|
||||
"""Retrieve a single attacker profile by UUID."""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def get_attackers(
|
||||
self,
|
||||
|
||||
@@ -29,6 +29,7 @@ class SQLiteRepository(BaseRepository):
|
||||
async def initialize(self) -> None:
|
||||
"""Async warm-up / verification. Creates tables if they don't exist."""
|
||||
from sqlmodel import SQLModel
|
||||
await self._migrate_attackers_table()
|
||||
async with self.engine.begin() as conn:
|
||||
await conn.run_sync(SQLModel.metadata.create_all)
|
||||
|
||||
@@ -47,6 +48,13 @@ class SQLiteRepository(BaseRepository):
|
||||
))
|
||||
await session.commit()
|
||||
|
||||
async def _migrate_attackers_table(self) -> None:
|
||||
"""Drop the old attackers table if it lacks the uuid column (pre-UUID schema)."""
|
||||
async with self.engine.begin() as conn:
|
||||
rows = (await conn.execute(text("PRAGMA table_info(attackers)"))).fetchall()
|
||||
if rows and not any(r[1] == "uuid" for r in rows):
|
||||
await conn.execute(text("DROP TABLE attackers"))
|
||||
|
||||
async def reinitialize(self) -> None:
|
||||
"""Initialize the database schema asynchronously (useful for tests)."""
|
||||
from sqlmodel import SQLModel
|
||||
@@ -418,6 +426,22 @@ class SQLiteRepository(BaseRepository):
|
||||
grouped[item.attacker_ip].append(d)
|
||||
return dict(grouped)
|
||||
|
||||
async def get_bounties_for_ips(self, ips: set[str]) -> dict[str, List[dict[str, Any]]]:
|
||||
from collections import defaultdict
|
||||
async with self.session_factory() as session:
|
||||
result = await session.execute(
|
||||
select(Bounty).where(Bounty.attacker_ip.in_(ips)).order_by(asc(Bounty.timestamp))
|
||||
)
|
||||
grouped: dict[str, List[dict[str, Any]]] = defaultdict(list)
|
||||
for item in result.scalars().all():
|
||||
d = item.model_dump(mode="json")
|
||||
try:
|
||||
d["payload"] = json.loads(d["payload"])
|
||||
except (json.JSONDecodeError, TypeError):
|
||||
pass
|
||||
grouped[item.attacker_ip].append(d)
|
||||
return dict(grouped)
|
||||
|
||||
async def upsert_attacker(self, data: dict[str, Any]) -> None:
|
||||
async with self.session_factory() as session:
|
||||
result = await session.execute(
|
||||
@@ -429,9 +453,31 @@ class SQLiteRepository(BaseRepository):
|
||||
setattr(existing, k, v)
|
||||
session.add(existing)
|
||||
else:
|
||||
data["uuid"] = str(uuid.uuid4())
|
||||
session.add(Attacker(**data))
|
||||
await session.commit()
|
||||
|
||||
@staticmethod
|
||||
def _deserialize_attacker(d: dict[str, Any]) -> dict[str, Any]:
|
||||
"""Parse JSON-encoded list fields in an attacker dict."""
|
||||
for key in ("services", "deckies", "fingerprints", "commands"):
|
||||
if isinstance(d.get(key), str):
|
||||
try:
|
||||
d[key] = json.loads(d[key])
|
||||
except (json.JSONDecodeError, TypeError):
|
||||
pass
|
||||
return d
|
||||
|
||||
async def get_attacker_by_uuid(self, uuid: str) -> Optional[dict[str, Any]]:
|
||||
async with self.session_factory() as session:
|
||||
result = await session.execute(
|
||||
select(Attacker).where(Attacker.uuid == uuid)
|
||||
)
|
||||
attacker = result.scalar_one_or_none()
|
||||
if not attacker:
|
||||
return None
|
||||
return self._deserialize_attacker(attacker.model_dump(mode="json"))
|
||||
|
||||
async def get_attackers(
|
||||
self,
|
||||
limit: int = 50,
|
||||
@@ -450,7 +496,10 @@ class SQLiteRepository(BaseRepository):
|
||||
|
||||
async with self.session_factory() as session:
|
||||
result = await session.execute(statement)
|
||||
return [a.model_dump(mode="json") for a in result.scalars().all()]
|
||||
return [
|
||||
self._deserialize_attacker(a.model_dump(mode="json"))
|
||||
for a in result.scalars().all()
|
||||
]
|
||||
|
||||
async def get_total_attackers(self, search: Optional[str] = None) -> int:
|
||||
statement = select(func.count()).select_from(Attacker)
|
||||
|
||||
@@ -11,6 +11,8 @@ from .fleet.api_mutate_decky import router as mutate_decky_router
|
||||
from .fleet.api_mutate_interval import router as mutate_interval_router
|
||||
from .fleet.api_deploy_deckies import router as deploy_deckies_router
|
||||
from .stream.api_stream_events import router as stream_router
|
||||
from .attackers.api_get_attackers import router as attackers_router
|
||||
from .attackers.api_get_attacker_detail import router as attacker_detail_router
|
||||
|
||||
api_router = APIRouter()
|
||||
|
||||
@@ -31,6 +33,10 @@ api_router.include_router(mutate_decky_router)
|
||||
api_router.include_router(mutate_interval_router)
|
||||
api_router.include_router(deploy_deckies_router)
|
||||
|
||||
# Attacker Profiles
|
||||
api_router.include_router(attackers_router)
|
||||
api_router.include_router(attacker_detail_router)
|
||||
|
||||
# Observability
|
||||
api_router.include_router(stats_router)
|
||||
api_router.include_router(stream_router)
|
||||
|
||||
0
decnet/web/router/attackers/__init__.py
Normal file
0
decnet/web/router/attackers/__init__.py
Normal file
26
decnet/web/router/attackers/api_get_attacker_detail.py
Normal file
26
decnet/web/router/attackers/api_get_attacker_detail.py
Normal file
@@ -0,0 +1,26 @@
|
||||
from typing import Any
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException
|
||||
|
||||
from decnet.web.dependencies import get_current_user, repo
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
|
||||
@router.get(
|
||||
"/attackers/{uuid}",
|
||||
tags=["Attacker Profiles"],
|
||||
responses={
|
||||
401: {"description": "Could not validate credentials"},
|
||||
404: {"description": "Attacker not found"},
|
||||
},
|
||||
)
|
||||
async def get_attacker_detail(
|
||||
uuid: str,
|
||||
current_user: str = Depends(get_current_user),
|
||||
) -> dict[str, Any]:
|
||||
"""Retrieve a single attacker profile by UUID."""
|
||||
attacker = await repo.get_attacker_by_uuid(uuid)
|
||||
if not attacker:
|
||||
raise HTTPException(status_code=404, detail="Attacker not found")
|
||||
return attacker
|
||||
36
decnet/web/router/attackers/api_get_attackers.py
Normal file
36
decnet/web/router/attackers/api_get_attackers.py
Normal file
@@ -0,0 +1,36 @@
|
||||
from typing import Any, Optional
|
||||
|
||||
from fastapi import APIRouter, Depends, Query
|
||||
|
||||
from decnet.web.dependencies import get_current_user, repo
|
||||
from decnet.web.db.models import AttackersResponse
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
|
||||
@router.get(
|
||||
"/attackers",
|
||||
response_model=AttackersResponse,
|
||||
tags=["Attacker Profiles"],
|
||||
responses={
|
||||
401: {"description": "Could not validate credentials"},
|
||||
422: {"description": "Validation error"},
|
||||
},
|
||||
)
|
||||
async def get_attackers(
|
||||
limit: int = Query(50, ge=1, le=1000),
|
||||
offset: int = Query(0, ge=0, le=2147483647),
|
||||
search: Optional[str] = None,
|
||||
sort_by: str = Query("recent", pattern="^(recent|active|traversals)$"),
|
||||
current_user: str = Depends(get_current_user),
|
||||
) -> dict[str, Any]:
|
||||
"""Retrieve paginated attacker profiles."""
|
||||
def _norm(v: Optional[str]) -> Optional[str]:
|
||||
if v in (None, "null", "NULL", "undefined", ""):
|
||||
return None
|
||||
return v
|
||||
|
||||
s = _norm(search)
|
||||
_data = await repo.get_attackers(limit=limit, offset=offset, search=s, sort_by=sort_by)
|
||||
_total = await repo.get_total_attackers(search=s)
|
||||
return {"total": _total, "limit": limit, "offset": offset, "data": _data}
|
||||
Reference in New Issue
Block a user