Add passive TLS fingerprinting via a sniffer container on the MACVLAN interface, plus the Attacker table and periodic rebuild worker that correlates per-IP profiles from Log + Bounty + CorrelationEngine. - templates/sniffer/: Scapy sniffer with pure-Python TLS parser; emits tls_client_hello / tls_session RFC 5424 lines with ja3, ja3s, sni, alpn, raw_ciphers, raw_extensions; GREASE filtered per RFC 8701 - decnet/services/sniffer.py: service plugin (no ports, NET_RAW/NET_ADMIN) - decnet/web/db/models.py: Attacker SQLModel table + AttackersResponse - decnet/web/db/repository.py: 5 new abstract methods - decnet/web/db/sqlite/repository.py: implement all 5 (upsert, pagination, sort by recent/active/traversals, bounty grouping) - decnet/web/attacker_worker.py: 30s periodic rebuild via CorrelationEngine; extracts commands from log fields, merges fingerprint bounties - decnet/web/api.py: wire attacker_profile_worker into lifespan - decnet/web/ingester.py: extract JA3 bounty (fingerprint_type=ja3) - development/DEVELOPMENT.md: full attacker intelligence collection roadmap - pyproject.toml: scapy>=2.6.1 added to dev deps - tests: test_sniffer_ja3.py (40+ vectors), test_attacker_worker.py, test_base_repo.py / test_web_api.py updated for new surface
177 lines
5.5 KiB
Python
177 lines
5.5 KiB
Python
"""
|
|
Attacker profile builder — background worker.
|
|
|
|
Periodically rebuilds the `attackers` table by:
|
|
1. Feeding all stored Log.raw_line values through the CorrelationEngine
|
|
(which parses RFC 5424 and tracks per-IP event histories + traversals).
|
|
2. Merging with the Bounty table (fingerprints, credentials).
|
|
3. Extracting commands executed per IP from the structured log fields.
|
|
4. Upserting one Attacker record per observed IP.
|
|
|
|
Runs every _REBUILD_INTERVAL seconds. Full rebuild each cycle — simple and
|
|
correct at honeypot log volumes.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import asyncio
|
|
import json
|
|
from datetime import datetime, timezone
|
|
from typing import Any
|
|
|
|
from decnet.correlation.engine import CorrelationEngine
|
|
from decnet.correlation.parser import LogEvent
|
|
from decnet.logging import get_logger
|
|
from decnet.web.db.repository import BaseRepository
|
|
|
|
logger = get_logger("attacker_worker")
|
|
|
|
_REBUILD_INTERVAL = 30 # seconds
|
|
|
|
# Event types that indicate active command/query execution (not just connection/scan)
|
|
_COMMAND_EVENT_TYPES = frozenset({
|
|
"command", "exec", "query", "input", "shell_input",
|
|
"execute", "run", "sql_query", "redis_command",
|
|
})
|
|
|
|
# Fields that carry the executed command/query text
|
|
_COMMAND_FIELDS = ("command", "query", "input", "line", "sql", "cmd")
|
|
|
|
|
|
async def attacker_profile_worker(repo: BaseRepository) -> None:
|
|
"""Periodically rebuilds the Attacker table. Designed to run as an asyncio Task."""
|
|
logger.info("attacker profile worker started interval=%ds", _REBUILD_INTERVAL)
|
|
while True:
|
|
await asyncio.sleep(_REBUILD_INTERVAL)
|
|
try:
|
|
await _rebuild(repo)
|
|
except Exception as exc:
|
|
logger.error("attacker worker: rebuild failed: %s", exc)
|
|
|
|
|
|
async def _rebuild(repo: BaseRepository) -> None:
|
|
all_logs = await repo.get_all_logs_raw()
|
|
if not all_logs:
|
|
return
|
|
|
|
# Feed raw RFC 5424 lines into the CorrelationEngine
|
|
engine = CorrelationEngine()
|
|
for row in all_logs:
|
|
engine.ingest(row["raw_line"])
|
|
|
|
if not engine._events:
|
|
return
|
|
|
|
traversal_map = {t.attacker_ip: t for t in engine.traversals(min_deckies=2)}
|
|
all_bounties = await repo.get_all_bounties_by_ip()
|
|
|
|
count = 0
|
|
for ip, events in engine._events.items():
|
|
traversal = traversal_map.get(ip)
|
|
bounties = all_bounties.get(ip, [])
|
|
commands = _extract_commands(all_logs, ip)
|
|
|
|
record = _build_record(ip, events, traversal, bounties, commands)
|
|
await repo.upsert_attacker(record)
|
|
count += 1
|
|
|
|
logger.debug("attacker worker: rebuilt %d profiles", count)
|
|
|
|
|
|
def _build_record(
|
|
ip: str,
|
|
events: list[LogEvent],
|
|
traversal: Any,
|
|
bounties: list[dict[str, Any]],
|
|
commands: list[dict[str, Any]],
|
|
) -> dict[str, Any]:
|
|
services = sorted({e.service for e in events})
|
|
deckies = (
|
|
traversal.deckies
|
|
if traversal
|
|
else _first_contact_deckies(events)
|
|
)
|
|
fingerprints = [b for b in bounties if b.get("bounty_type") == "fingerprint"]
|
|
credential_count = sum(1 for b in bounties if b.get("bounty_type") == "credential")
|
|
|
|
return {
|
|
"ip": ip,
|
|
"first_seen": min(e.timestamp for e in events),
|
|
"last_seen": max(e.timestamp for e in events),
|
|
"event_count": len(events),
|
|
"service_count": len(services),
|
|
"decky_count": len({e.decky for e in events}),
|
|
"services": json.dumps(services),
|
|
"deckies": json.dumps(deckies),
|
|
"traversal_path": traversal.path if traversal else None,
|
|
"is_traversal": traversal is not None,
|
|
"bounty_count": len(bounties),
|
|
"credential_count": credential_count,
|
|
"fingerprints": json.dumps(fingerprints),
|
|
"commands": json.dumps(commands),
|
|
"updated_at": datetime.now(timezone.utc),
|
|
}
|
|
|
|
|
|
def _first_contact_deckies(events: list[LogEvent]) -> list[str]:
|
|
"""Return unique deckies in first-contact order (for non-traversal attackers)."""
|
|
seen: list[str] = []
|
|
for e in sorted(events, key=lambda x: x.timestamp):
|
|
if e.decky not in seen:
|
|
seen.append(e.decky)
|
|
return seen
|
|
|
|
|
|
def _extract_commands(
|
|
all_logs: list[dict[str, Any]], ip: str
|
|
) -> list[dict[str, Any]]:
|
|
"""
|
|
Extract executed commands for a given attacker IP from raw log rows.
|
|
|
|
Looks for rows where:
|
|
- attacker_ip matches
|
|
- event_type is a known command-execution type
|
|
- fields JSON contains a command-like key
|
|
|
|
Returns a list of {service, decky, command, timestamp} dicts.
|
|
"""
|
|
commands: list[dict[str, Any]] = []
|
|
for row in all_logs:
|
|
if row.get("attacker_ip") != ip:
|
|
continue
|
|
if row.get("event_type") not in _COMMAND_EVENT_TYPES:
|
|
continue
|
|
|
|
raw_fields = row.get("fields")
|
|
if not raw_fields:
|
|
continue
|
|
|
|
# fields is stored as a JSON string in the DB row
|
|
if isinstance(raw_fields, str):
|
|
try:
|
|
fields = json.loads(raw_fields)
|
|
except (json.JSONDecodeError, ValueError):
|
|
continue
|
|
else:
|
|
fields = raw_fields
|
|
|
|
cmd_text: str | None = None
|
|
for key in _COMMAND_FIELDS:
|
|
val = fields.get(key)
|
|
if val:
|
|
cmd_text = str(val)
|
|
break
|
|
|
|
if not cmd_text:
|
|
continue
|
|
|
|
ts = row.get("timestamp")
|
|
commands.append({
|
|
"service": row.get("service", ""),
|
|
"decky": row.get("decky", ""),
|
|
"command": cmd_text,
|
|
"timestamp": ts.isoformat() if isinstance(ts, datetime) else str(ts),
|
|
})
|
|
|
|
return commands
|