fix: serialize HTTP headers as JSON so tool detection and bounty extraction work

templates/decnet_logging.py calls str(v) on all SD-PARAM values, turning a
headers dict into Python repr ('{'User-Agent': ...}') rather than JSON.
detect_tools_from_headers() called json.loads() on that string and silently
swallowed the error, returning [] for every HTTP event. Same bug prevented
the ingester from extracting User-Agent bounty fingerprints.

- templates/http/server.py: wrap headers dict in json.dumps() before passing
  to syslog_line so the value is a valid JSON string in the syslog record
- behavioral.py: add ast.literal_eval fallback for existing DB rows that were
  stored with the old Python repr format
- ingester.py: parse headers as JSON string in _extract_bounty so User-Agent
  fingerprints are stored correctly going forward
- tests: add test_json_string_headers and test_python_repr_headers_fallback
  to exercise both formats in detect_tools_from_headers
This commit is contained in:
2026-04-15 17:03:52 -04:00
parent 02e73a19d5
commit 89887ec6fd
4 changed files with 38 additions and 4 deletions

View File

@@ -314,12 +314,24 @@ def detect_tools_from_headers(events: list[LogEvent]) -> list[str]:
if not raw_headers:
continue
# headers may arrive as a JSON string or a dict already
# headers may arrive as a JSON string, a Python-repr string (legacy),
# or a dict already (in-memory / test paths).
if isinstance(raw_headers, str):
try:
headers: dict[str, str] = json.loads(raw_headers)
except (json.JSONDecodeError, ValueError):
continue
# Backward-compat: events written before the JSON-encode fix
# were serialized as Python repr via str(dict). ast.literal_eval
# handles that safely (no arbitrary code execution).
try:
import ast as _ast
_parsed = _ast.literal_eval(raw_headers)
if isinstance(_parsed, dict):
headers = _parsed
else:
continue
except Exception:
continue
elif isinstance(raw_headers, dict):
headers = raw_headers
else:

View File

@@ -106,7 +106,17 @@ async def _extract_bounty(repo: BaseRepository, log_data: dict[str, Any]) -> Non
})
# 2. HTTP User-Agent fingerprint
_headers = _fields.get("headers") if isinstance(_fields.get("headers"), dict) else {}
_h_raw = _fields.get("headers")
if isinstance(_h_raw, dict):
_headers = _h_raw
elif isinstance(_h_raw, str):
try:
_parsed = json.loads(_h_raw)
_headers = _parsed if isinstance(_parsed, dict) else {}
except (json.JSONDecodeError, ValueError):
_headers = {}
else:
_headers = {}
_ua = _headers.get("User-Agent") or _headers.get("user-agent")
if _ua:
await repo.add_bounty({

View File

@@ -79,7 +79,7 @@ def log_request():
method=request.method,
path=request.path,
remote_addr=request.remote_addr,
headers=dict(request.headers),
headers=json.dumps(dict(request.headers)),
body=request.get_data(as_text=True)[:512],
)

View File

@@ -287,6 +287,18 @@ class TestDetectToolsFromHeaders:
result = detect_tools_from_headers(events)
assert result.count("sqlmap") == 1
def test_json_string_headers(self):
# Post-fix format: headers stored as a JSON string (not a dict).
e = _mk(0, event_type="request", service="http",
fields={"headers": '{"User-Agent": "Nmap Scripting Engine"}'})
assert "nmap" in detect_tools_from_headers([e])
def test_python_repr_headers_fallback(self):
# Legacy format: headers stored as Python repr string (str(dict)).
e = _mk(0, event_type="request", service="http",
fields={"headers": "{'User-Agent': 'Nmap Scripting Engine'}"})
assert "nmap" in detect_tools_from_headers([e])
# ─── phase_sequence ────────────────────────────────────────────────────────