From 89887ec6fd210777589cf95f72cd10c5de645be3 Mon Sep 17 00:00:00 2001 From: anti Date: Wed, 15 Apr 2026 17:03:52 -0400 Subject: [PATCH] fix: serialize HTTP headers as JSON so tool detection and bounty extraction work templates/decnet_logging.py calls str(v) on all SD-PARAM values, turning a headers dict into Python repr ('{'User-Agent': ...}') rather than JSON. detect_tools_from_headers() called json.loads() on that string and silently swallowed the error, returning [] for every HTTP event. Same bug prevented the ingester from extracting User-Agent bounty fingerprints. - templates/http/server.py: wrap headers dict in json.dumps() before passing to syslog_line so the value is a valid JSON string in the syslog record - behavioral.py: add ast.literal_eval fallback for existing DB rows that were stored with the old Python repr format - ingester.py: parse headers as JSON string in _extract_bounty so User-Agent fingerprints are stored correctly going forward - tests: add test_json_string_headers and test_python_repr_headers_fallback to exercise both formats in detect_tools_from_headers --- decnet/profiler/behavioral.py | 16 ++++++++++++++-- decnet/web/ingester.py | 12 +++++++++++- templates/http/server.py | 2 +- tests/test_profiler_behavioral.py | 12 ++++++++++++ 4 files changed, 38 insertions(+), 4 deletions(-) diff --git a/decnet/profiler/behavioral.py b/decnet/profiler/behavioral.py index bd19acc..db44648 100644 --- a/decnet/profiler/behavioral.py +++ b/decnet/profiler/behavioral.py @@ -314,12 +314,24 @@ def detect_tools_from_headers(events: list[LogEvent]) -> list[str]: if not raw_headers: continue - # headers may arrive as a JSON string or a dict already + # headers may arrive as a JSON string, a Python-repr string (legacy), + # or a dict already (in-memory / test paths). if isinstance(raw_headers, str): try: headers: dict[str, str] = json.loads(raw_headers) except (json.JSONDecodeError, ValueError): - continue + # Backward-compat: events written before the JSON-encode fix + # were serialized as Python repr via str(dict). ast.literal_eval + # handles that safely (no arbitrary code execution). + try: + import ast as _ast + _parsed = _ast.literal_eval(raw_headers) + if isinstance(_parsed, dict): + headers = _parsed + else: + continue + except Exception: + continue elif isinstance(raw_headers, dict): headers = raw_headers else: diff --git a/decnet/web/ingester.py b/decnet/web/ingester.py index 188a833..780cf7f 100644 --- a/decnet/web/ingester.py +++ b/decnet/web/ingester.py @@ -106,7 +106,17 @@ async def _extract_bounty(repo: BaseRepository, log_data: dict[str, Any]) -> Non }) # 2. HTTP User-Agent fingerprint - _headers = _fields.get("headers") if isinstance(_fields.get("headers"), dict) else {} + _h_raw = _fields.get("headers") + if isinstance(_h_raw, dict): + _headers = _h_raw + elif isinstance(_h_raw, str): + try: + _parsed = json.loads(_h_raw) + _headers = _parsed if isinstance(_parsed, dict) else {} + except (json.JSONDecodeError, ValueError): + _headers = {} + else: + _headers = {} _ua = _headers.get("User-Agent") or _headers.get("user-agent") if _ua: await repo.add_bounty({ diff --git a/templates/http/server.py b/templates/http/server.py index 076c5ac..cb8d17d 100644 --- a/templates/http/server.py +++ b/templates/http/server.py @@ -79,7 +79,7 @@ def log_request(): method=request.method, path=request.path, remote_addr=request.remote_addr, - headers=dict(request.headers), + headers=json.dumps(dict(request.headers)), body=request.get_data(as_text=True)[:512], ) diff --git a/tests/test_profiler_behavioral.py b/tests/test_profiler_behavioral.py index eb18a1b..ecddd31 100644 --- a/tests/test_profiler_behavioral.py +++ b/tests/test_profiler_behavioral.py @@ -287,6 +287,18 @@ class TestDetectToolsFromHeaders: result = detect_tools_from_headers(events) assert result.count("sqlmap") == 1 + def test_json_string_headers(self): + # Post-fix format: headers stored as a JSON string (not a dict). + e = _mk(0, event_type="request", service="http", + fields={"headers": '{"User-Agent": "Nmap Scripting Engine"}'}) + assert "nmap" in detect_tools_from_headers([e]) + + def test_python_repr_headers_fallback(self): + # Legacy format: headers stored as Python repr string (str(dict)). + e = _mk(0, event_type="request", service="http", + fields={"headers": "{'User-Agent': 'Nmap Scripting Engine'}"}) + assert "nmap" in detect_tools_from_headers([e]) + # ─── phase_sequence ────────────────────────────────────────────────────────