diff --git a/decnet/profiler/behavioral.py b/decnet/profiler/behavioral.py index bd19acc..db44648 100644 --- a/decnet/profiler/behavioral.py +++ b/decnet/profiler/behavioral.py @@ -314,12 +314,24 @@ def detect_tools_from_headers(events: list[LogEvent]) -> list[str]: if not raw_headers: continue - # headers may arrive as a JSON string or a dict already + # headers may arrive as a JSON string, a Python-repr string (legacy), + # or a dict already (in-memory / test paths). if isinstance(raw_headers, str): try: headers: dict[str, str] = json.loads(raw_headers) except (json.JSONDecodeError, ValueError): - continue + # Backward-compat: events written before the JSON-encode fix + # were serialized as Python repr via str(dict). ast.literal_eval + # handles that safely (no arbitrary code execution). + try: + import ast as _ast + _parsed = _ast.literal_eval(raw_headers) + if isinstance(_parsed, dict): + headers = _parsed + else: + continue + except Exception: + continue elif isinstance(raw_headers, dict): headers = raw_headers else: diff --git a/decnet/web/ingester.py b/decnet/web/ingester.py index 188a833..780cf7f 100644 --- a/decnet/web/ingester.py +++ b/decnet/web/ingester.py @@ -106,7 +106,17 @@ async def _extract_bounty(repo: BaseRepository, log_data: dict[str, Any]) -> Non }) # 2. HTTP User-Agent fingerprint - _headers = _fields.get("headers") if isinstance(_fields.get("headers"), dict) else {} + _h_raw = _fields.get("headers") + if isinstance(_h_raw, dict): + _headers = _h_raw + elif isinstance(_h_raw, str): + try: + _parsed = json.loads(_h_raw) + _headers = _parsed if isinstance(_parsed, dict) else {} + except (json.JSONDecodeError, ValueError): + _headers = {} + else: + _headers = {} _ua = _headers.get("User-Agent") or _headers.get("user-agent") if _ua: await repo.add_bounty({ diff --git a/templates/http/server.py b/templates/http/server.py index 076c5ac..cb8d17d 100644 --- a/templates/http/server.py +++ b/templates/http/server.py @@ -79,7 +79,7 @@ def log_request(): method=request.method, path=request.path, remote_addr=request.remote_addr, - headers=dict(request.headers), + headers=json.dumps(dict(request.headers)), body=request.get_data(as_text=True)[:512], ) diff --git a/tests/test_profiler_behavioral.py b/tests/test_profiler_behavioral.py index eb18a1b..ecddd31 100644 --- a/tests/test_profiler_behavioral.py +++ b/tests/test_profiler_behavioral.py @@ -287,6 +287,18 @@ class TestDetectToolsFromHeaders: result = detect_tools_from_headers(events) assert result.count("sqlmap") == 1 + def test_json_string_headers(self): + # Post-fix format: headers stored as a JSON string (not a dict). + e = _mk(0, event_type="request", service="http", + fields={"headers": '{"User-Agent": "Nmap Scripting Engine"}'}) + assert "nmap" in detect_tools_from_headers([e]) + + def test_python_repr_headers_fallback(self): + # Legacy format: headers stored as Python repr string (str(dict)). + e = _mk(0, event_type="request", service="http", + fields={"headers": "{'User-Agent': 'Nmap Scripting Engine'}"}) + assert "nmap" in detect_tools_from_headers([e]) + # ─── phase_sequence ────────────────────────────────────────────────────────