diff --git a/decnet/templates/conpot/syslog_bridge.py b/decnet/templates/conpot/syslog_bridge.py index c0a78d09..ca6d7284 100644 --- a/decnet/templates/conpot/syslog_bridge.py +++ b/decnet/templates/conpot/syslog_bridge.py @@ -12,8 +12,10 @@ RFC 5424 structure: Facility: local0 (16). SD element ID uses PEN 55555. """ +import base64 +import re from datetime import datetime, timezone -from typing import Any +from typing import Any, Optional # ─── Constants ──────────────────────────────────────────────────────────────── @@ -79,6 +81,106 @@ def syslog_line( return f"{pri}1 {ts} {host} {appname} {_NILVALUE} {msgid} {sd}{message}" +def encode_secret(secret: str) -> dict[str, str]: + """Standardized credential-secret encoding for the universal SD-block shape. + + Returns ``{'secret_printable': ..., 'secret_b64': ...}`` ready to spread + into a :func:`syslog_line` / ``_log`` call:: + + _log("auth_attempt", principal=user, **encode_secret(password)) + + ``secret_printable`` mirrors auth-helper.c's sd_escape: bytes outside + ``[0x20, 0x7f)`` collapse to ``'?'`` so the field is always parser-safe + RFC 5424 ASCII. ``secret_b64`` preserves the *original* utf-8 bytes — + NUL/0xff/control/non-utf8 sequences all survive losslessly, useful as + a fingerprinting signal even when the printable form sanitizes them. + + The decnet web ingester's native-shape branch keys off ``secret_b64`` + being present, so any service emitter calling this helper lands its + cred attempt directly in the :class:`Credential` table. + """ + raw = secret.encode("utf-8", errors="replace") + printable = "".join(chr(b) if 0x20 <= b < 0x7f else "?" for b in raw) + return { + "secret_printable": printable, + "secret_b64": base64.b64encode(raw).decode("ascii"), + } + + +_DIGEST_PARAM_RE = re.compile(r'(\w+)\s*=\s*"([^"]*)"|(\w+)\s*=\s*([^,\s]+)') + + +def classify_authorization(header_value: Optional[str]) -> Optional[dict[str, Any]]: + """Parse an HTTP Authorization header value into Credential SD fields. + + Returns a dict with the universal cred shape ready to spread into a + ``_log(...)`` call:: + + auth = request.headers.get("Authorization") + cred = classify_authorization(auth) + if cred: + _log("auth_attempt", **cred) + + Recognised schemes: + * Basic — base64(user:pw); decoded → ``principal=user`` + + ``secret_kind="plaintext"`` + ``encode_secret(pw)``. + * Bearer / Token — opaque token; ``principal=None`` + + ``secret_kind="http_bearer"`` + ``encode_secret(token)``. + * Digest — ``principal=username`` from header + + ``secret_kind="http_digest_md5"`` + ``encode_secret(response)``. + + Returns ``None`` for anything unrecognized (AWS4-HMAC-SHA256, NTLM, + Negotiate, …) — callers can still log the raw header value in the + ambient SD-block; we just don't know how to extract a hashable + secret from it. + """ + if not header_value or not isinstance(header_value, str): + return None + parts = header_value.strip().split(None, 1) + if len(parts) < 2: + return None + scheme, rest = parts[0].lower(), parts[1].strip() + + if scheme == "basic": + try: + decoded = base64.b64decode(rest, validate=True).decode("utf-8", errors="replace") + except (ValueError, base64.binascii.Error): + return None + if ":" not in decoded: + return None + user, _, pw = decoded.partition(":") + return { + "principal": user, + "secret_kind": "plaintext", + **encode_secret(pw), + } + + if scheme in ("bearer", "token"): + return { + "principal": None, + "secret_kind": "http_bearer", + **encode_secret(rest), + } + + if scheme == "digest": + params: dict[str, str] = {} + for m in _DIGEST_PARAM_RE.finditer(rest): + k = m.group(1) or m.group(3) + v = m.group(2) if m.group(2) is not None else m.group(4) + if k: + params[k.lower()] = v + response = params.get("response") + if not response: + return None + return { + "principal": params.get("username"), + "secret_kind": "http_digest_md5", + **encode_secret(response), + } + + return None + + def write_syslog_file(line: str) -> None: """Emit a syslog line to stdout for container log capture.""" print(line, flush=True) diff --git a/decnet/templates/docker_api/server.py b/decnet/templates/docker_api/server.py index 03d4961c..f258cf33 100644 --- a/decnet/templates/docker_api/server.py +++ b/decnet/templates/docker_api/server.py @@ -10,7 +10,12 @@ import json import os from flask import Flask, request -from syslog_bridge import syslog_line, write_syslog_file, forward_syslog +from syslog_bridge import ( + classify_authorization, + forward_syslog, + syslog_line, + write_syslog_file, +) NODE_NAME = os.environ.get("NODE_NAME", "docker-host") SERVICE_NAME = "docker_api" @@ -68,12 +73,15 @@ def _log(event_type: str, severity: int = 6, **kwargs) -> None: @app.before_request def log_request(): + cred = classify_authorization(request.headers.get("Authorization")) _log( "request", method=request.method, path=request.path, remote_addr=request.remote_addr, + headers=json.dumps(dict(request.headers)), body=request.get_data(as_text=True)[:512], + **(cred or {}), ) diff --git a/decnet/templates/docker_api/syslog_bridge.py b/decnet/templates/docker_api/syslog_bridge.py index c0a78d09..ca6d7284 100644 --- a/decnet/templates/docker_api/syslog_bridge.py +++ b/decnet/templates/docker_api/syslog_bridge.py @@ -12,8 +12,10 @@ RFC 5424 structure: Facility: local0 (16). SD element ID uses PEN 55555. """ +import base64 +import re from datetime import datetime, timezone -from typing import Any +from typing import Any, Optional # ─── Constants ──────────────────────────────────────────────────────────────── @@ -79,6 +81,106 @@ def syslog_line( return f"{pri}1 {ts} {host} {appname} {_NILVALUE} {msgid} {sd}{message}" +def encode_secret(secret: str) -> dict[str, str]: + """Standardized credential-secret encoding for the universal SD-block shape. + + Returns ``{'secret_printable': ..., 'secret_b64': ...}`` ready to spread + into a :func:`syslog_line` / ``_log`` call:: + + _log("auth_attempt", principal=user, **encode_secret(password)) + + ``secret_printable`` mirrors auth-helper.c's sd_escape: bytes outside + ``[0x20, 0x7f)`` collapse to ``'?'`` so the field is always parser-safe + RFC 5424 ASCII. ``secret_b64`` preserves the *original* utf-8 bytes — + NUL/0xff/control/non-utf8 sequences all survive losslessly, useful as + a fingerprinting signal even when the printable form sanitizes them. + + The decnet web ingester's native-shape branch keys off ``secret_b64`` + being present, so any service emitter calling this helper lands its + cred attempt directly in the :class:`Credential` table. + """ + raw = secret.encode("utf-8", errors="replace") + printable = "".join(chr(b) if 0x20 <= b < 0x7f else "?" for b in raw) + return { + "secret_printable": printable, + "secret_b64": base64.b64encode(raw).decode("ascii"), + } + + +_DIGEST_PARAM_RE = re.compile(r'(\w+)\s*=\s*"([^"]*)"|(\w+)\s*=\s*([^,\s]+)') + + +def classify_authorization(header_value: Optional[str]) -> Optional[dict[str, Any]]: + """Parse an HTTP Authorization header value into Credential SD fields. + + Returns a dict with the universal cred shape ready to spread into a + ``_log(...)`` call:: + + auth = request.headers.get("Authorization") + cred = classify_authorization(auth) + if cred: + _log("auth_attempt", **cred) + + Recognised schemes: + * Basic — base64(user:pw); decoded → ``principal=user`` + + ``secret_kind="plaintext"`` + ``encode_secret(pw)``. + * Bearer / Token — opaque token; ``principal=None`` + + ``secret_kind="http_bearer"`` + ``encode_secret(token)``. + * Digest — ``principal=username`` from header + + ``secret_kind="http_digest_md5"`` + ``encode_secret(response)``. + + Returns ``None`` for anything unrecognized (AWS4-HMAC-SHA256, NTLM, + Negotiate, …) — callers can still log the raw header value in the + ambient SD-block; we just don't know how to extract a hashable + secret from it. + """ + if not header_value or not isinstance(header_value, str): + return None + parts = header_value.strip().split(None, 1) + if len(parts) < 2: + return None + scheme, rest = parts[0].lower(), parts[1].strip() + + if scheme == "basic": + try: + decoded = base64.b64decode(rest, validate=True).decode("utf-8", errors="replace") + except (ValueError, base64.binascii.Error): + return None + if ":" not in decoded: + return None + user, _, pw = decoded.partition(":") + return { + "principal": user, + "secret_kind": "plaintext", + **encode_secret(pw), + } + + if scheme in ("bearer", "token"): + return { + "principal": None, + "secret_kind": "http_bearer", + **encode_secret(rest), + } + + if scheme == "digest": + params: dict[str, str] = {} + for m in _DIGEST_PARAM_RE.finditer(rest): + k = m.group(1) or m.group(3) + v = m.group(2) if m.group(2) is not None else m.group(4) + if k: + params[k.lower()] = v + response = params.get("response") + if not response: + return None + return { + "principal": params.get("username"), + "secret_kind": "http_digest_md5", + **encode_secret(response), + } + + return None + + def write_syslog_file(line: str) -> None: """Emit a syslog line to stdout for container log capture.""" print(line, flush=True) diff --git a/decnet/templates/elasticsearch/server.py b/decnet/templates/elasticsearch/server.py index c4fd2507..7d2b0d75 100644 --- a/decnet/templates/elasticsearch/server.py +++ b/decnet/templates/elasticsearch/server.py @@ -11,7 +11,12 @@ import os from http.server import BaseHTTPRequestHandler, HTTPServer import instance_seed as _seed -from syslog_bridge import syslog_line, write_syslog_file, forward_syslog +from syslog_bridge import ( + classify_authorization, + forward_syslog, + syslog_line, + write_syslog_file, +) NODE_NAME = os.environ.get("NODE_NAME", "esserver") SERVICE_NAME = "elasticsearch" @@ -102,18 +107,23 @@ class ESHandler(BaseHTTPRequestHandler): length = int(self.headers.get("Content-Length", 0)) return self.rfile.read(length).decode(errors="replace") if length else "" + def _cred_fields(self) -> dict: + """Universal cred shape from this request's Authorization header, + or empty dict when absent / unrecognized.""" + return classify_authorization(self.headers.get("Authorization")) or {} + def do_GET(self): src = self.client_address[0] path = self.path.split("?")[0] if path in ("/", ""): - _log("root_probe", src=src, method="GET", path=self.path) + _log("root_probe", src=src, method="GET", path=self.path, **self._cred_fields()) self._send_json(200, _ROOT_RESPONSE) elif path.startswith("/_cat/"): - _log("cat_api", src=src, method="GET", path=self.path) + _log("cat_api", src=src, method="GET", path=self.path, **self._cred_fields()) self._send_json(200, []) elif path.startswith("/_cluster/"): - _log("cluster_recon", src=src, method="GET", path=self.path) + _log("cluster_recon", src=src, method="GET", path=self.path, **self._cred_fields()) self._send_json(200, { "cluster_name": _CLUSTER_NAME, "cluster_uuid": _CLUSTER_UUID, @@ -129,7 +139,7 @@ class ESHandler(BaseHTTPRequestHandler): "active_shards_percent_as_number": 100.0, }) elif path.startswith("/_nodes"): - _log("nodes_recon", src=src, method="GET", path=self.path) + _log("nodes_recon", src=src, method="GET", path=self.path, **self._cred_fields()) self._send_json(200, { "_nodes": {"total": _CLUSTER_NODES, "successful": _CLUSTER_NODES, "failed": 0}, "cluster_name": _CLUSTER_NAME, @@ -137,10 +147,10 @@ class ESHandler(BaseHTTPRequestHandler): "build_hash": _ES_BUILD_HASH}}, }) elif path.startswith("/_security/") or path.startswith("/_xpack/"): - _log("security_probe", src=src, method="GET", path=self.path) + _log("security_probe", src=src, method="GET", path=self.path, **self._cred_fields()) self._send_json(200, {"enabled": True, "available": True}) else: - _log("request", src=src, method="GET", path=self.path) + _log("request", src=src, method="GET", path=self.path, **self._cred_fields()) self._send_json(404, {"error": {"root_cause": [{"type": "index_not_found_exception", "reason": "no such index"}]}}) @@ -149,7 +159,8 @@ class ESHandler(BaseHTTPRequestHandler): body = self._read_body() path = self.path.split("?")[0] _log("post_request", src=src, method="POST", path=self.path, - body_preview=body[:300], user_agent=self.headers.get("User-Agent", "")) + body_preview=body[:300], user_agent=self.headers.get("User-Agent", ""), + **self._cred_fields()) if "_search" in path or "_bulk" in path: self._send_json(200, {"took": 1, "timed_out": False, "hits": {"total": {"value": 0}, "hits": []}}) else: @@ -158,17 +169,20 @@ class ESHandler(BaseHTTPRequestHandler): def do_PUT(self): src = self.client_address[0] body = self._read_body() - _log("put_request", src=src, method="PUT", path=self.path, body_preview=body[:300]) + _log("put_request", src=src, method="PUT", path=self.path, + body_preview=body[:300], **self._cred_fields()) self._send_json(200, {"acknowledged": True}) def do_DELETE(self): src = self.client_address[0] - _log("delete_request", src=src, method="DELETE", path=self.path) + _log("delete_request", src=src, method="DELETE", path=self.path, + **self._cred_fields()) self._send_json(200, {"acknowledged": True}) def do_HEAD(self): src = self.client_address[0] - _log("head_request", src=src, method="HEAD", path=self.path) + _log("head_request", src=src, method="HEAD", path=self.path, + **self._cred_fields()) self._send_json(200, {}) def log_message(self, fmt, *args): diff --git a/decnet/templates/elasticsearch/syslog_bridge.py b/decnet/templates/elasticsearch/syslog_bridge.py index c0a78d09..ca6d7284 100644 --- a/decnet/templates/elasticsearch/syslog_bridge.py +++ b/decnet/templates/elasticsearch/syslog_bridge.py @@ -12,8 +12,10 @@ RFC 5424 structure: Facility: local0 (16). SD element ID uses PEN 55555. """ +import base64 +import re from datetime import datetime, timezone -from typing import Any +from typing import Any, Optional # ─── Constants ──────────────────────────────────────────────────────────────── @@ -79,6 +81,106 @@ def syslog_line( return f"{pri}1 {ts} {host} {appname} {_NILVALUE} {msgid} {sd}{message}" +def encode_secret(secret: str) -> dict[str, str]: + """Standardized credential-secret encoding for the universal SD-block shape. + + Returns ``{'secret_printable': ..., 'secret_b64': ...}`` ready to spread + into a :func:`syslog_line` / ``_log`` call:: + + _log("auth_attempt", principal=user, **encode_secret(password)) + + ``secret_printable`` mirrors auth-helper.c's sd_escape: bytes outside + ``[0x20, 0x7f)`` collapse to ``'?'`` so the field is always parser-safe + RFC 5424 ASCII. ``secret_b64`` preserves the *original* utf-8 bytes — + NUL/0xff/control/non-utf8 sequences all survive losslessly, useful as + a fingerprinting signal even when the printable form sanitizes them. + + The decnet web ingester's native-shape branch keys off ``secret_b64`` + being present, so any service emitter calling this helper lands its + cred attempt directly in the :class:`Credential` table. + """ + raw = secret.encode("utf-8", errors="replace") + printable = "".join(chr(b) if 0x20 <= b < 0x7f else "?" for b in raw) + return { + "secret_printable": printable, + "secret_b64": base64.b64encode(raw).decode("ascii"), + } + + +_DIGEST_PARAM_RE = re.compile(r'(\w+)\s*=\s*"([^"]*)"|(\w+)\s*=\s*([^,\s]+)') + + +def classify_authorization(header_value: Optional[str]) -> Optional[dict[str, Any]]: + """Parse an HTTP Authorization header value into Credential SD fields. + + Returns a dict with the universal cred shape ready to spread into a + ``_log(...)`` call:: + + auth = request.headers.get("Authorization") + cred = classify_authorization(auth) + if cred: + _log("auth_attempt", **cred) + + Recognised schemes: + * Basic — base64(user:pw); decoded → ``principal=user`` + + ``secret_kind="plaintext"`` + ``encode_secret(pw)``. + * Bearer / Token — opaque token; ``principal=None`` + + ``secret_kind="http_bearer"`` + ``encode_secret(token)``. + * Digest — ``principal=username`` from header + + ``secret_kind="http_digest_md5"`` + ``encode_secret(response)``. + + Returns ``None`` for anything unrecognized (AWS4-HMAC-SHA256, NTLM, + Negotiate, …) — callers can still log the raw header value in the + ambient SD-block; we just don't know how to extract a hashable + secret from it. + """ + if not header_value or not isinstance(header_value, str): + return None + parts = header_value.strip().split(None, 1) + if len(parts) < 2: + return None + scheme, rest = parts[0].lower(), parts[1].strip() + + if scheme == "basic": + try: + decoded = base64.b64decode(rest, validate=True).decode("utf-8", errors="replace") + except (ValueError, base64.binascii.Error): + return None + if ":" not in decoded: + return None + user, _, pw = decoded.partition(":") + return { + "principal": user, + "secret_kind": "plaintext", + **encode_secret(pw), + } + + if scheme in ("bearer", "token"): + return { + "principal": None, + "secret_kind": "http_bearer", + **encode_secret(rest), + } + + if scheme == "digest": + params: dict[str, str] = {} + for m in _DIGEST_PARAM_RE.finditer(rest): + k = m.group(1) or m.group(3) + v = m.group(2) if m.group(2) is not None else m.group(4) + if k: + params[k.lower()] = v + response = params.get("response") + if not response: + return None + return { + "principal": params.get("username"), + "secret_kind": "http_digest_md5", + **encode_secret(response), + } + + return None + + def write_syslog_file(line: str) -> None: """Emit a syslog line to stdout for container log capture.""" print(line, flush=True) diff --git a/decnet/templates/ftp/syslog_bridge.py b/decnet/templates/ftp/syslog_bridge.py index c0a78d09..ca6d7284 100644 --- a/decnet/templates/ftp/syslog_bridge.py +++ b/decnet/templates/ftp/syslog_bridge.py @@ -12,8 +12,10 @@ RFC 5424 structure: Facility: local0 (16). SD element ID uses PEN 55555. """ +import base64 +import re from datetime import datetime, timezone -from typing import Any +from typing import Any, Optional # ─── Constants ──────────────────────────────────────────────────────────────── @@ -79,6 +81,106 @@ def syslog_line( return f"{pri}1 {ts} {host} {appname} {_NILVALUE} {msgid} {sd}{message}" +def encode_secret(secret: str) -> dict[str, str]: + """Standardized credential-secret encoding for the universal SD-block shape. + + Returns ``{'secret_printable': ..., 'secret_b64': ...}`` ready to spread + into a :func:`syslog_line` / ``_log`` call:: + + _log("auth_attempt", principal=user, **encode_secret(password)) + + ``secret_printable`` mirrors auth-helper.c's sd_escape: bytes outside + ``[0x20, 0x7f)`` collapse to ``'?'`` so the field is always parser-safe + RFC 5424 ASCII. ``secret_b64`` preserves the *original* utf-8 bytes — + NUL/0xff/control/non-utf8 sequences all survive losslessly, useful as + a fingerprinting signal even when the printable form sanitizes them. + + The decnet web ingester's native-shape branch keys off ``secret_b64`` + being present, so any service emitter calling this helper lands its + cred attempt directly in the :class:`Credential` table. + """ + raw = secret.encode("utf-8", errors="replace") + printable = "".join(chr(b) if 0x20 <= b < 0x7f else "?" for b in raw) + return { + "secret_printable": printable, + "secret_b64": base64.b64encode(raw).decode("ascii"), + } + + +_DIGEST_PARAM_RE = re.compile(r'(\w+)\s*=\s*"([^"]*)"|(\w+)\s*=\s*([^,\s]+)') + + +def classify_authorization(header_value: Optional[str]) -> Optional[dict[str, Any]]: + """Parse an HTTP Authorization header value into Credential SD fields. + + Returns a dict with the universal cred shape ready to spread into a + ``_log(...)`` call:: + + auth = request.headers.get("Authorization") + cred = classify_authorization(auth) + if cred: + _log("auth_attempt", **cred) + + Recognised schemes: + * Basic — base64(user:pw); decoded → ``principal=user`` + + ``secret_kind="plaintext"`` + ``encode_secret(pw)``. + * Bearer / Token — opaque token; ``principal=None`` + + ``secret_kind="http_bearer"`` + ``encode_secret(token)``. + * Digest — ``principal=username`` from header + + ``secret_kind="http_digest_md5"`` + ``encode_secret(response)``. + + Returns ``None`` for anything unrecognized (AWS4-HMAC-SHA256, NTLM, + Negotiate, …) — callers can still log the raw header value in the + ambient SD-block; we just don't know how to extract a hashable + secret from it. + """ + if not header_value or not isinstance(header_value, str): + return None + parts = header_value.strip().split(None, 1) + if len(parts) < 2: + return None + scheme, rest = parts[0].lower(), parts[1].strip() + + if scheme == "basic": + try: + decoded = base64.b64decode(rest, validate=True).decode("utf-8", errors="replace") + except (ValueError, base64.binascii.Error): + return None + if ":" not in decoded: + return None + user, _, pw = decoded.partition(":") + return { + "principal": user, + "secret_kind": "plaintext", + **encode_secret(pw), + } + + if scheme in ("bearer", "token"): + return { + "principal": None, + "secret_kind": "http_bearer", + **encode_secret(rest), + } + + if scheme == "digest": + params: dict[str, str] = {} + for m in _DIGEST_PARAM_RE.finditer(rest): + k = m.group(1) or m.group(3) + v = m.group(2) if m.group(2) is not None else m.group(4) + if k: + params[k.lower()] = v + response = params.get("response") + if not response: + return None + return { + "principal": params.get("username"), + "secret_kind": "http_digest_md5", + **encode_secret(response), + } + + return None + + def write_syslog_file(line: str) -> None: """Emit a syslog line to stdout for container log capture.""" print(line, flush=True) diff --git a/decnet/templates/http/server.py b/decnet/templates/http/server.py index cf664f1c..c75a9f2c 100644 --- a/decnet/templates/http/server.py +++ b/decnet/templates/http/server.py @@ -14,7 +14,12 @@ from flask import Flask, request, send_from_directory from werkzeug.serving import make_server, WSGIRequestHandler import instance_seed as _seed -from syslog_bridge import syslog_line, write_syslog_file, forward_syslog +from syslog_bridge import ( + classify_authorization, + forward_syslog, + syslog_line, + write_syslog_file, +) logging.getLogger("werkzeug").setLevel(logging.ERROR) @@ -93,6 +98,11 @@ def _log(event_type: str, severity: int = 6, **kwargs) -> None: @app.before_request def log_request(): + # Classify Authorization → universal credential SD shape. Lands in + # the Credential table on Basic / Bearer / Digest; opaque schemes + # (NTLM, AWS4-HMAC-…) fall through and ride only in the headers + # dump. None when no Authorization header present. + cred = classify_authorization(request.headers.get("Authorization")) _log( "request", method=request.method, @@ -100,6 +110,7 @@ def log_request(): remote_addr=request.remote_addr, headers=json.dumps(dict(request.headers)), body=request.get_data(as_text=True)[:512], + **(cred or {}), ) diff --git a/decnet/templates/http/syslog_bridge.py b/decnet/templates/http/syslog_bridge.py index c0a78d09..ca6d7284 100644 --- a/decnet/templates/http/syslog_bridge.py +++ b/decnet/templates/http/syslog_bridge.py @@ -12,8 +12,10 @@ RFC 5424 structure: Facility: local0 (16). SD element ID uses PEN 55555. """ +import base64 +import re from datetime import datetime, timezone -from typing import Any +from typing import Any, Optional # ─── Constants ──────────────────────────────────────────────────────────────── @@ -79,6 +81,106 @@ def syslog_line( return f"{pri}1 {ts} {host} {appname} {_NILVALUE} {msgid} {sd}{message}" +def encode_secret(secret: str) -> dict[str, str]: + """Standardized credential-secret encoding for the universal SD-block shape. + + Returns ``{'secret_printable': ..., 'secret_b64': ...}`` ready to spread + into a :func:`syslog_line` / ``_log`` call:: + + _log("auth_attempt", principal=user, **encode_secret(password)) + + ``secret_printable`` mirrors auth-helper.c's sd_escape: bytes outside + ``[0x20, 0x7f)`` collapse to ``'?'`` so the field is always parser-safe + RFC 5424 ASCII. ``secret_b64`` preserves the *original* utf-8 bytes — + NUL/0xff/control/non-utf8 sequences all survive losslessly, useful as + a fingerprinting signal even when the printable form sanitizes them. + + The decnet web ingester's native-shape branch keys off ``secret_b64`` + being present, so any service emitter calling this helper lands its + cred attempt directly in the :class:`Credential` table. + """ + raw = secret.encode("utf-8", errors="replace") + printable = "".join(chr(b) if 0x20 <= b < 0x7f else "?" for b in raw) + return { + "secret_printable": printable, + "secret_b64": base64.b64encode(raw).decode("ascii"), + } + + +_DIGEST_PARAM_RE = re.compile(r'(\w+)\s*=\s*"([^"]*)"|(\w+)\s*=\s*([^,\s]+)') + + +def classify_authorization(header_value: Optional[str]) -> Optional[dict[str, Any]]: + """Parse an HTTP Authorization header value into Credential SD fields. + + Returns a dict with the universal cred shape ready to spread into a + ``_log(...)`` call:: + + auth = request.headers.get("Authorization") + cred = classify_authorization(auth) + if cred: + _log("auth_attempt", **cred) + + Recognised schemes: + * Basic — base64(user:pw); decoded → ``principal=user`` + + ``secret_kind="plaintext"`` + ``encode_secret(pw)``. + * Bearer / Token — opaque token; ``principal=None`` + + ``secret_kind="http_bearer"`` + ``encode_secret(token)``. + * Digest — ``principal=username`` from header + + ``secret_kind="http_digest_md5"`` + ``encode_secret(response)``. + + Returns ``None`` for anything unrecognized (AWS4-HMAC-SHA256, NTLM, + Negotiate, …) — callers can still log the raw header value in the + ambient SD-block; we just don't know how to extract a hashable + secret from it. + """ + if not header_value or not isinstance(header_value, str): + return None + parts = header_value.strip().split(None, 1) + if len(parts) < 2: + return None + scheme, rest = parts[0].lower(), parts[1].strip() + + if scheme == "basic": + try: + decoded = base64.b64decode(rest, validate=True).decode("utf-8", errors="replace") + except (ValueError, base64.binascii.Error): + return None + if ":" not in decoded: + return None + user, _, pw = decoded.partition(":") + return { + "principal": user, + "secret_kind": "plaintext", + **encode_secret(pw), + } + + if scheme in ("bearer", "token"): + return { + "principal": None, + "secret_kind": "http_bearer", + **encode_secret(rest), + } + + if scheme == "digest": + params: dict[str, str] = {} + for m in _DIGEST_PARAM_RE.finditer(rest): + k = m.group(1) or m.group(3) + v = m.group(2) if m.group(2) is not None else m.group(4) + if k: + params[k.lower()] = v + response = params.get("response") + if not response: + return None + return { + "principal": params.get("username"), + "secret_kind": "http_digest_md5", + **encode_secret(response), + } + + return None + + def write_syslog_file(line: str) -> None: """Emit a syslog line to stdout for container log capture.""" print(line, flush=True) diff --git a/decnet/templates/https/server.py b/decnet/templates/https/server.py index 1f18e496..c87ee9b7 100644 --- a/decnet/templates/https/server.py +++ b/decnet/templates/https/server.py @@ -16,7 +16,12 @@ from flask import Flask, request, send_from_directory from werkzeug.serving import make_server, WSGIRequestHandler import instance_seed as _seed -from syslog_bridge import syslog_line, write_syslog_file, forward_syslog +from syslog_bridge import ( + classify_authorization, + forward_syslog, + syslog_line, + write_syslog_file, +) logging.getLogger("werkzeug").setLevel(logging.ERROR) @@ -94,6 +99,7 @@ def _log(event_type: str, severity: int = 6, **kwargs) -> None: @app.before_request def log_request(): + cred = classify_authorization(request.headers.get("Authorization")) _log( "request", method=request.method, @@ -101,6 +107,7 @@ def log_request(): remote_addr=request.remote_addr, headers=dict(request.headers), body=request.get_data(as_text=True)[:512], + **(cred or {}), ) diff --git a/decnet/templates/https/syslog_bridge.py b/decnet/templates/https/syslog_bridge.py index c0a78d09..ca6d7284 100644 --- a/decnet/templates/https/syslog_bridge.py +++ b/decnet/templates/https/syslog_bridge.py @@ -12,8 +12,10 @@ RFC 5424 structure: Facility: local0 (16). SD element ID uses PEN 55555. """ +import base64 +import re from datetime import datetime, timezone -from typing import Any +from typing import Any, Optional # ─── Constants ──────────────────────────────────────────────────────────────── @@ -79,6 +81,106 @@ def syslog_line( return f"{pri}1 {ts} {host} {appname} {_NILVALUE} {msgid} {sd}{message}" +def encode_secret(secret: str) -> dict[str, str]: + """Standardized credential-secret encoding for the universal SD-block shape. + + Returns ``{'secret_printable': ..., 'secret_b64': ...}`` ready to spread + into a :func:`syslog_line` / ``_log`` call:: + + _log("auth_attempt", principal=user, **encode_secret(password)) + + ``secret_printable`` mirrors auth-helper.c's sd_escape: bytes outside + ``[0x20, 0x7f)`` collapse to ``'?'`` so the field is always parser-safe + RFC 5424 ASCII. ``secret_b64`` preserves the *original* utf-8 bytes — + NUL/0xff/control/non-utf8 sequences all survive losslessly, useful as + a fingerprinting signal even when the printable form sanitizes them. + + The decnet web ingester's native-shape branch keys off ``secret_b64`` + being present, so any service emitter calling this helper lands its + cred attempt directly in the :class:`Credential` table. + """ + raw = secret.encode("utf-8", errors="replace") + printable = "".join(chr(b) if 0x20 <= b < 0x7f else "?" for b in raw) + return { + "secret_printable": printable, + "secret_b64": base64.b64encode(raw).decode("ascii"), + } + + +_DIGEST_PARAM_RE = re.compile(r'(\w+)\s*=\s*"([^"]*)"|(\w+)\s*=\s*([^,\s]+)') + + +def classify_authorization(header_value: Optional[str]) -> Optional[dict[str, Any]]: + """Parse an HTTP Authorization header value into Credential SD fields. + + Returns a dict with the universal cred shape ready to spread into a + ``_log(...)`` call:: + + auth = request.headers.get("Authorization") + cred = classify_authorization(auth) + if cred: + _log("auth_attempt", **cred) + + Recognised schemes: + * Basic — base64(user:pw); decoded → ``principal=user`` + + ``secret_kind="plaintext"`` + ``encode_secret(pw)``. + * Bearer / Token — opaque token; ``principal=None`` + + ``secret_kind="http_bearer"`` + ``encode_secret(token)``. + * Digest — ``principal=username`` from header + + ``secret_kind="http_digest_md5"`` + ``encode_secret(response)``. + + Returns ``None`` for anything unrecognized (AWS4-HMAC-SHA256, NTLM, + Negotiate, …) — callers can still log the raw header value in the + ambient SD-block; we just don't know how to extract a hashable + secret from it. + """ + if not header_value or not isinstance(header_value, str): + return None + parts = header_value.strip().split(None, 1) + if len(parts) < 2: + return None + scheme, rest = parts[0].lower(), parts[1].strip() + + if scheme == "basic": + try: + decoded = base64.b64decode(rest, validate=True).decode("utf-8", errors="replace") + except (ValueError, base64.binascii.Error): + return None + if ":" not in decoded: + return None + user, _, pw = decoded.partition(":") + return { + "principal": user, + "secret_kind": "plaintext", + **encode_secret(pw), + } + + if scheme in ("bearer", "token"): + return { + "principal": None, + "secret_kind": "http_bearer", + **encode_secret(rest), + } + + if scheme == "digest": + params: dict[str, str] = {} + for m in _DIGEST_PARAM_RE.finditer(rest): + k = m.group(1) or m.group(3) + v = m.group(2) if m.group(2) is not None else m.group(4) + if k: + params[k.lower()] = v + response = params.get("response") + if not response: + return None + return { + "principal": params.get("username"), + "secret_kind": "http_digest_md5", + **encode_secret(response), + } + + return None + + def write_syslog_file(line: str) -> None: """Emit a syslog line to stdout for container log capture.""" print(line, flush=True) diff --git a/decnet/templates/imap/syslog_bridge.py b/decnet/templates/imap/syslog_bridge.py index c0a78d09..ca6d7284 100644 --- a/decnet/templates/imap/syslog_bridge.py +++ b/decnet/templates/imap/syslog_bridge.py @@ -12,8 +12,10 @@ RFC 5424 structure: Facility: local0 (16). SD element ID uses PEN 55555. """ +import base64 +import re from datetime import datetime, timezone -from typing import Any +from typing import Any, Optional # ─── Constants ──────────────────────────────────────────────────────────────── @@ -79,6 +81,106 @@ def syslog_line( return f"{pri}1 {ts} {host} {appname} {_NILVALUE} {msgid} {sd}{message}" +def encode_secret(secret: str) -> dict[str, str]: + """Standardized credential-secret encoding for the universal SD-block shape. + + Returns ``{'secret_printable': ..., 'secret_b64': ...}`` ready to spread + into a :func:`syslog_line` / ``_log`` call:: + + _log("auth_attempt", principal=user, **encode_secret(password)) + + ``secret_printable`` mirrors auth-helper.c's sd_escape: bytes outside + ``[0x20, 0x7f)`` collapse to ``'?'`` so the field is always parser-safe + RFC 5424 ASCII. ``secret_b64`` preserves the *original* utf-8 bytes — + NUL/0xff/control/non-utf8 sequences all survive losslessly, useful as + a fingerprinting signal even when the printable form sanitizes them. + + The decnet web ingester's native-shape branch keys off ``secret_b64`` + being present, so any service emitter calling this helper lands its + cred attempt directly in the :class:`Credential` table. + """ + raw = secret.encode("utf-8", errors="replace") + printable = "".join(chr(b) if 0x20 <= b < 0x7f else "?" for b in raw) + return { + "secret_printable": printable, + "secret_b64": base64.b64encode(raw).decode("ascii"), + } + + +_DIGEST_PARAM_RE = re.compile(r'(\w+)\s*=\s*"([^"]*)"|(\w+)\s*=\s*([^,\s]+)') + + +def classify_authorization(header_value: Optional[str]) -> Optional[dict[str, Any]]: + """Parse an HTTP Authorization header value into Credential SD fields. + + Returns a dict with the universal cred shape ready to spread into a + ``_log(...)`` call:: + + auth = request.headers.get("Authorization") + cred = classify_authorization(auth) + if cred: + _log("auth_attempt", **cred) + + Recognised schemes: + * Basic — base64(user:pw); decoded → ``principal=user`` + + ``secret_kind="plaintext"`` + ``encode_secret(pw)``. + * Bearer / Token — opaque token; ``principal=None`` + + ``secret_kind="http_bearer"`` + ``encode_secret(token)``. + * Digest — ``principal=username`` from header + + ``secret_kind="http_digest_md5"`` + ``encode_secret(response)``. + + Returns ``None`` for anything unrecognized (AWS4-HMAC-SHA256, NTLM, + Negotiate, …) — callers can still log the raw header value in the + ambient SD-block; we just don't know how to extract a hashable + secret from it. + """ + if not header_value or not isinstance(header_value, str): + return None + parts = header_value.strip().split(None, 1) + if len(parts) < 2: + return None + scheme, rest = parts[0].lower(), parts[1].strip() + + if scheme == "basic": + try: + decoded = base64.b64decode(rest, validate=True).decode("utf-8", errors="replace") + except (ValueError, base64.binascii.Error): + return None + if ":" not in decoded: + return None + user, _, pw = decoded.partition(":") + return { + "principal": user, + "secret_kind": "plaintext", + **encode_secret(pw), + } + + if scheme in ("bearer", "token"): + return { + "principal": None, + "secret_kind": "http_bearer", + **encode_secret(rest), + } + + if scheme == "digest": + params: dict[str, str] = {} + for m in _DIGEST_PARAM_RE.finditer(rest): + k = m.group(1) or m.group(3) + v = m.group(2) if m.group(2) is not None else m.group(4) + if k: + params[k.lower()] = v + response = params.get("response") + if not response: + return None + return { + "principal": params.get("username"), + "secret_kind": "http_digest_md5", + **encode_secret(response), + } + + return None + + def write_syslog_file(line: str) -> None: """Emit a syslog line to stdout for container log capture.""" print(line, flush=True) diff --git a/decnet/templates/k8s/server.py b/decnet/templates/k8s/server.py index 8e5ba516..40e368f3 100644 --- a/decnet/templates/k8s/server.py +++ b/decnet/templates/k8s/server.py @@ -10,7 +10,12 @@ import json import os from flask import Flask, request -from syslog_bridge import syslog_line, write_syslog_file, forward_syslog +from syslog_bridge import ( + classify_authorization, + forward_syslog, + syslog_line, + write_syslog_file, +) NODE_NAME = os.environ.get("NODE_NAME", "k8s-master") SERVICE_NAME = "k8s" @@ -75,13 +80,16 @@ def _log(event_type: str, severity: int = 6, **kwargs) -> None: @app.before_request def log_request(): + auth_header = request.headers.get("Authorization", "") + cred = classify_authorization(auth_header) _log( "request", method=request.method, path=request.path, remote_addr=request.remote_addr, - auth=request.headers.get("Authorization", ""), + auth=auth_header, body=request.get_data(as_text=True)[:512], + **(cred or {}), ) diff --git a/decnet/templates/k8s/syslog_bridge.py b/decnet/templates/k8s/syslog_bridge.py index c0a78d09..ca6d7284 100644 --- a/decnet/templates/k8s/syslog_bridge.py +++ b/decnet/templates/k8s/syslog_bridge.py @@ -12,8 +12,10 @@ RFC 5424 structure: Facility: local0 (16). SD element ID uses PEN 55555. """ +import base64 +import re from datetime import datetime, timezone -from typing import Any +from typing import Any, Optional # ─── Constants ──────────────────────────────────────────────────────────────── @@ -79,6 +81,106 @@ def syslog_line( return f"{pri}1 {ts} {host} {appname} {_NILVALUE} {msgid} {sd}{message}" +def encode_secret(secret: str) -> dict[str, str]: + """Standardized credential-secret encoding for the universal SD-block shape. + + Returns ``{'secret_printable': ..., 'secret_b64': ...}`` ready to spread + into a :func:`syslog_line` / ``_log`` call:: + + _log("auth_attempt", principal=user, **encode_secret(password)) + + ``secret_printable`` mirrors auth-helper.c's sd_escape: bytes outside + ``[0x20, 0x7f)`` collapse to ``'?'`` so the field is always parser-safe + RFC 5424 ASCII. ``secret_b64`` preserves the *original* utf-8 bytes — + NUL/0xff/control/non-utf8 sequences all survive losslessly, useful as + a fingerprinting signal even when the printable form sanitizes them. + + The decnet web ingester's native-shape branch keys off ``secret_b64`` + being present, so any service emitter calling this helper lands its + cred attempt directly in the :class:`Credential` table. + """ + raw = secret.encode("utf-8", errors="replace") + printable = "".join(chr(b) if 0x20 <= b < 0x7f else "?" for b in raw) + return { + "secret_printable": printable, + "secret_b64": base64.b64encode(raw).decode("ascii"), + } + + +_DIGEST_PARAM_RE = re.compile(r'(\w+)\s*=\s*"([^"]*)"|(\w+)\s*=\s*([^,\s]+)') + + +def classify_authorization(header_value: Optional[str]) -> Optional[dict[str, Any]]: + """Parse an HTTP Authorization header value into Credential SD fields. + + Returns a dict with the universal cred shape ready to spread into a + ``_log(...)`` call:: + + auth = request.headers.get("Authorization") + cred = classify_authorization(auth) + if cred: + _log("auth_attempt", **cred) + + Recognised schemes: + * Basic — base64(user:pw); decoded → ``principal=user`` + + ``secret_kind="plaintext"`` + ``encode_secret(pw)``. + * Bearer / Token — opaque token; ``principal=None`` + + ``secret_kind="http_bearer"`` + ``encode_secret(token)``. + * Digest — ``principal=username`` from header + + ``secret_kind="http_digest_md5"`` + ``encode_secret(response)``. + + Returns ``None`` for anything unrecognized (AWS4-HMAC-SHA256, NTLM, + Negotiate, …) — callers can still log the raw header value in the + ambient SD-block; we just don't know how to extract a hashable + secret from it. + """ + if not header_value or not isinstance(header_value, str): + return None + parts = header_value.strip().split(None, 1) + if len(parts) < 2: + return None + scheme, rest = parts[0].lower(), parts[1].strip() + + if scheme == "basic": + try: + decoded = base64.b64decode(rest, validate=True).decode("utf-8", errors="replace") + except (ValueError, base64.binascii.Error): + return None + if ":" not in decoded: + return None + user, _, pw = decoded.partition(":") + return { + "principal": user, + "secret_kind": "plaintext", + **encode_secret(pw), + } + + if scheme in ("bearer", "token"): + return { + "principal": None, + "secret_kind": "http_bearer", + **encode_secret(rest), + } + + if scheme == "digest": + params: dict[str, str] = {} + for m in _DIGEST_PARAM_RE.finditer(rest): + k = m.group(1) or m.group(3) + v = m.group(2) if m.group(2) is not None else m.group(4) + if k: + params[k.lower()] = v + response = params.get("response") + if not response: + return None + return { + "principal": params.get("username"), + "secret_kind": "http_digest_md5", + **encode_secret(response), + } + + return None + + def write_syslog_file(line: str) -> None: """Emit a syslog line to stdout for container log capture.""" print(line, flush=True) diff --git a/decnet/templates/ldap/syslog_bridge.py b/decnet/templates/ldap/syslog_bridge.py index c0a78d09..ca6d7284 100644 --- a/decnet/templates/ldap/syslog_bridge.py +++ b/decnet/templates/ldap/syslog_bridge.py @@ -12,8 +12,10 @@ RFC 5424 structure: Facility: local0 (16). SD element ID uses PEN 55555. """ +import base64 +import re from datetime import datetime, timezone -from typing import Any +from typing import Any, Optional # ─── Constants ──────────────────────────────────────────────────────────────── @@ -79,6 +81,106 @@ def syslog_line( return f"{pri}1 {ts} {host} {appname} {_NILVALUE} {msgid} {sd}{message}" +def encode_secret(secret: str) -> dict[str, str]: + """Standardized credential-secret encoding for the universal SD-block shape. + + Returns ``{'secret_printable': ..., 'secret_b64': ...}`` ready to spread + into a :func:`syslog_line` / ``_log`` call:: + + _log("auth_attempt", principal=user, **encode_secret(password)) + + ``secret_printable`` mirrors auth-helper.c's sd_escape: bytes outside + ``[0x20, 0x7f)`` collapse to ``'?'`` so the field is always parser-safe + RFC 5424 ASCII. ``secret_b64`` preserves the *original* utf-8 bytes — + NUL/0xff/control/non-utf8 sequences all survive losslessly, useful as + a fingerprinting signal even when the printable form sanitizes them. + + The decnet web ingester's native-shape branch keys off ``secret_b64`` + being present, so any service emitter calling this helper lands its + cred attempt directly in the :class:`Credential` table. + """ + raw = secret.encode("utf-8", errors="replace") + printable = "".join(chr(b) if 0x20 <= b < 0x7f else "?" for b in raw) + return { + "secret_printable": printable, + "secret_b64": base64.b64encode(raw).decode("ascii"), + } + + +_DIGEST_PARAM_RE = re.compile(r'(\w+)\s*=\s*"([^"]*)"|(\w+)\s*=\s*([^,\s]+)') + + +def classify_authorization(header_value: Optional[str]) -> Optional[dict[str, Any]]: + """Parse an HTTP Authorization header value into Credential SD fields. + + Returns a dict with the universal cred shape ready to spread into a + ``_log(...)`` call:: + + auth = request.headers.get("Authorization") + cred = classify_authorization(auth) + if cred: + _log("auth_attempt", **cred) + + Recognised schemes: + * Basic — base64(user:pw); decoded → ``principal=user`` + + ``secret_kind="plaintext"`` + ``encode_secret(pw)``. + * Bearer / Token — opaque token; ``principal=None`` + + ``secret_kind="http_bearer"`` + ``encode_secret(token)``. + * Digest — ``principal=username`` from header + + ``secret_kind="http_digest_md5"`` + ``encode_secret(response)``. + + Returns ``None`` for anything unrecognized (AWS4-HMAC-SHA256, NTLM, + Negotiate, …) — callers can still log the raw header value in the + ambient SD-block; we just don't know how to extract a hashable + secret from it. + """ + if not header_value or not isinstance(header_value, str): + return None + parts = header_value.strip().split(None, 1) + if len(parts) < 2: + return None + scheme, rest = parts[0].lower(), parts[1].strip() + + if scheme == "basic": + try: + decoded = base64.b64decode(rest, validate=True).decode("utf-8", errors="replace") + except (ValueError, base64.binascii.Error): + return None + if ":" not in decoded: + return None + user, _, pw = decoded.partition(":") + return { + "principal": user, + "secret_kind": "plaintext", + **encode_secret(pw), + } + + if scheme in ("bearer", "token"): + return { + "principal": None, + "secret_kind": "http_bearer", + **encode_secret(rest), + } + + if scheme == "digest": + params: dict[str, str] = {} + for m in _DIGEST_PARAM_RE.finditer(rest): + k = m.group(1) or m.group(3) + v = m.group(2) if m.group(2) is not None else m.group(4) + if k: + params[k.lower()] = v + response = params.get("response") + if not response: + return None + return { + "principal": params.get("username"), + "secret_kind": "http_digest_md5", + **encode_secret(response), + } + + return None + + def write_syslog_file(line: str) -> None: """Emit a syslog line to stdout for container log capture.""" print(line, flush=True) diff --git a/decnet/templates/llmnr/syslog_bridge.py b/decnet/templates/llmnr/syslog_bridge.py index c0a78d09..ca6d7284 100644 --- a/decnet/templates/llmnr/syslog_bridge.py +++ b/decnet/templates/llmnr/syslog_bridge.py @@ -12,8 +12,10 @@ RFC 5424 structure: Facility: local0 (16). SD element ID uses PEN 55555. """ +import base64 +import re from datetime import datetime, timezone -from typing import Any +from typing import Any, Optional # ─── Constants ──────────────────────────────────────────────────────────────── @@ -79,6 +81,106 @@ def syslog_line( return f"{pri}1 {ts} {host} {appname} {_NILVALUE} {msgid} {sd}{message}" +def encode_secret(secret: str) -> dict[str, str]: + """Standardized credential-secret encoding for the universal SD-block shape. + + Returns ``{'secret_printable': ..., 'secret_b64': ...}`` ready to spread + into a :func:`syslog_line` / ``_log`` call:: + + _log("auth_attempt", principal=user, **encode_secret(password)) + + ``secret_printable`` mirrors auth-helper.c's sd_escape: bytes outside + ``[0x20, 0x7f)`` collapse to ``'?'`` so the field is always parser-safe + RFC 5424 ASCII. ``secret_b64`` preserves the *original* utf-8 bytes — + NUL/0xff/control/non-utf8 sequences all survive losslessly, useful as + a fingerprinting signal even when the printable form sanitizes them. + + The decnet web ingester's native-shape branch keys off ``secret_b64`` + being present, so any service emitter calling this helper lands its + cred attempt directly in the :class:`Credential` table. + """ + raw = secret.encode("utf-8", errors="replace") + printable = "".join(chr(b) if 0x20 <= b < 0x7f else "?" for b in raw) + return { + "secret_printable": printable, + "secret_b64": base64.b64encode(raw).decode("ascii"), + } + + +_DIGEST_PARAM_RE = re.compile(r'(\w+)\s*=\s*"([^"]*)"|(\w+)\s*=\s*([^,\s]+)') + + +def classify_authorization(header_value: Optional[str]) -> Optional[dict[str, Any]]: + """Parse an HTTP Authorization header value into Credential SD fields. + + Returns a dict with the universal cred shape ready to spread into a + ``_log(...)`` call:: + + auth = request.headers.get("Authorization") + cred = classify_authorization(auth) + if cred: + _log("auth_attempt", **cred) + + Recognised schemes: + * Basic — base64(user:pw); decoded → ``principal=user`` + + ``secret_kind="plaintext"`` + ``encode_secret(pw)``. + * Bearer / Token — opaque token; ``principal=None`` + + ``secret_kind="http_bearer"`` + ``encode_secret(token)``. + * Digest — ``principal=username`` from header + + ``secret_kind="http_digest_md5"`` + ``encode_secret(response)``. + + Returns ``None`` for anything unrecognized (AWS4-HMAC-SHA256, NTLM, + Negotiate, …) — callers can still log the raw header value in the + ambient SD-block; we just don't know how to extract a hashable + secret from it. + """ + if not header_value or not isinstance(header_value, str): + return None + parts = header_value.strip().split(None, 1) + if len(parts) < 2: + return None + scheme, rest = parts[0].lower(), parts[1].strip() + + if scheme == "basic": + try: + decoded = base64.b64decode(rest, validate=True).decode("utf-8", errors="replace") + except (ValueError, base64.binascii.Error): + return None + if ":" not in decoded: + return None + user, _, pw = decoded.partition(":") + return { + "principal": user, + "secret_kind": "plaintext", + **encode_secret(pw), + } + + if scheme in ("bearer", "token"): + return { + "principal": None, + "secret_kind": "http_bearer", + **encode_secret(rest), + } + + if scheme == "digest": + params: dict[str, str] = {} + for m in _DIGEST_PARAM_RE.finditer(rest): + k = m.group(1) or m.group(3) + v = m.group(2) if m.group(2) is not None else m.group(4) + if k: + params[k.lower()] = v + response = params.get("response") + if not response: + return None + return { + "principal": params.get("username"), + "secret_kind": "http_digest_md5", + **encode_secret(response), + } + + return None + + def write_syslog_file(line: str) -> None: """Emit a syslog line to stdout for container log capture.""" print(line, flush=True) diff --git a/decnet/templates/mongodb/syslog_bridge.py b/decnet/templates/mongodb/syslog_bridge.py index c0a78d09..ca6d7284 100644 --- a/decnet/templates/mongodb/syslog_bridge.py +++ b/decnet/templates/mongodb/syslog_bridge.py @@ -12,8 +12,10 @@ RFC 5424 structure: Facility: local0 (16). SD element ID uses PEN 55555. """ +import base64 +import re from datetime import datetime, timezone -from typing import Any +from typing import Any, Optional # ─── Constants ──────────────────────────────────────────────────────────────── @@ -79,6 +81,106 @@ def syslog_line( return f"{pri}1 {ts} {host} {appname} {_NILVALUE} {msgid} {sd}{message}" +def encode_secret(secret: str) -> dict[str, str]: + """Standardized credential-secret encoding for the universal SD-block shape. + + Returns ``{'secret_printable': ..., 'secret_b64': ...}`` ready to spread + into a :func:`syslog_line` / ``_log`` call:: + + _log("auth_attempt", principal=user, **encode_secret(password)) + + ``secret_printable`` mirrors auth-helper.c's sd_escape: bytes outside + ``[0x20, 0x7f)`` collapse to ``'?'`` so the field is always parser-safe + RFC 5424 ASCII. ``secret_b64`` preserves the *original* utf-8 bytes — + NUL/0xff/control/non-utf8 sequences all survive losslessly, useful as + a fingerprinting signal even when the printable form sanitizes them. + + The decnet web ingester's native-shape branch keys off ``secret_b64`` + being present, so any service emitter calling this helper lands its + cred attempt directly in the :class:`Credential` table. + """ + raw = secret.encode("utf-8", errors="replace") + printable = "".join(chr(b) if 0x20 <= b < 0x7f else "?" for b in raw) + return { + "secret_printable": printable, + "secret_b64": base64.b64encode(raw).decode("ascii"), + } + + +_DIGEST_PARAM_RE = re.compile(r'(\w+)\s*=\s*"([^"]*)"|(\w+)\s*=\s*([^,\s]+)') + + +def classify_authorization(header_value: Optional[str]) -> Optional[dict[str, Any]]: + """Parse an HTTP Authorization header value into Credential SD fields. + + Returns a dict with the universal cred shape ready to spread into a + ``_log(...)`` call:: + + auth = request.headers.get("Authorization") + cred = classify_authorization(auth) + if cred: + _log("auth_attempt", **cred) + + Recognised schemes: + * Basic — base64(user:pw); decoded → ``principal=user`` + + ``secret_kind="plaintext"`` + ``encode_secret(pw)``. + * Bearer / Token — opaque token; ``principal=None`` + + ``secret_kind="http_bearer"`` + ``encode_secret(token)``. + * Digest — ``principal=username`` from header + + ``secret_kind="http_digest_md5"`` + ``encode_secret(response)``. + + Returns ``None`` for anything unrecognized (AWS4-HMAC-SHA256, NTLM, + Negotiate, …) — callers can still log the raw header value in the + ambient SD-block; we just don't know how to extract a hashable + secret from it. + """ + if not header_value or not isinstance(header_value, str): + return None + parts = header_value.strip().split(None, 1) + if len(parts) < 2: + return None + scheme, rest = parts[0].lower(), parts[1].strip() + + if scheme == "basic": + try: + decoded = base64.b64decode(rest, validate=True).decode("utf-8", errors="replace") + except (ValueError, base64.binascii.Error): + return None + if ":" not in decoded: + return None + user, _, pw = decoded.partition(":") + return { + "principal": user, + "secret_kind": "plaintext", + **encode_secret(pw), + } + + if scheme in ("bearer", "token"): + return { + "principal": None, + "secret_kind": "http_bearer", + **encode_secret(rest), + } + + if scheme == "digest": + params: dict[str, str] = {} + for m in _DIGEST_PARAM_RE.finditer(rest): + k = m.group(1) or m.group(3) + v = m.group(2) if m.group(2) is not None else m.group(4) + if k: + params[k.lower()] = v + response = params.get("response") + if not response: + return None + return { + "principal": params.get("username"), + "secret_kind": "http_digest_md5", + **encode_secret(response), + } + + return None + + def write_syslog_file(line: str) -> None: """Emit a syslog line to stdout for container log capture.""" print(line, flush=True) diff --git a/decnet/templates/mqtt/syslog_bridge.py b/decnet/templates/mqtt/syslog_bridge.py index c0a78d09..ca6d7284 100644 --- a/decnet/templates/mqtt/syslog_bridge.py +++ b/decnet/templates/mqtt/syslog_bridge.py @@ -12,8 +12,10 @@ RFC 5424 structure: Facility: local0 (16). SD element ID uses PEN 55555. """ +import base64 +import re from datetime import datetime, timezone -from typing import Any +from typing import Any, Optional # ─── Constants ──────────────────────────────────────────────────────────────── @@ -79,6 +81,106 @@ def syslog_line( return f"{pri}1 {ts} {host} {appname} {_NILVALUE} {msgid} {sd}{message}" +def encode_secret(secret: str) -> dict[str, str]: + """Standardized credential-secret encoding for the universal SD-block shape. + + Returns ``{'secret_printable': ..., 'secret_b64': ...}`` ready to spread + into a :func:`syslog_line` / ``_log`` call:: + + _log("auth_attempt", principal=user, **encode_secret(password)) + + ``secret_printable`` mirrors auth-helper.c's sd_escape: bytes outside + ``[0x20, 0x7f)`` collapse to ``'?'`` so the field is always parser-safe + RFC 5424 ASCII. ``secret_b64`` preserves the *original* utf-8 bytes — + NUL/0xff/control/non-utf8 sequences all survive losslessly, useful as + a fingerprinting signal even when the printable form sanitizes them. + + The decnet web ingester's native-shape branch keys off ``secret_b64`` + being present, so any service emitter calling this helper lands its + cred attempt directly in the :class:`Credential` table. + """ + raw = secret.encode("utf-8", errors="replace") + printable = "".join(chr(b) if 0x20 <= b < 0x7f else "?" for b in raw) + return { + "secret_printable": printable, + "secret_b64": base64.b64encode(raw).decode("ascii"), + } + + +_DIGEST_PARAM_RE = re.compile(r'(\w+)\s*=\s*"([^"]*)"|(\w+)\s*=\s*([^,\s]+)') + + +def classify_authorization(header_value: Optional[str]) -> Optional[dict[str, Any]]: + """Parse an HTTP Authorization header value into Credential SD fields. + + Returns a dict with the universal cred shape ready to spread into a + ``_log(...)`` call:: + + auth = request.headers.get("Authorization") + cred = classify_authorization(auth) + if cred: + _log("auth_attempt", **cred) + + Recognised schemes: + * Basic — base64(user:pw); decoded → ``principal=user`` + + ``secret_kind="plaintext"`` + ``encode_secret(pw)``. + * Bearer / Token — opaque token; ``principal=None`` + + ``secret_kind="http_bearer"`` + ``encode_secret(token)``. + * Digest — ``principal=username`` from header + + ``secret_kind="http_digest_md5"`` + ``encode_secret(response)``. + + Returns ``None`` for anything unrecognized (AWS4-HMAC-SHA256, NTLM, + Negotiate, …) — callers can still log the raw header value in the + ambient SD-block; we just don't know how to extract a hashable + secret from it. + """ + if not header_value or not isinstance(header_value, str): + return None + parts = header_value.strip().split(None, 1) + if len(parts) < 2: + return None + scheme, rest = parts[0].lower(), parts[1].strip() + + if scheme == "basic": + try: + decoded = base64.b64decode(rest, validate=True).decode("utf-8", errors="replace") + except (ValueError, base64.binascii.Error): + return None + if ":" not in decoded: + return None + user, _, pw = decoded.partition(":") + return { + "principal": user, + "secret_kind": "plaintext", + **encode_secret(pw), + } + + if scheme in ("bearer", "token"): + return { + "principal": None, + "secret_kind": "http_bearer", + **encode_secret(rest), + } + + if scheme == "digest": + params: dict[str, str] = {} + for m in _DIGEST_PARAM_RE.finditer(rest): + k = m.group(1) or m.group(3) + v = m.group(2) if m.group(2) is not None else m.group(4) + if k: + params[k.lower()] = v + response = params.get("response") + if not response: + return None + return { + "principal": params.get("username"), + "secret_kind": "http_digest_md5", + **encode_secret(response), + } + + return None + + def write_syslog_file(line: str) -> None: """Emit a syslog line to stdout for container log capture.""" print(line, flush=True) diff --git a/decnet/templates/mssql/syslog_bridge.py b/decnet/templates/mssql/syslog_bridge.py index c0a78d09..ca6d7284 100644 --- a/decnet/templates/mssql/syslog_bridge.py +++ b/decnet/templates/mssql/syslog_bridge.py @@ -12,8 +12,10 @@ RFC 5424 structure: Facility: local0 (16). SD element ID uses PEN 55555. """ +import base64 +import re from datetime import datetime, timezone -from typing import Any +from typing import Any, Optional # ─── Constants ──────────────────────────────────────────────────────────────── @@ -79,6 +81,106 @@ def syslog_line( return f"{pri}1 {ts} {host} {appname} {_NILVALUE} {msgid} {sd}{message}" +def encode_secret(secret: str) -> dict[str, str]: + """Standardized credential-secret encoding for the universal SD-block shape. + + Returns ``{'secret_printable': ..., 'secret_b64': ...}`` ready to spread + into a :func:`syslog_line` / ``_log`` call:: + + _log("auth_attempt", principal=user, **encode_secret(password)) + + ``secret_printable`` mirrors auth-helper.c's sd_escape: bytes outside + ``[0x20, 0x7f)`` collapse to ``'?'`` so the field is always parser-safe + RFC 5424 ASCII. ``secret_b64`` preserves the *original* utf-8 bytes — + NUL/0xff/control/non-utf8 sequences all survive losslessly, useful as + a fingerprinting signal even when the printable form sanitizes them. + + The decnet web ingester's native-shape branch keys off ``secret_b64`` + being present, so any service emitter calling this helper lands its + cred attempt directly in the :class:`Credential` table. + """ + raw = secret.encode("utf-8", errors="replace") + printable = "".join(chr(b) if 0x20 <= b < 0x7f else "?" for b in raw) + return { + "secret_printable": printable, + "secret_b64": base64.b64encode(raw).decode("ascii"), + } + + +_DIGEST_PARAM_RE = re.compile(r'(\w+)\s*=\s*"([^"]*)"|(\w+)\s*=\s*([^,\s]+)') + + +def classify_authorization(header_value: Optional[str]) -> Optional[dict[str, Any]]: + """Parse an HTTP Authorization header value into Credential SD fields. + + Returns a dict with the universal cred shape ready to spread into a + ``_log(...)`` call:: + + auth = request.headers.get("Authorization") + cred = classify_authorization(auth) + if cred: + _log("auth_attempt", **cred) + + Recognised schemes: + * Basic — base64(user:pw); decoded → ``principal=user`` + + ``secret_kind="plaintext"`` + ``encode_secret(pw)``. + * Bearer / Token — opaque token; ``principal=None`` + + ``secret_kind="http_bearer"`` + ``encode_secret(token)``. + * Digest — ``principal=username`` from header + + ``secret_kind="http_digest_md5"`` + ``encode_secret(response)``. + + Returns ``None`` for anything unrecognized (AWS4-HMAC-SHA256, NTLM, + Negotiate, …) — callers can still log the raw header value in the + ambient SD-block; we just don't know how to extract a hashable + secret from it. + """ + if not header_value or not isinstance(header_value, str): + return None + parts = header_value.strip().split(None, 1) + if len(parts) < 2: + return None + scheme, rest = parts[0].lower(), parts[1].strip() + + if scheme == "basic": + try: + decoded = base64.b64decode(rest, validate=True).decode("utf-8", errors="replace") + except (ValueError, base64.binascii.Error): + return None + if ":" not in decoded: + return None + user, _, pw = decoded.partition(":") + return { + "principal": user, + "secret_kind": "plaintext", + **encode_secret(pw), + } + + if scheme in ("bearer", "token"): + return { + "principal": None, + "secret_kind": "http_bearer", + **encode_secret(rest), + } + + if scheme == "digest": + params: dict[str, str] = {} + for m in _DIGEST_PARAM_RE.finditer(rest): + k = m.group(1) or m.group(3) + v = m.group(2) if m.group(2) is not None else m.group(4) + if k: + params[k.lower()] = v + response = params.get("response") + if not response: + return None + return { + "principal": params.get("username"), + "secret_kind": "http_digest_md5", + **encode_secret(response), + } + + return None + + def write_syslog_file(line: str) -> None: """Emit a syslog line to stdout for container log capture.""" print(line, flush=True) diff --git a/decnet/templates/mysql/syslog_bridge.py b/decnet/templates/mysql/syslog_bridge.py index c0a78d09..ca6d7284 100644 --- a/decnet/templates/mysql/syslog_bridge.py +++ b/decnet/templates/mysql/syslog_bridge.py @@ -12,8 +12,10 @@ RFC 5424 structure: Facility: local0 (16). SD element ID uses PEN 55555. """ +import base64 +import re from datetime import datetime, timezone -from typing import Any +from typing import Any, Optional # ─── Constants ──────────────────────────────────────────────────────────────── @@ -79,6 +81,106 @@ def syslog_line( return f"{pri}1 {ts} {host} {appname} {_NILVALUE} {msgid} {sd}{message}" +def encode_secret(secret: str) -> dict[str, str]: + """Standardized credential-secret encoding for the universal SD-block shape. + + Returns ``{'secret_printable': ..., 'secret_b64': ...}`` ready to spread + into a :func:`syslog_line` / ``_log`` call:: + + _log("auth_attempt", principal=user, **encode_secret(password)) + + ``secret_printable`` mirrors auth-helper.c's sd_escape: bytes outside + ``[0x20, 0x7f)`` collapse to ``'?'`` so the field is always parser-safe + RFC 5424 ASCII. ``secret_b64`` preserves the *original* utf-8 bytes — + NUL/0xff/control/non-utf8 sequences all survive losslessly, useful as + a fingerprinting signal even when the printable form sanitizes them. + + The decnet web ingester's native-shape branch keys off ``secret_b64`` + being present, so any service emitter calling this helper lands its + cred attempt directly in the :class:`Credential` table. + """ + raw = secret.encode("utf-8", errors="replace") + printable = "".join(chr(b) if 0x20 <= b < 0x7f else "?" for b in raw) + return { + "secret_printable": printable, + "secret_b64": base64.b64encode(raw).decode("ascii"), + } + + +_DIGEST_PARAM_RE = re.compile(r'(\w+)\s*=\s*"([^"]*)"|(\w+)\s*=\s*([^,\s]+)') + + +def classify_authorization(header_value: Optional[str]) -> Optional[dict[str, Any]]: + """Parse an HTTP Authorization header value into Credential SD fields. + + Returns a dict with the universal cred shape ready to spread into a + ``_log(...)`` call:: + + auth = request.headers.get("Authorization") + cred = classify_authorization(auth) + if cred: + _log("auth_attempt", **cred) + + Recognised schemes: + * Basic — base64(user:pw); decoded → ``principal=user`` + + ``secret_kind="plaintext"`` + ``encode_secret(pw)``. + * Bearer / Token — opaque token; ``principal=None`` + + ``secret_kind="http_bearer"`` + ``encode_secret(token)``. + * Digest — ``principal=username`` from header + + ``secret_kind="http_digest_md5"`` + ``encode_secret(response)``. + + Returns ``None`` for anything unrecognized (AWS4-HMAC-SHA256, NTLM, + Negotiate, …) — callers can still log the raw header value in the + ambient SD-block; we just don't know how to extract a hashable + secret from it. + """ + if not header_value or not isinstance(header_value, str): + return None + parts = header_value.strip().split(None, 1) + if len(parts) < 2: + return None + scheme, rest = parts[0].lower(), parts[1].strip() + + if scheme == "basic": + try: + decoded = base64.b64decode(rest, validate=True).decode("utf-8", errors="replace") + except (ValueError, base64.binascii.Error): + return None + if ":" not in decoded: + return None + user, _, pw = decoded.partition(":") + return { + "principal": user, + "secret_kind": "plaintext", + **encode_secret(pw), + } + + if scheme in ("bearer", "token"): + return { + "principal": None, + "secret_kind": "http_bearer", + **encode_secret(rest), + } + + if scheme == "digest": + params: dict[str, str] = {} + for m in _DIGEST_PARAM_RE.finditer(rest): + k = m.group(1) or m.group(3) + v = m.group(2) if m.group(2) is not None else m.group(4) + if k: + params[k.lower()] = v + response = params.get("response") + if not response: + return None + return { + "principal": params.get("username"), + "secret_kind": "http_digest_md5", + **encode_secret(response), + } + + return None + + def write_syslog_file(line: str) -> None: """Emit a syslog line to stdout for container log capture.""" print(line, flush=True) diff --git a/decnet/templates/pop3/syslog_bridge.py b/decnet/templates/pop3/syslog_bridge.py index c0a78d09..ca6d7284 100644 --- a/decnet/templates/pop3/syslog_bridge.py +++ b/decnet/templates/pop3/syslog_bridge.py @@ -12,8 +12,10 @@ RFC 5424 structure: Facility: local0 (16). SD element ID uses PEN 55555. """ +import base64 +import re from datetime import datetime, timezone -from typing import Any +from typing import Any, Optional # ─── Constants ──────────────────────────────────────────────────────────────── @@ -79,6 +81,106 @@ def syslog_line( return f"{pri}1 {ts} {host} {appname} {_NILVALUE} {msgid} {sd}{message}" +def encode_secret(secret: str) -> dict[str, str]: + """Standardized credential-secret encoding for the universal SD-block shape. + + Returns ``{'secret_printable': ..., 'secret_b64': ...}`` ready to spread + into a :func:`syslog_line` / ``_log`` call:: + + _log("auth_attempt", principal=user, **encode_secret(password)) + + ``secret_printable`` mirrors auth-helper.c's sd_escape: bytes outside + ``[0x20, 0x7f)`` collapse to ``'?'`` so the field is always parser-safe + RFC 5424 ASCII. ``secret_b64`` preserves the *original* utf-8 bytes — + NUL/0xff/control/non-utf8 sequences all survive losslessly, useful as + a fingerprinting signal even when the printable form sanitizes them. + + The decnet web ingester's native-shape branch keys off ``secret_b64`` + being present, so any service emitter calling this helper lands its + cred attempt directly in the :class:`Credential` table. + """ + raw = secret.encode("utf-8", errors="replace") + printable = "".join(chr(b) if 0x20 <= b < 0x7f else "?" for b in raw) + return { + "secret_printable": printable, + "secret_b64": base64.b64encode(raw).decode("ascii"), + } + + +_DIGEST_PARAM_RE = re.compile(r'(\w+)\s*=\s*"([^"]*)"|(\w+)\s*=\s*([^,\s]+)') + + +def classify_authorization(header_value: Optional[str]) -> Optional[dict[str, Any]]: + """Parse an HTTP Authorization header value into Credential SD fields. + + Returns a dict with the universal cred shape ready to spread into a + ``_log(...)`` call:: + + auth = request.headers.get("Authorization") + cred = classify_authorization(auth) + if cred: + _log("auth_attempt", **cred) + + Recognised schemes: + * Basic — base64(user:pw); decoded → ``principal=user`` + + ``secret_kind="plaintext"`` + ``encode_secret(pw)``. + * Bearer / Token — opaque token; ``principal=None`` + + ``secret_kind="http_bearer"`` + ``encode_secret(token)``. + * Digest — ``principal=username`` from header + + ``secret_kind="http_digest_md5"`` + ``encode_secret(response)``. + + Returns ``None`` for anything unrecognized (AWS4-HMAC-SHA256, NTLM, + Negotiate, …) — callers can still log the raw header value in the + ambient SD-block; we just don't know how to extract a hashable + secret from it. + """ + if not header_value or not isinstance(header_value, str): + return None + parts = header_value.strip().split(None, 1) + if len(parts) < 2: + return None + scheme, rest = parts[0].lower(), parts[1].strip() + + if scheme == "basic": + try: + decoded = base64.b64decode(rest, validate=True).decode("utf-8", errors="replace") + except (ValueError, base64.binascii.Error): + return None + if ":" not in decoded: + return None + user, _, pw = decoded.partition(":") + return { + "principal": user, + "secret_kind": "plaintext", + **encode_secret(pw), + } + + if scheme in ("bearer", "token"): + return { + "principal": None, + "secret_kind": "http_bearer", + **encode_secret(rest), + } + + if scheme == "digest": + params: dict[str, str] = {} + for m in _DIGEST_PARAM_RE.finditer(rest): + k = m.group(1) or m.group(3) + v = m.group(2) if m.group(2) is not None else m.group(4) + if k: + params[k.lower()] = v + response = params.get("response") + if not response: + return None + return { + "principal": params.get("username"), + "secret_kind": "http_digest_md5", + **encode_secret(response), + } + + return None + + def write_syslog_file(line: str) -> None: """Emit a syslog line to stdout for container log capture.""" print(line, flush=True) diff --git a/decnet/templates/postgres/syslog_bridge.py b/decnet/templates/postgres/syslog_bridge.py index c0a78d09..ca6d7284 100644 --- a/decnet/templates/postgres/syslog_bridge.py +++ b/decnet/templates/postgres/syslog_bridge.py @@ -12,8 +12,10 @@ RFC 5424 structure: Facility: local0 (16). SD element ID uses PEN 55555. """ +import base64 +import re from datetime import datetime, timezone -from typing import Any +from typing import Any, Optional # ─── Constants ──────────────────────────────────────────────────────────────── @@ -79,6 +81,106 @@ def syslog_line( return f"{pri}1 {ts} {host} {appname} {_NILVALUE} {msgid} {sd}{message}" +def encode_secret(secret: str) -> dict[str, str]: + """Standardized credential-secret encoding for the universal SD-block shape. + + Returns ``{'secret_printable': ..., 'secret_b64': ...}`` ready to spread + into a :func:`syslog_line` / ``_log`` call:: + + _log("auth_attempt", principal=user, **encode_secret(password)) + + ``secret_printable`` mirrors auth-helper.c's sd_escape: bytes outside + ``[0x20, 0x7f)`` collapse to ``'?'`` so the field is always parser-safe + RFC 5424 ASCII. ``secret_b64`` preserves the *original* utf-8 bytes — + NUL/0xff/control/non-utf8 sequences all survive losslessly, useful as + a fingerprinting signal even when the printable form sanitizes them. + + The decnet web ingester's native-shape branch keys off ``secret_b64`` + being present, so any service emitter calling this helper lands its + cred attempt directly in the :class:`Credential` table. + """ + raw = secret.encode("utf-8", errors="replace") + printable = "".join(chr(b) if 0x20 <= b < 0x7f else "?" for b in raw) + return { + "secret_printable": printable, + "secret_b64": base64.b64encode(raw).decode("ascii"), + } + + +_DIGEST_PARAM_RE = re.compile(r'(\w+)\s*=\s*"([^"]*)"|(\w+)\s*=\s*([^,\s]+)') + + +def classify_authorization(header_value: Optional[str]) -> Optional[dict[str, Any]]: + """Parse an HTTP Authorization header value into Credential SD fields. + + Returns a dict with the universal cred shape ready to spread into a + ``_log(...)`` call:: + + auth = request.headers.get("Authorization") + cred = classify_authorization(auth) + if cred: + _log("auth_attempt", **cred) + + Recognised schemes: + * Basic — base64(user:pw); decoded → ``principal=user`` + + ``secret_kind="plaintext"`` + ``encode_secret(pw)``. + * Bearer / Token — opaque token; ``principal=None`` + + ``secret_kind="http_bearer"`` + ``encode_secret(token)``. + * Digest — ``principal=username`` from header + + ``secret_kind="http_digest_md5"`` + ``encode_secret(response)``. + + Returns ``None`` for anything unrecognized (AWS4-HMAC-SHA256, NTLM, + Negotiate, …) — callers can still log the raw header value in the + ambient SD-block; we just don't know how to extract a hashable + secret from it. + """ + if not header_value or not isinstance(header_value, str): + return None + parts = header_value.strip().split(None, 1) + if len(parts) < 2: + return None + scheme, rest = parts[0].lower(), parts[1].strip() + + if scheme == "basic": + try: + decoded = base64.b64decode(rest, validate=True).decode("utf-8", errors="replace") + except (ValueError, base64.binascii.Error): + return None + if ":" not in decoded: + return None + user, _, pw = decoded.partition(":") + return { + "principal": user, + "secret_kind": "plaintext", + **encode_secret(pw), + } + + if scheme in ("bearer", "token"): + return { + "principal": None, + "secret_kind": "http_bearer", + **encode_secret(rest), + } + + if scheme == "digest": + params: dict[str, str] = {} + for m in _DIGEST_PARAM_RE.finditer(rest): + k = m.group(1) or m.group(3) + v = m.group(2) if m.group(2) is not None else m.group(4) + if k: + params[k.lower()] = v + response = params.get("response") + if not response: + return None + return { + "principal": params.get("username"), + "secret_kind": "http_digest_md5", + **encode_secret(response), + } + + return None + + def write_syslog_file(line: str) -> None: """Emit a syslog line to stdout for container log capture.""" print(line, flush=True) diff --git a/decnet/templates/rdp/syslog_bridge.py b/decnet/templates/rdp/syslog_bridge.py index c0a78d09..ca6d7284 100644 --- a/decnet/templates/rdp/syslog_bridge.py +++ b/decnet/templates/rdp/syslog_bridge.py @@ -12,8 +12,10 @@ RFC 5424 structure: Facility: local0 (16). SD element ID uses PEN 55555. """ +import base64 +import re from datetime import datetime, timezone -from typing import Any +from typing import Any, Optional # ─── Constants ──────────────────────────────────────────────────────────────── @@ -79,6 +81,106 @@ def syslog_line( return f"{pri}1 {ts} {host} {appname} {_NILVALUE} {msgid} {sd}{message}" +def encode_secret(secret: str) -> dict[str, str]: + """Standardized credential-secret encoding for the universal SD-block shape. + + Returns ``{'secret_printable': ..., 'secret_b64': ...}`` ready to spread + into a :func:`syslog_line` / ``_log`` call:: + + _log("auth_attempt", principal=user, **encode_secret(password)) + + ``secret_printable`` mirrors auth-helper.c's sd_escape: bytes outside + ``[0x20, 0x7f)`` collapse to ``'?'`` so the field is always parser-safe + RFC 5424 ASCII. ``secret_b64`` preserves the *original* utf-8 bytes — + NUL/0xff/control/non-utf8 sequences all survive losslessly, useful as + a fingerprinting signal even when the printable form sanitizes them. + + The decnet web ingester's native-shape branch keys off ``secret_b64`` + being present, so any service emitter calling this helper lands its + cred attempt directly in the :class:`Credential` table. + """ + raw = secret.encode("utf-8", errors="replace") + printable = "".join(chr(b) if 0x20 <= b < 0x7f else "?" for b in raw) + return { + "secret_printable": printable, + "secret_b64": base64.b64encode(raw).decode("ascii"), + } + + +_DIGEST_PARAM_RE = re.compile(r'(\w+)\s*=\s*"([^"]*)"|(\w+)\s*=\s*([^,\s]+)') + + +def classify_authorization(header_value: Optional[str]) -> Optional[dict[str, Any]]: + """Parse an HTTP Authorization header value into Credential SD fields. + + Returns a dict with the universal cred shape ready to spread into a + ``_log(...)`` call:: + + auth = request.headers.get("Authorization") + cred = classify_authorization(auth) + if cred: + _log("auth_attempt", **cred) + + Recognised schemes: + * Basic — base64(user:pw); decoded → ``principal=user`` + + ``secret_kind="plaintext"`` + ``encode_secret(pw)``. + * Bearer / Token — opaque token; ``principal=None`` + + ``secret_kind="http_bearer"`` + ``encode_secret(token)``. + * Digest — ``principal=username`` from header + + ``secret_kind="http_digest_md5"`` + ``encode_secret(response)``. + + Returns ``None`` for anything unrecognized (AWS4-HMAC-SHA256, NTLM, + Negotiate, …) — callers can still log the raw header value in the + ambient SD-block; we just don't know how to extract a hashable + secret from it. + """ + if not header_value or not isinstance(header_value, str): + return None + parts = header_value.strip().split(None, 1) + if len(parts) < 2: + return None + scheme, rest = parts[0].lower(), parts[1].strip() + + if scheme == "basic": + try: + decoded = base64.b64decode(rest, validate=True).decode("utf-8", errors="replace") + except (ValueError, base64.binascii.Error): + return None + if ":" not in decoded: + return None + user, _, pw = decoded.partition(":") + return { + "principal": user, + "secret_kind": "plaintext", + **encode_secret(pw), + } + + if scheme in ("bearer", "token"): + return { + "principal": None, + "secret_kind": "http_bearer", + **encode_secret(rest), + } + + if scheme == "digest": + params: dict[str, str] = {} + for m in _DIGEST_PARAM_RE.finditer(rest): + k = m.group(1) or m.group(3) + v = m.group(2) if m.group(2) is not None else m.group(4) + if k: + params[k.lower()] = v + response = params.get("response") + if not response: + return None + return { + "principal": params.get("username"), + "secret_kind": "http_digest_md5", + **encode_secret(response), + } + + return None + + def write_syslog_file(line: str) -> None: """Emit a syslog line to stdout for container log capture.""" print(line, flush=True) diff --git a/decnet/templates/redis/syslog_bridge.py b/decnet/templates/redis/syslog_bridge.py index c0a78d09..ca6d7284 100644 --- a/decnet/templates/redis/syslog_bridge.py +++ b/decnet/templates/redis/syslog_bridge.py @@ -12,8 +12,10 @@ RFC 5424 structure: Facility: local0 (16). SD element ID uses PEN 55555. """ +import base64 +import re from datetime import datetime, timezone -from typing import Any +from typing import Any, Optional # ─── Constants ──────────────────────────────────────────────────────────────── @@ -79,6 +81,106 @@ def syslog_line( return f"{pri}1 {ts} {host} {appname} {_NILVALUE} {msgid} {sd}{message}" +def encode_secret(secret: str) -> dict[str, str]: + """Standardized credential-secret encoding for the universal SD-block shape. + + Returns ``{'secret_printable': ..., 'secret_b64': ...}`` ready to spread + into a :func:`syslog_line` / ``_log`` call:: + + _log("auth_attempt", principal=user, **encode_secret(password)) + + ``secret_printable`` mirrors auth-helper.c's sd_escape: bytes outside + ``[0x20, 0x7f)`` collapse to ``'?'`` so the field is always parser-safe + RFC 5424 ASCII. ``secret_b64`` preserves the *original* utf-8 bytes — + NUL/0xff/control/non-utf8 sequences all survive losslessly, useful as + a fingerprinting signal even when the printable form sanitizes them. + + The decnet web ingester's native-shape branch keys off ``secret_b64`` + being present, so any service emitter calling this helper lands its + cred attempt directly in the :class:`Credential` table. + """ + raw = secret.encode("utf-8", errors="replace") + printable = "".join(chr(b) if 0x20 <= b < 0x7f else "?" for b in raw) + return { + "secret_printable": printable, + "secret_b64": base64.b64encode(raw).decode("ascii"), + } + + +_DIGEST_PARAM_RE = re.compile(r'(\w+)\s*=\s*"([^"]*)"|(\w+)\s*=\s*([^,\s]+)') + + +def classify_authorization(header_value: Optional[str]) -> Optional[dict[str, Any]]: + """Parse an HTTP Authorization header value into Credential SD fields. + + Returns a dict with the universal cred shape ready to spread into a + ``_log(...)`` call:: + + auth = request.headers.get("Authorization") + cred = classify_authorization(auth) + if cred: + _log("auth_attempt", **cred) + + Recognised schemes: + * Basic — base64(user:pw); decoded → ``principal=user`` + + ``secret_kind="plaintext"`` + ``encode_secret(pw)``. + * Bearer / Token — opaque token; ``principal=None`` + + ``secret_kind="http_bearer"`` + ``encode_secret(token)``. + * Digest — ``principal=username`` from header + + ``secret_kind="http_digest_md5"`` + ``encode_secret(response)``. + + Returns ``None`` for anything unrecognized (AWS4-HMAC-SHA256, NTLM, + Negotiate, …) — callers can still log the raw header value in the + ambient SD-block; we just don't know how to extract a hashable + secret from it. + """ + if not header_value or not isinstance(header_value, str): + return None + parts = header_value.strip().split(None, 1) + if len(parts) < 2: + return None + scheme, rest = parts[0].lower(), parts[1].strip() + + if scheme == "basic": + try: + decoded = base64.b64decode(rest, validate=True).decode("utf-8", errors="replace") + except (ValueError, base64.binascii.Error): + return None + if ":" not in decoded: + return None + user, _, pw = decoded.partition(":") + return { + "principal": user, + "secret_kind": "plaintext", + **encode_secret(pw), + } + + if scheme in ("bearer", "token"): + return { + "principal": None, + "secret_kind": "http_bearer", + **encode_secret(rest), + } + + if scheme == "digest": + params: dict[str, str] = {} + for m in _DIGEST_PARAM_RE.finditer(rest): + k = m.group(1) or m.group(3) + v = m.group(2) if m.group(2) is not None else m.group(4) + if k: + params[k.lower()] = v + response = params.get("response") + if not response: + return None + return { + "principal": params.get("username"), + "secret_kind": "http_digest_md5", + **encode_secret(response), + } + + return None + + def write_syslog_file(line: str) -> None: """Emit a syslog line to stdout for container log capture.""" print(line, flush=True) diff --git a/decnet/templates/sip/server.py b/decnet/templates/sip/server.py index dd401669..66e504b1 100644 --- a/decnet/templates/sip/server.py +++ b/decnet/templates/sip/server.py @@ -8,7 +8,12 @@ Authorization header and call metadata, then responds with 401 Unauthorized. import asyncio import os import re -from syslog_bridge import syslog_line, write_syslog_file, forward_syslog +from syslog_bridge import ( + classify_authorization, + forward_syslog, + syslog_line, + write_syslog_file, +) NODE_NAME = os.environ.get("NODE_NAME", "pbx") SERVICE_NAME = "sip" @@ -58,6 +63,13 @@ def _handle_message(data: bytes, src_addr) -> bytes | None: m = re.search(r'username="([^"]+)"', auth_header) username = m.group(1) if m else "" + # SIP Digest is the same shape as HTTP Digest (RFC 7616 derived from + # RFC 2617). classify_authorization handles it identically — emits + # secret_kind="http_digest_md5", which is correct: the cred is the + # MD5 hash response, regardless of whether it rode in over SIP or + # HTTP. Reuse-analytics correlates across both. + cred = classify_authorization(auth_header) + _log( "request", src=src_addr[0], @@ -67,6 +79,7 @@ def _handle_message(data: bytes, src_addr) -> bytes | None: to=headers.get("to", ""), username=username, auth=auth_header[:256], + **(cred or {}), ) if method in ("REGISTER", "INVITE", "OPTIONS"): diff --git a/decnet/templates/sip/syslog_bridge.py b/decnet/templates/sip/syslog_bridge.py index c0a78d09..ca6d7284 100644 --- a/decnet/templates/sip/syslog_bridge.py +++ b/decnet/templates/sip/syslog_bridge.py @@ -12,8 +12,10 @@ RFC 5424 structure: Facility: local0 (16). SD element ID uses PEN 55555. """ +import base64 +import re from datetime import datetime, timezone -from typing import Any +from typing import Any, Optional # ─── Constants ──────────────────────────────────────────────────────────────── @@ -79,6 +81,106 @@ def syslog_line( return f"{pri}1 {ts} {host} {appname} {_NILVALUE} {msgid} {sd}{message}" +def encode_secret(secret: str) -> dict[str, str]: + """Standardized credential-secret encoding for the universal SD-block shape. + + Returns ``{'secret_printable': ..., 'secret_b64': ...}`` ready to spread + into a :func:`syslog_line` / ``_log`` call:: + + _log("auth_attempt", principal=user, **encode_secret(password)) + + ``secret_printable`` mirrors auth-helper.c's sd_escape: bytes outside + ``[0x20, 0x7f)`` collapse to ``'?'`` so the field is always parser-safe + RFC 5424 ASCII. ``secret_b64`` preserves the *original* utf-8 bytes — + NUL/0xff/control/non-utf8 sequences all survive losslessly, useful as + a fingerprinting signal even when the printable form sanitizes them. + + The decnet web ingester's native-shape branch keys off ``secret_b64`` + being present, so any service emitter calling this helper lands its + cred attempt directly in the :class:`Credential` table. + """ + raw = secret.encode("utf-8", errors="replace") + printable = "".join(chr(b) if 0x20 <= b < 0x7f else "?" for b in raw) + return { + "secret_printable": printable, + "secret_b64": base64.b64encode(raw).decode("ascii"), + } + + +_DIGEST_PARAM_RE = re.compile(r'(\w+)\s*=\s*"([^"]*)"|(\w+)\s*=\s*([^,\s]+)') + + +def classify_authorization(header_value: Optional[str]) -> Optional[dict[str, Any]]: + """Parse an HTTP Authorization header value into Credential SD fields. + + Returns a dict with the universal cred shape ready to spread into a + ``_log(...)`` call:: + + auth = request.headers.get("Authorization") + cred = classify_authorization(auth) + if cred: + _log("auth_attempt", **cred) + + Recognised schemes: + * Basic — base64(user:pw); decoded → ``principal=user`` + + ``secret_kind="plaintext"`` + ``encode_secret(pw)``. + * Bearer / Token — opaque token; ``principal=None`` + + ``secret_kind="http_bearer"`` + ``encode_secret(token)``. + * Digest — ``principal=username`` from header + + ``secret_kind="http_digest_md5"`` + ``encode_secret(response)``. + + Returns ``None`` for anything unrecognized (AWS4-HMAC-SHA256, NTLM, + Negotiate, …) — callers can still log the raw header value in the + ambient SD-block; we just don't know how to extract a hashable + secret from it. + """ + if not header_value or not isinstance(header_value, str): + return None + parts = header_value.strip().split(None, 1) + if len(parts) < 2: + return None + scheme, rest = parts[0].lower(), parts[1].strip() + + if scheme == "basic": + try: + decoded = base64.b64decode(rest, validate=True).decode("utf-8", errors="replace") + except (ValueError, base64.binascii.Error): + return None + if ":" not in decoded: + return None + user, _, pw = decoded.partition(":") + return { + "principal": user, + "secret_kind": "plaintext", + **encode_secret(pw), + } + + if scheme in ("bearer", "token"): + return { + "principal": None, + "secret_kind": "http_bearer", + **encode_secret(rest), + } + + if scheme == "digest": + params: dict[str, str] = {} + for m in _DIGEST_PARAM_RE.finditer(rest): + k = m.group(1) or m.group(3) + v = m.group(2) if m.group(2) is not None else m.group(4) + if k: + params[k.lower()] = v + response = params.get("response") + if not response: + return None + return { + "principal": params.get("username"), + "secret_kind": "http_digest_md5", + **encode_secret(response), + } + + return None + + def write_syslog_file(line: str) -> None: """Emit a syslog line to stdout for container log capture.""" print(line, flush=True) diff --git a/decnet/templates/smb/syslog_bridge.py b/decnet/templates/smb/syslog_bridge.py index c0a78d09..ca6d7284 100644 --- a/decnet/templates/smb/syslog_bridge.py +++ b/decnet/templates/smb/syslog_bridge.py @@ -12,8 +12,10 @@ RFC 5424 structure: Facility: local0 (16). SD element ID uses PEN 55555. """ +import base64 +import re from datetime import datetime, timezone -from typing import Any +from typing import Any, Optional # ─── Constants ──────────────────────────────────────────────────────────────── @@ -79,6 +81,106 @@ def syslog_line( return f"{pri}1 {ts} {host} {appname} {_NILVALUE} {msgid} {sd}{message}" +def encode_secret(secret: str) -> dict[str, str]: + """Standardized credential-secret encoding for the universal SD-block shape. + + Returns ``{'secret_printable': ..., 'secret_b64': ...}`` ready to spread + into a :func:`syslog_line` / ``_log`` call:: + + _log("auth_attempt", principal=user, **encode_secret(password)) + + ``secret_printable`` mirrors auth-helper.c's sd_escape: bytes outside + ``[0x20, 0x7f)`` collapse to ``'?'`` so the field is always parser-safe + RFC 5424 ASCII. ``secret_b64`` preserves the *original* utf-8 bytes — + NUL/0xff/control/non-utf8 sequences all survive losslessly, useful as + a fingerprinting signal even when the printable form sanitizes them. + + The decnet web ingester's native-shape branch keys off ``secret_b64`` + being present, so any service emitter calling this helper lands its + cred attempt directly in the :class:`Credential` table. + """ + raw = secret.encode("utf-8", errors="replace") + printable = "".join(chr(b) if 0x20 <= b < 0x7f else "?" for b in raw) + return { + "secret_printable": printable, + "secret_b64": base64.b64encode(raw).decode("ascii"), + } + + +_DIGEST_PARAM_RE = re.compile(r'(\w+)\s*=\s*"([^"]*)"|(\w+)\s*=\s*([^,\s]+)') + + +def classify_authorization(header_value: Optional[str]) -> Optional[dict[str, Any]]: + """Parse an HTTP Authorization header value into Credential SD fields. + + Returns a dict with the universal cred shape ready to spread into a + ``_log(...)`` call:: + + auth = request.headers.get("Authorization") + cred = classify_authorization(auth) + if cred: + _log("auth_attempt", **cred) + + Recognised schemes: + * Basic — base64(user:pw); decoded → ``principal=user`` + + ``secret_kind="plaintext"`` + ``encode_secret(pw)``. + * Bearer / Token — opaque token; ``principal=None`` + + ``secret_kind="http_bearer"`` + ``encode_secret(token)``. + * Digest — ``principal=username`` from header + + ``secret_kind="http_digest_md5"`` + ``encode_secret(response)``. + + Returns ``None`` for anything unrecognized (AWS4-HMAC-SHA256, NTLM, + Negotiate, …) — callers can still log the raw header value in the + ambient SD-block; we just don't know how to extract a hashable + secret from it. + """ + if not header_value or not isinstance(header_value, str): + return None + parts = header_value.strip().split(None, 1) + if len(parts) < 2: + return None + scheme, rest = parts[0].lower(), parts[1].strip() + + if scheme == "basic": + try: + decoded = base64.b64decode(rest, validate=True).decode("utf-8", errors="replace") + except (ValueError, base64.binascii.Error): + return None + if ":" not in decoded: + return None + user, _, pw = decoded.partition(":") + return { + "principal": user, + "secret_kind": "plaintext", + **encode_secret(pw), + } + + if scheme in ("bearer", "token"): + return { + "principal": None, + "secret_kind": "http_bearer", + **encode_secret(rest), + } + + if scheme == "digest": + params: dict[str, str] = {} + for m in _DIGEST_PARAM_RE.finditer(rest): + k = m.group(1) or m.group(3) + v = m.group(2) if m.group(2) is not None else m.group(4) + if k: + params[k.lower()] = v + response = params.get("response") + if not response: + return None + return { + "principal": params.get("username"), + "secret_kind": "http_digest_md5", + **encode_secret(response), + } + + return None + + def write_syslog_file(line: str) -> None: """Emit a syslog line to stdout for container log capture.""" print(line, flush=True) diff --git a/decnet/templates/smtp/syslog_bridge.py b/decnet/templates/smtp/syslog_bridge.py index c0a78d09..ca6d7284 100644 --- a/decnet/templates/smtp/syslog_bridge.py +++ b/decnet/templates/smtp/syslog_bridge.py @@ -12,8 +12,10 @@ RFC 5424 structure: Facility: local0 (16). SD element ID uses PEN 55555. """ +import base64 +import re from datetime import datetime, timezone -from typing import Any +from typing import Any, Optional # ─── Constants ──────────────────────────────────────────────────────────────── @@ -79,6 +81,106 @@ def syslog_line( return f"{pri}1 {ts} {host} {appname} {_NILVALUE} {msgid} {sd}{message}" +def encode_secret(secret: str) -> dict[str, str]: + """Standardized credential-secret encoding for the universal SD-block shape. + + Returns ``{'secret_printable': ..., 'secret_b64': ...}`` ready to spread + into a :func:`syslog_line` / ``_log`` call:: + + _log("auth_attempt", principal=user, **encode_secret(password)) + + ``secret_printable`` mirrors auth-helper.c's sd_escape: bytes outside + ``[0x20, 0x7f)`` collapse to ``'?'`` so the field is always parser-safe + RFC 5424 ASCII. ``secret_b64`` preserves the *original* utf-8 bytes — + NUL/0xff/control/non-utf8 sequences all survive losslessly, useful as + a fingerprinting signal even when the printable form sanitizes them. + + The decnet web ingester's native-shape branch keys off ``secret_b64`` + being present, so any service emitter calling this helper lands its + cred attempt directly in the :class:`Credential` table. + """ + raw = secret.encode("utf-8", errors="replace") + printable = "".join(chr(b) if 0x20 <= b < 0x7f else "?" for b in raw) + return { + "secret_printable": printable, + "secret_b64": base64.b64encode(raw).decode("ascii"), + } + + +_DIGEST_PARAM_RE = re.compile(r'(\w+)\s*=\s*"([^"]*)"|(\w+)\s*=\s*([^,\s]+)') + + +def classify_authorization(header_value: Optional[str]) -> Optional[dict[str, Any]]: + """Parse an HTTP Authorization header value into Credential SD fields. + + Returns a dict with the universal cred shape ready to spread into a + ``_log(...)`` call:: + + auth = request.headers.get("Authorization") + cred = classify_authorization(auth) + if cred: + _log("auth_attempt", **cred) + + Recognised schemes: + * Basic — base64(user:pw); decoded → ``principal=user`` + + ``secret_kind="plaintext"`` + ``encode_secret(pw)``. + * Bearer / Token — opaque token; ``principal=None`` + + ``secret_kind="http_bearer"`` + ``encode_secret(token)``. + * Digest — ``principal=username`` from header + + ``secret_kind="http_digest_md5"`` + ``encode_secret(response)``. + + Returns ``None`` for anything unrecognized (AWS4-HMAC-SHA256, NTLM, + Negotiate, …) — callers can still log the raw header value in the + ambient SD-block; we just don't know how to extract a hashable + secret from it. + """ + if not header_value or not isinstance(header_value, str): + return None + parts = header_value.strip().split(None, 1) + if len(parts) < 2: + return None + scheme, rest = parts[0].lower(), parts[1].strip() + + if scheme == "basic": + try: + decoded = base64.b64decode(rest, validate=True).decode("utf-8", errors="replace") + except (ValueError, base64.binascii.Error): + return None + if ":" not in decoded: + return None + user, _, pw = decoded.partition(":") + return { + "principal": user, + "secret_kind": "plaintext", + **encode_secret(pw), + } + + if scheme in ("bearer", "token"): + return { + "principal": None, + "secret_kind": "http_bearer", + **encode_secret(rest), + } + + if scheme == "digest": + params: dict[str, str] = {} + for m in _DIGEST_PARAM_RE.finditer(rest): + k = m.group(1) or m.group(3) + v = m.group(2) if m.group(2) is not None else m.group(4) + if k: + params[k.lower()] = v + response = params.get("response") + if not response: + return None + return { + "principal": params.get("username"), + "secret_kind": "http_digest_md5", + **encode_secret(response), + } + + return None + + def write_syslog_file(line: str) -> None: """Emit a syslog line to stdout for container log capture.""" print(line, flush=True) diff --git a/decnet/templates/snmp/server.py b/decnet/templates/snmp/server.py index 94109398..d388bbce 100644 --- a/decnet/templates/snmp/server.py +++ b/decnet/templates/snmp/server.py @@ -9,7 +9,12 @@ Logs all requests as JSON. import asyncio import os import struct -from syslog_bridge import syslog_line, write_syslog_file, forward_syslog +from syslog_bridge import ( + encode_secret, + forward_syslog, + syslog_line, + write_syslog_file, +) NODE_NAME = os.environ.get("NODE_NAME", "switch") SERVICE_NAME = "snmp" @@ -229,8 +234,15 @@ class SNMPProtocol(asyncio.DatagramProtocol): def datagram_received(self, data, addr): try: version, community, request_id, oids = _parse_snmp(data) + # SNMP v1/v2c community is the only auth mechanism on the wire + # — every packet carries the shared secret in plaintext. Land + # it as a Credential row keyed `snmp_community` so reuse- + # analytics treats community-string spray as the same shape + # of attack signal as cleartext password spray. _log("get_request", src=addr[0], src_port=addr[1], - version=version, community=community, oids=oids) + version=version, community=community, oids=oids, + principal=None, secret_kind="snmp_community", + **encode_secret(community)) response = _build_response(version, community, request_id, oids) self._transport.sendto(response, addr) except Exception as e: diff --git a/decnet/templates/snmp/syslog_bridge.py b/decnet/templates/snmp/syslog_bridge.py index c0a78d09..ca6d7284 100644 --- a/decnet/templates/snmp/syslog_bridge.py +++ b/decnet/templates/snmp/syslog_bridge.py @@ -12,8 +12,10 @@ RFC 5424 structure: Facility: local0 (16). SD element ID uses PEN 55555. """ +import base64 +import re from datetime import datetime, timezone -from typing import Any +from typing import Any, Optional # ─── Constants ──────────────────────────────────────────────────────────────── @@ -79,6 +81,106 @@ def syslog_line( return f"{pri}1 {ts} {host} {appname} {_NILVALUE} {msgid} {sd}{message}" +def encode_secret(secret: str) -> dict[str, str]: + """Standardized credential-secret encoding for the universal SD-block shape. + + Returns ``{'secret_printable': ..., 'secret_b64': ...}`` ready to spread + into a :func:`syslog_line` / ``_log`` call:: + + _log("auth_attempt", principal=user, **encode_secret(password)) + + ``secret_printable`` mirrors auth-helper.c's sd_escape: bytes outside + ``[0x20, 0x7f)`` collapse to ``'?'`` so the field is always parser-safe + RFC 5424 ASCII. ``secret_b64`` preserves the *original* utf-8 bytes — + NUL/0xff/control/non-utf8 sequences all survive losslessly, useful as + a fingerprinting signal even when the printable form sanitizes them. + + The decnet web ingester's native-shape branch keys off ``secret_b64`` + being present, so any service emitter calling this helper lands its + cred attempt directly in the :class:`Credential` table. + """ + raw = secret.encode("utf-8", errors="replace") + printable = "".join(chr(b) if 0x20 <= b < 0x7f else "?" for b in raw) + return { + "secret_printable": printable, + "secret_b64": base64.b64encode(raw).decode("ascii"), + } + + +_DIGEST_PARAM_RE = re.compile(r'(\w+)\s*=\s*"([^"]*)"|(\w+)\s*=\s*([^,\s]+)') + + +def classify_authorization(header_value: Optional[str]) -> Optional[dict[str, Any]]: + """Parse an HTTP Authorization header value into Credential SD fields. + + Returns a dict with the universal cred shape ready to spread into a + ``_log(...)`` call:: + + auth = request.headers.get("Authorization") + cred = classify_authorization(auth) + if cred: + _log("auth_attempt", **cred) + + Recognised schemes: + * Basic — base64(user:pw); decoded → ``principal=user`` + + ``secret_kind="plaintext"`` + ``encode_secret(pw)``. + * Bearer / Token — opaque token; ``principal=None`` + + ``secret_kind="http_bearer"`` + ``encode_secret(token)``. + * Digest — ``principal=username`` from header + + ``secret_kind="http_digest_md5"`` + ``encode_secret(response)``. + + Returns ``None`` for anything unrecognized (AWS4-HMAC-SHA256, NTLM, + Negotiate, …) — callers can still log the raw header value in the + ambient SD-block; we just don't know how to extract a hashable + secret from it. + """ + if not header_value or not isinstance(header_value, str): + return None + parts = header_value.strip().split(None, 1) + if len(parts) < 2: + return None + scheme, rest = parts[0].lower(), parts[1].strip() + + if scheme == "basic": + try: + decoded = base64.b64decode(rest, validate=True).decode("utf-8", errors="replace") + except (ValueError, base64.binascii.Error): + return None + if ":" not in decoded: + return None + user, _, pw = decoded.partition(":") + return { + "principal": user, + "secret_kind": "plaintext", + **encode_secret(pw), + } + + if scheme in ("bearer", "token"): + return { + "principal": None, + "secret_kind": "http_bearer", + **encode_secret(rest), + } + + if scheme == "digest": + params: dict[str, str] = {} + for m in _DIGEST_PARAM_RE.finditer(rest): + k = m.group(1) or m.group(3) + v = m.group(2) if m.group(2) is not None else m.group(4) + if k: + params[k.lower()] = v + response = params.get("response") + if not response: + return None + return { + "principal": params.get("username"), + "secret_kind": "http_digest_md5", + **encode_secret(response), + } + + return None + + def write_syslog_file(line: str) -> None: """Emit a syslog line to stdout for container log capture.""" print(line, flush=True) diff --git a/decnet/templates/ssh/syslog_bridge.py b/decnet/templates/ssh/syslog_bridge.py index c0a78d09..ca6d7284 100644 --- a/decnet/templates/ssh/syslog_bridge.py +++ b/decnet/templates/ssh/syslog_bridge.py @@ -12,8 +12,10 @@ RFC 5424 structure: Facility: local0 (16). SD element ID uses PEN 55555. """ +import base64 +import re from datetime import datetime, timezone -from typing import Any +from typing import Any, Optional # ─── Constants ──────────────────────────────────────────────────────────────── @@ -79,6 +81,106 @@ def syslog_line( return f"{pri}1 {ts} {host} {appname} {_NILVALUE} {msgid} {sd}{message}" +def encode_secret(secret: str) -> dict[str, str]: + """Standardized credential-secret encoding for the universal SD-block shape. + + Returns ``{'secret_printable': ..., 'secret_b64': ...}`` ready to spread + into a :func:`syslog_line` / ``_log`` call:: + + _log("auth_attempt", principal=user, **encode_secret(password)) + + ``secret_printable`` mirrors auth-helper.c's sd_escape: bytes outside + ``[0x20, 0x7f)`` collapse to ``'?'`` so the field is always parser-safe + RFC 5424 ASCII. ``secret_b64`` preserves the *original* utf-8 bytes — + NUL/0xff/control/non-utf8 sequences all survive losslessly, useful as + a fingerprinting signal even when the printable form sanitizes them. + + The decnet web ingester's native-shape branch keys off ``secret_b64`` + being present, so any service emitter calling this helper lands its + cred attempt directly in the :class:`Credential` table. + """ + raw = secret.encode("utf-8", errors="replace") + printable = "".join(chr(b) if 0x20 <= b < 0x7f else "?" for b in raw) + return { + "secret_printable": printable, + "secret_b64": base64.b64encode(raw).decode("ascii"), + } + + +_DIGEST_PARAM_RE = re.compile(r'(\w+)\s*=\s*"([^"]*)"|(\w+)\s*=\s*([^,\s]+)') + + +def classify_authorization(header_value: Optional[str]) -> Optional[dict[str, Any]]: + """Parse an HTTP Authorization header value into Credential SD fields. + + Returns a dict with the universal cred shape ready to spread into a + ``_log(...)`` call:: + + auth = request.headers.get("Authorization") + cred = classify_authorization(auth) + if cred: + _log("auth_attempt", **cred) + + Recognised schemes: + * Basic — base64(user:pw); decoded → ``principal=user`` + + ``secret_kind="plaintext"`` + ``encode_secret(pw)``. + * Bearer / Token — opaque token; ``principal=None`` + + ``secret_kind="http_bearer"`` + ``encode_secret(token)``. + * Digest — ``principal=username`` from header + + ``secret_kind="http_digest_md5"`` + ``encode_secret(response)``. + + Returns ``None`` for anything unrecognized (AWS4-HMAC-SHA256, NTLM, + Negotiate, …) — callers can still log the raw header value in the + ambient SD-block; we just don't know how to extract a hashable + secret from it. + """ + if not header_value or not isinstance(header_value, str): + return None + parts = header_value.strip().split(None, 1) + if len(parts) < 2: + return None + scheme, rest = parts[0].lower(), parts[1].strip() + + if scheme == "basic": + try: + decoded = base64.b64decode(rest, validate=True).decode("utf-8", errors="replace") + except (ValueError, base64.binascii.Error): + return None + if ":" not in decoded: + return None + user, _, pw = decoded.partition(":") + return { + "principal": user, + "secret_kind": "plaintext", + **encode_secret(pw), + } + + if scheme in ("bearer", "token"): + return { + "principal": None, + "secret_kind": "http_bearer", + **encode_secret(rest), + } + + if scheme == "digest": + params: dict[str, str] = {} + for m in _DIGEST_PARAM_RE.finditer(rest): + k = m.group(1) or m.group(3) + v = m.group(2) if m.group(2) is not None else m.group(4) + if k: + params[k.lower()] = v + response = params.get("response") + if not response: + return None + return { + "principal": params.get("username"), + "secret_kind": "http_digest_md5", + **encode_secret(response), + } + + return None + + def write_syslog_file(line: str) -> None: """Emit a syslog line to stdout for container log capture.""" print(line, flush=True) diff --git a/decnet/templates/syslog_bridge.py b/decnet/templates/syslog_bridge.py index a2293fde..ca6d7284 100644 --- a/decnet/templates/syslog_bridge.py +++ b/decnet/templates/syslog_bridge.py @@ -13,8 +13,9 @@ Facility: local0 (16). SD element ID uses PEN 55555. """ import base64 +import re from datetime import datetime, timezone -from typing import Any +from typing import Any, Optional # ─── Constants ──────────────────────────────────────────────────────────────── @@ -106,6 +107,80 @@ def encode_secret(secret: str) -> dict[str, str]: } +_DIGEST_PARAM_RE = re.compile(r'(\w+)\s*=\s*"([^"]*)"|(\w+)\s*=\s*([^,\s]+)') + + +def classify_authorization(header_value: Optional[str]) -> Optional[dict[str, Any]]: + """Parse an HTTP Authorization header value into Credential SD fields. + + Returns a dict with the universal cred shape ready to spread into a + ``_log(...)`` call:: + + auth = request.headers.get("Authorization") + cred = classify_authorization(auth) + if cred: + _log("auth_attempt", **cred) + + Recognised schemes: + * Basic — base64(user:pw); decoded → ``principal=user`` + + ``secret_kind="plaintext"`` + ``encode_secret(pw)``. + * Bearer / Token — opaque token; ``principal=None`` + + ``secret_kind="http_bearer"`` + ``encode_secret(token)``. + * Digest — ``principal=username`` from header + + ``secret_kind="http_digest_md5"`` + ``encode_secret(response)``. + + Returns ``None`` for anything unrecognized (AWS4-HMAC-SHA256, NTLM, + Negotiate, …) — callers can still log the raw header value in the + ambient SD-block; we just don't know how to extract a hashable + secret from it. + """ + if not header_value or not isinstance(header_value, str): + return None + parts = header_value.strip().split(None, 1) + if len(parts) < 2: + return None + scheme, rest = parts[0].lower(), parts[1].strip() + + if scheme == "basic": + try: + decoded = base64.b64decode(rest, validate=True).decode("utf-8", errors="replace") + except (ValueError, base64.binascii.Error): + return None + if ":" not in decoded: + return None + user, _, pw = decoded.partition(":") + return { + "principal": user, + "secret_kind": "plaintext", + **encode_secret(pw), + } + + if scheme in ("bearer", "token"): + return { + "principal": None, + "secret_kind": "http_bearer", + **encode_secret(rest), + } + + if scheme == "digest": + params: dict[str, str] = {} + for m in _DIGEST_PARAM_RE.finditer(rest): + k = m.group(1) or m.group(3) + v = m.group(2) if m.group(2) is not None else m.group(4) + if k: + params[k.lower()] = v + response = params.get("response") + if not response: + return None + return { + "principal": params.get("username"), + "secret_kind": "http_digest_md5", + **encode_secret(response), + } + + return None + + def write_syslog_file(line: str) -> None: """Emit a syslog line to stdout for container log capture.""" print(line, flush=True) diff --git a/decnet/templates/telnet/syslog_bridge.py b/decnet/templates/telnet/syslog_bridge.py index c0a78d09..ca6d7284 100644 --- a/decnet/templates/telnet/syslog_bridge.py +++ b/decnet/templates/telnet/syslog_bridge.py @@ -12,8 +12,10 @@ RFC 5424 structure: Facility: local0 (16). SD element ID uses PEN 55555. """ +import base64 +import re from datetime import datetime, timezone -from typing import Any +from typing import Any, Optional # ─── Constants ──────────────────────────────────────────────────────────────── @@ -79,6 +81,106 @@ def syslog_line( return f"{pri}1 {ts} {host} {appname} {_NILVALUE} {msgid} {sd}{message}" +def encode_secret(secret: str) -> dict[str, str]: + """Standardized credential-secret encoding for the universal SD-block shape. + + Returns ``{'secret_printable': ..., 'secret_b64': ...}`` ready to spread + into a :func:`syslog_line` / ``_log`` call:: + + _log("auth_attempt", principal=user, **encode_secret(password)) + + ``secret_printable`` mirrors auth-helper.c's sd_escape: bytes outside + ``[0x20, 0x7f)`` collapse to ``'?'`` so the field is always parser-safe + RFC 5424 ASCII. ``secret_b64`` preserves the *original* utf-8 bytes — + NUL/0xff/control/non-utf8 sequences all survive losslessly, useful as + a fingerprinting signal even when the printable form sanitizes them. + + The decnet web ingester's native-shape branch keys off ``secret_b64`` + being present, so any service emitter calling this helper lands its + cred attempt directly in the :class:`Credential` table. + """ + raw = secret.encode("utf-8", errors="replace") + printable = "".join(chr(b) if 0x20 <= b < 0x7f else "?" for b in raw) + return { + "secret_printable": printable, + "secret_b64": base64.b64encode(raw).decode("ascii"), + } + + +_DIGEST_PARAM_RE = re.compile(r'(\w+)\s*=\s*"([^"]*)"|(\w+)\s*=\s*([^,\s]+)') + + +def classify_authorization(header_value: Optional[str]) -> Optional[dict[str, Any]]: + """Parse an HTTP Authorization header value into Credential SD fields. + + Returns a dict with the universal cred shape ready to spread into a + ``_log(...)`` call:: + + auth = request.headers.get("Authorization") + cred = classify_authorization(auth) + if cred: + _log("auth_attempt", **cred) + + Recognised schemes: + * Basic — base64(user:pw); decoded → ``principal=user`` + + ``secret_kind="plaintext"`` + ``encode_secret(pw)``. + * Bearer / Token — opaque token; ``principal=None`` + + ``secret_kind="http_bearer"`` + ``encode_secret(token)``. + * Digest — ``principal=username`` from header + + ``secret_kind="http_digest_md5"`` + ``encode_secret(response)``. + + Returns ``None`` for anything unrecognized (AWS4-HMAC-SHA256, NTLM, + Negotiate, …) — callers can still log the raw header value in the + ambient SD-block; we just don't know how to extract a hashable + secret from it. + """ + if not header_value or not isinstance(header_value, str): + return None + parts = header_value.strip().split(None, 1) + if len(parts) < 2: + return None + scheme, rest = parts[0].lower(), parts[1].strip() + + if scheme == "basic": + try: + decoded = base64.b64decode(rest, validate=True).decode("utf-8", errors="replace") + except (ValueError, base64.binascii.Error): + return None + if ":" not in decoded: + return None + user, _, pw = decoded.partition(":") + return { + "principal": user, + "secret_kind": "plaintext", + **encode_secret(pw), + } + + if scheme in ("bearer", "token"): + return { + "principal": None, + "secret_kind": "http_bearer", + **encode_secret(rest), + } + + if scheme == "digest": + params: dict[str, str] = {} + for m in _DIGEST_PARAM_RE.finditer(rest): + k = m.group(1) or m.group(3) + v = m.group(2) if m.group(2) is not None else m.group(4) + if k: + params[k.lower()] = v + response = params.get("response") + if not response: + return None + return { + "principal": params.get("username"), + "secret_kind": "http_digest_md5", + **encode_secret(response), + } + + return None + + def write_syslog_file(line: str) -> None: """Emit a syslog line to stdout for container log capture.""" print(line, flush=True) diff --git a/decnet/templates/tftp/syslog_bridge.py b/decnet/templates/tftp/syslog_bridge.py index c0a78d09..ca6d7284 100644 --- a/decnet/templates/tftp/syslog_bridge.py +++ b/decnet/templates/tftp/syslog_bridge.py @@ -12,8 +12,10 @@ RFC 5424 structure: Facility: local0 (16). SD element ID uses PEN 55555. """ +import base64 +import re from datetime import datetime, timezone -from typing import Any +from typing import Any, Optional # ─── Constants ──────────────────────────────────────────────────────────────── @@ -79,6 +81,106 @@ def syslog_line( return f"{pri}1 {ts} {host} {appname} {_NILVALUE} {msgid} {sd}{message}" +def encode_secret(secret: str) -> dict[str, str]: + """Standardized credential-secret encoding for the universal SD-block shape. + + Returns ``{'secret_printable': ..., 'secret_b64': ...}`` ready to spread + into a :func:`syslog_line` / ``_log`` call:: + + _log("auth_attempt", principal=user, **encode_secret(password)) + + ``secret_printable`` mirrors auth-helper.c's sd_escape: bytes outside + ``[0x20, 0x7f)`` collapse to ``'?'`` so the field is always parser-safe + RFC 5424 ASCII. ``secret_b64`` preserves the *original* utf-8 bytes — + NUL/0xff/control/non-utf8 sequences all survive losslessly, useful as + a fingerprinting signal even when the printable form sanitizes them. + + The decnet web ingester's native-shape branch keys off ``secret_b64`` + being present, so any service emitter calling this helper lands its + cred attempt directly in the :class:`Credential` table. + """ + raw = secret.encode("utf-8", errors="replace") + printable = "".join(chr(b) if 0x20 <= b < 0x7f else "?" for b in raw) + return { + "secret_printable": printable, + "secret_b64": base64.b64encode(raw).decode("ascii"), + } + + +_DIGEST_PARAM_RE = re.compile(r'(\w+)\s*=\s*"([^"]*)"|(\w+)\s*=\s*([^,\s]+)') + + +def classify_authorization(header_value: Optional[str]) -> Optional[dict[str, Any]]: + """Parse an HTTP Authorization header value into Credential SD fields. + + Returns a dict with the universal cred shape ready to spread into a + ``_log(...)`` call:: + + auth = request.headers.get("Authorization") + cred = classify_authorization(auth) + if cred: + _log("auth_attempt", **cred) + + Recognised schemes: + * Basic — base64(user:pw); decoded → ``principal=user`` + + ``secret_kind="plaintext"`` + ``encode_secret(pw)``. + * Bearer / Token — opaque token; ``principal=None`` + + ``secret_kind="http_bearer"`` + ``encode_secret(token)``. + * Digest — ``principal=username`` from header + + ``secret_kind="http_digest_md5"`` + ``encode_secret(response)``. + + Returns ``None`` for anything unrecognized (AWS4-HMAC-SHA256, NTLM, + Negotiate, …) — callers can still log the raw header value in the + ambient SD-block; we just don't know how to extract a hashable + secret from it. + """ + if not header_value or not isinstance(header_value, str): + return None + parts = header_value.strip().split(None, 1) + if len(parts) < 2: + return None + scheme, rest = parts[0].lower(), parts[1].strip() + + if scheme == "basic": + try: + decoded = base64.b64decode(rest, validate=True).decode("utf-8", errors="replace") + except (ValueError, base64.binascii.Error): + return None + if ":" not in decoded: + return None + user, _, pw = decoded.partition(":") + return { + "principal": user, + "secret_kind": "plaintext", + **encode_secret(pw), + } + + if scheme in ("bearer", "token"): + return { + "principal": None, + "secret_kind": "http_bearer", + **encode_secret(rest), + } + + if scheme == "digest": + params: dict[str, str] = {} + for m in _DIGEST_PARAM_RE.finditer(rest): + k = m.group(1) or m.group(3) + v = m.group(2) if m.group(2) is not None else m.group(4) + if k: + params[k.lower()] = v + response = params.get("response") + if not response: + return None + return { + "principal": params.get("username"), + "secret_kind": "http_digest_md5", + **encode_secret(response), + } + + return None + + def write_syslog_file(line: str) -> None: """Emit a syslog line to stdout for container log capture.""" print(line, flush=True) diff --git a/decnet/templates/vnc/syslog_bridge.py b/decnet/templates/vnc/syslog_bridge.py index c0a78d09..ca6d7284 100644 --- a/decnet/templates/vnc/syslog_bridge.py +++ b/decnet/templates/vnc/syslog_bridge.py @@ -12,8 +12,10 @@ RFC 5424 structure: Facility: local0 (16). SD element ID uses PEN 55555. """ +import base64 +import re from datetime import datetime, timezone -from typing import Any +from typing import Any, Optional # ─── Constants ──────────────────────────────────────────────────────────────── @@ -79,6 +81,106 @@ def syslog_line( return f"{pri}1 {ts} {host} {appname} {_NILVALUE} {msgid} {sd}{message}" +def encode_secret(secret: str) -> dict[str, str]: + """Standardized credential-secret encoding for the universal SD-block shape. + + Returns ``{'secret_printable': ..., 'secret_b64': ...}`` ready to spread + into a :func:`syslog_line` / ``_log`` call:: + + _log("auth_attempt", principal=user, **encode_secret(password)) + + ``secret_printable`` mirrors auth-helper.c's sd_escape: bytes outside + ``[0x20, 0x7f)`` collapse to ``'?'`` so the field is always parser-safe + RFC 5424 ASCII. ``secret_b64`` preserves the *original* utf-8 bytes — + NUL/0xff/control/non-utf8 sequences all survive losslessly, useful as + a fingerprinting signal even when the printable form sanitizes them. + + The decnet web ingester's native-shape branch keys off ``secret_b64`` + being present, so any service emitter calling this helper lands its + cred attempt directly in the :class:`Credential` table. + """ + raw = secret.encode("utf-8", errors="replace") + printable = "".join(chr(b) if 0x20 <= b < 0x7f else "?" for b in raw) + return { + "secret_printable": printable, + "secret_b64": base64.b64encode(raw).decode("ascii"), + } + + +_DIGEST_PARAM_RE = re.compile(r'(\w+)\s*=\s*"([^"]*)"|(\w+)\s*=\s*([^,\s]+)') + + +def classify_authorization(header_value: Optional[str]) -> Optional[dict[str, Any]]: + """Parse an HTTP Authorization header value into Credential SD fields. + + Returns a dict with the universal cred shape ready to spread into a + ``_log(...)`` call:: + + auth = request.headers.get("Authorization") + cred = classify_authorization(auth) + if cred: + _log("auth_attempt", **cred) + + Recognised schemes: + * Basic — base64(user:pw); decoded → ``principal=user`` + + ``secret_kind="plaintext"`` + ``encode_secret(pw)``. + * Bearer / Token — opaque token; ``principal=None`` + + ``secret_kind="http_bearer"`` + ``encode_secret(token)``. + * Digest — ``principal=username`` from header + + ``secret_kind="http_digest_md5"`` + ``encode_secret(response)``. + + Returns ``None`` for anything unrecognized (AWS4-HMAC-SHA256, NTLM, + Negotiate, …) — callers can still log the raw header value in the + ambient SD-block; we just don't know how to extract a hashable + secret from it. + """ + if not header_value or not isinstance(header_value, str): + return None + parts = header_value.strip().split(None, 1) + if len(parts) < 2: + return None + scheme, rest = parts[0].lower(), parts[1].strip() + + if scheme == "basic": + try: + decoded = base64.b64decode(rest, validate=True).decode("utf-8", errors="replace") + except (ValueError, base64.binascii.Error): + return None + if ":" not in decoded: + return None + user, _, pw = decoded.partition(":") + return { + "principal": user, + "secret_kind": "plaintext", + **encode_secret(pw), + } + + if scheme in ("bearer", "token"): + return { + "principal": None, + "secret_kind": "http_bearer", + **encode_secret(rest), + } + + if scheme == "digest": + params: dict[str, str] = {} + for m in _DIGEST_PARAM_RE.finditer(rest): + k = m.group(1) or m.group(3) + v = m.group(2) if m.group(2) is not None else m.group(4) + if k: + params[k.lower()] = v + response = params.get("response") + if not response: + return None + return { + "principal": params.get("username"), + "secret_kind": "http_digest_md5", + **encode_secret(response), + } + + return None + + def write_syslog_file(line: str) -> None: """Emit a syslog line to stdout for container log capture.""" print(line, flush=True) diff --git a/decnet/web/ingester.py b/decnet/web/ingester.py index 76766a89..32c09d14 100644 --- a/decnet/web/ingester.py +++ b/decnet/web/ingester.py @@ -316,6 +316,11 @@ async def _extract_bounty(repo: BaseRepository, log_data: dict[str, Any]) -> Non "category": _ua_category, "tool": _ua_tool, "signals": _ua_signals or None, + # Request context — which endpoint did the tool hit? + # Useful in the dashboard so analysts see "Nikto hit + # /admin" rather than just "Nikto seen on this decky". + "method": _fields.get("method"), + "path": _fields.get("path"), } }) diff --git a/tests/core/test_fingerprinting.py b/tests/core/test_fingerprinting.py index 4b90ad24..a2b40e23 100644 --- a/tests/core/test_fingerprinting.py +++ b/tests/core/test_fingerprinting.py @@ -15,6 +15,23 @@ def _make_repo(): # HTTP User-Agent # --------------------------------------------------------------------------- +def _find_ua_bounty(repo) -> dict: + """Find the http_useragent fingerprint among all add_bounty calls. + + A single HTTP request can produce multiple `bounty_type="fingerprint"` + bounties (UA, http_quirks, ip_leak, …). Tests for one specific kind + must filter rather than assert call count, so adding new fingerprint + families later doesn't retroactively break old tests.""" + for c in repo.add_bounty.await_args_list: + payload = c[0][0].get("payload") or {} + if payload.get("fingerprint_type") == "http_useragent": + return c[0][0] + raise AssertionError( + "no http_useragent bounty found; calls=%r" + % [c[0][0].get("payload") for c in repo.add_bounty.await_args_list] + ) + + @pytest.mark.asyncio async def test_http_useragent_extracted(): repo = _make_repo() @@ -30,13 +47,12 @@ async def test_http_useragent_extracted(): }, } await _extract_bounty(repo, log_data) - repo.add_bounty.assert_awaited_once() - call_kwargs = repo.add_bounty.call_args[0][0] - assert call_kwargs["bounty_type"] == "fingerprint" - assert call_kwargs["payload"]["fingerprint_type"] == "http_useragent" - assert call_kwargs["payload"]["value"] == "Nikto/2.1.6" - assert call_kwargs["payload"]["path"] == "/admin" - assert call_kwargs["payload"]["method"] == "GET" + bounty = _find_ua_bounty(repo) + assert bounty["bounty_type"] == "fingerprint" + assert bounty["payload"]["fingerprint_type"] == "http_useragent" + assert bounty["payload"]["value"] == "Nikto/2.1.6" + assert bounty["payload"]["path"] == "/admin" + assert bounty["payload"]["method"] == "GET" @pytest.mark.asyncio @@ -52,12 +68,14 @@ async def test_http_useragent_lowercase_key(): }, } await _extract_bounty(repo, log_data) - call_kwargs = repo.add_bounty.call_args[0][0] - assert call_kwargs["payload"]["value"] == "sqlmap/1.7" + bounty = _find_ua_bounty(repo) + assert bounty["payload"]["value"] == "sqlmap/1.7" @pytest.mark.asyncio async def test_http_no_useragent_no_fingerprint_bounty(): + """No User-Agent header → no http_useragent bounty (other fingerprint + families like http_quirks may still fire on the same request).""" repo = _make_repo() log_data = { "decky": "decky-01", @@ -69,7 +87,11 @@ async def test_http_no_useragent_no_fingerprint_bounty(): }, } await _extract_bounty(repo, log_data) - repo.add_bounty.assert_not_awaited() + ua_calls = [ + c for c in repo.add_bounty.await_args_list + if (c[0][0].get("payload") or {}).get("fingerprint_type") == "http_useragent" + ] + assert ua_calls == [] @pytest.mark.asyncio @@ -142,39 +164,59 @@ async def test_vnc_version_event_no_client_version_field(): @pytest.mark.asyncio async def test_credential_still_extracted_alongside_fingerprint(): + """Native-shape credential lands via upsert_credential, not add_bounty. + + The legacy username+password adapter was deleted in DEBT-039; the + universal shape (secret_b64 + principal) goes straight to the + Credential table. Fingerprint bounties continue to ride add_bounty.""" + import base64 repo = _make_repo() + repo.upsert_credential = AsyncMock() log_data = { "decky": "decky-03", "service": "ftp", "attacker_ip": "10.0.0.8", "event_type": "auth_attempt", - "fields": {"username": "admin", "password": "1234"}, + "fields": { + "username": "admin", + "principal": "admin", + "secret_kind": "plaintext", + "secret_printable": "1234", + "secret_b64": base64.b64encode(b"1234").decode(), + }, } await _extract_bounty(repo, log_data) - repo.add_bounty.assert_awaited_once() - call_kwargs = repo.add_bounty.call_args[0][0] - assert call_kwargs["bounty_type"] == "credential" + repo.upsert_credential.assert_awaited_once() + cred = repo.upsert_credential.call_args[0][0] + assert cred["service"] == "ftp" + assert cred["principal"] == "admin" @pytest.mark.asyncio async def test_http_credential_and_fingerprint_both_extracted(): - """An HTTP login attempt can yield both a credential and a UA fingerprint.""" + """An HTTP login attempt yields both a Credential row and a UA + fingerprint bounty — distinct write paths.""" + import base64 repo = _make_repo() + repo.upsert_credential = AsyncMock() log_data = { "decky": "decky-03", "service": "http", "attacker_ip": "10.0.0.9", "event_type": "request", "fields": { - "username": "root", - "password": "toor", + "principal": "root", + "secret_kind": "plaintext", + "secret_printable": "toor", + "secret_b64": base64.b64encode(b"toor").decode(), "headers": {"User-Agent": "curl/7.88.1"}, }, } await _extract_bounty(repo, log_data) - assert repo.add_bounty.await_count == 2 - types = {c[0][0]["bounty_type"] for c in repo.add_bounty.call_args_list} - assert types == {"credential", "fingerprint"} + repo.upsert_credential.assert_awaited_once() + # add_bounty fired for the UA fingerprint; http_quirks may also fire. + bounty_types = {c[0][0]["bounty_type"] for c in repo.add_bounty.call_args_list} + assert "fingerprint" in bounty_types # --------------------------------------------------------------------------- diff --git a/tests/service_testing/conftest.py b/tests/service_testing/conftest.py index 9234d4b1..6605227a 100644 --- a/tests/service_testing/conftest.py +++ b/tests/service_testing/conftest.py @@ -25,6 +25,14 @@ def make_fake_syslog_bridge() -> ModuleType: mod.forward_syslog = MagicMock() mod.SEVERITY_WARNING = 4 mod.SEVERITY_INFO = 6 + # encode_secret returns the universal cred SD shape; tests don't + # care about the exact bytes, just that the key set is correct. + mod.encode_secret = MagicMock( + return_value={"secret_printable": "", "secret_b64": ""} + ) + # classify_authorization returns None for unknown / absent auth so + # services that call **(cred or {}) get a no-op spread. + mod.classify_authorization = MagicMock(return_value=None) return mod diff --git a/tests/service_testing/test_imap.py b/tests/service_testing/test_imap.py index c9362f8d..f555115e 100644 --- a/tests/service_testing/test_imap.py +++ b/tests/service_testing/test_imap.py @@ -24,6 +24,8 @@ def _make_fake_syslog_bridge() -> ModuleType: mod.forward_syslog = MagicMock() mod.SEVERITY_WARNING = 4 mod.SEVERITY_INFO = 6 + mod.encode_secret = MagicMock(return_value={"secret_printable": "", "secret_b64": ""}) + mod.classify_authorization = MagicMock(return_value=None) return mod diff --git a/tests/service_testing/test_mqtt.py b/tests/service_testing/test_mqtt.py index cbce2722..7b18b969 100644 --- a/tests/service_testing/test_mqtt.py +++ b/tests/service_testing/test_mqtt.py @@ -23,6 +23,8 @@ def _make_fake_syslog_bridge() -> ModuleType: mod.forward_syslog = MagicMock() mod.SEVERITY_WARNING = 4 mod.SEVERITY_INFO = 6 + mod.encode_secret = MagicMock(return_value={"secret_printable": "", "secret_b64": ""}) + mod.classify_authorization = MagicMock(return_value=None) return mod diff --git a/tests/service_testing/test_pop3.py b/tests/service_testing/test_pop3.py index 93b04f57..9b85dd0d 100644 --- a/tests/service_testing/test_pop3.py +++ b/tests/service_testing/test_pop3.py @@ -24,6 +24,8 @@ def _make_fake_syslog_bridge() -> ModuleType: mod.forward_syslog = MagicMock() mod.SEVERITY_WARNING = 4 mod.SEVERITY_INFO = 6 + mod.encode_secret = MagicMock(return_value={"secret_printable": "", "secret_b64": ""}) + mod.classify_authorization = MagicMock(return_value=None) return mod diff --git a/tests/service_testing/test_smtp.py b/tests/service_testing/test_smtp.py index 6565e841..240d45a4 100644 --- a/tests/service_testing/test_smtp.py +++ b/tests/service_testing/test_smtp.py @@ -27,6 +27,8 @@ def _make_fake_syslog_bridge() -> ModuleType: mod.forward_syslog = MagicMock() mod.SEVERITY_WARNING = 4 mod.SEVERITY_INFO = 6 + mod.encode_secret = MagicMock(return_value={"secret_printable": "", "secret_b64": ""}) + mod.classify_authorization = MagicMock(return_value=None) return mod diff --git a/tests/service_testing/test_snmp.py b/tests/service_testing/test_snmp.py index 1cc190aa..73264df0 100644 --- a/tests/service_testing/test_snmp.py +++ b/tests/service_testing/test_snmp.py @@ -25,6 +25,8 @@ def _make_fake_syslog_bridge() -> ModuleType: mod.forward_syslog = MagicMock() mod.SEVERITY_WARNING = 4 mod.SEVERITY_INFO = 6 + mod.encode_secret = MagicMock(return_value={"secret_printable": "", "secret_b64": ""}) + mod.classify_authorization = MagicMock(return_value=None) return mod diff --git a/tests/services/test_cred_emitters.py b/tests/services/test_cred_emitters.py index 4edc346b..e81939a1 100644 --- a/tests/services/test_cred_emitters.py +++ b/tests/services/test_cred_emitters.py @@ -222,6 +222,109 @@ async def test_vnc_hash_credential(): assert cred["secret_sha256"] == hashlib.sha256(raw).hexdigest() +@pytest.mark.asyncio +async def test_snmp_community_native_shape(): + """SNMP v1/v2c community string lands as secret_kind=snmp_community, + principal=None (no per-user identity in v1/v2c).""" + from decnet.web.ingester import _extract_bounty + repo = MagicMock(); repo.upsert_credential = AsyncMock() + raw = b"public" + log_data = { + "decky": "decky-01", + "service": "snmp", + "attacker_ip": "10.0.0.5", + "fields": { + "version": 1, + "community": "public", + "secret_kind": "snmp_community", + "secret_printable": "public", + "secret_b64": base64.b64encode(raw).decode("ascii"), + }, + } + await _extract_bounty(repo, log_data) + cred = repo.upsert_credential.call_args[0][0] + assert cred["service"] == "snmp" + assert cred["secret_kind"] == "snmp_community" + assert cred["principal"] is None + assert cred["secret_sha256"] == hashlib.sha256(raw).hexdigest() + + +@pytest.mark.asyncio +async def test_http_basic_native_shape(): + """HTTP Basic via classify_authorization → principal+plaintext.""" + from decnet.web.ingester import _extract_bounty + repo = MagicMock(); repo.upsert_credential = AsyncMock() + log_data = { + "decky": "decky-01", + "service": "http", + "attacker_ip": "10.0.0.5", + "fields": { + "method": "GET", + "path": "/admin", + "principal": "admin", + "secret_kind": "plaintext", + "secret_printable": "hunter2", + "secret_b64": base64.b64encode(b"hunter2").decode("ascii"), + }, + } + await _extract_bounty(repo, log_data) + cred = repo.upsert_credential.call_args[0][0] + assert cred["service"] == "http" + assert cred["principal"] == "admin" + assert cred["secret_kind"] == "plaintext" + + +@pytest.mark.asyncio +async def test_http_bearer_native_shape(): + """HTTP Bearer — principal=None, secret_kind=http_bearer, opaque.""" + from decnet.web.ingester import _extract_bounty + repo = MagicMock(); repo.upsert_credential = AsyncMock() + token = b"eyJhbGciOiJIUzI1NiJ9.foo.bar" + log_data = { + "decky": "decky-01", + "service": "k8s", + "attacker_ip": "10.0.0.5", + "fields": { + "method": "GET", + "path": "/api/v1/secrets", + "principal": None, + "secret_kind": "http_bearer", + "secret_printable": token.decode(), + "secret_b64": base64.b64encode(token).decode("ascii"), + }, + } + await _extract_bounty(repo, log_data) + cred = repo.upsert_credential.call_args[0][0] + assert cred["secret_kind"] == "http_bearer" + assert cred["principal"] is None + assert cred["secret_sha256"] == hashlib.sha256(token).hexdigest() + + +@pytest.mark.asyncio +async def test_sip_digest_native_shape(): + """SIP Digest via classify_authorization → response hash captured.""" + from decnet.web.ingester import _extract_bounty + repo = MagicMock(); repo.upsert_credential = AsyncMock() + response_hash = "d41d8cd98f00b204e9800998ecf8427e" + log_data = { + "decky": "decky-01", + "service": "sip", + "attacker_ip": "10.0.0.5", + "fields": { + "method": "REGISTER", + "principal": "alice", + "secret_kind": "http_digest_md5", + "secret_printable": response_hash, + "secret_b64": base64.b64encode(response_hash.encode()).decode("ascii"), + }, + } + await _extract_bounty(repo, log_data) + cred = repo.upsert_credential.call_args[0][0] + assert cred["service"] == "sip" + assert cred["secret_kind"] == "http_digest_md5" + assert cred["principal"] == "alice" + + @pytest.mark.asyncio async def test_lossless_b64_survives_nonprintable_password(): """Even when secret_printable is sanitized, secret_b64 still decodes diff --git a/tests/services/test_syslog_bridge_helpers.py b/tests/services/test_syslog_bridge_helpers.py index e8b6abf1..5d1b2a7d 100644 --- a/tests/services/test_syslog_bridge_helpers.py +++ b/tests/services/test_syslog_bridge_helpers.py @@ -65,6 +65,53 @@ def test_encode_secret_preserves_rfc5424_specials(syslog_bridge): assert base64.b64decode(out["secret_b64"]) == secret.encode("utf-8") +def test_classify_authorization_basic(syslog_bridge): + """HTTP Basic — base64(user:pw) decodes to plaintext credential.""" + cred = syslog_bridge.classify_authorization("Basic YWRtaW46aHVudGVyMg==") + assert cred is not None + assert cred["principal"] == "admin" + assert cred["secret_kind"] == "plaintext" + assert base64.b64decode(cred["secret_b64"]) == b"hunter2" + assert cred["secret_printable"] == "hunter2" + + +def test_classify_authorization_bearer(syslog_bridge): + cred = syslog_bridge.classify_authorization("Bearer eyJhbGciOiJIUzI1NiJ9.foo.bar") + assert cred["principal"] is None + assert cred["secret_kind"] == "http_bearer" + assert base64.b64decode(cred["secret_b64"]) == b"eyJhbGciOiJIUzI1NiJ9.foo.bar" + + +def test_classify_authorization_token_alias(syslog_bridge): + """`Token ` = same shape as Bearer (Kubernetes service accounts).""" + cred = syslog_bridge.classify_authorization("Token sa-jwt-token-abc") + assert cred["secret_kind"] == "http_bearer" + + +def test_classify_authorization_digest(syslog_bridge): + """RFC 7616 Digest — extract username + response hash.""" + header = ('Digest username="alice", realm="example.com", ' + 'nonce="abc123", uri="/", response="d41d8cd98f00b204e9800998ecf8427e"') + cred = syslog_bridge.classify_authorization(header) + assert cred["principal"] == "alice" + assert cred["secret_kind"] == "http_digest_md5" + assert cred["secret_printable"] == "d41d8cd98f00b204e9800998ecf8427e" + + +def test_classify_authorization_unknown_scheme(syslog_bridge): + """NTLM, AWS4-HMAC-…, Negotiate — all return None for now.""" + assert syslog_bridge.classify_authorization("NTLM TlRMTVNTUAA=") is None + assert syslog_bridge.classify_authorization("AWS4-HMAC-SHA256 Credential=…") is None + + +def test_classify_authorization_malformed(syslog_bridge): + assert syslog_bridge.classify_authorization(None) is None + assert syslog_bridge.classify_authorization("") is None + assert syslog_bridge.classify_authorization("Basic !!not-base64!!") is None + assert syslog_bridge.classify_authorization("Basic dXNlcg==") is None # no colon + assert syslog_bridge.classify_authorization("Digest no-response-here") is None + + def test_encode_secret_unicode_replaced(syslog_bridge): """Non-ASCII unicode encodes via utf-8, then printable strips the multi-byte sequence to '?' chars (one per raw byte)."""