feat(creds): Phase 3 — HTTP/HTTPS POST form body cred extraction

Login forms (wp-login.php, phpMyAdmin, Joomla, etc.) ship a `Content-Type: application/x-www-form-urlencoded` body with field names like username/user/email/log/pwd/password. The HTTP/HTTPS templates already captured the body as opaque bytes; now they parse common login-form shapes into the universal credential SD shape. Adds canonical templates/syslog_bridge.py: extract_form_credentials(body, content_type) -> dict | None. Field-name matching is case-insensitive and covers: Principal: username, user, email, login, userid, account, log, user_login (WordPress), uname / pma_username (phpMyAdmin) Secret: password, pass, pwd, passwd, passwort, mot_de_passe, user_password (WordPress), pma_password (phpMyAdmin) The HTTP/HTTPS log_request handlers now call: cred = classify_authorization(...) or extract_form_credentials(...) — Authorization wins when present (current session credential beats a follow-up form change), but POSTs to /wp-login.php with no Auth header still surface their cleartext creds. Secret-without-principal is intentional: a reset-confirm or auto- fill abuse may carry a password without any field that maps to our principal list. The cred row writes with principal=None — the sha256 still correlates across services for reuse analytics. The body capture cap bumped from 512 → 4096 chars so reasonable form bodies aren't truncated before the cred extractor sees them; the body stored in fields.body stays at 512 chars (display-friendly). 36 helper + emitter tests pass. Phases 4-7 still pending.
2026-04-25 07:10:05 -04:00
parent 0c1316f74c
commit e4bf8fa012
30 changed files with 1972 additions and 8 deletions
--- a/decnet/templates/https/server.py
+++ b/decnet/templates/https/server.py
@@ -18,6 +18,7 @@ from werkzeug.serving import make_server, WSGIRequestHandler
 import instance_seed as _seed
 from syslog_bridge import (
    classify_authorization,
+    extract_form_credentials,
    forward_syslog,
    syslog_line,
    write_syslog_file,
@@ -99,14 +100,18 @@ def _log(event_type: str, severity: int = 6, **kwargs) -> None:

@app.before_request
 def log_request():
-    cred = classify_authorization(request.headers.get("Authorization"))
+    body = request.get_data(as_text=True)[:4096]
+    cred = (
+        classify_authorization(request.headers.get("Authorization"))
+        or extract_form_credentials(body, request.headers.get("Content-Type"))
+    )
    _log(
        "request",
        method=request.method,
        path=request.path,
        remote_addr=request.remote_addr,
        headers=dict(request.headers),
-        body=request.get_data(as_text=True)[:512],
+        body=body[:512],
        **(cred or {}),
    )

--- a/decnet/templates/https/syslog_bridge.py
+++ b/decnet/templates/https/syslog_bridge.py
@@ -181,6 +181,76 @@ def classify_authorization(header_value: Optional[str]) -> Optional[dict[str, An
    return None


+_FORM_PRINCIPAL_KEYS = (
+    "username", "user", "email", "login", "userid", "account",
+    "log",        # wp-login.php
+    "user_login", # WordPress alt
+    "uname",      # phpMyAdmin
+    "pma_username",
+)
+_FORM_SECRET_KEYS = (
+    "password", "pass", "pwd", "passwd", "passwort", "mot_de_passe",
+    "user_password",   # WordPress alt
+    "pma_password",    # phpMyAdmin
+)
+
+
+def extract_form_credentials(
+    body: Optional[str],
+    content_type: Optional[str],
+) -> Optional[dict[str, Any]]:
+    """Parse an `application/x-www-form-urlencoded` body for credentials.
+
+    Returns the universal cred SD shape ready to spread into a
+    ``_log(...)`` call when both a principal-shaped key and a secret-
+    shaped key are present in the body. Otherwise returns ``None``.
+
+    Field-name detection is case-insensitive and covers the most common
+    login-form variants (WordPress wp-login.php, phpMyAdmin, Joomla,
+    etc.). Add more entries to ``_FORM_PRINCIPAL_KEYS`` /
+    ``_FORM_SECRET_KEYS`` as new templates surface them.
+    """
+    if not body or not isinstance(content_type, str):
+        return None
+    if not content_type.lower().startswith("application/x-www-form-urlencoded"):
+        return None
+
+    fields: dict[str, str] = {}
+    for pair in body.split("&"):
+        if "=" not in pair:
+            continue
+        k, _, v = pair.partition("=")
+        # urllib decode without importing urllib at module scope (the
+        # template emitters are import-cost-sensitive). Inline the
+        # tiny percent-decode + plus-decode.
+        try:
+            from urllib.parse import unquote_plus
+            key = unquote_plus(k).lower()
+            val = unquote_plus(v)
+        except Exception:
+            continue
+        # First-wins so duplicate-key forms don't get clobbered.
+        fields.setdefault(key, val)
+
+    principal: Optional[str] = None
+    for k in _FORM_PRINCIPAL_KEYS:
+        if k in fields:
+            principal = fields[k]
+            break
+    secret: Optional[str] = None
+    for k in _FORM_SECRET_KEYS:
+        if k in fields:
+            secret = fields[k]
+            break
+    if secret is None:
+        return None
+    return {
+        "principal": principal,
+        "secret_kind": "plaintext",
+        **encode_secret(secret),
+    }
+
+
 def write_syslog_file(line: str) -> None:
    """Emit a syslog line to stdout for container log capture."""
    print(line, flush=True)