feat(creds): Phase 3 — HTTP/HTTPS POST form body cred extraction
Login forms (wp-login.php, phpMyAdmin, Joomla, etc.) ship a
`Content-Type: application/x-www-form-urlencoded` body with field
names like username/user/email/log/pwd/password. The HTTP/HTTPS
templates already captured the body as opaque bytes; now they parse
common login-form shapes into the universal credential SD shape.
Adds canonical templates/syslog_bridge.py:
extract_form_credentials(body, content_type) -> dict | None.
Field-name matching is case-insensitive and covers:
Principal: username, user, email, login, userid, account, log,
user_login (WordPress), uname / pma_username (phpMyAdmin)
Secret: password, pass, pwd, passwd, passwort, mot_de_passe,
user_password (WordPress), pma_password (phpMyAdmin)
The HTTP/HTTPS log_request handlers now call:
cred = classify_authorization(...) or extract_form_credentials(...)
— Authorization wins when present (current session credential beats
a follow-up form change), but POSTs to /wp-login.php with no Auth
header still surface their cleartext creds.
Secret-without-principal is intentional: a reset-confirm or auto-
fill abuse may carry a password without any field that maps to our
principal list. The cred row writes with principal=None — the
sha256 still correlates across services for reuse analytics.
The body capture cap bumped from 512 → 4096 chars so reasonable
form bodies aren't truncated before the cred extractor sees them;
the body stored in fields.body stays at 512 chars (display-friendly).
36 helper + emitter tests pass. Phases 4-7 still pending.
This commit is contained in:
@@ -18,6 +18,7 @@ from werkzeug.serving import make_server, WSGIRequestHandler
|
||||
import instance_seed as _seed
|
||||
from syslog_bridge import (
|
||||
classify_authorization,
|
||||
extract_form_credentials,
|
||||
forward_syslog,
|
||||
syslog_line,
|
||||
write_syslog_file,
|
||||
@@ -99,14 +100,18 @@ def _log(event_type: str, severity: int = 6, **kwargs) -> None:
|
||||
|
||||
@app.before_request
|
||||
def log_request():
|
||||
cred = classify_authorization(request.headers.get("Authorization"))
|
||||
body = request.get_data(as_text=True)[:4096]
|
||||
cred = (
|
||||
classify_authorization(request.headers.get("Authorization"))
|
||||
or extract_form_credentials(body, request.headers.get("Content-Type"))
|
||||
)
|
||||
_log(
|
||||
"request",
|
||||
method=request.method,
|
||||
path=request.path,
|
||||
remote_addr=request.remote_addr,
|
||||
headers=dict(request.headers),
|
||||
body=request.get_data(as_text=True)[:512],
|
||||
body=body[:512],
|
||||
**(cred or {}),
|
||||
)
|
||||
|
||||
|
||||
@@ -181,6 +181,76 @@ def classify_authorization(header_value: Optional[str]) -> Optional[dict[str, An
|
||||
return None
|
||||
|
||||
|
||||
_FORM_PRINCIPAL_KEYS = (
|
||||
"username", "user", "email", "login", "userid", "account",
|
||||
"log", # wp-login.php
|
||||
"user_login", # WordPress alt
|
||||
"uname", # phpMyAdmin
|
||||
"pma_username",
|
||||
)
|
||||
_FORM_SECRET_KEYS = (
|
||||
"password", "pass", "pwd", "passwd", "passwort", "mot_de_passe",
|
||||
"user_password", # WordPress alt
|
||||
"pma_password", # phpMyAdmin
|
||||
)
|
||||
|
||||
|
||||
def extract_form_credentials(
|
||||
body: Optional[str],
|
||||
content_type: Optional[str],
|
||||
) -> Optional[dict[str, Any]]:
|
||||
"""Parse an `application/x-www-form-urlencoded` body for credentials.
|
||||
|
||||
Returns the universal cred SD shape ready to spread into a
|
||||
``_log(...)`` call when both a principal-shaped key and a secret-
|
||||
shaped key are present in the body. Otherwise returns ``None``.
|
||||
|
||||
Field-name detection is case-insensitive and covers the most common
|
||||
login-form variants (WordPress wp-login.php, phpMyAdmin, Joomla,
|
||||
etc.). Add more entries to ``_FORM_PRINCIPAL_KEYS`` /
|
||||
``_FORM_SECRET_KEYS`` as new templates surface them.
|
||||
"""
|
||||
if not body or not isinstance(content_type, str):
|
||||
return None
|
||||
if not content_type.lower().startswith("application/x-www-form-urlencoded"):
|
||||
return None
|
||||
|
||||
fields: dict[str, str] = {}
|
||||
for pair in body.split("&"):
|
||||
if "=" not in pair:
|
||||
continue
|
||||
k, _, v = pair.partition("=")
|
||||
# urllib decode without importing urllib at module scope (the
|
||||
# template emitters are import-cost-sensitive). Inline the
|
||||
# tiny percent-decode + plus-decode.
|
||||
try:
|
||||
from urllib.parse import unquote_plus
|
||||
key = unquote_plus(k).lower()
|
||||
val = unquote_plus(v)
|
||||
except Exception:
|
||||
continue
|
||||
# First-wins so duplicate-key forms don't get clobbered.
|
||||
fields.setdefault(key, val)
|
||||
|
||||
principal: Optional[str] = None
|
||||
for k in _FORM_PRINCIPAL_KEYS:
|
||||
if k in fields:
|
||||
principal = fields[k]
|
||||
break
|
||||
secret: Optional[str] = None
|
||||
for k in _FORM_SECRET_KEYS:
|
||||
if k in fields:
|
||||
secret = fields[k]
|
||||
break
|
||||
if secret is None:
|
||||
return None
|
||||
return {
|
||||
"principal": principal,
|
||||
"secret_kind": "plaintext",
|
||||
**encode_secret(secret),
|
||||
}
|
||||
|
||||
|
||||
def write_syslog_file(line: str) -> None:
|
||||
"""Emit a syslog line to stdout for container log capture."""
|
||||
print(line, flush=True)
|
||||
|
||||
Reference in New Issue
Block a user