merge: testing → main (reconcile 2-week divergence)

This commit is contained in:
2026-04-28 18:36:00 -04:00
parent 499836c9e4
commit 862e4dbb31
1235 changed files with 160255 additions and 7996 deletions

View File

@@ -0,0 +1,25 @@
ARG BASE_IMAGE=debian:bookworm-slim
FROM ${BASE_IMAGE}
RUN apt-get update && apt-get install -y --no-install-recommends \
python3 \
&& rm -rf /var/lib/apt/lists/*
COPY syslog_bridge.py /opt/syslog_bridge.py
COPY instance_seed.py /opt/instance_seed.py
COPY ntlmssp.py /opt/ntlmssp.py
COPY server.py /opt/server.py
COPY entrypoint.sh /entrypoint.sh
RUN chmod +x /entrypoint.sh
EXPOSE 445 139
RUN useradd -r -s /bin/false -d /opt logrelay \
&& apt-get update && apt-get install -y --no-install-recommends libcap2-bin \
&& rm -rf /var/lib/apt/lists/* \
&& (find /usr/bin/ -maxdepth 1 -name 'python3*' -type f -exec setcap 'cap_net_bind_service+eip' {} \; 2>/dev/null || true)
HEALTHCHECK --interval=30s --timeout=5s --start-period=10s --retries=3 \
CMD kill -0 1 || exit 1
USER logrelay
ENTRYPOINT ["/entrypoint.sh"]

View File

@@ -0,0 +1,3 @@
#!/bin/bash
set -e
exec python3 /opt/server.py

View File

@@ -0,0 +1,120 @@
#!/usr/bin/env python3
"""
Per-instance stealth seeding for honeypot service templates.
The whole decoy fleet looks identical to a scanner unless each decky
diverges on the boring details: cluster UUIDs, auth salts, uptime, minor
version strings, etc. This module derives a stable per-instance seed
from NODE_NAME (+ optional INSTANCE_ID) and exposes helpers that return
deterministic-per-decky-but-different-across-the-fleet values.
Connection-time jitter is intentionally NOT seeded — two hits to the same
decky should not replay the same latency curve.
"""
from __future__ import annotations
import asyncio
import hashlib
import os
import random
import time
import uuid
from typing import Sequence, TypeVar
T = TypeVar("T")
_HOSTNAME = (
os.environ.get("NODE_NAME")
or os.environ.get("HOSTNAME")
or "decky"
)
_INSTANCE_ID = os.environ.get("INSTANCE_ID", "")
_SEED_MATERIAL = f"{_HOSTNAME}:{_INSTANCE_ID}".encode()
_SEED_INT = int.from_bytes(hashlib.sha256(_SEED_MATERIAL).digest()[:8], "big")
#: Deterministic RNG seeded per decky — use for *persistent* choices
#: (versions, UUIDs, stored credentials). Never use for timing.
rng = random.Random(_SEED_INT)
#: Process boot time — real uptime elapsed since container start.
_PROCESS_START = time.time()
#: Deterministic per-instance fake "has been up for this long at boot"
#: offset, so every decky pretends to have a different history.
_BOOT_OFFSET = rng.randint(3600, 45 * 86400)
def hostname() -> str:
return _HOSTNAME
def uptime_seconds() -> int:
"""Monotonically increasing, unique per instance."""
return int(_BOOT_OFFSET + (time.time() - _PROCESS_START))
def boot_epoch() -> int:
"""Fake wall-clock boot time for this instance (seconds since epoch)."""
return int(time.time() - uptime_seconds())
def instance_uuid(namespace: str = "") -> str:
"""Deterministic UUID4-looking value for this instance+namespace."""
ns = uuid.UUID("00000000-0000-0000-0000-000000000000")
return str(uuid.uuid5(ns, f"{_HOSTNAME}:{namespace}"))
def instance_hex(nbytes: int, namespace: str = "") -> str:
"""Deterministic hex token of given byte length."""
material = f"{_HOSTNAME}:{namespace}".encode()
digest = hashlib.sha256(material).digest()
while len(digest) < nbytes:
digest += hashlib.sha256(digest).digest()
return digest[:nbytes].hex()
def pick(choices: Sequence[T]) -> T:
"""Deterministic choice from a sequence."""
return rng.choice(list(choices))
def pick_weighted(choices: Sequence[tuple[T, float]]) -> T:
"""Deterministic weighted choice. Input: [(item, weight), ...]."""
total = sum(w for _, w in choices)
r = rng.uniform(0, total)
acc = 0.0
for item, w in choices:
acc += w
if r <= acc:
return item
return choices[-1][0]
def random_bytes(n: int, namespace: str = "") -> bytes:
"""Deterministic per-instance byte string of length n."""
out = bytearray()
i = 0
while len(out) < n:
out.extend(
hashlib.sha256(f"{_HOSTNAME}:{namespace}:{i}".encode()).digest()
)
i += 1
return bytes(out[:n])
def fresh_bytes(n: int) -> bytes:
"""Non-deterministic random bytes — for per-connection nonces/salts."""
return os.urandom(n)
async def jitter(min_ms: int = 5, max_ms: int = 120) -> None:
"""Async response-time jitter. Uses unseeded RNG so timing varies
across connections to the same decky — seeded jitter would leak
predictability."""
await asyncio.sleep(random.uniform(min_ms, max_ms) / 1000.0)
def jitter_sync(min_ms: int = 5, max_ms: int = 120) -> None:
"""Blocking jitter for non-asyncio servers."""
time.sleep(random.uniform(min_ms, max_ms) / 1000.0)

View File

@@ -0,0 +1,132 @@
"""NTLMSSP Type 3 (Authenticate) message parser.
Standalone module shared between any honeypot template that wants to
land NTLM credentials in the universal :class:`Credential` table.
Currently consumed by the SMB and RDP-NLA templates.
The parser is intentionally narrow: only :func:`parse_type3` is public,
and it reads a single Type 3 buffer (the bytes starting with the
``NTLMSSP\\0`` signature). Callers handle SPNEGO unwrapping, SMB
SessionSetup framing, RDP/CredSSP TSRequest parsing, etc.
Reference: MS-NLMP §2.2.1.3 (AUTHENTICATE_MESSAGE).
Cred-shape mapping for the universal Credential model:
- ``principal`` = ``"DOMAIN\\username"`` when domain present, else
bare username. Both decoded UTF-16-LE when NEGOTIATE_UNICODE is set
in the message flags (it always is in modern clients).
- ``secret_kind`` = ``"ntlmssp_v2"`` when the NtChallengeResponse is
≥ 24 bytes (NTLMv2 carries variable-length blob ≥ 16+8 bytes),
``"ntlmssp_v1"`` for the legacy 24-byte fixed response.
- ``secret_b64`` = base64 of the entire NtChallengeResponse bytes.
This is the canonical "hashcat -m 5600" (NTLMv2) or "-m 5500"
(NTLMv1) input.
"""
from __future__ import annotations
import base64
import struct
from typing import Optional
NTLMSSP_SIG = b"NTLMSSP\x00"
NEGOTIATE_UNICODE = 0x00000001
def find_ntlmssp(buf: bytes) -> int:
"""Return the offset of the NTLMSSP signature in ``buf`` or -1.
Useful for callers that have a SPNEGO-wrapped or SMB-embedded blob
and want to skip straight to the inner Type 1/2/3 message without
walking the outer ASN.1.
"""
return buf.find(NTLMSSP_SIG)
def _read_field(buf: bytes, off: int) -> tuple[int, int, int]:
"""Read an NTLMSSP field record: (Len, MaxLen, BufferOffset)."""
if off + 8 > len(buf):
return 0, 0, 0
f_len, f_max, f_off = struct.unpack_from("<HHI", buf, off)
return f_len, f_max, f_off
def _slice(buf: bytes, off: int, length: int) -> bytes:
end = off + length
if off < 0 or end > len(buf) or length < 0:
return b""
return buf[off:end]
def _decode_str(raw: bytes, unicode: bool) -> str:
if unicode:
return raw.decode("utf-16-le", errors="replace")
return raw.decode("ascii", errors="replace")
def parse_type3(blob: bytes) -> Optional[dict]:
"""Parse an NTLMSSP Type 3 (AUTHENTICATE_MESSAGE) buffer.
Returns a dict with the universal credential SD shape ready to
spread into a ``_log(...)`` call::
{
"username": "alice", # service-specific identity
"domain": "ACME", # domain (may be empty)
"principal": "ACME\\\\alice", # hoisted column
"secret_kind": "ntlmssp_v2", # or _v1
"secret_printable": "<hex>", # NT response in hex
"secret_b64": "<base64>", # NT response, lossless
}
Returns ``None`` when ``blob`` is malformed or not a Type 3.
"""
if len(blob) < 32 or not blob.startswith(NTLMSSP_SIG):
return None
msg_type = struct.unpack_from("<I", blob, 8)[0]
if msg_type != 3:
return None
# Field record layout (all from MS-NLMP §2.2.1.3):
# 12 LmChallengeResponseFields
# 20 NtChallengeResponseFields
# 28 DomainNameFields
# 36 UserNameFields
# 44 WorkstationFields
# 52 EncryptedRandomSessionKeyFields
# 60 NegotiateFlags
nt_len, _, nt_off = _read_field(blob, 20)
dom_len, _, dom_off = _read_field(blob, 28)
user_len, _, user_off = _read_field(blob, 36)
if len(blob) < 64:
return None
flags = struct.unpack_from("<I", blob, 60)[0]
unicode = bool(flags & NEGOTIATE_UNICODE)
nt_response = _slice(blob, nt_off, nt_len)
domain = _decode_str(_slice(blob, dom_off, dom_len), unicode)
username = _decode_str(_slice(blob, user_off, user_len), unicode)
if not nt_response:
# No NT response → anonymous bind or malformed; nothing to
# treat as a credential.
return None
# NTLMv2 NTChallengeResponseV2 has a 16-byte HMAC followed by a
# variable-length blob (≥ 28 bytes total in practice). NTLMv1 is
# exactly 24 bytes. Use length to discriminate; close enough for
# cred-classification purposes (the bytes go on hashcat regardless).
secret_kind = "ntlmssp_v1" if len(nt_response) == 24 else "ntlmssp_v2"
if domain:
principal = f"{domain}\\{username}"
else:
principal = username or None
return {
"username": username,
"domain": domain,
"principal": principal,
"secret_kind": secret_kind,
"secret_printable": nt_response.hex(),
"secret_b64": base64.b64encode(nt_response).decode("ascii"),
}

View File

@@ -0,0 +1,296 @@
#!/usr/bin/env python3
"""Minimal honeypot SMB2 server.
Hand-rolled asyncio framer that does just enough of MS-SMB2 to lure a
client through Negotiate → Session Setup (Type1) → Session Setup
(Type3), at which point we extract the inner NTLMSSP Type 3 with the
shared :func:`ntlmssp.parse_type3` parser and emit a credential SD
block. Authentication always fails with STATUS_LOGON_FAILURE — the
attacker's hash lands in the Credential table; the attacker does not
land on the host.
References:
- MS-SMB2 §2.2.3 NEGOTIATE Request, §2.2.4 NEGOTIATE Response
- MS-SMB2 §2.2.5 SESSION_SETUP Request, §2.2.6 SESSION_SETUP Response
- MS-NLMP §2.2.1 NTLMSSP messages (CHALLENGE_MESSAGE Type 2)
- RFC 1002 §4.3 NetBIOS Session Service framing
"""
from __future__ import annotations
import asyncio
import os
import struct
import instance_seed
from ntlmssp import find_ntlmssp, parse_type3
from syslog_bridge import syslog_line, write_syslog_file, forward_syslog
NODE_NAME = os.environ.get("NODE_NAME", "WORKSTATION")
SERVICE_NAME = "smb"
LOG_TARGET = os.environ.get("LOG_TARGET", "")
LISTEN_HOST = "0.0.0.0" # nosec B104 — honeypot binds all interfaces by design
LISTEN_PORT = 445
# SMB2 status codes
STATUS_SUCCESS = 0x00000000
STATUS_MORE_PROCESSING_REQUIRED = 0xC0000016
STATUS_LOGON_FAILURE = 0xC000006D
# SMB2 commands
SMB2_NEGOTIATE = 0x0000
SMB2_SESSION_SETUP = 0x0001
SMB2_MAGIC = b"\xfeSMB"
NBSS_SESSION_MESSAGE = 0x00
# Per-instance NTLM challenge: deterministic-per-decky-but-different-
# across-the-fleet. Derived from NODE_NAME so two captures from the
# same decky reuse the same challenge (lets offline attackers retry
# wordlists), while every decky in the fleet differs (looks like a
# real population of hosts to a scanner).
SERVER_CHALLENGE = instance_seed.random_bytes(8, "ntlm_challenge")
SERVER_GUID = instance_seed.random_bytes(16, "smb_server_guid")
# Read caps; an attacker shouldn't be able to make us allocate
# unbounded memory just by lying about NetBIOS frame length.
MAX_NBSS_LEN = 1 * 1024 * 1024 # 1 MiB is plenty for SessionSetup blobs
def _log(event_type: str, severity: int = 6, **kwargs) -> None:
line = syslog_line(SERVICE_NAME, NODE_NAME, event_type, severity, **kwargs)
write_syslog_file(line)
forward_syslog(line, LOG_TARGET)
# ── SPNEGO / NTLMSSP Type 2 builder ──────────────────────────────────────────
def _build_ntlmssp_type2(challenge: bytes) -> bytes:
"""Build a minimal NTLMSSP CHALLENGE_MESSAGE (MS-NLMP §2.2.1.2).
Layout (all little-endian):
0 "NTLMSSP\\0" 8 bytes
8 MessageType=2 u32
12 TargetNameFields 8 bytes (Len, MaxLen, Offset)
20 NegotiateFlags u32
24 ServerChallenge 8 bytes
32 Reserved 8 bytes
40 TargetInfoFields 8 bytes
48 Version 8 bytes
56 Payload TargetName + TargetInfo
We advertise NEGOTIATE_UNICODE | NEGOTIATE_NTLM | NEGOTIATE_TARGET_INFO
(0x00828201) which is what real Windows servers send in practice; the
attacker's client uses these flags to decide whether to send Unicode
field strings in its Type 3 — the parser handles either.
"""
target = "WORKGROUP".encode("utf-16-le")
# AV pair list: NetBIOS computer name + EOL terminator
av_name = "WORKGROUP".encode("utf-16-le")
target_info = struct.pack("<HH", 1, len(av_name)) + av_name + struct.pack("<HH", 0, 0)
flags = 0x00828201 # UNICODE | NTLM | TARGET_INFO | always_sign
payload = target + target_info
target_off = 56
info_off = target_off + len(target)
return (
b"NTLMSSP\x00"
+ struct.pack("<I", 2) # Type 2
+ struct.pack("<HHI", len(target), len(target), target_off)
+ struct.pack("<I", flags)
+ challenge
+ b"\x00" * 8 # reserved
+ struct.pack("<HHI", len(target_info), len(target_info), info_off)
+ b"\x00" * 8 # version
+ payload
)
def _wrap_spnego_type2(ntlm_type2: bytes) -> bytes:
"""SPNEGO NegTokenResp DER carrying the NTLMSSP Type 2 blob.
Real Windows wraps Type 2 in an SPNEGO NegTokenResp (RFC 4178). A
well-formed wrapping is rarely required by attacker tools (Hydra,
Metasploit's smb_login, Impacket scanners all accept a raw Type 2
too) — but we ship the SPNEGO envelope so that finicky clients
don't bail out before sending Type 3, which is what we actually
want on the wire. The DER below hand-encodes a single
``NegTokenResp`` with negState=accept-incomplete, supportedMech =
NTLMSSP OID, and responseToken = ntlm_type2.
"""
# NTLMSSP OID = 1.3.6.1.4.1.311.2.2.10 → DER bytes
ntlmssp_oid = bytes.fromhex("06 0a 2b 06 01 04 01 82 37 02 02 0a".replace(" ", ""))
# negState [0] enum 1 (accept-incomplete)
neg_state = bytes.fromhex("a0 03 0a 01 01".replace(" ", ""))
# supportedMech [1] OID
supported = b"\xa1" + _der_len(len(ntlmssp_oid)) + ntlmssp_oid
# responseToken [2] OCTET STRING
rt_inner = b"\x04" + _der_len(len(ntlm_type2)) + ntlm_type2
response_token = b"\xa2" + _der_len(len(rt_inner)) + rt_inner
inner = neg_state + supported + response_token
neg_token_resp = b"\x30" + _der_len(len(inner)) + inner # SEQUENCE
# NegTokenResp is itself tagged [1] in the outer choice
return b"\xa1" + _der_len(len(neg_token_resp)) + neg_token_resp
def _der_len(n: int) -> bytes:
if n < 0x80:
return bytes([n])
body = n.to_bytes((n.bit_length() + 7) // 8, "big")
return bytes([0x80 | len(body)]) + body
# ── SMB2 PDU helpers ─────────────────────────────────────────────────────────
def _smb2_header(command: int, status: int, message_id: int, session_id: int = 0) -> bytes:
"""SMB2 sync header (64 bytes), MS-SMB2 §2.2.1."""
return (
SMB2_MAGIC # ProtocolId
+ struct.pack("<H", 64) # StructureSize
+ struct.pack("<H", 0) # CreditCharge
+ struct.pack("<I", status) # Status
+ struct.pack("<H", command) # Command
+ struct.pack("<H", 1) # CreditResponse
+ struct.pack("<I", 0x00000001) # Flags = SERVER_TO_REDIR
+ struct.pack("<I", 0) # NextCommand
+ struct.pack("<Q", message_id) # MessageId
+ struct.pack("<I", 0) # Reserved (sync)
+ struct.pack("<I", 0) # TreeId
+ struct.pack("<Q", session_id) # SessionId
+ b"\x00" * 16 # Signature
)
def _negotiate_response(message_id: int) -> bytes:
"""SMB2 NEGOTIATE response (MS-SMB2 §2.2.4) — dialect 0x0210 (SMB 2.1)."""
body = (
struct.pack("<H", 65) # StructureSize
+ struct.pack("<H", 0) # SecurityMode
+ struct.pack("<H", 0x0210) # DialectRevision = SMB 2.1
+ struct.pack("<H", 0) # Reserved
+ SERVER_GUID
+ struct.pack("<I", 0) # Capabilities
+ struct.pack("<I", 0x00010000) # MaxTransactSize
+ struct.pack("<I", 0x00010000) # MaxReadSize
+ struct.pack("<I", 0x00010000) # MaxWriteSize
+ struct.pack("<Q", 0) # SystemTime
+ struct.pack("<Q", 0) # ServerStartTime
+ struct.pack("<H", 128) # SecurityBufferOffset (header64+body64)
+ struct.pack("<H", 0) # SecurityBufferLength
+ struct.pack("<I", 0) # Reserved2
)
return _smb2_header(SMB2_NEGOTIATE, STATUS_SUCCESS, message_id) + body
def _session_setup_response(message_id: int, session_id: int, sec_blob: bytes, status: int) -> bytes:
"""SMB2 SESSION_SETUP response (MS-SMB2 §2.2.6) carrying SPNEGO blob."""
body = (
struct.pack("<H", 9) # StructureSize
+ struct.pack("<H", 0) # SessionFlags
+ struct.pack("<H", 64 + 8) # SecurityBufferOffset
+ struct.pack("<H", len(sec_blob)) # SecurityBufferLength
)
return _smb2_header(SMB2_SESSION_SETUP, status, message_id, session_id) + body + sec_blob
# ── Connection handler ───────────────────────────────────────────────────────
async def _handle_client(reader: asyncio.StreamReader, writer: asyncio.StreamWriter) -> None:
peer = writer.get_extra_info("peername") or ("?", 0)
src_ip, src_port = peer[0], peer[1]
_log("connection", src_ip=src_ip, src_port=src_port)
session_id = 0x1000_0000_0000_0001
setup_round = 0
try:
while True:
# NetBIOS Session Service framing: 1 type byte + 3 length bytes
hdr = await reader.readexactly(4)
if hdr[0] != NBSS_SESSION_MESSAGE:
# Session Request / Keepalive / etc — quietly drop.
break
nb_len = int.from_bytes(hdr[1:4], "big")
if nb_len < 64 or nb_len > MAX_NBSS_LEN:
break
pdu = await reader.readexactly(nb_len)
if not pdu.startswith(SMB2_MAGIC):
# SMB1 Negotiate or other — not implemented; drop.
break
command = struct.unpack_from("<H", pdu, 12)[0]
message_id = struct.unpack_from("<Q", pdu, 24)[0]
if command == SMB2_NEGOTIATE:
resp = _negotiate_response(message_id)
_send_nbss(writer, resp)
elif command == SMB2_SESSION_SETUP:
setup_round += 1
# Body starts after 64-byte header; parse SecurityBufferOffset/Length
if len(pdu) < 64 + 24:
break
sec_off = struct.unpack_from("<H", pdu, 64 + 12)[0]
sec_len = struct.unpack_from("<H", pdu, 64 + 14)[0]
blob = pdu[sec_off:sec_off + sec_len] if sec_len else b""
if setup_round == 1:
# First Session Setup → respond with NTLMSSP Type 2
type2 = _build_ntlmssp_type2(SERVER_CHALLENGE)
spnego = _wrap_spnego_type2(type2)
resp = _session_setup_response(
message_id, session_id, spnego, STATUS_MORE_PROCESSING_REQUIRED
)
_send_nbss(writer, resp)
else:
# Second Session Setup → contains NTLMSSP Type 3
off = find_ntlmssp(blob)
if off >= 0:
cred = parse_type3(blob[off:])
if cred:
_log(
"auth_attempt",
src_ip=src_ip,
src_port=src_port,
**cred,
)
# Always fail authentication
resp = _session_setup_response(
message_id, session_id, b"", STATUS_LOGON_FAILURE
)
_send_nbss(writer, resp)
break
else:
# We only implement Negotiate + SessionSetup; other commands
# could keep an attacker engaged longer but require state we
# don't carry. Disconnect.
break
except (asyncio.IncompleteReadError, ConnectionError):
pass
except Exception as exc: # noqa: BLE001 — honeypot must never crash the worker
_log("error", severity=4, src_ip=src_ip, msg=str(exc))
finally:
try:
writer.close()
await writer.wait_closed()
except Exception:
pass
_log("disconnect", src_ip=src_ip, src_port=src_port)
def _send_nbss(writer: asyncio.StreamWriter, smb_pdu: bytes) -> None:
nbss = bytes([NBSS_SESSION_MESSAGE]) + len(smb_pdu).to_bytes(3, "big")
writer.write(nbss + smb_pdu)
async def _main() -> None:
_log("startup", msg=f"SMB server starting as {NODE_NAME}")
server = await asyncio.start_server(_handle_client, LISTEN_HOST, LISTEN_PORT)
async with server:
await server.serve_forever()
if __name__ == "__main__":
try:
asyncio.run(_main())
except KeyboardInterrupt:
_log("shutdown")

View File

@@ -0,0 +1,261 @@
#!/usr/bin/env python3
"""
Shared RFC 5424 syslog helper used by service containers.
Services call syslog_line() to format an RFC 5424 message, then
write_syslog_file() to emit it to stdout — the container runtime
captures it, and the host-side collector streams it into the log file.
RFC 5424 structure:
<PRI>1 TIMESTAMP HOSTNAME APP-NAME PROCID MSGID [SD-ELEMENT] MSG
Facility: local0 (16). SD element ID uses PEN 55555.
"""
import base64
import re
from datetime import datetime, timezone
from typing import Any, Optional
# ─── Constants ────────────────────────────────────────────────────────────────
_FACILITY_LOCAL0 = 16
_SD_ID = "relay@55555"
_NILVALUE = "-"
SEVERITY_EMERG = 0
SEVERITY_ALERT = 1
SEVERITY_CRIT = 2
SEVERITY_ERROR = 3
SEVERITY_WARNING = 4
SEVERITY_NOTICE = 5
SEVERITY_INFO = 6
SEVERITY_DEBUG = 7
_MAX_HOSTNAME = 255
_MAX_APPNAME = 48
_MAX_MSGID = 32
# ─── Formatter ────────────────────────────────────────────────────────────────
def _sd_escape(value: str) -> str:
"""Escape SD-PARAM-VALUE per RFC 5424 §6.3.3."""
return value.replace("\\", "\\\\").replace('"', '\\"').replace("]", "\\]")
def _sd_element(fields: dict[str, Any]) -> str:
if not fields:
return _NILVALUE
params = " ".join(f'{k}="{_sd_escape(str(v))}"' for k, v in fields.items())
return f"[{_SD_ID} {params}]"
def syslog_line(
service: str,
hostname: str,
event_type: str,
severity: int = SEVERITY_INFO,
timestamp: datetime | None = None,
msg: str | None = None,
**fields: Any,
) -> str:
"""
Return a single RFC 5424-compliant syslog line (no trailing newline).
Args:
service: APP-NAME (e.g. "http", "mysql")
hostname: HOSTNAME (node name)
event_type: MSGID (e.g. "request", "login_attempt")
severity: Syslog severity integer (default: INFO=6)
timestamp: UTC datetime; defaults to now
msg: Optional free-text MSG
**fields: Encoded as structured data params
"""
pri = f"<{_FACILITY_LOCAL0 * 8 + severity}>"
ts = (timestamp or datetime.now(timezone.utc)).isoformat()
host = (hostname or _NILVALUE)[:_MAX_HOSTNAME]
appname = (service or _NILVALUE)[:_MAX_APPNAME]
msgid = (event_type or _NILVALUE)[:_MAX_MSGID]
sd = _sd_element(fields)
message = f" {msg}" if msg else ""
return f"{pri}1 {ts} {host} {appname} {_NILVALUE} {msgid} {sd}{message}"
def encode_secret(secret: str) -> dict[str, str]:
"""Standardized credential-secret encoding for the universal SD-block shape.
Returns ``{'secret_printable': ..., 'secret_b64': ...}`` ready to spread
into a :func:`syslog_line` / ``_log`` call::
_log("auth_attempt", principal=user, **encode_secret(password))
``secret_printable`` mirrors auth-helper.c's sd_escape: bytes outside
``[0x20, 0x7f)`` collapse to ``'?'`` so the field is always parser-safe
RFC 5424 ASCII. ``secret_b64`` preserves the *original* utf-8 bytes —
NUL/0xff/control/non-utf8 sequences all survive losslessly, useful as
a fingerprinting signal even when the printable form sanitizes them.
The decnet web ingester's native-shape branch keys off ``secret_b64``
being present, so any service emitter calling this helper lands its
cred attempt directly in the :class:`Credential` table.
"""
raw = secret.encode("utf-8", errors="replace")
printable = "".join(chr(b) if 0x20 <= b < 0x7f else "?" for b in raw)
return {
"secret_printable": printable,
"secret_b64": base64.b64encode(raw).decode("ascii"),
}
_DIGEST_PARAM_RE = re.compile(r'(\w+)\s*=\s*"([^"]*)"|(\w+)\s*=\s*([^,\s]+)')
def classify_authorization(header_value: Optional[str]) -> Optional[dict[str, Any]]:
"""Parse an HTTP Authorization header value into Credential SD fields.
Returns a dict with the universal cred shape ready to spread into a
``_log(...)`` call::
auth = request.headers.get("Authorization")
cred = classify_authorization(auth)
if cred:
_log("auth_attempt", **cred)
Recognised schemes:
* Basic — base64(user:pw); decoded → ``principal=user`` +
``secret_kind="plaintext"`` + ``encode_secret(pw)``.
* Bearer / Token — opaque token; ``principal=None`` +
``secret_kind="http_bearer"`` + ``encode_secret(token)``.
* Digest — ``principal=username`` from header +
``secret_kind="http_digest_md5"`` + ``encode_secret(response)``.
Returns ``None`` for anything unrecognized (AWS4-HMAC-SHA256, NTLM,
Negotiate, …) — callers can still log the raw header value in the
ambient SD-block; we just don't know how to extract a hashable
secret from it.
"""
if not header_value or not isinstance(header_value, str):
return None
parts = header_value.strip().split(None, 1)
if len(parts) < 2:
return None
scheme, rest = parts[0].lower(), parts[1].strip()
if scheme == "basic":
try:
decoded = base64.b64decode(rest, validate=True).decode("utf-8", errors="replace")
except (ValueError, base64.binascii.Error):
return None
if ":" not in decoded:
return None
user, _, pw = decoded.partition(":")
return {
"principal": user,
"secret_kind": "plaintext",
**encode_secret(pw),
}
if scheme in ("bearer", "token"):
return {
"principal": None,
"secret_kind": "http_bearer",
**encode_secret(rest),
}
if scheme == "digest":
params: dict[str, str] = {}
for m in _DIGEST_PARAM_RE.finditer(rest):
k = m.group(1) or m.group(3)
v = m.group(2) if m.group(2) is not None else m.group(4)
if k:
params[k.lower()] = v
response = params.get("response")
if not response:
return None
return {
"principal": params.get("username"),
"secret_kind": "http_digest_md5",
**encode_secret(response),
}
return None
_FORM_PRINCIPAL_KEYS = (
"username", "user", "email", "login", "userid", "account",
"log", # wp-login.php
"user_login", # WordPress alt
"uname", # phpMyAdmin
"pma_username",
)
_FORM_SECRET_KEYS = (
"password", "pass", "pwd", "passwd", "passwort", "mot_de_passe",
"user_password", # WordPress alt
"pma_password", # phpMyAdmin
)
def extract_form_credentials(
body: Optional[str],
content_type: Optional[str],
) -> Optional[dict[str, Any]]:
"""Parse an `application/x-www-form-urlencoded` body for credentials.
Returns the universal cred SD shape ready to spread into a
``_log(...)`` call when both a principal-shaped key and a secret-
shaped key are present in the body. Otherwise returns ``None``.
Field-name detection is case-insensitive and covers the most common
login-form variants (WordPress wp-login.php, phpMyAdmin, Joomla,
etc.). Add more entries to ``_FORM_PRINCIPAL_KEYS`` /
``_FORM_SECRET_KEYS`` as new templates surface them.
"""
if not body or not isinstance(content_type, str):
return None
if not content_type.lower().startswith("application/x-www-form-urlencoded"):
return None
fields: dict[str, str] = {}
for pair in body.split("&"):
if "=" not in pair:
continue
k, _, v = pair.partition("=")
# urllib decode without importing urllib at module scope (the
# template emitters are import-cost-sensitive). Inline the
# tiny percent-decode + plus-decode.
try:
from urllib.parse import unquote_plus
key = unquote_plus(k).lower()
val = unquote_plus(v)
except Exception:
continue
# First-wins so duplicate-key forms don't get clobbered.
fields.setdefault(key, val)
principal: Optional[str] = None
for k in _FORM_PRINCIPAL_KEYS:
if k in fields:
principal = fields[k]
break
secret: Optional[str] = None
for k in _FORM_SECRET_KEYS:
if k in fields:
secret = fields[k]
break
if secret is None:
return None
return {
"principal": principal,
"secret_kind": "plaintext",
**encode_secret(secret),
}
def write_syslog_file(line: str) -> None:
"""Emit a syslog line to stdout for container log capture."""
print(line, flush=True)
def forward_syslog(line: str, log_target: str) -> None:
"""No-op stub. TCP forwarding is handled by rsyslog, not by service containers."""
pass