feat(creds): NTLMSSP Type 3 parser + DEBT-040 for SMB/RDP/NLA framers

Ships the load-bearing primitive both Phase 5 (SMB) and Phase 7 (RDP NLA) need: a standalone NTLMSSP Type 3 (AUTHENTICATE_MESSAGE) parser per MS-NLMP §2.2.1.3. Surface: parse_type3(blob) -> dict | None find_ntlmssp(buf) -> int # locate NTLMSSP\\0 inside SPNEGO outer Returns the universal Credential SD shape: username + domain (decoded UTF-16-LE or ASCII per NEGOTIATE_UNICODE) principal = "DOMAIN\\\\username" secret_kind = "ntlmssp_v1" (24-byte fixed) or "ntlmssp_v2" (variable) secret_b64 = base64 of NtChallengeResponse — canonical hashcat input (-m 5500 v1, -m 5600 v2) Bounds-checked for untrusted-input safety. Anonymous binds (empty NT response) return None — no credential to record. 7 unit tests cover NTLMv1/v2 distinction, ASCII vs Unicode strings, empty-domain shape, malformed signature/type rejection, and SPNEGO- wrapped find_ntlmssp() lookup. DEBT-040 opens to track the three remaining protocol framers that will consume this parser: - SMB: hand-rolled SMB2 + Session Setup framer (~200 LoC) replacing Impacket's opaque SimpleSMBServer - RDP basic auth: TPKT/X.224/MCS framer for legacy plaintext path (~150 LoC) - RDP NLA: TLS upgrade + CredSSP TSRequest parser, reuses parse_type3 via the SPNEGO inner blob (~250 LoC) These are substantial protocol implementations each — landing them inline with Phase 1-3+6's cred coverage rollout would have inflated the session beyond reasonable scope. Cred-reuse analytics already work across the 12 services covered in this session; the deferred three just round out the fleet.
2026-04-25 07:19:30 -04:00
parent 9777aa7677
commit afe02af5c2
3 changed files with 304 additions and 2 deletions
--- a/decnet/templates/_shared/ntlmssp.py
+++ b/decnet/templates/_shared/ntlmssp.py
@@ -0,0 +1,132 @@
+"""NTLMSSP Type 3 (Authenticate) message parser.
+
+Standalone module shared between any honeypot template that wants to
+land NTLM credentials in the universal :class:`Credential` table.
+Currently consumed by the SMB and RDP-NLA templates.
+
+The parser is intentionally narrow: only :func:`parse_type3` is public,
+and it reads a single Type 3 buffer (the bytes starting with the
+``NTLMSSP\\0`` signature). Callers handle SPNEGO unwrapping, SMB
+SessionSetup framing, RDP/CredSSP TSRequest parsing, etc.
+
+Reference: MS-NLMP §2.2.1.3 (AUTHENTICATE_MESSAGE).
+
+Cred-shape mapping for the universal Credential model:
+- ``principal`` = ``"DOMAIN\\username"`` when domain present, else
+  bare username. Both decoded UTF-16-LE when NEGOTIATE_UNICODE is set
+  in the message flags (it always is in modern clients).
+- ``secret_kind`` = ``"ntlmssp_v2"`` when the NtChallengeResponse is
+  ≥ 24 bytes (NTLMv2 carries variable-length blob ≥ 16+8 bytes),
+  ``"ntlmssp_v1"`` for the legacy 24-byte fixed response.
+- ``secret_b64`` = base64 of the entire NtChallengeResponse bytes.
+  This is the canonical "hashcat -m 5600" (NTLMv2) or "-m 5500"
+  (NTLMv1) input.
+"""
+from __future__ import annotations
+
+import base64
+import struct
+from typing import Optional
+
+NTLMSSP_SIG = b"NTLMSSP\x00"
+NEGOTIATE_UNICODE = 0x00000001
+
+
+def find_ntlmssp(buf: bytes) -> int:
+    """Return the offset of the NTLMSSP signature in ``buf`` or -1.
+
+    Useful for callers that have a SPNEGO-wrapped or SMB-embedded blob
+    and want to skip straight to the inner Type 1/2/3 message without
+    walking the outer ASN.1.
+    """
+    return buf.find(NTLMSSP_SIG)
+
+
+def _read_field(buf: bytes, off: int) -> tuple[int, int, int]:
+    """Read an NTLMSSP field record: (Len, MaxLen, BufferOffset)."""
+    if off + 8 > len(buf):
+        return 0, 0, 0
+    f_len, f_max, f_off = struct.unpack_from("<HHI", buf, off)
+    return f_len, f_max, f_off
+
+
+def _slice(buf: bytes, off: int, length: int) -> bytes:
+    end = off + length
+    if off < 0 or end > len(buf) or length < 0:
+        return b""
+    return buf[off:end]
+
+
+def _decode_str(raw: bytes, unicode: bool) -> str:
+    if unicode:
+        return raw.decode("utf-16-le", errors="replace")
+    return raw.decode("ascii", errors="replace")
+
+
+def parse_type3(blob: bytes) -> Optional[dict]:
+    """Parse an NTLMSSP Type 3 (AUTHENTICATE_MESSAGE) buffer.
+
+    Returns a dict with the universal credential SD shape ready to
+    spread into a ``_log(...)`` call::
+
+        {
+          "username": "alice",            # service-specific identity
+          "domain": "ACME",               # domain (may be empty)
+          "principal": "ACME\\\\alice",      # hoisted column
+          "secret_kind": "ntlmssp_v2",   # or _v1
+          "secret_printable": "<hex>",   # NT response in hex
+          "secret_b64": "<base64>",      # NT response, lossless
+        }
+
+    Returns ``None`` when ``blob`` is malformed or not a Type 3.
+    """
+    if len(blob) < 32 or not blob.startswith(NTLMSSP_SIG):
+        return None
+    msg_type = struct.unpack_from("<I", blob, 8)[0]
+    if msg_type != 3:
+        return None
+
+    # Field record layout (all from MS-NLMP §2.2.1.3):
+    #   12 LmChallengeResponseFields
+    #   20 NtChallengeResponseFields
+    #   28 DomainNameFields
+    #   36 UserNameFields
+    #   44 WorkstationFields
+    #   52 EncryptedRandomSessionKeyFields
+    #   60 NegotiateFlags
+    nt_len, _, nt_off = _read_field(blob, 20)
+    dom_len, _, dom_off = _read_field(blob, 28)
+    user_len, _, user_off = _read_field(blob, 36)
+    if len(blob) < 64:
+        return None
+    flags = struct.unpack_from("<I", blob, 60)[0]
+    unicode = bool(flags & NEGOTIATE_UNICODE)
+
+    nt_response = _slice(blob, nt_off, nt_len)
+    domain = _decode_str(_slice(blob, dom_off, dom_len), unicode)
+    username = _decode_str(_slice(blob, user_off, user_len), unicode)
+
+    if not nt_response:
+        # No NT response → anonymous bind or malformed; nothing to
+        # treat as a credential.
+        return None
+
+    # NTLMv2 NTChallengeResponseV2 has a 16-byte HMAC followed by a
+    # variable-length blob (≥ 28 bytes total in practice). NTLMv1 is
+    # exactly 24 bytes. Use length to discriminate; close enough for
+    # cred-classification purposes (the bytes go on hashcat regardless).
+    secret_kind = "ntlmssp_v1" if len(nt_response) == 24 else "ntlmssp_v2"
+
+    if domain:
+        principal = f"{domain}\\{username}"
+    else:
+        principal = username or None
+
+    return {
+        "username": username,
+        "domain": domain,
+        "principal": principal,
+        "secret_kind": secret_kind,
+        "secret_printable": nt_response.hex(),
+        "secret_b64": base64.b64encode(nt_response).decode("ascii"),
+    }
--- a/development/DEBT.md
+++ b/development/DEBT.md
@@ -1,6 +1,6 @@
 # DECNET — Technical Debt Register

-> Last updated: 2026-04-25 — Credential model gained `secret_kind` discriminator; Postgres MD5 + VNC DES challenge creds now land in the table; MQTT regression from the legacy-adapter removal patched.
+> Last updated: 2026-04-25 — Cred coverage rolled out across 9 more services (HTTP family + DB hash creds + form bodies + MongoDB SCRAM); RDP/SMB/NLA capture deferred to DEBT-040.
 > Severity: 🔴 Critical · 🟠 High · 🟡 Medium · 🟢 Low

 ---
@@ -392,6 +392,21 @@ Closed by commits `aebb9f8` (encode_secret() helper), `abb4dd9` (six-service mig

 ---

+### DEBT-040 — RDP, SMB, RDP-NLA cred capture (protocol framers)
+**Files:** `decnet/templates/rdp/server.py`, `decnet/templates/smb/server.py`, `decnet/templates/_shared/ntlmssp.py` (already shipped).
+
+Three protocol-heavy templates still capture only connection bytes; their wire format carries credentials we currently throw away:
+
+1. **SMB** — `SimpleSMBServer` (Impacket) handles auth opaquely. NTLMSSP Type 3 messages carrying the NTLMv1/v2 hash flow through without ever surfacing in the `Credential` table. To fix: replace SimpleSMBServer with a hand-rolled asyncio SMB2 framer that (a) responds to Negotiate Protocol with a stock dialect, (b) responds to the first Session Setup with a stock NTLMSSP Type 2 challenge, (c) parses the second Session Setup's NTLMSSP Type 3 via the already-shipped `_shared/ntlmssp.py:parse_type3()`, (d) returns STATUS_LOGON_FAILURE so the attacker can't actually authenticate. Rough budget: 200 LoC for the SMB2/SPNEGO framer, parser is already there. Lands creds as `secret_kind="ntlmssp_v2"`.
+
+2. **RDP basic auth** — `templates/rdp/server.py` accepts an X.224 connection but immediately drops the connection on data. To capture TS_LOGON_INFO (the legacy plaintext-recoverable auth that pre-NLA mstsc and old Hydra/MSF modules use), the template needs TPKT → X.224 Data PDU → MCS Send Data Request → Client Info PDU framing. Plaintext-recoverable, lands as `secret_kind="plaintext"`. Rough budget: 150 LoC. Limited operator value — most modern attackers default to NLA — but ships with Phase 4 of the original cred-coverage plan.
+
+3. **RDP NLA / CredSSP** — the realistic-attacker path. RDP NLA wraps CredSSP, which wraps a TLS handshake, which carries SPNEGO/NTLM blobs. To capture: respond to the Connection Request advertising `PROTOCOL_HYBRID`, upgrade the socket to TLS using a self-signed cert (existing `https/` infra reusable), parse the inner CredSSP TSRequest ASN.1 DER, extract the negoTokens (NTLMSSP Type 1/2/3), reuse `_shared/ntlmssp.py:parse_type3()` for the Type 3 hash. Rough budget: 250 LoC, biggest of the three.
+
+**Already shipped as Phase 5/7 prep:** `decnet/templates/_shared/ntlmssp.py` (Type 3 parser with 7 unit tests). Both SMB and RDP-NLA work consume it directly.
+
+**Status:** Open — substantial protocol implementations each. Land independently as separate commits when scheduling allows. Cred-reuse analytics already work without these (the existing 12 services cover the bulk of attacker traffic); these three just round out fleet coverage.
+
 ### DEBT-032 — Prober can't detect fingerprint rotation without mutation
 **Files:** `decnet/prober/worker.py` (~lines 235, 286, 334, 392), `decnet/web/db/models.py` (new `decky_service_fingerprints` table).

@@ -473,6 +488,7 @@ The prober already computes JARM (`worker.py:286`), HASSH (`worker.py:334`), and
 | DEBT-037 | 🟡 Medium | Integration / Webhooks | open (tracks MVP follow-ups) |
 | DEBT-038 | 🟡 Medium | Honeypot / SSH cred capture | open (document-only) |
 | ~~DEBT-039~~ | ✅ | Honeypot / Cred emitters | resolved |
+| DEBT-040 | 🟡 Medium | Honeypot / RDP+SMB cred framers | open |

-**Remaining open:** DEBT-011 (Alembic), DEBT-023 (image pinning), DEBT-026 (modular mailboxes), DEBT-027 (Dynamic bait store), DEBT-028 (deploy endpoint tests), DEBT-032 (fingerprint rotation detection), DEBT-033 (transcript shard rotation), DEBT-035 (artifacts uid/gid alignment), DEBT-036 (session-profile ingester), DEBT-037 (webhook delivery hardening), DEBT-038 (SSH PAM cred-capture limitations — document-only).
+**Remaining open:** DEBT-011 (Alembic), DEBT-023 (image pinning), DEBT-026 (modular mailboxes), DEBT-027 (Dynamic bait store), DEBT-028 (deploy endpoint tests), DEBT-032 (fingerprint rotation detection), DEBT-033 (transcript shard rotation), DEBT-035 (artifacts uid/gid alignment), DEBT-036 (session-profile ingester), DEBT-037 (webhook delivery hardening), DEBT-038 (SSH PAM cred-capture limitations — document-only), DEBT-040 (RDP / SMB / NLA cred framers).
 **Estimated remaining effort:** ~24 hours. DEBT-030 Phase B (optimistic staged-buffer editor) is a follow-up, not debt.
--- a/tests/services/test_ntlmssp_parser.py
+++ b/tests/services/test_ntlmssp_parser.py
@@ -0,0 +1,154 @@
+"""NTLMSSP Type 3 parser tests.
+
+Builds Type 3 buffers field-by-field per MS-NLMP §2.2.1.3 and asserts
+the parser returns the universal Credential SD shape. Shared
+infrastructure for SMB and RDP-NLA cred capture.
+"""
+from __future__ import annotations
+
+import base64
+import importlib.util
+import struct
+from pathlib import Path
+
+import pytest
+
+
+def _load_ntlmssp():
+    repo = Path(__file__).resolve().parents[2]
+    path = repo / "decnet" / "templates" / "_shared" / "ntlmssp.py"
+    spec = importlib.util.spec_from_file_location("_ntlmssp_under_test", path)
+    mod = importlib.util.module_from_spec(spec)
+    spec.loader.exec_module(mod)
+    return mod
+
+
+@pytest.fixture(scope="module")
+def ntlmssp():
+    return _load_ntlmssp()
+
+
+def _build_type3(
+    *,
+    username: str,
+    domain: str,
+    nt_response: bytes,
+    unicode: bool = True,
+) -> bytes:
+    """Build a syntactically-valid NTLMSSP Type 3 message."""
+    if unicode:
+        u = username.encode("utf-16-le")
+        d = domain.encode("utf-16-le")
+        flags = 0x00000001  # NEGOTIATE_UNICODE
+    else:
+        u = username.encode("ascii")
+        d = domain.encode("ascii")
+        flags = 0x00000000
+
+    # Layout: 8 sig + 4 type + 6×8 field records + 4 flags = 64 bytes
+    # of header, then payload (concat of nt_response, domain, username).
+    header_size = 64
+    nt_off = header_size
+    dom_off = nt_off + len(nt_response)
+    user_off = dom_off + len(d)
+
+    hdr = bytearray(header_size)
+    hdr[0:8] = b"NTLMSSP\x00"
+    struct.pack_into("<I", hdr, 8, 3)  # message type 3
+    # LmChallengeResponse (unused — empty)
+    struct.pack_into("<HHI", hdr, 12, 0, 0, 0)
+    # NtChallengeResponse
+    struct.pack_into("<HHI", hdr, 20, len(nt_response), len(nt_response), nt_off)
+    # DomainName
+    struct.pack_into("<HHI", hdr, 28, len(d), len(d), dom_off)
+    # UserName
+    struct.pack_into("<HHI", hdr, 36, len(u), len(u), user_off)
+    # Workstation (unused)
+    struct.pack_into("<HHI", hdr, 44, 0, 0, 0)
+    # EncryptedRandomSessionKey (unused)
+    struct.pack_into("<HHI", hdr, 52, 0, 0, 0)
+    # NegotiateFlags
+    struct.pack_into("<I", hdr, 60, flags)
+
+    return bytes(hdr) + nt_response + d + u
+
+
+def test_parse_type3_ntlmv2(ntlmssp):
+    """NTLMv2 NTChallengeResponse is variable-length (>= 28 bytes in
+    practice). Parser flags this as secret_kind=ntlmssp_v2."""
+    nt_response = b"\xab" * 16 + b"\x01\x01\x00\x00" + b"\x00" * 28  # ~48 bytes
+    blob = _build_type3(
+        username="alice", domain="ACME", nt_response=nt_response,
+    )
+    cred = ntlmssp.parse_type3(blob)
+    assert cred is not None
+    assert cred["username"] == "alice"
+    assert cred["domain"] == "ACME"
+    assert cred["principal"] == "ACME\\alice"
+    assert cred["secret_kind"] == "ntlmssp_v2"
+    assert base64.b64decode(cred["secret_b64"]) == nt_response
+
+
+def test_parse_type3_ntlmv1(ntlmssp):
+    """NTLMv1 NTChallengeResponse is exactly 24 bytes."""
+    nt_response = b"\xcd" * 24
+    blob = _build_type3(
+        username="bob", domain="WORKGROUP", nt_response=nt_response,
+    )
+    cred = ntlmssp.parse_type3(blob)
+    assert cred["secret_kind"] == "ntlmssp_v1"
+    assert cred["principal"] == "WORKGROUP\\bob"
+
+
+def test_parse_type3_no_domain(ntlmssp):
+    nt_response = b"\xff" * 24
+    blob = _build_type3(
+        username="lonely", domain="", nt_response=nt_response,
+    )
+    cred = ntlmssp.parse_type3(blob)
+    assert cred["domain"] == ""
+    assert cred["principal"] == "lonely"
+
+
+def test_parse_type3_oem_strings(ntlmssp):
+    """Older clients without NEGOTIATE_UNICODE send ASCII strings."""
+    nt_response = b"\x11" * 24
+    blob = _build_type3(
+        username="ascii_user",
+        domain="WIN2000",
+        nt_response=nt_response,
+        unicode=False,
+    )
+    cred = ntlmssp.parse_type3(blob)
+    assert cred["username"] == "ascii_user"
+    assert cred["domain"] == "WIN2000"
+
+
+def test_parse_type3_rejects_non_signature(ntlmssp):
+    assert ntlmssp.parse_type3(b"NotNtlmssp") is None
+    assert ntlmssp.parse_type3(b"") is None
+    # Right magic but wrong message type:
+    blob = bytearray(64)
+    blob[0:8] = b"NTLMSSP\x00"
+    struct.pack_into("<I", blob, 8, 1)  # Type 1, not 3
+    assert ntlmssp.parse_type3(bytes(blob)) is None
+
+
+def test_parse_type3_rejects_anonymous(ntlmssp):
+    """Empty NT response (anonymous bind) → no credential to record."""
+    blob = _build_type3(username="", domain="", nt_response=b"")
+    assert ntlmssp.parse_type3(blob) is None
+
+
+def test_find_ntlmssp_inside_outer_blob(ntlmssp):
+    """SPNEGO-wrapped Type 3 — caller can locate the signature first
+    and slice from there. Tests the find_ntlmssp helper."""
+    nt_response = b"\xee" * 32
+    inner = _build_type3(
+        username="x", domain="y", nt_response=nt_response,
+    )
+    outer = b"\x60\x82\x01\x00" + b"\x00" * 16 + inner + b"\xff" * 8
+    off = ntlmssp.find_ntlmssp(outer)
+    assert off >= 0
+    cred = ntlmssp.parse_type3(outer[off:])
+    assert cred["username"] == "x"