refactor(ingester): drop legacy cred adapter — DEBT-039 closed

Phase 3/3 of DEBT-039. Now that all six cred-emitting services
(SSH, Telnet, FTP, POP3, IMAP, SMTP, Redis, LDAP) emit the universal
`secret_b64`-bearing SD shape, the ingester's legacy fork has no
live emitters to handle. Deletes:

- `_ingest_credential_legacy()` — synthesized native fields from
  username+password
- The `elif _fields.get("username") and _fields.get("password")`
  branch in `_extract_bounty`
- `_printable_filter()` — only the legacy adapter called it; the
  native branch trusts the emitter (encode_secret() in Python or
  sd_escape() in C) to have already sanitized
- The legacy-adapter test cases in tests/web/test_ingester.py;
  their coverage moved to tests/services/test_cred_emitters.py
  per-service in Phase 2

The cred path is now single-shape end-to-end. A pre-migration log
row carrying only username+password silently produces no Credential
write — by design, since no current emitter writes that shape and
keeping a code path alive for theoretical legacy data risks masking
emitter regressions. Pre-v1: any historical Bounty cred rows from
before commit 2f47f67 stay untouched.

DEBT-039 marked resolved with summary of the three commits and the
silent-loss bug fix for Redis + LDAP that fell out of execution.
This commit is contained in:
2026-04-25 06:04:09 -04:00
parent abb4dd9fc0
commit e696c2beb3
3 changed files with 20 additions and 145 deletions

View File

@@ -207,18 +207,6 @@ _PRINCIPAL_MAX = 256
_SECRET_B64_MAX = 2048
def _printable_filter(s: str) -> str:
"""Replace bytes outside [0x20, 0x7f) with '?', matching auth-helper.c.
Operates on the str's UTF-8 encoded bytes so we don't accidentally
let a `\\u202e` Unicode override slip through display layers.
"""
out: list[int] = []
for b in s.encode("utf-8", errors="replace"):
out.append(b if 0x20 <= b < 0x7f else ord("?"))
return bytes(out).decode("ascii")
def _truncate_with_warn(s: Optional[str], cap: int, label: str) -> Optional[str]:
if s is None:
return None
@@ -274,51 +262,6 @@ async def _ingest_credential_native(
})
async def _ingest_credential_legacy(
repo: BaseRepository,
log_data: dict[str, Any],
fields: dict[str, Any],
) -> None:
"""Legacy-shape credential: SD-block has username + password.
Synthesizes secret_b64 (from utf8-encoded password bytes), the
sha256 hash (over those same bytes — lossless before any printable
sanitization), and a printable-filtered secret_printable. FTP /
POP3 / IMAP / SMTP go through this branch until DEBT-039 lands.
"""
user = fields.get("username")
pw = fields.get("password")
if not isinstance(pw, str):
return
raw = pw.encode("utf-8", errors="replace")
sha256_hex = hashlib.sha256(raw).hexdigest()
b64 = base64.b64encode(raw).decode("ascii")
printable = _printable_filter(pw)
# Synthesize the universal keys into a copy of fields so the JSON
# blob carries the standardized shape too — lets downstream readers
# treat every credential row identically regardless of emitter.
synthesized_fields = dict(fields)
synthesized_fields.setdefault("principal", user)
synthesized_fields.setdefault("secret_printable", printable)
synthesized_fields.setdefault("secret_b64", b64)
await repo.upsert_credential({
"attacker_ip": log_data.get("attacker_ip"),
"decky_name": log_data.get("decky"),
"service": log_data.get("service"),
"principal": _truncate_with_warn(user, _PRINCIPAL_MAX, "principal"),
"secret_sha256": sha256_hex,
"secret_b64": _truncate_with_warn(b64, _SECRET_B64_MAX, "secret_b64"),
"secret_printable": _truncate_with_warn(
printable, _SECRET_PRINTABLE_MAX, "secret_printable"
),
"outcome": fields.get("outcome"),
"fields": synthesized_fields,
})
@_traced("ingester.extract_bounty")
async def _extract_bounty(repo: BaseRepository, log_data: dict[str, Any]) -> None:
"""Detect and extract valuable artifacts (bounties) from log entries."""
@@ -326,21 +269,14 @@ async def _extract_bounty(repo: BaseRepository, log_data: dict[str, Any]) -> Non
if not isinstance(_fields, dict):
return
# 1. Credentials — fork on emitter shape.
#
# New shape (SSH/Telnet auth-helper, future emitters): SD-block
# carries `secret_b64` directly. Universal across services.
#
# Legacy shape (FTP/POP3/IMAP/SMTP today): SD-block has `username`
# + `password`. Adapter synthesizes `secret_b64` + `secret_sha256`
# on the fly so those services land in the same Credential table
# without requiring a per-template emitter rewrite. Tracked as
# DEBT-039 — eventually those services emit the new shape natively
# and this branch dies.
# 1. Credentials — every cred-emitting service writes the universal
# SD shape (`secret_b64` present). The legacy `username`+`password`
# adapter that bridged FTP/POP3/IMAP/SMTP through DEBT-039 was
# removed once those services migrated; emitters now feed the
# native branch directly. Redis (no principal) and LDAP (principal=
# dn) also land here — they were previously dropped silently.
if "secret_b64" in _fields:
await _ingest_credential_native(repo, log_data, _fields)
elif _fields.get("username") and _fields.get("password"):
await _ingest_credential_legacy(repo, log_data, _fields)
# 2. HTTP User-Agent fingerprint
_h_raw = _fields.get("headers")

View File

@@ -1,6 +1,6 @@
# DECNET — Technical Debt Register
> Last updated: 2026-04-25 — DEBT-039 opened (legacy cred emitters), Credential storage model landed.
> Last updated: 2026-04-25 — DEBT-039 resolved (six service emitters on standardized shape, legacy ingester adapter deleted).
> Severity: 🔴 Critical · 🟠 High · 🟡 Medium · 🟢 Low
---
@@ -386,21 +386,11 @@ The SSH cred-capture path that closes the "real OpenSSH doesn't log attempted pa
**Status:** Open — document-only ticket tracking the architectural trade-offs of the v1 implementation. None of these are blocking; they're the things to know if the helper ever needs upgrading.
### DEBT-039 — Migrate FTP/POP3/IMAP/SMTP emitters to standardized credential shape
**Files:** `decnet/templates/ftp/server.py`, `decnet/templates/pop3/server.py`, `decnet/templates/imap/server.py`, `decnet/templates/smtp/server.py`, `decnet/web/ingester.py` (legacy adapter at `_ingest_credential_legacy`).
### ~~DEBT-039 — Migrate FTP/POP3/IMAP/SMTP emitters to standardized credential shape~~ ✅ RESOLVED
The new `Credential` storage model (commit landing alongside this entry) writes one universal shape: `principal` + `secret_sha256` + `secret_b64` + `secret_printable`. SSH and Telnet auth-helper emit those keys natively. The four legacy services — FTP, POP3, IMAP, SMTP — still emit the old `username=` + `password=` shape, and the ingester carries a one-shot adapter (`_ingest_credential_legacy`) that synthesizes the universal keys on the fly.
Closed by commits `aebb9f8` (encode_secret() helper), `abb4dd9` (six-service migration), and the legacy-adapter removal commit. Scope expanded during execution to include Redis (`auth, password=` — was silently dropped) and LDAP (`bind, dn=, password=` — was silently dropped) — both now emit the universal shape and feed the native ingester branch. The legacy adapter `_ingest_credential_legacy` and its `username`+`password` fork are deleted; only the native branch remains. Also added: the SMTP MAIL FROM event now exposes a parsed `domain=` field alongside the original `value=` for future "what domains attackers spoof from" analytics — Log row only, no Credential write.
The adapter works correctly but couples ingester logic to an emitter shape we'd rather see go away. Per-service migration:
1. **FTP** (`templates/ftp/server.py:103`) — change `_log("auth_attempt", username=..., password=...)` to also emit `principal`, `secret_printable`, `secret_b64`. Remove the legacy adapter dependency for `service="ftp"` once verified.
2. **POP3** (`templates/pop3/server.py`) — same pattern.
3. **IMAP** (`templates/imap/server.py`) — same pattern.
4. **SMTP** (`templates/smtp/server.py`) — opportunity to use the new `domain` field as the principal (rather than `username` for an MTA), since SMTP AUTH PLAIN/LOGIN's authentication identity is conceptually a domain user, not a system user.
Once all four migrate, delete `_ingest_credential_legacy` from `decnet/web/ingester.py` and drop the legacy branch from `_extract_bounty`.
**Status:** Open — the legacy adapter is a temporary bridge. No deadline; close one service at a time as their templates are touched for unrelated reasons.
---
### DEBT-032 — Prober can't detect fingerprint rotation without mutation
**Files:** `decnet/prober/worker.py` (~lines 235, 286, 334, 392), `decnet/web/db/models.py` (new `decky_service_fingerprints` table).
@@ -482,7 +472,7 @@ The prober already computes JARM (`worker.py:286`), HASSH (`worker.py:334`), and
| DEBT-036 | 🟡 Medium | Correlation / Keystroke dynamics | open |
| DEBT-037 | 🟡 Medium | Integration / Webhooks | open (tracks MVP follow-ups) |
| DEBT-038 | 🟡 Medium | Honeypot / SSH cred capture | open (document-only) |
| DEBT-039 | 🟡 Medium | Honeypot / Cred emitters | open |
| ~~DEBT-039~~ | ✅ | Honeypot / Cred emitters | resolved |
**Remaining open:** DEBT-011 (Alembic), DEBT-023 (image pinning), DEBT-026 (modular mailboxes), DEBT-027 (Dynamic bait store), DEBT-028 (deploy endpoint tests), DEBT-032 (fingerprint rotation detection), DEBT-033 (transcript shard rotation), DEBT-035 (artifacts uid/gid alignment), DEBT-036 (session-profile ingester), DEBT-037 (webhook delivery hardening), DEBT-038 (SSH PAM cred-capture limitations — document-only), DEBT-039 (legacy cred emitters → standardized shape).
**Remaining open:** DEBT-011 (Alembic), DEBT-023 (image pinning), DEBT-026 (modular mailboxes), DEBT-027 (Dynamic bait store), DEBT-028 (deploy endpoint tests), DEBT-032 (fingerprint rotation detection), DEBT-033 (transcript shard rotation), DEBT-035 (artifacts uid/gid alignment), DEBT-036 (session-profile ingester), DEBT-037 (webhook delivery hardening), DEBT-038 (SSH PAM cred-capture limitations — document-only).
**Estimated remaining effort:** ~24 hours. DEBT-030 Phase B (optimistic staged-buffer editor) is a follow-up, not debt.

View File

@@ -16,29 +16,6 @@ import pytest
# ── _extract_bounty ───────────────────────────────────────────────────────────
class TestExtractBounty:
@pytest.mark.asyncio
async def test_credential_legacy_adapter(self):
"""FTP/POP3/IMAP/SMTP shape (username + password) → upsert_credential."""
from decnet.web.ingester import _extract_bounty
mock_repo = MagicMock()
mock_repo.upsert_credential = AsyncMock()
log_data: dict = {
"decky": "decky-01",
"service": "ftp",
"attacker_ip": "10.0.0.5",
"fields": {"username": "admin", "password": "hunter2"},
}
await _extract_bounty(mock_repo, log_data)
mock_repo.upsert_credential.assert_awaited_once()
cred = mock_repo.upsert_credential.call_args[0][0]
assert cred["service"] == "ftp"
assert cred["principal"] == "admin"
assert cred["secret_printable"] == "hunter2"
# b64 + sha256 computed over the original utf-8 bytes.
import base64, hashlib
assert cred["secret_b64"] == base64.b64encode(b"hunter2").decode()
assert cred["secret_sha256"] == hashlib.sha256(b"hunter2").hexdigest()
@pytest.mark.asyncio
async def test_credential_native_shape(self):
"""SSH/Telnet auth-helper shape (secret_b64) → upsert_credential."""
@@ -79,32 +56,6 @@ class TestExtractBounty:
await _extract_bounty(mock_repo, log_data)
mock_repo.upsert_credential.assert_not_awaited()
@pytest.mark.asyncio
async def test_credential_legacy_sanitizes_nonprintable(self):
"""Non-printable bytes in legacy password collapse to '?' in
secret_printable; b64 + sha256 reflect the ORIGINAL bytes."""
from decnet.web.ingester import _extract_bounty
import base64, hashlib
mock_repo = MagicMock()
mock_repo.upsert_credential = AsyncMock()
# ANSI escape + NUL byte in the password.
bad_pw = "\x1b[31mbad\x00trail"
log_data: dict = {
"decky": "decky-01",
"service": "ftp",
"attacker_ip": "10.0.0.5",
"fields": {"username": "user", "password": bad_pw},
}
await _extract_bounty(mock_repo, log_data)
cred = mock_repo.upsert_credential.call_args[0][0]
# No 0x1b, no NUL — collapsed to '?'.
assert "\x1b" not in cred["secret_printable"]
assert "\x00" not in cred["secret_printable"]
# Original bytes survive in b64 + sha256.
raw = bad_pw.encode("utf-8")
assert base64.b64decode(cred["secret_b64"]) == raw
assert cred["secret_sha256"] == hashlib.sha256(raw).hexdigest()
@pytest.mark.asyncio
async def test_no_fields_skips(self):
from decnet.web.ingester import _extract_bounty
@@ -122,21 +73,19 @@ class TestExtractBounty:
mock_repo.upsert_credential.assert_not_awaited()
@pytest.mark.asyncio
async def test_missing_password_skips(self):
async def test_no_secret_b64_no_credential(self):
"""The native branch keys off `secret_b64`. Fields lacking it
produce no Credential row — even if username/password keys
from the pre-migration era are present, they're now ignored."""
from decnet.web.ingester import _extract_bounty
mock_repo = MagicMock()
mock_repo.upsert_credential = AsyncMock()
await _extract_bounty(mock_repo, {"fields": {"username": "admin"}})
# Pre-migration shape — adapter is gone; this is a no-op path.
await _extract_bounty(mock_repo, {
"fields": {"username": "admin", "password": "stale"},
})
mock_repo.upsert_credential.assert_not_awaited()
@pytest.mark.asyncio
async def test_missing_username_skips(self):
from decnet.web.ingester import _extract_bounty
mock_repo = MagicMock()
mock_repo.add_bounty = AsyncMock()
await _extract_bounty(mock_repo, {"fields": {"password": "pass"}})
mock_repo.add_bounty.assert_not_awaited()
# ── log_ingestion_worker ──────────────────────────────────────────────────────