diff --git a/decnet/web/ingester.py b/decnet/web/ingester.py index cc4de7f3..5ba37656 100644 --- a/decnet/web/ingester.py +++ b/decnet/web/ingester.py @@ -207,18 +207,6 @@ _PRINCIPAL_MAX = 256 _SECRET_B64_MAX = 2048 -def _printable_filter(s: str) -> str: - """Replace bytes outside [0x20, 0x7f) with '?', matching auth-helper.c. - - Operates on the str's UTF-8 encoded bytes so we don't accidentally - let a `\\u202e` Unicode override slip through display layers. - """ - out: list[int] = [] - for b in s.encode("utf-8", errors="replace"): - out.append(b if 0x20 <= b < 0x7f else ord("?")) - return bytes(out).decode("ascii") - - def _truncate_with_warn(s: Optional[str], cap: int, label: str) -> Optional[str]: if s is None: return None @@ -274,51 +262,6 @@ async def _ingest_credential_native( }) -async def _ingest_credential_legacy( - repo: BaseRepository, - log_data: dict[str, Any], - fields: dict[str, Any], -) -> None: - """Legacy-shape credential: SD-block has username + password. - - Synthesizes secret_b64 (from utf8-encoded password bytes), the - sha256 hash (over those same bytes — lossless before any printable - sanitization), and a printable-filtered secret_printable. FTP / - POP3 / IMAP / SMTP go through this branch until DEBT-039 lands. - """ - user = fields.get("username") - pw = fields.get("password") - if not isinstance(pw, str): - return - - raw = pw.encode("utf-8", errors="replace") - sha256_hex = hashlib.sha256(raw).hexdigest() - b64 = base64.b64encode(raw).decode("ascii") - printable = _printable_filter(pw) - - # Synthesize the universal keys into a copy of fields so the JSON - # blob carries the standardized shape too — lets downstream readers - # treat every credential row identically regardless of emitter. - synthesized_fields = dict(fields) - synthesized_fields.setdefault("principal", user) - synthesized_fields.setdefault("secret_printable", printable) - synthesized_fields.setdefault("secret_b64", b64) - - await repo.upsert_credential({ - "attacker_ip": log_data.get("attacker_ip"), - "decky_name": log_data.get("decky"), - "service": log_data.get("service"), - "principal": _truncate_with_warn(user, _PRINCIPAL_MAX, "principal"), - "secret_sha256": sha256_hex, - "secret_b64": _truncate_with_warn(b64, _SECRET_B64_MAX, "secret_b64"), - "secret_printable": _truncate_with_warn( - printable, _SECRET_PRINTABLE_MAX, "secret_printable" - ), - "outcome": fields.get("outcome"), - "fields": synthesized_fields, - }) - - @_traced("ingester.extract_bounty") async def _extract_bounty(repo: BaseRepository, log_data: dict[str, Any]) -> None: """Detect and extract valuable artifacts (bounties) from log entries.""" @@ -326,21 +269,14 @@ async def _extract_bounty(repo: BaseRepository, log_data: dict[str, Any]) -> Non if not isinstance(_fields, dict): return - # 1. Credentials — fork on emitter shape. - # - # New shape (SSH/Telnet auth-helper, future emitters): SD-block - # carries `secret_b64` directly. Universal across services. - # - # Legacy shape (FTP/POP3/IMAP/SMTP today): SD-block has `username` - # + `password`. Adapter synthesizes `secret_b64` + `secret_sha256` - # on the fly so those services land in the same Credential table - # without requiring a per-template emitter rewrite. Tracked as - # DEBT-039 — eventually those services emit the new shape natively - # and this branch dies. + # 1. Credentials — every cred-emitting service writes the universal + # SD shape (`secret_b64` present). The legacy `username`+`password` + # adapter that bridged FTP/POP3/IMAP/SMTP through DEBT-039 was + # removed once those services migrated; emitters now feed the + # native branch directly. Redis (no principal) and LDAP (principal= + # dn) also land here — they were previously dropped silently. if "secret_b64" in _fields: await _ingest_credential_native(repo, log_data, _fields) - elif _fields.get("username") and _fields.get("password"): - await _ingest_credential_legacy(repo, log_data, _fields) # 2. HTTP User-Agent fingerprint _h_raw = _fields.get("headers") diff --git a/development/DEBT.md b/development/DEBT.md index c1396cd0..08f52225 100644 --- a/development/DEBT.md +++ b/development/DEBT.md @@ -1,6 +1,6 @@ # DECNET — Technical Debt Register -> Last updated: 2026-04-25 — DEBT-039 opened (legacy cred emitters), Credential storage model landed. +> Last updated: 2026-04-25 — DEBT-039 resolved (six service emitters on standardized shape, legacy ingester adapter deleted). > Severity: 🔴 Critical · 🟠 High · 🟡 Medium · 🟢 Low --- @@ -386,21 +386,11 @@ The SSH cred-capture path that closes the "real OpenSSH doesn't log attempted pa **Status:** Open — document-only ticket tracking the architectural trade-offs of the v1 implementation. None of these are blocking; they're the things to know if the helper ever needs upgrading. -### DEBT-039 — Migrate FTP/POP3/IMAP/SMTP emitters to standardized credential shape -**Files:** `decnet/templates/ftp/server.py`, `decnet/templates/pop3/server.py`, `decnet/templates/imap/server.py`, `decnet/templates/smtp/server.py`, `decnet/web/ingester.py` (legacy adapter at `_ingest_credential_legacy`). +### ~~DEBT-039 — Migrate FTP/POP3/IMAP/SMTP emitters to standardized credential shape~~ ✅ RESOLVED -The new `Credential` storage model (commit landing alongside this entry) writes one universal shape: `principal` + `secret_sha256` + `secret_b64` + `secret_printable`. SSH and Telnet auth-helper emit those keys natively. The four legacy services — FTP, POP3, IMAP, SMTP — still emit the old `username=` + `password=` shape, and the ingester carries a one-shot adapter (`_ingest_credential_legacy`) that synthesizes the universal keys on the fly. +Closed by commits `aebb9f8` (encode_secret() helper), `abb4dd9` (six-service migration), and the legacy-adapter removal commit. Scope expanded during execution to include Redis (`auth, password=` — was silently dropped) and LDAP (`bind, dn=, password=` — was silently dropped) — both now emit the universal shape and feed the native ingester branch. The legacy adapter `_ingest_credential_legacy` and its `username`+`password` fork are deleted; only the native branch remains. Also added: the SMTP MAIL FROM event now exposes a parsed `domain=` field alongside the original `value=` for future "what domains attackers spoof from" analytics — Log row only, no Credential write. -The adapter works correctly but couples ingester logic to an emitter shape we'd rather see go away. Per-service migration: - -1. **FTP** (`templates/ftp/server.py:103`) — change `_log("auth_attempt", username=..., password=...)` to also emit `principal`, `secret_printable`, `secret_b64`. Remove the legacy adapter dependency for `service="ftp"` once verified. -2. **POP3** (`templates/pop3/server.py`) — same pattern. -3. **IMAP** (`templates/imap/server.py`) — same pattern. -4. **SMTP** (`templates/smtp/server.py`) — opportunity to use the new `domain` field as the principal (rather than `username` for an MTA), since SMTP AUTH PLAIN/LOGIN's authentication identity is conceptually a domain user, not a system user. - -Once all four migrate, delete `_ingest_credential_legacy` from `decnet/web/ingester.py` and drop the legacy branch from `_extract_bounty`. - -**Status:** Open — the legacy adapter is a temporary bridge. No deadline; close one service at a time as their templates are touched for unrelated reasons. +--- ### DEBT-032 — Prober can't detect fingerprint rotation without mutation **Files:** `decnet/prober/worker.py` (~lines 235, 286, 334, 392), `decnet/web/db/models.py` (new `decky_service_fingerprints` table). @@ -482,7 +472,7 @@ The prober already computes JARM (`worker.py:286`), HASSH (`worker.py:334`), and | DEBT-036 | 🟡 Medium | Correlation / Keystroke dynamics | open | | DEBT-037 | 🟡 Medium | Integration / Webhooks | open (tracks MVP follow-ups) | | DEBT-038 | 🟡 Medium | Honeypot / SSH cred capture | open (document-only) | -| DEBT-039 | 🟡 Medium | Honeypot / Cred emitters | open | +| ~~DEBT-039~~ | ✅ | Honeypot / Cred emitters | resolved | -**Remaining open:** DEBT-011 (Alembic), DEBT-023 (image pinning), DEBT-026 (modular mailboxes), DEBT-027 (Dynamic bait store), DEBT-028 (deploy endpoint tests), DEBT-032 (fingerprint rotation detection), DEBT-033 (transcript shard rotation), DEBT-035 (artifacts uid/gid alignment), DEBT-036 (session-profile ingester), DEBT-037 (webhook delivery hardening), DEBT-038 (SSH PAM cred-capture limitations — document-only), DEBT-039 (legacy cred emitters → standardized shape). +**Remaining open:** DEBT-011 (Alembic), DEBT-023 (image pinning), DEBT-026 (modular mailboxes), DEBT-027 (Dynamic bait store), DEBT-028 (deploy endpoint tests), DEBT-032 (fingerprint rotation detection), DEBT-033 (transcript shard rotation), DEBT-035 (artifacts uid/gid alignment), DEBT-036 (session-profile ingester), DEBT-037 (webhook delivery hardening), DEBT-038 (SSH PAM cred-capture limitations — document-only). **Estimated remaining effort:** ~24 hours. DEBT-030 Phase B (optimistic staged-buffer editor) is a follow-up, not debt. diff --git a/tests/web/test_ingester.py b/tests/web/test_ingester.py index 0ad82028..ad231524 100644 --- a/tests/web/test_ingester.py +++ b/tests/web/test_ingester.py @@ -16,29 +16,6 @@ import pytest # ── _extract_bounty ─────────────────────────────────────────────────────────── class TestExtractBounty: - @pytest.mark.asyncio - async def test_credential_legacy_adapter(self): - """FTP/POP3/IMAP/SMTP shape (username + password) → upsert_credential.""" - from decnet.web.ingester import _extract_bounty - mock_repo = MagicMock() - mock_repo.upsert_credential = AsyncMock() - log_data: dict = { - "decky": "decky-01", - "service": "ftp", - "attacker_ip": "10.0.0.5", - "fields": {"username": "admin", "password": "hunter2"}, - } - await _extract_bounty(mock_repo, log_data) - mock_repo.upsert_credential.assert_awaited_once() - cred = mock_repo.upsert_credential.call_args[0][0] - assert cred["service"] == "ftp" - assert cred["principal"] == "admin" - assert cred["secret_printable"] == "hunter2" - # b64 + sha256 computed over the original utf-8 bytes. - import base64, hashlib - assert cred["secret_b64"] == base64.b64encode(b"hunter2").decode() - assert cred["secret_sha256"] == hashlib.sha256(b"hunter2").hexdigest() - @pytest.mark.asyncio async def test_credential_native_shape(self): """SSH/Telnet auth-helper shape (secret_b64) → upsert_credential.""" @@ -79,32 +56,6 @@ class TestExtractBounty: await _extract_bounty(mock_repo, log_data) mock_repo.upsert_credential.assert_not_awaited() - @pytest.mark.asyncio - async def test_credential_legacy_sanitizes_nonprintable(self): - """Non-printable bytes in legacy password collapse to '?' in - secret_printable; b64 + sha256 reflect the ORIGINAL bytes.""" - from decnet.web.ingester import _extract_bounty - import base64, hashlib - mock_repo = MagicMock() - mock_repo.upsert_credential = AsyncMock() - # ANSI escape + NUL byte in the password. - bad_pw = "\x1b[31mbad\x00trail" - log_data: dict = { - "decky": "decky-01", - "service": "ftp", - "attacker_ip": "10.0.0.5", - "fields": {"username": "user", "password": bad_pw}, - } - await _extract_bounty(mock_repo, log_data) - cred = mock_repo.upsert_credential.call_args[0][0] - # No 0x1b, no NUL — collapsed to '?'. - assert "\x1b" not in cred["secret_printable"] - assert "\x00" not in cred["secret_printable"] - # Original bytes survive in b64 + sha256. - raw = bad_pw.encode("utf-8") - assert base64.b64decode(cred["secret_b64"]) == raw - assert cred["secret_sha256"] == hashlib.sha256(raw).hexdigest() - @pytest.mark.asyncio async def test_no_fields_skips(self): from decnet.web.ingester import _extract_bounty @@ -122,21 +73,19 @@ class TestExtractBounty: mock_repo.upsert_credential.assert_not_awaited() @pytest.mark.asyncio - async def test_missing_password_skips(self): + async def test_no_secret_b64_no_credential(self): + """The native branch keys off `secret_b64`. Fields lacking it + produce no Credential row — even if username/password keys + from the pre-migration era are present, they're now ignored.""" from decnet.web.ingester import _extract_bounty mock_repo = MagicMock() mock_repo.upsert_credential = AsyncMock() - await _extract_bounty(mock_repo, {"fields": {"username": "admin"}}) + # Pre-migration shape — adapter is gone; this is a no-op path. + await _extract_bounty(mock_repo, { + "fields": {"username": "admin", "password": "stale"}, + }) mock_repo.upsert_credential.assert_not_awaited() - @pytest.mark.asyncio - async def test_missing_username_skips(self): - from decnet.web.ingester import _extract_bounty - mock_repo = MagicMock() - mock_repo.add_bounty = AsyncMock() - await _extract_bounty(mock_repo, {"fields": {"password": "pass"}}) - mock_repo.add_bounty.assert_not_awaited() - # ── log_ingestion_worker ──────────────────────────────────────────────────────