refactor(ingester): drop legacy cred adapter — DEBT-039 closed

Phase 3/3 of DEBT-039. Now that all six cred-emitting services
(SSH, Telnet, FTP, POP3, IMAP, SMTP, Redis, LDAP) emit the universal
`secret_b64`-bearing SD shape, the ingester's legacy fork has no
live emitters to handle. Deletes:

- `_ingest_credential_legacy()` — synthesized native fields from
  username+password
- The `elif _fields.get("username") and _fields.get("password")`
  branch in `_extract_bounty`
- `_printable_filter()` — only the legacy adapter called it; the
  native branch trusts the emitter (encode_secret() in Python or
  sd_escape() in C) to have already sanitized
- The legacy-adapter test cases in tests/web/test_ingester.py;
  their coverage moved to tests/services/test_cred_emitters.py
  per-service in Phase 2

The cred path is now single-shape end-to-end. A pre-migration log
row carrying only username+password silently produces no Credential
write — by design, since no current emitter writes that shape and
keeping a code path alive for theoretical legacy data risks masking
emitter regressions. Pre-v1: any historical Bounty cred rows from
before commit 2f47f67 stay untouched.

DEBT-039 marked resolved with summary of the three commits and the
silent-loss bug fix for Redis + LDAP that fell out of execution.
This commit is contained in:
2026-04-25 06:04:09 -04:00
parent abb4dd9fc0
commit e696c2beb3
3 changed files with 20 additions and 145 deletions

View File

@@ -207,18 +207,6 @@ _PRINCIPAL_MAX = 256
_SECRET_B64_MAX = 2048
def _printable_filter(s: str) -> str:
"""Replace bytes outside [0x20, 0x7f) with '?', matching auth-helper.c.
Operates on the str's UTF-8 encoded bytes so we don't accidentally
let a `\\u202e` Unicode override slip through display layers.
"""
out: list[int] = []
for b in s.encode("utf-8", errors="replace"):
out.append(b if 0x20 <= b < 0x7f else ord("?"))
return bytes(out).decode("ascii")
def _truncate_with_warn(s: Optional[str], cap: int, label: str) -> Optional[str]:
if s is None:
return None
@@ -274,51 +262,6 @@ async def _ingest_credential_native(
})
async def _ingest_credential_legacy(
repo: BaseRepository,
log_data: dict[str, Any],
fields: dict[str, Any],
) -> None:
"""Legacy-shape credential: SD-block has username + password.
Synthesizes secret_b64 (from utf8-encoded password bytes), the
sha256 hash (over those same bytes — lossless before any printable
sanitization), and a printable-filtered secret_printable. FTP /
POP3 / IMAP / SMTP go through this branch until DEBT-039 lands.
"""
user = fields.get("username")
pw = fields.get("password")
if not isinstance(pw, str):
return
raw = pw.encode("utf-8", errors="replace")
sha256_hex = hashlib.sha256(raw).hexdigest()
b64 = base64.b64encode(raw).decode("ascii")
printable = _printable_filter(pw)
# Synthesize the universal keys into a copy of fields so the JSON
# blob carries the standardized shape too — lets downstream readers
# treat every credential row identically regardless of emitter.
synthesized_fields = dict(fields)
synthesized_fields.setdefault("principal", user)
synthesized_fields.setdefault("secret_printable", printable)
synthesized_fields.setdefault("secret_b64", b64)
await repo.upsert_credential({
"attacker_ip": log_data.get("attacker_ip"),
"decky_name": log_data.get("decky"),
"service": log_data.get("service"),
"principal": _truncate_with_warn(user, _PRINCIPAL_MAX, "principal"),
"secret_sha256": sha256_hex,
"secret_b64": _truncate_with_warn(b64, _SECRET_B64_MAX, "secret_b64"),
"secret_printable": _truncate_with_warn(
printable, _SECRET_PRINTABLE_MAX, "secret_printable"
),
"outcome": fields.get("outcome"),
"fields": synthesized_fields,
})
@_traced("ingester.extract_bounty")
async def _extract_bounty(repo: BaseRepository, log_data: dict[str, Any]) -> None:
"""Detect and extract valuable artifacts (bounties) from log entries."""
@@ -326,21 +269,14 @@ async def _extract_bounty(repo: BaseRepository, log_data: dict[str, Any]) -> Non
if not isinstance(_fields, dict):
return
# 1. Credentials — fork on emitter shape.
#
# New shape (SSH/Telnet auth-helper, future emitters): SD-block
# carries `secret_b64` directly. Universal across services.
#
# Legacy shape (FTP/POP3/IMAP/SMTP today): SD-block has `username`
# + `password`. Adapter synthesizes `secret_b64` + `secret_sha256`
# on the fly so those services land in the same Credential table
# without requiring a per-template emitter rewrite. Tracked as
# DEBT-039 — eventually those services emit the new shape natively
# and this branch dies.
# 1. Credentials — every cred-emitting service writes the universal
# SD shape (`secret_b64` present). The legacy `username`+`password`
# adapter that bridged FTP/POP3/IMAP/SMTP through DEBT-039 was
# removed once those services migrated; emitters now feed the
# native branch directly. Redis (no principal) and LDAP (principal=
# dn) also land here — they were previously dropped silently.
if "secret_b64" in _fields:
await _ingest_credential_native(repo, log_data, _fields)
elif _fields.get("username") and _fields.get("password"):
await _ingest_credential_legacy(repo, log_data, _fields)
# 2. HTTP User-Agent fingerprint
_h_raw = _fields.get("headers")