feat(mail): operator-tunable IMAP/POP3 email seed (DEBT-026)

IMAP_EMAIL_SEED / POP3_EMAIL_SEED accept a directory (rglob *.eml +
*.json) or a single .json/.eml. Loaded entries CONCATENATE with the
hardcoded _BAIT_EMAILS — additive to the realism-engine emailgen
output rather than replacing it. JSON dicts require from_addr /
to_addr / subject / body; bare bodies are wrapped into RFC 5322 on
load. compose_fragment reads service_cfg["email_seed"] and bind-mounts
the host path read-only at /var/spool/decnet-emails/seed.
This commit is contained in:
2026-05-03 02:47:06 -04:00
parent e0b07651fd
commit b88d67794d
8 changed files with 444 additions and 133 deletions

View File

@@ -4,11 +4,18 @@ from decnet.services.base import BaseService
TEMPLATES_DIR = Path(__file__).parent.parent / "templates" / "imap" TEMPLATES_DIR = Path(__file__).parent.parent / "templates" / "imap"
_SEED_CONTAINER_PATH = "/var/spool/decnet-emails/seed"
class IMAPService(BaseService): class IMAPService(BaseService):
name = "imap" name = "imap"
ports = [143, 993] ports = [143, 993]
default_image = "build" default_image = "build"
# config_schema: no user-tunable fields yet — TODO add when compose_fragment grows cfg reads # Optional config:
# email_seed: host path to a directory of .eml/.json files OR a
# single .json/.eml. Mounted read-only into the
# container; entries concatenate with the hardcoded
# bait list (additive to realism-engine output).
def compose_fragment(self, decky_name: str, log_target: str | None = None, service_cfg: dict | None = None) -> dict: def compose_fragment(self, decky_name: str, log_target: str | None = None, service_cfg: dict | None = None) -> dict:
fragment: dict = { fragment: dict = {
@@ -19,6 +26,14 @@ class IMAPService(BaseService):
} }
if log_target: if log_target:
fragment["environment"]["LOG_TARGET"] = log_target fragment["environment"]["LOG_TARGET"] = log_target
if service_cfg:
seed = service_cfg.get("email_seed")
if seed:
host_path = str(Path(str(seed)).expanduser().resolve())
fragment["environment"]["IMAP_EMAIL_SEED"] = _SEED_CONTAINER_PATH
fragment.setdefault("volumes", []).append(
f"{host_path}:{_SEED_CONTAINER_PATH}:ro"
)
return fragment return fragment
def dockerfile_context(self) -> Path | None: def dockerfile_context(self) -> Path | None:

View File

@@ -4,11 +4,17 @@ from decnet.services.base import BaseService
TEMPLATES_DIR = Path(__file__).parent.parent / "templates" / "pop3" TEMPLATES_DIR = Path(__file__).parent.parent / "templates" / "pop3"
_SEED_CONTAINER_PATH = "/var/spool/decnet-emails/seed"
class POP3Service(BaseService): class POP3Service(BaseService):
name = "pop3" name = "pop3"
ports = [110, 995] ports = [110, 995]
default_image = "build" default_image = "build"
# config_schema: no user-tunable fields yet — TODO add when compose_fragment grows cfg reads # Optional config:
# email_seed: host path to a directory of .eml/.json files OR a
# single .json/.eml. Mounted read-only; entries
# concatenate with the hardcoded bait list.
def compose_fragment(self, decky_name: str, log_target: str | None = None, service_cfg: dict | None = None) -> dict: def compose_fragment(self, decky_name: str, log_target: str | None = None, service_cfg: dict | None = None) -> dict:
fragment: dict = { fragment: dict = {
@@ -19,6 +25,14 @@ class POP3Service(BaseService):
} }
if log_target: if log_target:
fragment["environment"]["LOG_TARGET"] = log_target fragment["environment"]["LOG_TARGET"] = log_target
if service_cfg:
seed = service_cfg.get("email_seed")
if seed:
host_path = str(Path(str(seed)).expanduser().resolve())
fragment["environment"]["POP3_EMAIL_SEED"] = _SEED_CONTAINER_PATH
fragment.setdefault("volumes", []).append(
f"{host_path}:{_SEED_CONTAINER_PATH}:ro"
)
return fragment return fragment
def dockerfile_context(self) -> Path | None: def dockerfile_context(self) -> Path | None:

View File

@@ -13,7 +13,9 @@ Banner advertises Dovecot so nmap fingerprints correctly.
import asyncio import asyncio
import email import email
import email.policy import email.policy
import json
import os import os
import sys
import time import time
from email.utils import getaddresses from email.utils import getaddresses
from pathlib import Path from pathlib import Path
@@ -37,14 +39,14 @@ VALID_USERS: dict[str, str] = {
u: p for part in _RAW_USERS.split(",") if ":" in part for u, p in [part.split(":", 1)] u: p for part in _RAW_USERS.split(",") if ":" in part for u, p in [part.split(":", 1)]
} }
# Path to a directory of ``*.eml`` files that the orchestrator emailgen # Operator/realism-engine email seed source. Two shapes accepted:
# worker drops into the container (``/var/spool/decnet-emails/`` by # 1. Directory: walked recursively for ``*.eml`` (RFC 822 on disk —
# convention). When set AND the directory contains parseable EMLs, # what the realism-engine emailgen worker drops in) and ``*.json``
# they replace the hardcoded ``_BAIT_EMAILS`` fallback below — meaning # (operator-curated lists of dicts, see _load_seed_json).
# every mail an attacker reads is the LLM-generated, persona-driven, # 2. Single ``*.json`` file: a list of dicts with the same shape.
# language-aware version, not the static credential-stuffed bait list. # Loaded entries are CONCATENATED with ``_BAIT_EMAILS`` — never replace.
# Empty / missing / unparseable: the fallback list still serves so a # The hardcoded list keeps a fresh deployment non-silent and serves as
# fresh deployment is never silent. # the deterministic baseline the persona output stacks on top of.
_EMAIL_SEED_PATH = os.environ.get("IMAP_EMAIL_SEED", "") _EMAIL_SEED_PATH = os.environ.get("IMAP_EMAIL_SEED", "")
# Re-scan the seed directory at most this often. Cheap: walking a few # Re-scan the seed directory at most this often. Cheap: walking a few
# dozen .eml files is sub-millisecond, but caching keeps an attacker's # dozen .eml files is sub-millisecond, but caching keeps an attacker's
@@ -256,20 +258,18 @@ _MAILBOXES = ["INBOX", "Sent", "Drafts", "Archive"]
# ── Spool-backed email loader ───────────────────────────────────────────────── # ── Spool-backed email loader ─────────────────────────────────────────────────
# When IMAP_EMAIL_SEED points at a directory of .eml files the # When IMAP_EMAIL_SEED points at a directory (or a single .json file) the
# orchestrator emailgen worker has dropped into the container, parse # realism-engine emailgen worker / operator has populated, parse it on
# them on demand and serve them as the INBOX. Cached between requests # demand and CONCATENATE the result with the hardcoded ``_BAIT_EMAILS``.
# with a short TTL + mtime check so a hot mailbox doesn't pay the parse # Cached with a short TTL + mtime check so a hot mailbox doesn't pay the
# cost on every IMAP command. # parse cost on every IMAP command.
#
# Failure modes (missing dir, unparseable EMLs, empty dir) all return
# the hardcoded fallback rather than 0 messages — a silent INBOX is a
# stronger tell than a slightly-stale one.
_seed_cache: list[dict] | None = None _seed_cache: list[dict] | None = None
_seed_cache_dir_mtime: float = 0.0 _seed_cache_path_mtime: float = 0.0
_seed_cache_loaded_at: float = 0.0 _seed_cache_loaded_at: float = 0.0
_SEED_JSON_REQUIRED = ("from_addr", "to_addr", "subject", "body")
def _split_addr(value: str) -> tuple[str, str]: def _split_addr(value: str) -> tuple[str, str]:
"""Return (display_name, email) from a header value, falling back to """Return (display_name, email) from a header value, falling back to
@@ -284,11 +284,12 @@ def _split_addr(value: str) -> tuple[str, str]:
return (name or "").strip(), (addr or value).strip() return (name or "").strip(), (addr or value).strip()
def _eml_to_dict(path: Path, uid: int) -> dict | None: def _eml_to_dict(path: Path) -> dict | None:
"""Parse one .eml into the dict shape the rest of this server uses. """Parse one .eml into the dict shape the rest of this server uses.
Returns None when the file isn't parseable; callers skip + continue Returns None when the file isn't parseable; callers skip + continue
so one corrupt EML does not kill the whole INBOX listing. so one corrupt EML does not kill the whole INBOX listing. ``uid``
is assigned by the caller after concatenation.
""" """
try: try:
raw = path.read_bytes() raw = path.read_bytes()
@@ -300,71 +301,155 @@ def _eml_to_dict(path: Path, uid: int) -> dict | None:
subject = (msg.get("Subject") or "").strip() subject = (msg.get("Subject") or "").strip()
date = msg.get("Date") or "" date = msg.get("Date") or ""
return { return {
"uid": uid, "uid": 0,
"flags": [], # never \Seen for spool emails — fresh delivery "flags": [], # never \Seen for spool emails — fresh delivery
"from_name": from_name or from_addr.split("@", 1)[0] if from_addr else "Unknown", "from_name": from_name or (from_addr.split("@", 1)[0] if from_addr else "Unknown"),
"from_addr": from_addr or "unknown@localhost", "from_addr": from_addr or "unknown@localhost",
"to_addr": to_addr or "unknown@localhost", "to_addr": to_addr or "unknown@localhost",
"subject": subject or "(no subject)", "subject": subject or "(no subject)",
"date": date, "date": date,
# The body field carries the full RFC 822 message — headers + body.
# That mirrors how the hardcoded _BAIT_EMAILS entries are shaped.
"body": raw.decode("utf-8", errors="replace"), "body": raw.decode("utf-8", errors="replace"),
} }
def _scan_seed_dir(path: Path) -> list[dict]: def _seed_dict_to_entry(entry: dict) -> dict | None:
"""Walk *path* recursively, parse every ``*.eml``, sort by mtime.""" """Validate and normalize a JSON-supplied dict into the bait shape.
eml_paths: list[Path] = []
Required keys: from_addr, to_addr, subject, body. Optional: date,
from_name, flags. Bad rows return None (caller skips + logs).
"""
if not isinstance(entry, dict):
return None
for key in _SEED_JSON_REQUIRED:
if not isinstance(entry.get(key), str) or not entry[key]:
return None
from_addr = entry["from_addr"]
from_name = str(entry.get("from_name") or from_addr.split("@", 1)[0])
date = str(entry.get("date") or "")
flags = entry.get("flags") or []
if not isinstance(flags, list):
flags = []
body = entry["body"]
# If body is a bare string (no headers), wrap it into RFC 822 so
# IMAP BODY[]/RFC822 fetches return a complete message — matches
# the hardcoded _BAIT_EMAILS shape.
if "\r\n\r\n" not in body and "\n\n" not in body:
headers = (
f"Date: {date}\r\n"
f"From: {from_name} <{from_addr}>\r\n"
f"To: {entry['to_addr']}\r\n"
f"Subject: {entry['subject']}\r\n"
"\r\n"
)
body = headers + body
return {
"uid": 0,
"flags": list(flags),
"from_name": from_name,
"from_addr": from_addr,
"to_addr": entry["to_addr"],
"subject": entry["subject"],
"date": date,
"body": body,
}
def _load_seed_json(path: Path) -> list[dict]:
"""Load a JSON list of dicts into entries. Bad rows logged + skipped."""
try: try:
for p in path.rglob("*.eml"): raw = path.read_text(encoding="utf-8")
if p.is_file(): data = json.loads(raw)
eml_paths.append(p) except (OSError, ValueError) as exc:
print(f"imap: seed json {path} unreadable: {exc}", file=sys.stderr)
return []
if not isinstance(data, list):
print(f"imap: seed json {path} must be a list", file=sys.stderr)
return []
out: list[dict] = []
for i, entry in enumerate(data):
normalized = _seed_dict_to_entry(entry)
if normalized is None:
print(f"imap: seed json {path}[{i}] missing required keys", file=sys.stderr)
continue
out.append(normalized)
return out
def _scan_seed(path: Path) -> list[dict]:
"""Resolve *path* into seed entries.
- Directory: rglob ``*.eml`` (mtime-sorted) + every ``*.json`` (each
a list of dicts).
- File ending in ``.json``: that JSON list.
- File ending in ``.eml``: that single EML.
"""
out: list[dict] = []
try:
if path.is_dir():
eml_paths = sorted(
(p for p in path.rglob("*.eml") if p.is_file()),
key=lambda p: p.stat().st_mtime,
)
for p in eml_paths:
d = _eml_to_dict(p)
if d is not None:
out.append(d)
for jp in sorted(p for p in path.rglob("*.json") if p.is_file()):
out.extend(_load_seed_json(jp))
elif path.suffix.lower() == ".json" and path.is_file():
out.extend(_load_seed_json(path))
elif path.suffix.lower() == ".eml" and path.is_file():
d = _eml_to_dict(path)
if d is not None:
out.append(d)
except OSError: except OSError:
return [] return []
eml_paths.sort(key=lambda p: p.stat().st_mtime)
out: list[dict] = []
for i, p in enumerate(eml_paths, start=1):
d = _eml_to_dict(p, uid=i)
if d is not None:
out.append(d)
return out return out
def _get_emails() -> list[dict]: def _get_emails() -> list[dict]:
"""Return the active mailbox list. """Return the active mailbox list: ``_BAIT_EMAILS`` concatenated
with seed entries (directory of .eml/.json or a single .json/.eml).
Resolution order: UIDs are renumbered sequentially across the combined list so the
1. ``IMAP_EMAIL_SEED`` set + dir exists + at least one parseable EML hardcoded baits keep their original UIDs (1..10) and seeded entries
→ that list (rescan-throttled). pick up from len(_BAIT_EMAILS)+1.
2. Else → the hardcoded ``_BAIT_EMAILS`` fallback.
""" """
global _seed_cache, _seed_cache_dir_mtime, _seed_cache_loaded_at global _seed_cache, _seed_cache_path_mtime, _seed_cache_loaded_at
if not _EMAIL_SEED_PATH: if not _EMAIL_SEED_PATH:
return _BAIT_EMAILS return _BAIT_EMAILS
seed_dir = Path(_EMAIL_SEED_PATH)
seed_path = Path(_EMAIL_SEED_PATH)
try: try:
dir_stat = seed_dir.stat() path_stat = seed_path.stat()
except OSError: except OSError:
return _BAIT_EMAILS return _BAIT_EMAILS
now = time.monotonic() now = time.monotonic()
fresh_enough = ( fresh_enough = (
_seed_cache is not None _seed_cache is not None
and (now - _seed_cache_loaded_at) < _SEED_RESCAN_INTERVAL and (now - _seed_cache_loaded_at) < _SEED_RESCAN_INTERVAL
and dir_stat.st_mtime == _seed_cache_dir_mtime and path_stat.st_mtime == _seed_cache_path_mtime
) )
if fresh_enough: if fresh_enough:
return _seed_cache or _BAIT_EMAILS seed = _seed_cache or []
scanned = _scan_seed_dir(seed_dir) else:
if not scanned: seed = _scan_seed(seed_path)
# Don't poison the cache with an empty list; a single early _seed_cache = seed
# FETCH before emailgen has run would otherwise stick the _seed_cache_path_mtime = path_stat.st_mtime
# mailbox at 0 for _SEED_RESCAN_INTERVAL seconds. _seed_cache_loaded_at = now
if not seed:
return _BAIT_EMAILS return _BAIT_EMAILS
_seed_cache = scanned
_seed_cache_dir_mtime = dir_stat.st_mtime combined: list[dict] = list(_BAIT_EMAILS)
_seed_cache_loaded_at = now base_uid = len(_BAIT_EMAILS)
return scanned for i, entry in enumerate(seed, start=1):
renumbered = dict(entry)
renumbered["uid"] = base_uid + i
combined.append(renumbered)
return combined
# ── Logging ─────────────────────────────────────────────────────────────────── # ── Logging ───────────────────────────────────────────────────────────────────

View File

@@ -10,7 +10,9 @@ Credentials via IMAP_USERS env var (shared with IMAP service).
""" """
import asyncio import asyncio
import json
import os import os
import sys
import time import time
from pathlib import Path from pathlib import Path
from typing import cast from typing import cast
@@ -33,11 +35,13 @@ VALID_USERS: dict[str, str] = {
u: p for part in _RAW_USERS.split(",") if ":" in part for u, p in [part.split(":", 1)] u: p for part in _RAW_USERS.split(",") if ":" in part for u, p in [part.split(":", 1)]
} }
# Path to a directory of ``*.eml`` files dropped by the orchestrator # Operator/realism-engine email seed source. Two shapes accepted:
# emailgen worker (``/var/spool/decnet-emails/`` by convention). When # 1. Directory: walked recursively for ``*.eml`` (RFC 822 on disk —
# set and populated, those EMLs replace the hardcoded fallback list # what the realism-engine emailgen worker drops in) and ``*.json``
# below — same semantics as the IMAP template. Empty / missing falls # (operator-curated lists of dicts; each dict formatted into RFC
# back so a fresh deployment is never silent. # 5322 on load).
# 2. Single ``*.json`` or ``*.eml`` file.
# Loaded entries are CONCATENATED with ``_BAIT_EMAILS`` — never replace.
_EMAIL_SEED_PATH = os.environ.get("POP3_EMAIL_SEED", "") _EMAIL_SEED_PATH = os.environ.get("POP3_EMAIL_SEED", "")
_SEED_RESCAN_INTERVAL = float(os.environ.get("POP3_EMAIL_SEED_RESCAN", "5")) _SEED_RESCAN_INTERVAL = float(os.environ.get("POP3_EMAIL_SEED_RESCAN", "5"))
@@ -172,60 +176,128 @@ _BAIT_EMAILS: list[str] = [
# ── Spool-backed email loader ───────────────────────────────────────────────── # ── Spool-backed email loader ─────────────────────────────────────────────────
# POP3 stores each message as a single str (full RFC 822 text); when the # POP3 stores each message as a single str (full RFC 822 text). Seeded
# emailgen spool is configured, we read every *.eml in it and serve the # entries CONCATENATE onto ``_BAIT_EMAILS`` (never replace). Both .eml
# raw bytes as the corpus. Same caching strategy as the IMAP template. # and .json sources are accepted — JSON dicts are formatted into RFC
# 5322 on load. Caching strategy matches the IMAP template.
_seed_cache: list[str] | None = None _seed_cache: list[str] | None = None
_seed_cache_dir_mtime: float = 0.0 _seed_cache_path_mtime: float = 0.0
_seed_cache_loaded_at: float = 0.0 _seed_cache_loaded_at: float = 0.0
_SEED_JSON_REQUIRED = ("from_addr", "to_addr", "subject", "body")
def _scan_seed_dir(path: Path) -> list[str]:
"""Walk *path* recursively and return each .eml's raw text content, def _seed_dict_to_rfc822(entry: dict) -> str | None:
sorted by mtime so older threads get lower indices.""" """Format a JSON-supplied dict into a full RFC 5322 message string.
eml_paths: list[Path] = []
Required keys: from_addr, to_addr, subject, body. Optional: date,
from_name. Returns None for malformed entries (caller skips + logs).
"""
if not isinstance(entry, dict):
return None
for key in _SEED_JSON_REQUIRED:
if not isinstance(entry.get(key), str) or not entry[key]:
return None
from_addr = entry["from_addr"]
from_name = str(entry.get("from_name") or from_addr.split("@", 1)[0])
date = str(entry.get("date") or "")
body = entry["body"]
if "\r\n\r\n" in body or "\n\n" in body:
return body # already a full RFC 822 message
return (
f"Date: {date}\r\n"
f"From: {from_name} <{from_addr}>\r\n"
f"To: {entry['to_addr']}\r\n"
f"Subject: {entry['subject']}\r\n"
"\r\n"
f"{body}"
)
def _load_seed_json(path: Path) -> list[str]:
"""Load a JSON list of dicts → list of RFC 822 strings."""
try: try:
for p in path.rglob("*.eml"): raw = path.read_text(encoding="utf-8")
if p.is_file(): data = json.loads(raw)
eml_paths.append(p) except (OSError, ValueError) as exc:
print(f"pop3: seed json {path} unreadable: {exc}", file=sys.stderr)
return []
if not isinstance(data, list):
print(f"pop3: seed json {path} must be a list", file=sys.stderr)
return []
out: list[str] = []
for i, entry in enumerate(data):
formatted = _seed_dict_to_rfc822(entry)
if formatted is None:
print(f"pop3: seed json {path}[{i}] missing required keys", file=sys.stderr)
continue
out.append(formatted)
return out
def _scan_seed(path: Path) -> list[str]:
"""Resolve *path* into RFC 822 strings (.eml direct, .json formatted)."""
out: list[str] = []
try:
if path.is_dir():
eml_paths = sorted(
(p for p in path.rglob("*.eml") if p.is_file()),
key=lambda p: p.stat().st_mtime,
)
for p in eml_paths:
try:
out.append(p.read_text(encoding="utf-8", errors="replace"))
except OSError:
continue
for jp in sorted(p for p in path.rglob("*.json") if p.is_file()):
out.extend(_load_seed_json(jp))
elif path.suffix.lower() == ".json" and path.is_file():
out.extend(_load_seed_json(path))
elif path.suffix.lower() == ".eml" and path.is_file():
try:
out.append(path.read_text(encoding="utf-8", errors="replace"))
except OSError:
pass
except OSError: except OSError:
return [] return []
eml_paths.sort(key=lambda p: p.stat().st_mtime)
out: list[str] = []
for p in eml_paths:
try:
out.append(p.read_text(encoding="utf-8", errors="replace"))
except OSError:
continue
return out return out
def _get_emails() -> list[str]: def _get_emails() -> list[str]:
"""Return the active corpus. Same fallback rules as IMAP template.""" """Return ``_BAIT_EMAILS`` concatenated with seed entries.
global _seed_cache, _seed_cache_dir_mtime, _seed_cache_loaded_at
Empty / missing seed → just ``_BAIT_EMAILS``. Hardcoded baits keep
indices 1..10; seeded messages start at 11.
"""
global _seed_cache, _seed_cache_path_mtime, _seed_cache_loaded_at
if not _EMAIL_SEED_PATH: if not _EMAIL_SEED_PATH:
return _BAIT_EMAILS return _BAIT_EMAILS
seed_dir = Path(_EMAIL_SEED_PATH)
seed_path = Path(_EMAIL_SEED_PATH)
try: try:
dir_stat = seed_dir.stat() path_stat = seed_path.stat()
except OSError: except OSError:
return _BAIT_EMAILS return _BAIT_EMAILS
now = time.monotonic() now = time.monotonic()
fresh_enough = ( fresh_enough = (
_seed_cache is not None _seed_cache is not None
and (now - _seed_cache_loaded_at) < _SEED_RESCAN_INTERVAL and (now - _seed_cache_loaded_at) < _SEED_RESCAN_INTERVAL
and dir_stat.st_mtime == _seed_cache_dir_mtime and path_stat.st_mtime == _seed_cache_path_mtime
) )
if fresh_enough: if fresh_enough:
return _seed_cache or _BAIT_EMAILS seed = _seed_cache or []
scanned = _scan_seed_dir(seed_dir) else:
if not scanned: seed = _scan_seed(seed_path)
_seed_cache = seed
_seed_cache_path_mtime = path_stat.st_mtime
_seed_cache_loaded_at = now
if not seed:
return _BAIT_EMAILS return _BAIT_EMAILS
_seed_cache = scanned return list(_BAIT_EMAILS) + seed
_seed_cache_dir_mtime = dir_stat.st_mtime
_seed_cache_loaded_at = now
return scanned
# ── Logging ─────────────────────────────────────────────────────────────────── # ── Logging ───────────────────────────────────────────────────────────────────

View File

@@ -110,14 +110,9 @@ All route decorators now declare `responses={401: {"description": "Not authentic
~~**File:** `decnet/web/sqlite_repository.py` (~400 lines)~~ ~~**File:** `decnet/web/sqlite_repository.py` (~400 lines)~~
Fully refactored to `decnet/web/db/` modular layout: `models.py` (SQLModel schema), `repository.py` (abstract base), `sqlite/repository.py` (SQLite implementation), `sqlite/database.py` (engine/session factory). Commit `de84cc6`. Fully refactored to `decnet/web/db/` modular layout: `models.py` (SQLModel schema), `repository.py` (abstract base), `sqlite/repository.py` (SQLite implementation), `sqlite/database.py` (engine/session factory). Commit `de84cc6`.
### DEBT-026 — IMAP/POP3 bait emails not configurable via service config ### ~~DEBT-026 — IMAP/POP3 bait emails not configurable via service config~~ ✅ RESOLVED
**Files:** `templates/imap/server.py`, `templates/pop3/server.py`, `decnet/services/imap.py`, `decnet/services/pop3.py` **Files:** `templates/imap/server.py`, `templates/pop3/server.py`, `decnet/services/imap.py`, `decnet/services/pop3.py`
Bait emails are hardcoded. A stub env var `IMAP_EMAIL_SEED` is read but currently ignored. Full implementation requires: Resolved 2026-05-03. `IMAP_EMAIL_SEED` / `POP3_EMAIL_SEED` now accept either a directory (rglob `*.eml` and `*.json`) or a single `.json` / `.eml` file. JSON entries are dicts with required keys `from_addr`, `to_addr`, `subject`, `body` (optional `from_name`, `date`, `flags`); bare-body entries are wrapped into RFC 5322 on load. Loaded entries CONCATENATE with `_BAIT_EMAILS` (additive to the realism-engine emailgen output — the hardcoded baits are no longer replaced). `compose_fragment()` reads `service_cfg["email_seed"]` and bind-mounts the host path read-only at `/var/spool/decnet-emails/seed`.
1. `IMAP_EMAIL_SEED` points to a JSON file with a list of `{from_, to, subject, date, body}` dicts.
2. `templates/imap/server.py` loads and merges/replaces `_BAIT_EMAILS` from that file at startup.
3. `decnet/services/imap.py` `compose_fragment()` reads `service_cfg["email_seed"]` and injects `IMAP_EMAIL_SEED` + a bind-mount for the seed file into the compose fragment.
4. Same pattern for POP3 (`POP3_EMAIL_SEED`).
**Status:** Stub in place — full wiring deferred to next session.
--- ---
@@ -713,7 +708,7 @@ user who needs it.
| DEBT-023 | 🟢 Low | Infra | deferred (needs docker pull) | | DEBT-023 | 🟢 Low | Infra | deferred (needs docker pull) |
| ~~DEBT-024~~ | ✅ | Infra | resolved | | ~~DEBT-024~~ | ✅ | Infra | resolved |
| ~~DEBT-025~~ | ✅ | Build | resolved | | ~~DEBT-025~~ | ✅ | Build | resolved |
| DEBT-026 | 🟡 Medium | Features | deferred (out of scope) | | ~~DEBT-026~~ | ✅ | Features | resolved 2026-05-03 |
| DEBT-027 | 🟡 Medium | Features | deferred (out of scope) | | DEBT-027 | 🟡 Medium | Features | deferred (out of scope) |
| DEBT-028 | 🟡 Medium | Testing | deferred (needs DinD CI) | | DEBT-028 | 🟡 Medium | Testing | deferred (needs DinD CI) |
| DEBT-029 | 🟡 Medium | Architecture / Bus | ✅ resolved | | DEBT-029 | 🟡 Medium | Architecture / Bus | ✅ resolved |
@@ -737,5 +732,5 @@ user who needs it.
| DEBT-048 | 🟡 Medium | TTP / Intel provider mapping review (recurring) | open / recurring | | DEBT-048 | 🟡 Medium | TTP / Intel provider mapping review (recurring) | open / recurring |
| DEBT-049 | 🟡 Medium | TTP / Sigma adapter (post-v1) | open | | DEBT-049 | 🟡 Medium | TTP / Sigma adapter (post-v1) | open |
**Remaining open:** DEBT-011 (Alembic), DEBT-023 (image pinning), DEBT-026 (modular mailboxes), DEBT-027 (Dynamic bait store), DEBT-028 (deploy endpoint tests), DEBT-032 (fingerprint rotation detection), DEBT-033 (transcript shard rotation), DEBT-036 (session-profile ingester), DEBT-037 (webhook delivery hardening), DEBT-038 (SSH PAM cred-capture limitations — document-only), DEBT-042 (orchestrator failure-count window), DEBT-043 (frontend test framework), DEBT-045 (EmailLifter heavyweight — partial paid; carved-out follow-ups remain), DEBT-046 (mal-hash feed), DEBT-048 (TTP intel provider mapping review — recurring quarterly), DEBT-049 (TTP Sigma adapter — post-v1). **Remaining open:** DEBT-011 (Alembic), DEBT-023 (image pinning), DEBT-027 (Dynamic bait store), DEBT-028 (deploy endpoint tests), DEBT-032 (fingerprint rotation detection), DEBT-033 (transcript shard rotation), DEBT-036 (session-profile ingester), DEBT-037 (webhook delivery hardening), DEBT-038 (SSH PAM cred-capture limitations — document-only), DEBT-042 (orchestrator failure-count window), DEBT-043 (frontend test framework), DEBT-045 (EmailLifter heavyweight — partial paid; carved-out follow-ups remain), DEBT-046 (mal-hash feed), DEBT-048 (TTP intel provider mapping review — recurring quarterly), DEBT-049 (TTP Sigma adapter — post-v1).
**Estimated remaining effort:** ~21 hours plus the new EmailLifter / TTP follow-ups. DEBT-030 Phase B (optimistic staged-buffer editor) is a follow-up, not debt. **Estimated remaining effort:** ~21 hours plus the new EmailLifter / TTP follow-ups. DEBT-030 Phase B (optimistic staged-buffer editor) is a follow-up, not debt.

View File

@@ -1,12 +1,15 @@
"""Spool-backed email loading for the IMAP template. """Seed-backed email loading for the IMAP template.
Verifies that when ``IMAP_EMAIL_SEED`` points at a directory of .eml Verifies that when ``IMAP_EMAIL_SEED`` points at a directory of .eml /
files, the IMAP server serves those (replacing the hardcoded .json (or a single .json / .eml), the IMAP server CONCATENATES those
``_BAIT_EMAILS`` fallback). Empty / missing dir falls back gracefully. entries onto the hardcoded ``_BAIT_EMAILS`` baseline. Empty / missing
input falls back to the baseline alone — the realism-engine output and
operator-curated seeds are additive, never replacing.
""" """
from __future__ import annotations from __future__ import annotations
import importlib.util import importlib.util
import json
import sys import sys
from pathlib import Path from pathlib import Path
from types import ModuleType from types import ModuleType
@@ -14,6 +17,8 @@ from unittest.mock import MagicMock, patch
import pytest import pytest
_HARDCODED = 10 # length of templates/imap/server.py::_BAIT_EMAILS
_EML_TEMPLATE = ( _EML_TEMPLATE = (
"From: {from_name} <{from_addr}>\r\n" "From: {from_name} <{from_addr}>\r\n"
@@ -78,41 +83,47 @@ def _seed(tmp_path: Path, n: int = 3) -> Path:
def test_falls_back_to_hardcoded_when_seed_unset(tmp_path): def test_falls_back_to_hardcoded_when_seed_unset(tmp_path):
mod = _load_imap({}) mod = _load_imap({})
emails = mod._get_emails() emails = mod._get_emails()
# The shipped fallback ships exactly 10 entries. # The shipped baseline is exactly 10 entries.
assert len(emails) == 10 assert len(emails) == _HARDCODED
assert emails[0]["from_addr"] == "devops@company.internal" assert emails[0]["from_addr"] == "devops@company.internal"
def test_falls_back_when_seed_dir_missing(tmp_path): def test_falls_back_when_seed_dir_missing(tmp_path):
mod = _load_imap({"IMAP_EMAIL_SEED": str(tmp_path / "does-not-exist")}) mod = _load_imap({"IMAP_EMAIL_SEED": str(tmp_path / "does-not-exist")})
emails = mod._get_emails() emails = mod._get_emails()
assert len(emails) == 10 # fallback assert len(emails) == _HARDCODED # baseline only
def test_falls_back_when_seed_dir_empty(tmp_path): def test_falls_back_when_seed_dir_empty(tmp_path):
(tmp_path / "spool").mkdir() (tmp_path / "spool").mkdir()
mod = _load_imap({"IMAP_EMAIL_SEED": str(tmp_path / "spool")}) mod = _load_imap({"IMAP_EMAIL_SEED": str(tmp_path / "spool")})
assert len(mod._get_emails()) == 10 # fallback (no .eml files) assert len(mod._get_emails()) == _HARDCODED # baseline only
def test_loads_eml_files_from_spool(tmp_path): def test_seed_concatenates_with_hardcoded(tmp_path):
spool = _seed(tmp_path, n=3) spool = _seed(tmp_path, n=3)
mod = _load_imap({"IMAP_EMAIL_SEED": str(spool)}) mod = _load_imap({"IMAP_EMAIL_SEED": str(spool)})
emails = mod._get_emails() emails = mod._get_emails()
assert len(emails) == 3 # Hardcoded 10 + 3 spooled = 13.
senders = {e["from_addr"] for e in emails} assert len(emails) == _HARDCODED + 3
# Hardcoded baseline keeps original UIDs 1..10.
assert emails[0]["uid"] == 1
assert emails[0]["from_addr"] == "devops@company.internal"
assert emails[9]["uid"] == 10
# Seeded entries pick up at UID 11.
assert {e["uid"] for e in emails[10:]} == {11, 12, 13}
senders = {e["from_addr"] for e in emails[10:]}
assert senders == {"sender0@corp.com", "sender1@corp.com", "sender2@corp.com"} assert senders == {"sender0@corp.com", "sender1@corp.com", "sender2@corp.com"}
# UIDs are 1-based and unique.
assert {e["uid"] for e in emails} == {1, 2, 3}
def test_loaded_eml_carries_full_rfc822_body(tmp_path): def test_loaded_eml_carries_full_rfc822_body(tmp_path):
spool = _seed(tmp_path, n=1) spool = _seed(tmp_path, n=1)
mod = _load_imap({"IMAP_EMAIL_SEED": str(spool)}) mod = _load_imap({"IMAP_EMAIL_SEED": str(spool)})
emails = mod._get_emails() emails = mod._get_emails()
assert "From:" in emails[0]["body"] seeded = emails[_HARDCODED]
assert "Subject: Topic 0" in emails[0]["body"] assert "From:" in seeded["body"]
assert "Body of message 0." in emails[0]["body"] assert "Subject: Topic 0" in seeded["body"]
assert "Body of message 0." in seeded["body"]
def test_corrupt_eml_skipped_not_fatal(tmp_path): def test_corrupt_eml_skipped_not_fatal(tmp_path):
@@ -127,11 +138,56 @@ def test_corrupt_eml_skipped_not_fatal(tmp_path):
(spool / "broken.eml").mkdir() (spool / "broken.eml").mkdir()
mod = _load_imap({"IMAP_EMAIL_SEED": str(spool)}) mod = _load_imap({"IMAP_EMAIL_SEED": str(spool)})
emails = mod._get_emails() emails = mod._get_emails()
assert len(emails) == 1 assert len(emails) == _HARDCODED + 1
assert emails[0]["from_addr"] == "good@corp.com" assert emails[-1]["from_addr"] == "good@corp.com"
def test_select_inbox_reflects_spool_count(tmp_path): def test_json_seed_file_loaded(tmp_path):
seed = tmp_path / "seed.json"
seed.write_text(json.dumps([
{
"from_addr": "ceo@corp.com",
"from_name": "CEO",
"to_addr": "admin@corp.com",
"subject": "Q4 numbers",
"date": "Mon, 27 Apr 2026 09:00:00 +0000",
"body": "Please review attached.",
},
{
# Missing 'subject' — must be skipped, not crash.
"from_addr": "ghost@corp.com",
"to_addr": "admin@corp.com",
"body": "no subject",
},
]))
mod = _load_imap({"IMAP_EMAIL_SEED": str(seed)})
emails = mod._get_emails()
assert len(emails) == _HARDCODED + 1 # one valid, one dropped
seeded = emails[-1]
assert seeded["uid"] == _HARDCODED + 1
assert seeded["from_addr"] == "ceo@corp.com"
# JSON entry without RFC 822 headers gets wrapped into a full message.
assert "From: CEO <ceo@corp.com>" in seeded["body"]
assert "Subject: Q4 numbers" in seeded["body"]
def test_dir_with_eml_and_json_concatenated(tmp_path):
spool = _seed(tmp_path, n=2)
(spool / "extra.json").write_text(json.dumps([
{
"from_addr": "ops@corp.com",
"to_addr": "admin@corp.com",
"subject": "extra",
"body": "hi",
},
]))
mod = _load_imap({"IMAP_EMAIL_SEED": str(spool)})
emails = mod._get_emails()
# Hardcoded + 2 .eml + 1 .json
assert len(emails) == _HARDCODED + 3
def test_select_inbox_reflects_concatenated_count(tmp_path):
spool = _seed(tmp_path, n=4) spool = _seed(tmp_path, n=4)
mod = _load_imap({"IMAP_EMAIL_SEED": str(spool)}) mod = _load_imap({"IMAP_EMAIL_SEED": str(spool)})
proto = mod.IMAPProtocol() proto = mod.IMAPProtocol()
@@ -144,5 +200,6 @@ def test_select_inbox_reflects_spool_count(tmp_path):
written.clear() written.clear()
proto.data_received(b"B0 SELECT INBOX\r\n") proto.data_received(b"B0 SELECT INBOX\r\n")
out = b"".join(written) out = b"".join(written)
assert b"* 4 EXISTS" in out expected_total = _HARDCODED + 4
assert b"[UIDNEXT 5]" in out assert f"* {expected_total} EXISTS".encode() in out
assert f"[UIDNEXT {expected_total + 1}]".encode() in out

View File

@@ -1,12 +1,19 @@
"""Spool-backed email loading for the POP3 template.""" """Seed-backed email loading for the POP3 template.
Concat semantics: hardcoded ``_BAIT_EMAILS`` + .eml/.json from the seed
path. Mirrors the IMAP test file.
"""
from __future__ import annotations from __future__ import annotations
import importlib.util import importlib.util
import json
import sys import sys
from pathlib import Path from pathlib import Path
from types import ModuleType from types import ModuleType
from unittest.mock import MagicMock, patch from unittest.mock import MagicMock, patch
_HARDCODED = 10 # length of templates/pop3/server.py::_BAIT_EMAILS
_EML_TEMPLATE = ( _EML_TEMPLATE = (
"From: Sender <sender@corp.com>\r\n" "From: Sender <sender@corp.com>\r\n"
@@ -61,25 +68,54 @@ def _seed(tmp_path: Path, n: int) -> Path:
def test_falls_back_when_seed_unset(tmp_path): def test_falls_back_when_seed_unset(tmp_path):
mod = _load_pop3({}) mod = _load_pop3({})
assert len(mod._get_emails()) == 10 # hardcoded fallback assert len(mod._get_emails()) == _HARDCODED # baseline only
def test_falls_back_when_seed_dir_missing(tmp_path): def test_falls_back_when_seed_dir_missing(tmp_path):
mod = _load_pop3({"POP3_EMAIL_SEED": str(tmp_path / "nope")}) mod = _load_pop3({"POP3_EMAIL_SEED": str(tmp_path / "nope")})
assert len(mod._get_emails()) == 10 assert len(mod._get_emails()) == _HARDCODED
def test_loads_emls_from_spool(tmp_path): def test_seed_concatenates_with_hardcoded(tmp_path):
spool = _seed(tmp_path, n=3) spool = _seed(tmp_path, n=3)
mod = _load_pop3({"POP3_EMAIL_SEED": str(spool)}) mod = _load_pop3({"POP3_EMAIL_SEED": str(spool)})
emails = mod._get_emails() emails = mod._get_emails()
assert len(emails) == 3 # Hardcoded baseline + 3 spooled .eml.
# POP3 stores raw RFC 822 strings; verify content round-trips. assert len(emails) == _HARDCODED + 3
assert any("Topic 0" in e for e in emails) # Hardcoded entries unchanged at the head.
assert all(e.startswith("From:") for e in emails) assert "AWS credentials rotation" in emails[0]
# Seeded entries at the tail.
assert any("Topic 0" in e for e in emails[_HARDCODED:])
assert all(e.startswith("From:") for e in emails[_HARDCODED:])
def test_stat_reflects_spool_size(tmp_path): def test_json_seed_file_loaded(tmp_path):
seed = tmp_path / "seed.json"
seed.write_text(json.dumps([
{
"from_addr": "ceo@corp.com",
"from_name": "CEO",
"to_addr": "admin@corp.com",
"subject": "Q4 numbers",
"date": "Mon, 27 Apr 2026 09:00:00 +0000",
"body": "Please review attached.",
},
{
# Missing 'subject' — skipped, not fatal.
"from_addr": "ghost@corp.com",
"to_addr": "admin@corp.com",
"body": "no subject",
},
]))
mod = _load_pop3({"POP3_EMAIL_SEED": str(seed)})
emails = mod._get_emails()
assert len(emails) == _HARDCODED + 1
seeded = emails[-1]
assert "Subject: Q4 numbers" in seeded
assert "From: CEO <ceo@corp.com>" in seeded
def test_stat_reflects_concatenated_count(tmp_path):
spool = _seed(tmp_path, n=2) spool = _seed(tmp_path, n=2)
mod = _load_pop3({"POP3_EMAIL_SEED": str(spool)}) mod = _load_pop3({"POP3_EMAIL_SEED": str(spool)})
proto = mod.POP3Protocol() proto = mod.POP3Protocol()
@@ -93,4 +129,5 @@ def test_stat_reflects_spool_size(tmp_path):
written.clear() written.clear()
proto.data_received(b"STAT\r\n") proto.data_received(b"STAT\r\n")
out = b"".join(written) out = b"".join(written)
assert out.startswith(b"+OK 2 ") expected = _HARDCODED + 2
assert out.startswith(f"+OK {expected} ".encode())

View File

@@ -361,3 +361,39 @@ def test_telnet_no_cowrie_env_vars():
"""Ensure no Cowrie env vars bleed into the real telnet service.""" """Ensure no Cowrie env vars bleed into the real telnet service."""
env = _fragment("telnet").get("environment", {}) env = _fragment("telnet").get("environment", {})
assert not any(k.startswith("COWRIE_") for k in env) assert not any(k.startswith("COWRIE_") for k in env)
# IMAP / POP3 email_seed -----------------------------------------------------
def test_imap_no_email_seed_by_default():
fragment = _fragment("imap")
assert "IMAP_EMAIL_SEED" not in fragment.get("environment", {})
assert "volumes" not in fragment
def test_imap_email_seed_wires_env_and_volume(tmp_path):
seed_dir = tmp_path / "seed"
seed_dir.mkdir()
fragment = _fragment("imap", service_cfg={"email_seed": str(seed_dir)})
assert fragment["environment"]["IMAP_EMAIL_SEED"] == "/var/spool/decnet-emails/seed"
volumes = fragment.get("volumes") or []
assert len(volumes) == 1
assert volumes[0].endswith(":/var/spool/decnet-emails/seed:ro")
assert volumes[0].startswith(str(seed_dir))
def test_pop3_no_email_seed_by_default():
fragment = _fragment("pop3")
assert "POP3_EMAIL_SEED" not in fragment.get("environment", {})
assert "volumes" not in fragment
def test_pop3_email_seed_wires_env_and_volume(tmp_path):
seed_file = tmp_path / "seed.json"
seed_file.write_text("[]")
fragment = _fragment("pop3", service_cfg={"email_seed": str(seed_file)})
assert fragment["environment"]["POP3_EMAIL_SEED"] == "/var/spool/decnet-emails/seed"
volumes = fragment.get("volumes") or []
assert len(volumes) == 1
assert volumes[0].endswith(":/var/spool/decnet-emails/seed:ro")
assert volumes[0].startswith(str(seed_file))