merge: testing → main (reconcile 2-week divergence)

This commit is contained in:
2026-04-28 18:36:00 -04:00
parent 499836c9e4
commit 862e4dbb31
1235 changed files with 160255 additions and 7996 deletions

View File

@@ -0,0 +1,23 @@
ARG BASE_IMAGE=debian:bookworm-slim
FROM ${BASE_IMAGE}
RUN apt-get update && apt-get install -y --no-install-recommends \
python3 \
&& rm -rf /var/lib/apt/lists/*
COPY syslog_bridge.py /opt/syslog_bridge.py
COPY server.py /opt/server.py
COPY entrypoint.sh /entrypoint.sh
RUN chmod +x /entrypoint.sh
EXPOSE 143 993
RUN useradd -r -s /bin/false -d /opt logrelay \
&& apt-get update && apt-get install -y --no-install-recommends libcap2-bin \
&& rm -rf /var/lib/apt/lists/* \
&& (find /usr/bin/ -maxdepth 1 -name 'python3*' -type f -exec setcap 'cap_net_bind_service+eip' {} \; 2>/dev/null || true)
HEALTHCHECK --interval=30s --timeout=5s --start-period=10s --retries=3 \
CMD kill -0 1 || exit 1
USER logrelay
ENTRYPOINT ["/entrypoint.sh"]

View File

@@ -0,0 +1,3 @@
#!/bin/bash
set -e
exec python3 /opt/server.py

View File

@@ -0,0 +1,120 @@
#!/usr/bin/env python3
"""
Per-instance stealth seeding for honeypot service templates.
The whole decoy fleet looks identical to a scanner unless each decky
diverges on the boring details: cluster UUIDs, auth salts, uptime, minor
version strings, etc. This module derives a stable per-instance seed
from NODE_NAME (+ optional INSTANCE_ID) and exposes helpers that return
deterministic-per-decky-but-different-across-the-fleet values.
Connection-time jitter is intentionally NOT seeded — two hits to the same
decky should not replay the same latency curve.
"""
from __future__ import annotations
import asyncio
import hashlib
import os
import random
import time
import uuid
from typing import Sequence, TypeVar
T = TypeVar("T")
_HOSTNAME = (
os.environ.get("NODE_NAME")
or os.environ.get("HOSTNAME")
or "decky"
)
_INSTANCE_ID = os.environ.get("INSTANCE_ID", "")
_SEED_MATERIAL = f"{_HOSTNAME}:{_INSTANCE_ID}".encode()
_SEED_INT = int.from_bytes(hashlib.sha256(_SEED_MATERIAL).digest()[:8], "big")
#: Deterministic RNG seeded per decky — use for *persistent* choices
#: (versions, UUIDs, stored credentials). Never use for timing.
rng = random.Random(_SEED_INT)
#: Process boot time — real uptime elapsed since container start.
_PROCESS_START = time.time()
#: Deterministic per-instance fake "has been up for this long at boot"
#: offset, so every decky pretends to have a different history.
_BOOT_OFFSET = rng.randint(3600, 45 * 86400)
def hostname() -> str:
return _HOSTNAME
def uptime_seconds() -> int:
"""Monotonically increasing, unique per instance."""
return int(_BOOT_OFFSET + (time.time() - _PROCESS_START))
def boot_epoch() -> int:
"""Fake wall-clock boot time for this instance (seconds since epoch)."""
return int(time.time() - uptime_seconds())
def instance_uuid(namespace: str = "") -> str:
"""Deterministic UUID4-looking value for this instance+namespace."""
ns = uuid.UUID("00000000-0000-0000-0000-000000000000")
return str(uuid.uuid5(ns, f"{_HOSTNAME}:{namespace}"))
def instance_hex(nbytes: int, namespace: str = "") -> str:
"""Deterministic hex token of given byte length."""
material = f"{_HOSTNAME}:{namespace}".encode()
digest = hashlib.sha256(material).digest()
while len(digest) < nbytes:
digest += hashlib.sha256(digest).digest()
return digest[:nbytes].hex()
def pick(choices: Sequence[T]) -> T:
"""Deterministic choice from a sequence."""
return rng.choice(list(choices))
def pick_weighted(choices: Sequence[tuple[T, float]]) -> T:
"""Deterministic weighted choice. Input: [(item, weight), ...]."""
total = sum(w for _, w in choices)
r = rng.uniform(0, total)
acc = 0.0
for item, w in choices:
acc += w
if r <= acc:
return item
return choices[-1][0]
def random_bytes(n: int, namespace: str = "") -> bytes:
"""Deterministic per-instance byte string of length n."""
out = bytearray()
i = 0
while len(out) < n:
out.extend(
hashlib.sha256(f"{_HOSTNAME}:{namespace}:{i}".encode()).digest()
)
i += 1
return bytes(out[:n])
def fresh_bytes(n: int) -> bytes:
"""Non-deterministic random bytes — for per-connection nonces/salts."""
return os.urandom(n)
async def jitter(min_ms: int = 5, max_ms: int = 120) -> None:
"""Async response-time jitter. Uses unseeded RNG so timing varies
across connections to the same decky — seeded jitter would leak
predictability."""
await asyncio.sleep(random.uniform(min_ms, max_ms) / 1000.0)
def jitter_sync(min_ms: int = 5, max_ms: int = 120) -> None:
"""Blocking jitter for non-asyncio servers."""
time.sleep(random.uniform(min_ms, max_ms) / 1000.0)

View File

@@ -0,0 +1,679 @@
#!/usr/bin/env python3
"""
IMAP server (port 143).
Full IMAP4rev1 state machine with bait mailbox.
States: NOT_AUTHENTICATED → AUTHENTICATED → SELECTED
Credentials via IMAP_USERS env var ("user:pass,user2:pass2").
10 bait emails in INBOX containing AWS keys, DB passwords, tokens etc.
Banner advertises Dovecot so nmap fingerprints correctly.
"""
import asyncio
import email
import email.policy
import os
import time
from email.utils import getaddresses
from pathlib import Path
from syslog_bridge import (
SEVERITY_WARNING,
encode_secret,
forward_syslog,
syslog_line,
write_syslog_file,
)
NODE_NAME = os.environ.get("NODE_NAME", "mailserver")
SERVICE_NAME = "imap"
LOG_TARGET = os.environ.get("LOG_TARGET", "")
PORT = int(os.environ.get("PORT", "143"))
IMAP_BANNER = os.environ.get("IMAP_BANNER", "* OK Dovecot ready.\r\n")
_RAW_USERS = os.environ.get("IMAP_USERS", "admin:admin123,root:toor,mail:mail,user:user")
VALID_USERS: dict[str, str] = {
u: p for part in _RAW_USERS.split(",") if ":" in part for u, p in [part.split(":", 1)]
}
# Path to a directory of ``*.eml`` files that the orchestrator emailgen
# worker drops into the container (``/var/spool/decnet-emails/`` by
# convention). When set AND the directory contains parseable EMLs,
# they replace the hardcoded ``_BAIT_EMAILS`` fallback below — meaning
# every mail an attacker reads is the LLM-generated, persona-driven,
# language-aware version, not the static credential-stuffed bait list.
# Empty / missing / unparseable: the fallback list still serves so a
# fresh deployment is never silent.
_EMAIL_SEED_PATH = os.environ.get("IMAP_EMAIL_SEED", "")
# Re-scan the seed directory at most this often. Cheap: walking a few
# dozen .eml files is sub-millisecond, but caching keeps an attacker's
# rapid LIST/FETCH burst from re-parsing the same files on every
# command. Mtime check still triggers a re-load on real changes.
_SEED_RESCAN_INTERVAL = float(os.environ.get("IMAP_EMAIL_SEED_RESCAN", "5"))
# ── Bait emails ───────────────────────────────────────────────────────────────
# All 10 live in INBOX. UID == sequence number.
_BAIT_EMAILS: list[dict] = [
{
"uid": 1, "flags": [r"\Seen"],
"from_name": "DevOps Team", "from_addr": "devops@company.internal",
"to_addr": "admin@company.internal",
"subject": "AWS credentials rotation",
"date": "Mon, 06 Nov 2023 09:12:33 +0000",
"body": (
"Date: Mon, 06 Nov 2023 09:12:33 +0000\r\n"
"From: DevOps Team <devops@company.internal>\r\n"
"To: admin@company.internal\r\n"
"Subject: AWS credentials rotation\r\n"
"Message-ID: <1@company.internal>\r\n"
"\r\n"
"Team,\r\n\r\n"
"New AWS credentials have been issued. Old keys deactivated.\r\n\r\n"
"Access Key ID: AKIAIOSFODNN7EXAMPLE\r\n"
"Secret Access Key: wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY\r\n\r\n"
"Update ~/.aws/credentials immediately.\r\n\r\n-- DevOps\r\n"
),
},
{
"uid": 2, "flags": [r"\Seen"],
"from_name": "Monitoring", "from_addr": "monitoring@company.internal",
"to_addr": "admin@company.internal",
"subject": "DB password changed",
"date": "Tue, 07 Nov 2023 14:05:11 +0000",
"body": (
"Date: Tue, 07 Nov 2023 14:05:11 +0000\r\n"
"From: Monitoring <monitoring@company.internal>\r\n"
"To: admin@company.internal\r\n"
"Subject: DB password changed\r\n"
"Message-ID: <2@company.internal>\r\n"
"\r\n"
"Production database password was rotated.\r\n\r\n"
"Connection string: mysql://admin:Sup3rS3cr3t!@10.0.1.5:3306/production\r\n\r\n"
"Update all app configs.\r\n"
),
},
{
"uid": 3, "flags": [],
"from_name": "GitHub", "from_addr": "noreply@github.com",
"to_addr": "admin@company.internal",
"subject": "Your personal access token",
"date": "Wed, 08 Nov 2023 08:30:00 +0000",
"body": (
"Date: Wed, 08 Nov 2023 08:30:00 +0000\r\n"
"From: GitHub <noreply@github.com>\r\n"
"To: admin@company.internal\r\n"
"Subject: Your personal access token\r\n"
"Message-ID: <3@company.internal>\r\n"
"\r\n"
"Hi admin,\r\n\r\n"
"A new personal access token was created for your account.\r\n\r\n"
"Token: ghp_16C7e42F292c6912E7710c838347Ae178B4a\r\n\r\n"
"If this wasn't you, revoke it immediately at github.com/settings/tokens.\r\n"
),
},
{
"uid": 4, "flags": [r"\Seen"],
"from_name": "IT Admin", "from_addr": "admin@company.internal",
"to_addr": "team@company.internal",
"subject": "VPN config attached",
"date": "Thu, 09 Nov 2023 11:22:47 +0000",
"body": (
"Date: Thu, 09 Nov 2023 11:22:47 +0000\r\n"
"From: IT Admin <admin@company.internal>\r\n"
"To: team@company.internal\r\n"
"Subject: VPN config attached\r\n"
"Message-ID: <4@company.internal>\r\n"
"\r\n"
"VPN access details for new starters:\r\n\r\n"
" Host: vpn.company.internal:1194\r\n"
" Protocol: UDP\r\n"
" Username: vpnadmin\r\n"
" Password: VpnP@ss2024\r\n\r\n"
"Config file sent separately via secure channel.\r\n"
),
},
{
"uid": 5, "flags": [],
"from_name": "SysAdmin", "from_addr": "sysadmin@company.internal",
"to_addr": "admin@company.internal",
"subject": "Root password",
"date": "Fri, 10 Nov 2023 16:45:00 +0000",
"body": (
"Date: Fri, 10 Nov 2023 16:45:00 +0000\r\n"
"From: SysAdmin <sysadmin@company.internal>\r\n"
"To: admin@company.internal\r\n"
"Subject: Root password\r\n"
"Message-ID: <5@company.internal>\r\n"
"\r\n"
"New root password for prod servers:\r\n\r\n"
" r00tM3T00!\r\n\r\n"
"Change after first login. Do NOT forward this email.\r\n"
),
},
{
"uid": 6, "flags": [r"\Seen"],
"from_name": "Backup System", "from_addr": "backup@company.internal",
"to_addr": "admin@company.internal",
"subject": "Backup job failed",
"date": "Sat, 11 Nov 2023 03:12:04 +0000",
"body": (
"Date: Sat, 11 Nov 2023 03:12:04 +0000\r\n"
"From: Backup System <backup@company.internal>\r\n"
"To: admin@company.internal\r\n"
"Subject: Backup job failed\r\n"
"Message-ID: <6@company.internal>\r\n"
"\r\n"
"Nightly backup to 192.168.1.50:/mnt/nas FAILED at 03:11 UTC.\r\n\r\n"
"Error: Authentication failed. Credentials in /etc/backup.conf may be stale.\r\n\r\n"
"Last successful backup: 2023-11-10 03:11 UTC\r\n"
),
},
{
"uid": 7, "flags": [r"\Seen"],
"from_name": "Security Alerts", "from_addr": "alerts@company.internal",
"to_addr": "admin@company.internal",
"subject": "SSH brute-force alert",
"date": "Sun, 12 Nov 2023 07:04:31 +0000",
"body": (
"Date: Sun, 12 Nov 2023 07:04:31 +0000\r\n"
"From: Security Alerts <alerts@company.internal>\r\n"
"To: admin@company.internal\r\n"
"Subject: SSH brute-force alert\r\n"
"Message-ID: <7@company.internal>\r\n"
"\r\n"
"47 failed SSH login attempts detected against prod-web-01.\r\n\r\n"
"Source IPs: 185.220.101.34, 185.220.101.47, 185.220.101.52\r\n"
"Target user: root\r\n"
"Period: 2023-11-12 06:58 07:04 UTC\r\n\r\n"
"All attempts blocked by fail2ban. No successful logins.\r\n"
),
},
{
"uid": 8, "flags": [r"\Seen"],
"from_name": "External Vendor", "from_addr": "vendor@external.com",
"to_addr": "admin@company.internal",
"subject": "RE: API integration",
"date": "Mon, 13 Nov 2023 10:11:55 +0000",
"body": (
"Date: Mon, 13 Nov 2023 10:11:55 +0000\r\n"
"From: External Vendor <vendor@external.com>\r\n"
"To: admin@company.internal\r\n"
"Subject: RE: API integration\r\n"
"Message-ID: <8@company.internal>\r\n"
"\r\n"
"Hi,\r\n\r\n"
"Here is the live API key for the integration:\r\n\r\n"
" sk_live_9mK3xF2aP7qR1bN8cT4dW6vE0yU5hJ\r\n\r\n"
"Keep this confidential. Let me know if you need the webhook secret.\r\n\r\n"
"Best regards,\r\nVendor Support\r\n"
),
},
{
"uid": 9, "flags": [],
"from_name": "Help Desk", "from_addr": "helpdesk@company.internal",
"to_addr": "admin@company.internal",
"subject": "Password reset request",
"date": "Tue, 14 Nov 2023 13:48:22 +0000",
"body": (
"Date: Tue, 14 Nov 2023 13:48:22 +0000\r\n"
"From: Help Desk <helpdesk@company.internal>\r\n"
"To: admin@company.internal\r\n"
"Subject: Password reset request\r\n"
"Message-ID: <9@company.internal>\r\n"
"\r\n"
"Hi,\r\n\r\n"
"Could you reset my MFA? Current password is Winter2024! so you can verify it's me.\r\n\r\n"
"Thanks\r\n"
),
},
{
"uid": 10, "flags": [r"\Seen"],
"from_name": "AWS Billing", "from_addr": "noreply@aws.amazon.com",
"to_addr": "admin@company.internal",
"subject": "Your AWS bill is ready",
"date": "Wed, 15 Nov 2023 00:01:00 +0000",
"body": (
"Date: Wed, 15 Nov 2023 00:01:00 +0000\r\n"
"From: AWS Billing <noreply@aws.amazon.com>\r\n"
"To: admin@company.internal\r\n"
"Subject: Your AWS bill is ready\r\n"
"Message-ID: <10@company.internal>\r\n"
"\r\n"
"Your AWS bill for October 2023 is $847.23.\r\n\r\n"
"Top services:\r\n"
" EC2 (us-east-1): $412.10\r\n"
" RDS (us-east-1): $198.50\r\n"
" S3: $87.43\r\n"
" EC2 (eu-west-2): $149.20\r\n\r\n"
"Account ID: 123456789012\r\n"
),
},
]
_MAILBOXES = ["INBOX", "Sent", "Drafts", "Archive"]
# ── Spool-backed email loader ─────────────────────────────────────────────────
# When IMAP_EMAIL_SEED points at a directory of .eml files the
# orchestrator emailgen worker has dropped into the container, parse
# them on demand and serve them as the INBOX. Cached between requests
# with a short TTL + mtime check so a hot mailbox doesn't pay the parse
# cost on every IMAP command.
#
# Failure modes (missing dir, unparseable EMLs, empty dir) all return
# the hardcoded fallback rather than 0 messages — a silent INBOX is a
# stronger tell than a slightly-stale one.
_seed_cache: list[dict] | None = None
_seed_cache_dir_mtime: float = 0.0
_seed_cache_loaded_at: float = 0.0
def _split_addr(value: str) -> tuple[str, str]:
"""Return (display_name, email) from a header value, falling back to
the raw string when the parse fails. Worker side; we don't need
real RFC 5322 — just enough to populate the IMAP envelope tuple."""
if not value:
return "", ""
pairs = getaddresses([value])
if not pairs:
return "", value
name, addr = pairs[0]
return (name or "").strip(), (addr or value).strip()
def _eml_to_dict(path: Path, uid: int) -> dict | None:
"""Parse one .eml into the dict shape the rest of this server uses.
Returns None when the file isn't parseable; callers skip + continue
so one corrupt EML does not kill the whole INBOX listing.
"""
try:
raw = path.read_bytes()
msg = email.message_from_bytes(raw, policy=email.policy.compat32)
except Exception: # noqa: BLE001
return None
from_name, from_addr = _split_addr(msg.get("From", ""))
_to_name, to_addr = _split_addr(msg.get("To", ""))
subject = (msg.get("Subject") or "").strip()
date = msg.get("Date") or ""
return {
"uid": uid,
"flags": [], # never \Seen for spool emails — fresh delivery
"from_name": from_name or from_addr.split("@", 1)[0] if from_addr else "Unknown",
"from_addr": from_addr or "unknown@localhost",
"to_addr": to_addr or "unknown@localhost",
"subject": subject or "(no subject)",
"date": date,
# The body field carries the full RFC 822 message — headers + body.
# That mirrors how the hardcoded _BAIT_EMAILS entries are shaped.
"body": raw.decode("utf-8", errors="replace"),
}
def _scan_seed_dir(path: Path) -> list[dict]:
"""Walk *path* recursively, parse every ``*.eml``, sort by mtime."""
eml_paths: list[Path] = []
try:
for p in path.rglob("*.eml"):
if p.is_file():
eml_paths.append(p)
except OSError:
return []
eml_paths.sort(key=lambda p: p.stat().st_mtime)
out: list[dict] = []
for i, p in enumerate(eml_paths, start=1):
d = _eml_to_dict(p, uid=i)
if d is not None:
out.append(d)
return out
def _get_emails() -> list[dict]:
"""Return the active mailbox list.
Resolution order:
1. ``IMAP_EMAIL_SEED`` set + dir exists + at least one parseable EML
→ that list (rescan-throttled).
2. Else → the hardcoded ``_BAIT_EMAILS`` fallback.
"""
global _seed_cache, _seed_cache_dir_mtime, _seed_cache_loaded_at
if not _EMAIL_SEED_PATH:
return _BAIT_EMAILS
seed_dir = Path(_EMAIL_SEED_PATH)
try:
dir_stat = seed_dir.stat()
except OSError:
return _BAIT_EMAILS
now = time.monotonic()
fresh_enough = (
_seed_cache is not None
and (now - _seed_cache_loaded_at) < _SEED_RESCAN_INTERVAL
and dir_stat.st_mtime == _seed_cache_dir_mtime
)
if fresh_enough:
return _seed_cache or _BAIT_EMAILS
scanned = _scan_seed_dir(seed_dir)
if not scanned:
# Don't poison the cache with an empty list; a single early
# FETCH before emailgen has run would otherwise stick the
# mailbox at 0 for _SEED_RESCAN_INTERVAL seconds.
return _BAIT_EMAILS
_seed_cache = scanned
_seed_cache_dir_mtime = dir_stat.st_mtime
_seed_cache_loaded_at = now
return scanned
# ── Logging ───────────────────────────────────────────────────────────────────
def _log(event_type: str, severity: int = 6, **kwargs) -> None:
line = syslog_line(SERVICE_NAME, NODE_NAME, event_type, severity, **kwargs)
write_syslog_file(line)
forward_syslog(line, LOG_TARGET)
# ── FETCH helpers ─────────────────────────────────────────────────────────────
def _parse_seq_range(range_str: str, total: int) -> list[int]:
"""Parse IMAP sequence set ('1', '1:3', '1:*', '*') → list of 1-based indices."""
result = []
for part in range_str.split(","):
part = part.strip()
if ":" in part:
lo_s, hi_s = part.split(":", 1)
lo = total if lo_s == "*" else int(lo_s)
hi = total if hi_s == "*" else int(hi_s)
result.extend(range(min(lo, hi), max(lo, hi) + 1))
elif part == "*":
result.append(total)
else:
result.append(int(part))
return [n for n in result if 1 <= n <= total]
def _parse_fetch_items(items_str: str) -> list[str]:
"""Parse '(FLAGS ENVELOPE)' or 'BODY[]' → list of item name strings."""
s = items_str.strip()
if s.startswith("(") and s.endswith(")"):
s = s[1:-1]
tokens, i = [], 0
while i < len(s):
if s[i] == " ":
i += 1
continue
j, depth = i, 0
while j < len(s):
if s[j] == "[":
depth += 1
elif s[j] == "]":
depth -= 1
elif s[j] == " " and depth == 0:
break
j += 1
tokens.append(s[i:j].upper())
i = j
return tokens
def _envelope(msg: dict) -> str:
"""Build minimal RFC 3501 ENVELOPE tuple string."""
def addr(name: str, email: str) -> str:
parts = email.split("@", 1)
user = parts[0]
host = parts[1] if len(parts) > 1 else ""
safe_name = name.replace('"', '\\"')
return f'("{safe_name}" NIL "{user}" "{host}")'
from_addr = addr(msg["from_name"], msg["from_addr"])
to_addr = addr("", msg["to_addr"])
subject = msg["subject"].replace('"', '\\"')
return (
f'("{msg["date"]}" "{subject}" '
f'({from_addr}) ({from_addr}) ({from_addr}) '
f'({to_addr}) NIL NIL NIL "<{msg["uid"]}@{NODE_NAME}>")'
)
def _build_fetch_response(seq: int, msg: dict, items: list[str]) -> bytes:
"""Build the bytes for a single '* N FETCH (...)' response."""
non_literal: list[str] = []
literal_name: str | None = None
literal_raw: bytes | None = None
for item in items:
norm = item.upper()
if norm == "FLAGS":
flags = " ".join(msg["flags"]) if msg["flags"] else ""
non_literal.append(f"FLAGS ({flags})")
elif norm == "ENVELOPE":
non_literal.append(f"ENVELOPE {_envelope(msg)}")
elif norm == "RFC822.SIZE":
non_literal.append(f"RFC822.SIZE {len(msg['body'].encode())}")
elif norm in ("UID",):
non_literal.append(f"UID {msg['uid']}")
elif norm in ("BODY[]", "RFC822", "BODY[TEXT]", "BODY.PEEK[]"):
literal_name = "BODY[]"
literal_raw = msg["body"].encode()
elif norm in ("BODY[HEADER]", "BODY.PEEK[HEADER]"):
header_part = msg["body"].split("\r\n\r\n", 1)[0] + "\r\n\r\n"
literal_name = "BODY[HEADER]"
literal_raw = header_part.encode()
# unknown items silently ignored
if literal_raw is not None:
prefix_str = (" ".join(non_literal) + " ") if non_literal else ""
header = f"* {seq} FETCH ({prefix_str}{literal_name} {{{len(literal_raw)}}}\r\n".encode()
return header + literal_raw + b")\r\n"
else:
return f"* {seq} FETCH ({' '.join(non_literal)})\r\n".encode()
# ── Protocol ──────────────────────────────────────────────────────────────────
class IMAPProtocol(asyncio.Protocol):
def __init__(self):
self._transport = None
self._peer = ("?", 0)
self._buf = b""
self._state = "NOT_AUTHENTICATED"
self._selected = None # mailbox name currently selected
def connection_made(self, transport):
self._transport = transport
self._peer = transport.get_extra_info("peername", ("?", 0))
_log("connect", src=self._peer[0], src_port=self._peer[1])
banner = IMAP_BANNER if IMAP_BANNER.endswith("\r\n") else IMAP_BANNER + "\r\n"
transport.write(banner.encode())
def data_received(self, data):
self._buf += data
while b"\n" in self._buf:
line, self._buf = self._buf.split(b"\n", 1)
self._handle_line(line.decode(errors="replace").strip())
def connection_lost(self, exc):
_log("disconnect", src=self._peer[0] if self._peer else "?")
# ── Command dispatch ──────────────────────────────────────────────────────
def _handle_line(self, line: str) -> None:
parts = line.split(None, 2)
if not parts:
return
tag = parts[0]
cmd = parts[1].upper() if len(parts) > 1 else ""
args = parts[2] if len(parts) > 2 else ""
_log("command", src=self._peer[0], cmd=cmd, state=self._state)
# Commands valid in any state
if cmd == "CAPABILITY":
self._w(b"* CAPABILITY IMAP4rev1 LITERAL+ SASL-IR LOGIN-REFERRALS"
b" ID ENABLE IDLE AUTH=PLAIN AUTH=LOGIN\r\n")
self._w(f"{tag} OK CAPABILITY completed\r\n")
elif cmd == "NOOP":
self._w(f"{tag} OK\r\n")
elif cmd == "LOGOUT":
self._w(b"* BYE Logging out\r\n")
self._w(f"{tag} OK LOGOUT completed\r\n")
self._transport.close()
# NOT_AUTHENTICATED only
elif cmd == "LOGIN":
self._cmd_login(tag, args)
# AUTHENTICATED or SELECTED
elif cmd in ("LIST", "LSUB"):
self._cmd_list(tag, cmd)
elif cmd == "STATUS":
self._cmd_status(tag, args)
elif cmd in ("SELECT", "EXAMINE"):
self._cmd_select(tag, cmd, args)
# SELECTED only
elif cmd == "FETCH":
self._cmd_fetch(tag, args, use_uid=False)
elif cmd == "SEARCH":
self._cmd_search(tag)
elif cmd == "CLOSE":
self._cmd_close(tag)
# UID prefix — dispatch sub-command
elif cmd == "UID":
sub_parts = args.split(None, 1)
sub_cmd = sub_parts[0].upper() if sub_parts else ""
sub_args = sub_parts[1] if len(sub_parts) > 1 else ""
if sub_cmd == "FETCH":
self._cmd_fetch(tag, sub_args, use_uid=True)
elif sub_cmd == "SEARCH":
self._cmd_search(tag, uid_mode=True)
else:
self._w(f"{tag} BAD Unknown UID sub-command\r\n")
else:
self._w(f"{tag} BAD Command not recognized or not supported\r\n")
# ── Command implementations ───────────────────────────────────────────────
def _cmd_login(self, tag: str, args: str) -> None:
if self._state != "NOT_AUTHENTICATED":
self._w(f"{tag} BAD Already authenticated\r\n")
return
parts = args.split(None, 1)
username = parts[0].strip('"') if parts else ""
password = parts[1].strip('"') if len(parts) > 1 else ""
_enc = encode_secret(password)
if VALID_USERS.get(username) == password:
self._state = "AUTHENTICATED"
_log("auth", src=self._peer[0], username=username, principal=username,
outcome="success", **_enc)
self._w(f"{tag} OK [CAPABILITY IMAP4rev1] Logged in\r\n")
else:
_log("auth", src=self._peer[0], username=username, principal=username,
outcome="failure", severity=SEVERITY_WARNING, **_enc)
self._w(f"{tag} NO [AUTHENTICATIONFAILED] Authentication failed.\r\n")
def _cmd_list(self, tag: str, cmd: str) -> None:
if self._state == "NOT_AUTHENTICATED":
self._w(f"{tag} BAD Not authenticated\r\n")
return
for box in _MAILBOXES:
self._w(f'* {cmd} (\\HasNoChildren) "/" "{box}"\r\n')
self._w(f"{tag} OK {cmd} completed\r\n")
def _cmd_status(self, tag: str, args: str) -> None:
if self._state == "NOT_AUTHENTICATED":
self._w(f"{tag} BAD Not authenticated\r\n")
return
parts = args.split(None, 1)
mailbox = parts[0].strip('"') if parts else "INBOX"
attr_str = parts[1].strip("()").upper() if len(parts) > 1 else "MESSAGES"
n = len(_get_emails()) if mailbox == "INBOX" else 0
counts = {"MESSAGES": n, "RECENT": 0, "UNSEEN": n} if mailbox == "INBOX" \
else {"MESSAGES": 0, "RECENT": 0, "UNSEEN": 0}
result_parts = []
for attr in attr_str.split():
if attr in counts:
result_parts.append(f"{attr} {counts[attr]}")
self._w(f"* STATUS {mailbox} ({' '.join(result_parts)})\r\n")
self._w(f"{tag} OK STATUS completed\r\n")
def _cmd_select(self, tag: str, cmd: str, args: str) -> None:
if self._state == "NOT_AUTHENTICATED":
self._w(f"{tag} BAD Not authenticated\r\n")
return
mailbox = args.strip('"')
emails = _get_emails()
total = len(emails) if mailbox == "INBOX" else 0
self._selected = mailbox
self._state = "SELECTED"
self._w(f"* {total} EXISTS\r\n")
self._w(b"* 0 RECENT\r\n")
self._w(b"* OK [UNSEEN 1] Message 1 is first unseen\r\n")
self._w(b"* OK [UIDVALIDITY 1712345678] UIDs valid\r\n")
self._w(f"* OK [UIDNEXT {total + 1}] Predicted next UID\r\n")
self._w(b"* FLAGS (\\Answered \\Flagged \\Deleted \\Seen \\Draft)\r\n")
self._w(b"* OK [PERMANENTFLAGS (\\Deleted \\Seen \\*)] Limited\r\n")
mode = "READ-ONLY" if cmd == "EXAMINE" else "READ-WRITE"
self._w(f"{tag} OK [{mode}] {cmd} completed\r\n")
def _cmd_fetch(self, tag: str, args: str, use_uid: bool) -> None:
if self._state != "SELECTED":
self._w(f"{tag} BAD Not in selected state\r\n")
return
parts = args.split(None, 1)
range_str = parts[0] if parts else "1:*"
items_str = parts[1] if len(parts) > 1 else "FLAGS"
emails = _get_emails()
total = len(emails)
indices = _parse_seq_range(range_str, total)
items = _parse_fetch_items(items_str)
# Ensure UID is included when using UID FETCH
if use_uid and "UID" not in items:
items = ["UID"] + items
for seq in indices:
if 1 <= seq <= total:
self._transport.write(_build_fetch_response(seq, emails[seq - 1], items))
self._w(f"{tag} OK FETCH completed\r\n")
def _cmd_search(self, tag: str, uid_mode: bool = False) -> None:
if self._state != "SELECTED":
self._w(f"{tag} BAD Not in selected state\r\n")
return
nums = " ".join(str(i) for i in range(1, len(_get_emails()) + 1))
self._w(f"* SEARCH {nums}\r\n")
self._w(f"{tag} OK SEARCH completed\r\n")
def _cmd_close(self, tag: str) -> None:
if self._state != "SELECTED":
self._w(f"{tag} BAD Not in selected state\r\n")
return
self._state = "AUTHENTICATED"
self._selected = None
self._w(f"{tag} OK CLOSE completed\r\n")
# ── Helpers ───────────────────────────────────────────────────────────────
def _w(self, data: str | bytes) -> None:
if isinstance(data, str):
data = data.encode()
self._transport.write(data)
async def main():
_log("startup", msg=f"IMAP server starting as {NODE_NAME}")
loop = asyncio.get_running_loop()
server = await loop.create_server(IMAPProtocol, "0.0.0.0", PORT) # nosec B104
async with server:
await server.serve_forever()
if __name__ == "__main__":
asyncio.run(main())

View File

@@ -0,0 +1,261 @@
#!/usr/bin/env python3
"""
Shared RFC 5424 syslog helper used by service containers.
Services call syslog_line() to format an RFC 5424 message, then
write_syslog_file() to emit it to stdout — the container runtime
captures it, and the host-side collector streams it into the log file.
RFC 5424 structure:
<PRI>1 TIMESTAMP HOSTNAME APP-NAME PROCID MSGID [SD-ELEMENT] MSG
Facility: local0 (16). SD element ID uses PEN 55555.
"""
import base64
import re
from datetime import datetime, timezone
from typing import Any, Optional
# ─── Constants ────────────────────────────────────────────────────────────────
_FACILITY_LOCAL0 = 16
_SD_ID = "relay@55555"
_NILVALUE = "-"
SEVERITY_EMERG = 0
SEVERITY_ALERT = 1
SEVERITY_CRIT = 2
SEVERITY_ERROR = 3
SEVERITY_WARNING = 4
SEVERITY_NOTICE = 5
SEVERITY_INFO = 6
SEVERITY_DEBUG = 7
_MAX_HOSTNAME = 255
_MAX_APPNAME = 48
_MAX_MSGID = 32
# ─── Formatter ────────────────────────────────────────────────────────────────
def _sd_escape(value: str) -> str:
"""Escape SD-PARAM-VALUE per RFC 5424 §6.3.3."""
return value.replace("\\", "\\\\").replace('"', '\\"').replace("]", "\\]")
def _sd_element(fields: dict[str, Any]) -> str:
if not fields:
return _NILVALUE
params = " ".join(f'{k}="{_sd_escape(str(v))}"' for k, v in fields.items())
return f"[{_SD_ID} {params}]"
def syslog_line(
service: str,
hostname: str,
event_type: str,
severity: int = SEVERITY_INFO,
timestamp: datetime | None = None,
msg: str | None = None,
**fields: Any,
) -> str:
"""
Return a single RFC 5424-compliant syslog line (no trailing newline).
Args:
service: APP-NAME (e.g. "http", "mysql")
hostname: HOSTNAME (node name)
event_type: MSGID (e.g. "request", "login_attempt")
severity: Syslog severity integer (default: INFO=6)
timestamp: UTC datetime; defaults to now
msg: Optional free-text MSG
**fields: Encoded as structured data params
"""
pri = f"<{_FACILITY_LOCAL0 * 8 + severity}>"
ts = (timestamp or datetime.now(timezone.utc)).isoformat()
host = (hostname or _NILVALUE)[:_MAX_HOSTNAME]
appname = (service or _NILVALUE)[:_MAX_APPNAME]
msgid = (event_type or _NILVALUE)[:_MAX_MSGID]
sd = _sd_element(fields)
message = f" {msg}" if msg else ""
return f"{pri}1 {ts} {host} {appname} {_NILVALUE} {msgid} {sd}{message}"
def encode_secret(secret: str) -> dict[str, str]:
"""Standardized credential-secret encoding for the universal SD-block shape.
Returns ``{'secret_printable': ..., 'secret_b64': ...}`` ready to spread
into a :func:`syslog_line` / ``_log`` call::
_log("auth_attempt", principal=user, **encode_secret(password))
``secret_printable`` mirrors auth-helper.c's sd_escape: bytes outside
``[0x20, 0x7f)`` collapse to ``'?'`` so the field is always parser-safe
RFC 5424 ASCII. ``secret_b64`` preserves the *original* utf-8 bytes —
NUL/0xff/control/non-utf8 sequences all survive losslessly, useful as
a fingerprinting signal even when the printable form sanitizes them.
The decnet web ingester's native-shape branch keys off ``secret_b64``
being present, so any service emitter calling this helper lands its
cred attempt directly in the :class:`Credential` table.
"""
raw = secret.encode("utf-8", errors="replace")
printable = "".join(chr(b) if 0x20 <= b < 0x7f else "?" for b in raw)
return {
"secret_printable": printable,
"secret_b64": base64.b64encode(raw).decode("ascii"),
}
_DIGEST_PARAM_RE = re.compile(r'(\w+)\s*=\s*"([^"]*)"|(\w+)\s*=\s*([^,\s]+)')
def classify_authorization(header_value: Optional[str]) -> Optional[dict[str, Any]]:
"""Parse an HTTP Authorization header value into Credential SD fields.
Returns a dict with the universal cred shape ready to spread into a
``_log(...)`` call::
auth = request.headers.get("Authorization")
cred = classify_authorization(auth)
if cred:
_log("auth_attempt", **cred)
Recognised schemes:
* Basic — base64(user:pw); decoded → ``principal=user`` +
``secret_kind="plaintext"`` + ``encode_secret(pw)``.
* Bearer / Token — opaque token; ``principal=None`` +
``secret_kind="http_bearer"`` + ``encode_secret(token)``.
* Digest — ``principal=username`` from header +
``secret_kind="http_digest_md5"`` + ``encode_secret(response)``.
Returns ``None`` for anything unrecognized (AWS4-HMAC-SHA256, NTLM,
Negotiate, …) — callers can still log the raw header value in the
ambient SD-block; we just don't know how to extract a hashable
secret from it.
"""
if not header_value or not isinstance(header_value, str):
return None
parts = header_value.strip().split(None, 1)
if len(parts) < 2:
return None
scheme, rest = parts[0].lower(), parts[1].strip()
if scheme == "basic":
try:
decoded = base64.b64decode(rest, validate=True).decode("utf-8", errors="replace")
except (ValueError, base64.binascii.Error):
return None
if ":" not in decoded:
return None
user, _, pw = decoded.partition(":")
return {
"principal": user,
"secret_kind": "plaintext",
**encode_secret(pw),
}
if scheme in ("bearer", "token"):
return {
"principal": None,
"secret_kind": "http_bearer",
**encode_secret(rest),
}
if scheme == "digest":
params: dict[str, str] = {}
for m in _DIGEST_PARAM_RE.finditer(rest):
k = m.group(1) or m.group(3)
v = m.group(2) if m.group(2) is not None else m.group(4)
if k:
params[k.lower()] = v
response = params.get("response")
if not response:
return None
return {
"principal": params.get("username"),
"secret_kind": "http_digest_md5",
**encode_secret(response),
}
return None
_FORM_PRINCIPAL_KEYS = (
"username", "user", "email", "login", "userid", "account",
"log", # wp-login.php
"user_login", # WordPress alt
"uname", # phpMyAdmin
"pma_username",
)
_FORM_SECRET_KEYS = (
"password", "pass", "pwd", "passwd", "passwort", "mot_de_passe",
"user_password", # WordPress alt
"pma_password", # phpMyAdmin
)
def extract_form_credentials(
body: Optional[str],
content_type: Optional[str],
) -> Optional[dict[str, Any]]:
"""Parse an `application/x-www-form-urlencoded` body for credentials.
Returns the universal cred SD shape ready to spread into a
``_log(...)`` call when both a principal-shaped key and a secret-
shaped key are present in the body. Otherwise returns ``None``.
Field-name detection is case-insensitive and covers the most common
login-form variants (WordPress wp-login.php, phpMyAdmin, Joomla,
etc.). Add more entries to ``_FORM_PRINCIPAL_KEYS`` /
``_FORM_SECRET_KEYS`` as new templates surface them.
"""
if not body or not isinstance(content_type, str):
return None
if not content_type.lower().startswith("application/x-www-form-urlencoded"):
return None
fields: dict[str, str] = {}
for pair in body.split("&"):
if "=" not in pair:
continue
k, _, v = pair.partition("=")
# urllib decode without importing urllib at module scope (the
# template emitters are import-cost-sensitive). Inline the
# tiny percent-decode + plus-decode.
try:
from urllib.parse import unquote_plus
key = unquote_plus(k).lower()
val = unquote_plus(v)
except Exception:
continue
# First-wins so duplicate-key forms don't get clobbered.
fields.setdefault(key, val)
principal: Optional[str] = None
for k in _FORM_PRINCIPAL_KEYS:
if k in fields:
principal = fields[k]
break
secret: Optional[str] = None
for k in _FORM_SECRET_KEYS:
if k in fields:
secret = fields[k]
break
if secret is None:
return None
return {
"principal": principal,
"secret_kind": "plaintext",
**encode_secret(secret),
}
def write_syslog_file(line: str) -> None:
"""Emit a syslog line to stdout for container log capture."""
print(line, flush=True)
def forward_syslog(line: str, log_target: str) -> None:
"""No-op stub. TCP forwarding is handled by rsyslog, not by service containers."""
pass