From 39dafaf3847485ebf255a2ce3c33f7b327a3e536 Mon Sep 17 00:00:00 2001 From: anti Date: Sat, 18 Apr 2026 05:34:50 -0400 Subject: [PATCH] feat(ssh-stealth): hide capture artifacts via XOR+gzip entrypoint blob MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The /opt/emit_capture.py, /opt/syslog_bridge.py, and /usr/libexec/udev/journal-relay files were plaintext and world-readable to any attacker root-shelled into the SSH honeypot — revealing the full capture logic on a single cat. Pack all three into /entrypoint.sh as XOR+gzip+base64 blobs at build time (_build_stealth.py), then decode in-memory at container start and exec the capture loop from a bash -c string. No .py files under /opt, no journal-relay file under /usr/libexec/udev, no argv_zap name anywhere. The LD_PRELOAD shim is installed as /usr/lib/x86_64-linux-gnu/libudev-shared.so.1 — sits next to the real libudev.so.1 and blends into the multiarch layout. A 1-byte random XOR key is chosen at image build so a bare 'base64 -d | gunzip' probe on the visible entrypoint returns binary noise instead of readable Python. Docker-dependent tests live under tests/docker/ behind a new 'docker' pytest marker (excluded from the default run, same pattern as fuzz / live / bench). --- pyproject.toml | 3 +- templates/ssh/Dockerfile | 47 ++++---- templates/ssh/_build_stealth.py | 89 +++++++++++++++ templates/ssh/capture.sh | 55 +++++----- templates/ssh/emit_capture.py | 84 +++++++++++++++ templates/ssh/entrypoint.sh | 49 +++++++-- templates/ssh/syslog_bridge.py | 89 +++++++++++++++ tests/docker/__init__.py | 0 tests/docker/conftest.py | 35 ++++++ tests/docker/test_ssh_stealth_image.py | 128 ++++++++++++++++++++++ tests/test_ssh.py | 83 +++++++++++--- tests/test_ssh_stealth.py | 143 +++++++++++++++++++++++++ 12 files changed, 733 insertions(+), 72 deletions(-) create mode 100644 templates/ssh/_build_stealth.py create mode 100644 templates/ssh/emit_capture.py create mode 100644 templates/ssh/syslog_bridge.py create mode 100644 tests/docker/__init__.py create mode 100644 tests/docker/conftest.py create mode 100644 tests/docker/test_ssh_stealth_image.py create mode 100644 tests/test_ssh_stealth.py diff --git a/pyproject.toml b/pyproject.toml index 036aef2..b75b370 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -73,13 +73,14 @@ decnet = "decnet.cli:app" [tool.pytest.ini_options] asyncio_mode = "auto" asyncio_debug = "true" -addopts = "-m 'not fuzz and not live and not stress and not bench' -v -q -x -n logical --dist loadscope" +addopts = "-m 'not fuzz and not live and not stress and not bench and not docker' -v -q -x -n logical --dist loadscope" markers = [ "fuzz: hypothesis-based fuzz tests (slow, run with -m fuzz or -m '' for all)", "live: live subprocess service tests (run with -m live)", "live_docker: live Docker container tests (requires DECNET_LIVE_DOCKER=1)", "stress: locust-based stress tests (run with -m stress)", "bench: pytest-benchmark micro-benchmarks (run with -m bench)", + "docker: tests that build and run docker images (run with -m docker)", ] filterwarnings = [ "ignore::pytest.PytestUnhandledThreadExceptionWarning", diff --git a/templates/ssh/Dockerfile b/templates/ssh/Dockerfile index 6dd46a3..5e91886 100644 --- a/templates/ssh/Dockerfile +++ b/templates/ssh/Dockerfile @@ -20,6 +20,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ ca-certificates \ nmap \ jq \ + python3 \ && rm -rf /var/lib/apt/lists/* RUN mkdir -p /var/run/sshd /root/.ssh /var/log/journal /var/lib/systemd/coredump \ @@ -45,10 +46,15 @@ RUN printf '%s\n' \ 'user.* /proc/1/fd/1;RFC5424fmt' \ > /etc/rsyslog.d/50-journal-forward.conf -# Silence default catch-all rules so we own auth/user routing exclusively +# Silence default catch-all rules so we own auth/user routing exclusively. +# Also disable rsyslog's privilege drop: PID 1's stdout (/proc/1/fd/1) is +# owned by root, so a syslog-user rsyslogd gets EACCES and silently drops +# every auth/user line (bash CMD events + file_captured emissions). RUN sed -i \ -e 's|^\(\*\.\*;auth,authpriv\.none\)|#\1|' \ -e 's|^auth,authpriv\.\*|#auth,authpriv.*|' \ + -e 's|^\$PrivDropToUser|#$PrivDropToUser|' \ + -e 's|^\$PrivDropToGroup|#$PrivDropToGroup|' \ /etc/rsyslog.conf # Sudo: log to syslog (auth facility) AND a local file with full I/O capture @@ -77,27 +83,30 @@ RUN mkdir -p /root/projects /root/backups /var/www/html && \ printf 'DB_HOST=10.0.0.5\nDB_USER=admin\nDB_PASS=changeme123\nDB_NAME=prod_db\n' > /root/projects/.env && \ printf '[Unit]\nDescription=App Server\n[Service]\nExecStart=/usr/bin/python3 /opt/app/server.py\n' > /root/projects/app.service -COPY entrypoint.sh /entrypoint.sh -# Capture machinery is installed under plausible systemd/udev paths so casual -# `ps aux` inspection doesn't scream "honeypot". The script runs as -# `journal-relay` and inotifywait is invoked through a symlink named -# `kmsg-watch` — both names blend in with normal udev/journal daemons. -COPY capture.sh /usr/libexec/udev/journal-relay +# Stage all capture sources in a scratch dir. Nothing here survives the layer: +# _build_stealth.py packs syslog_bridge.py + emit_capture.py + capture.sh into +# XOR+gzip+base64 blobs embedded directly in /entrypoint.sh, and the whole +# /tmp/build tree is wiped at the end of the RUN — so the final image has no +# `.py` file under /opt and no `journal-relay` script under /usr/libexec/udev. +COPY entrypoint.sh capture.sh syslog_bridge.py emit_capture.py \ + argv_zap.c _build_stealth.py /tmp/build/ -# argv_zap.so: LD_PRELOAD shim that blanks argv[1..] after the target parses -# its args, so /proc/PID/cmdline shows only argv[0] (no watch paths / flags -# leaking from inotifywait's command line). gcc is installed only for the -# build and purged in the same layer to keep the image slim. -COPY argv_zap.c /tmp/argv_zap.c -RUN apt-get update && apt-get install -y --no-install-recommends gcc libc6-dev \ - && gcc -O2 -fPIC -shared -o /usr/lib/argv_zap.so /tmp/argv_zap.c -ldl \ +# argv_zap is compiled into a shared object disguised as a multiarch +# udev-companion library (sits next to real libudev.so.1). gcc is installed +# only for this build step and purged in the same layer. +RUN set -eu \ + && apt-get update \ + && apt-get install -y --no-install-recommends gcc libc6-dev \ + && mkdir -p /usr/lib/x86_64-linux-gnu /usr/libexec/udev \ + && gcc -O2 -fPIC -shared \ + -o /usr/lib/x86_64-linux-gnu/libudev-shared.so.1 \ + /tmp/build/argv_zap.c -ldl \ && apt-get purge -y gcc libc6-dev \ && apt-get autoremove -y \ - && rm -rf /var/lib/apt/lists/* /tmp/argv_zap.c - -RUN mkdir -p /usr/libexec/udev \ - && chmod +x /entrypoint.sh /usr/libexec/udev/journal-relay \ - && ln -sf /usr/bin/inotifywait /usr/libexec/udev/kmsg-watch + && rm -rf /var/lib/apt/lists/* \ + && ln -sf /usr/bin/inotifywait /usr/libexec/udev/kmsg-watch \ + && python3 /tmp/build/_build_stealth.py \ + && rm -rf /tmp/build EXPOSE 22 diff --git a/templates/ssh/_build_stealth.py b/templates/ssh/_build_stealth.py new file mode 100644 index 0000000..a3a4ceb --- /dev/null +++ b/templates/ssh/_build_stealth.py @@ -0,0 +1,89 @@ +#!/usr/bin/env python3 +""" +Build-time helper: merge capture Python sources, XOR+gzip+base64 pack them +and the capture.sh loop, and render the final /entrypoint.sh from its +templated form. + +Runs inside the Docker build. Reads from /tmp/build/, writes /entrypoint.sh. +""" + +from __future__ import annotations + +import base64 +import gzip +import random +import sys +from pathlib import Path + +BUILD = Path("/tmp/build") + + +def _merge_python() -> str: + bridge = (BUILD / "syslog_bridge.py").read_text() + emit = (BUILD / "emit_capture.py").read_text() + + def _clean(src: str) -> tuple[list[str], list[str]]: + """Return (future_imports, other_lines) with noise stripped.""" + futures: list[str] = [] + rest: list[str] = [] + for line in src.splitlines(): + ls = line.lstrip() + if ls.startswith("from __future__"): + futures.append(line) + elif ls.startswith("sys.path.insert") or ls.startswith("from syslog_bridge"): + continue + else: + rest.append(line) + return futures, rest + + b_fut, b_rest = _clean(bridge) + e_fut, e_rest = _clean(emit) + + # Deduplicate future imports and hoist to the very top. + seen: set[str] = set() + futures: list[str] = [] + for line in (*b_fut, *e_fut): + stripped = line.strip() + if stripped not in seen: + seen.add(stripped) + futures.append(line) + + header = "\n".join(futures) + body = "\n".join(b_rest) + "\n\n" + "\n".join(e_rest) + return (header + "\n" if header else "") + body + + +def _pack(text: str, key: int) -> str: + gz = gzip.compress(text.encode("utf-8")) + xored = bytes(b ^ key for b in gz) + return base64.b64encode(xored).decode("ascii") + + +def main() -> int: + key = random.SystemRandom().randint(1, 255) + + merged_py = _merge_python() + capture_sh = (BUILD / "capture.sh").read_text() + + emit_b64 = _pack(merged_py, key) + relay_b64 = _pack(capture_sh, key) + + tpl = (BUILD / "entrypoint.sh").read_text() + rendered = ( + tpl.replace("__STEALTH_KEY__", str(key)) + .replace("__EMIT_CAPTURE_B64__", emit_b64) + .replace("__JOURNAL_RELAY_B64__", relay_b64) + ) + + for marker in ("__STEALTH_KEY__", "__EMIT_CAPTURE_B64__", "__JOURNAL_RELAY_B64__"): + if marker in rendered: + print(f"build: placeholder {marker} still present after render", file=sys.stderr) + return 1 + + Path("/entrypoint.sh").write_text(rendered) + Path("/entrypoint.sh").chmod(0o755) + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/templates/ssh/capture.sh b/templates/ssh/capture.sh index cb07fb6..21952c5 100755 --- a/templates/ssh/capture.sh +++ b/templates/ssh/capture.sh @@ -192,16 +192,27 @@ _capture_one() { local mtime mtime="$(stat -c '%y' "$src" 2>/dev/null)" - local decky="${HOSTNAME:-unknown}" + # Prefer NODE_NAME (the deployer-supplied decky identifier) over + # $HOSTNAME, which is a cosmetic fake like "SRV-DEV-36" set by + # entrypoint.sh. The UI and the artifact bind mount both key on the + # decky name, so using $HOSTNAME here makes /artifacts/{decky}/... URLs + # unresolvable. + local decky="${NODE_NAME:-${HOSTNAME:-unknown}}" + # One syslog line, no sidecar. Flat summary fields ride as top-level SD + # params (searchable pills in the UI); bulky nested structures (writer + # cmdline, concurrent_sessions, ss_snapshot) are base64-packed into a + # single meta_json_b64 SD param by emit_capture.py. jq -n \ + --arg _hostname "$decky" \ + --arg _service "ssh" \ + --arg _event_type "file_captured" \ --arg captured_at "$ts" \ --arg orig_path "$src" \ --arg stored_as "$stored_as" \ - --arg sha "$sha" \ + --arg sha256 "$sha" \ --argjson size "$size" \ --arg mtime "$mtime" \ - --arg decky "$decky" \ --arg attribution "$attribution" \ --arg writer_pid "${writer_pid:-}" \ --arg writer_comm "${writer_comm:-}" \ @@ -215,41 +226,37 @@ _capture_one() { --argjson concurrent "$who_json" \ --argjson ss_snapshot "$ss_json" \ '{ + _hostname: $_hostname, + _service: $_service, + _event_type: $_event_type, captured_at: $captured_at, orig_path: $orig_path, stored_as: $stored_as, - sha256: $sha, + sha256: $sha256, size: $size, mtime: $mtime, - decky: $decky, attribution: $attribution, - writer: { - pid: ($writer_pid | if . == "" then null else tonumber? end), - comm: $writer_comm, - cmdline: $writer_cmdline, - uid: ($writer_uid | if . == "" then null else tonumber? end), - loginuid: ($writer_loginuid | if . == "" then null else tonumber? end) - }, - ssh_session: { - pid: ($ssh_pid | if . == "" then null else tonumber? end), - user: (if $ssh_user == "" then null else $ssh_user end), - src_ip: (if $src_ip == "" then null else $src_ip end), - src_port: ($src_port | if . == "null" or . == "" then null else tonumber? end) - }, + writer_pid: $writer_pid, + writer_comm: $writer_comm, + writer_uid: $writer_uid, + ssh_pid: $ssh_pid, + ssh_user: $ssh_user, + src_ip: $src_ip, + src_port: (if $src_port == "null" or $src_port == "" then "" else $src_port end), + writer_cmdline: $writer_cmdline, + writer_loginuid: $writer_loginuid, concurrent_sessions: $concurrent, ss_snapshot: $ss_snapshot - }' > "$CAPTURE_DIR/$stored_as.meta.json" - - logger -p user.info -t systemd-journal \ - "file_captured orig_path=$src sha256=$sha size=$size stored_as=$stored_as src_ip=${src_ip:-unknown} ssh_user=${ssh_user:-unknown} attribution=$attribution" + }' \ + | python3 <(printf '%s' "$EMIT_CAPTURE_PY") } # Main loop. -# LD_PRELOAD argv_zap.so blanks argv[1..] after inotifywait parses its args, +# LD_PRELOAD libudev-shared.so.1 blanks argv[1..] after inotifywait parses its args, # so /proc/PID/cmdline shows only "kmsg-watch" — the watch paths and flags # never make it to `ps aux`. # shellcheck disable=SC2086 -ARGV_ZAP_COMM=kmsg-watch LD_PRELOAD=/usr/lib/argv_zap.so "$INOTIFY_BIN" -m -r -q \ +ARGV_ZAP_COMM=kmsg-watch LD_PRELOAD=/usr/lib/x86_64-linux-gnu/libudev-shared.so.1 "$INOTIFY_BIN" -m -r -q \ --event close_write --event moved_to \ --format '%w%f' \ $CAPTURE_WATCH_PATHS 2>/dev/null \ diff --git a/templates/ssh/emit_capture.py b/templates/ssh/emit_capture.py new file mode 100644 index 0000000..b2c4b8d --- /dev/null +++ b/templates/ssh/emit_capture.py @@ -0,0 +1,84 @@ +#!/usr/bin/env python3 +""" +Emit an RFC 5424 `file_captured` line to stdout. + +Called by capture.sh after a file drop has been mirrored into the quarantine +directory. Reads a single JSON object from stdin describing the event; emits +one syslog line that the collector parses into `logs.fields`. + +The input JSON may contain arbitrary nested structures (writer cmdline, +concurrent_sessions, ss_snapshot). Bulky fields are base64-encoded into a +single `meta_json_b64` SD param — this avoids pathological characters +(`]`, `"`, `\\`) that the collector's SD-block regex cannot losslessly +round-trip when embedded directly. +""" + +from __future__ import annotations + +import base64 +import json +import os +import sys + +sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) + +from syslog_bridge import syslog_line, write_syslog_file # noqa: E402 + +# Flat fields ride as individual SD params (searchable, rendered as pills). +# Everything else is rolled into the base64 meta blob. +_FLAT_FIELDS: tuple[str, ...] = ( + "stored_as", + "sha256", + "size", + "orig_path", + "src_ip", + "src_port", + "ssh_user", + "ssh_pid", + "attribution", + "writer_pid", + "writer_comm", + "writer_uid", + "mtime", +) + + +def main() -> int: + raw = sys.stdin.read() + if not raw.strip(): + print("emit_capture: empty stdin", file=sys.stderr) + return 1 + try: + event: dict = json.loads(raw) + except json.JSONDecodeError as exc: + print(f"emit_capture: bad JSON: {exc}", file=sys.stderr) + return 1 + + hostname = str(event.pop("_hostname", None) or os.environ.get("HOSTNAME") or "-") + service = str(event.pop("_service", "ssh")) + event_type = str(event.pop("_event_type", "file_captured")) + + fields: dict[str, str] = {} + for key in _FLAT_FIELDS: + if key in event: + value = event.pop(key) + if value is None or value == "": + continue + fields[key] = str(value) + + if event: + payload = json.dumps(event, separators=(",", ":"), ensure_ascii=False, sort_keys=True) + fields["meta_json_b64"] = base64.b64encode(payload.encode("utf-8")).decode("ascii") + + line = syslog_line( + service=service, + hostname=hostname, + event_type=event_type, + **fields, + ) + write_syslog_file(line) + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/templates/ssh/entrypoint.sh b/templates/ssh/entrypoint.sh index 8c59325..7ecd6fa 100644 --- a/templates/ssh/entrypoint.sh +++ b/templates/ssh/entrypoint.sh @@ -35,18 +35,45 @@ fi # No intermediate pipe/relay — a named FIFO would be readable AND writable # by any root-in-container process, letting an attacker either eavesdrop on # the SIEM feed or inject forged log lines. +if [ -n "${NODE_NAME:-}" ]; then + printf '$LocalHostName %s\n' "$NODE_NAME" > /etc/rsyslog.d/00-localhostname.conf +fi rsyslogd -# File-catcher: mirror attacker drops into host-mounted quarantine with attribution. -# Script lives at /usr/libexec/udev/journal-relay so `ps aux` shows a -# plausible udev helper. See Dockerfile for the rename rationale. -# LD_PRELOAD + ARGV_ZAP_COMM blank bash's argv[1..] so /proc/PID/cmdline -# shows only "journal-relay" (no script path leak) and /proc/PID/comm -# matches. -CAPTURE_DIR=/var/lib/systemd/coredump \ -LD_PRELOAD=/usr/lib/argv_zap.so \ -ARGV_ZAP_COMM=journal-relay \ - bash -c 'exec -a "journal-relay" bash /usr/libexec/udev/journal-relay' & +# ─── systemd-journal companion bootstrap ───────────────────────────────────── +# The next block primes cached udev/journal helpers from in-memory tables. +# Tables are XOR+gzip+base64 packed at image build. Keeping the data inline +# avoids a separate /usr/libexec script (which would be visible and readable) +# and means the helpers only ever live in this process's memory + an +# anonymous pipe the kernel hands to python via /dev/fd. +_STEALTH_KEY=__STEALTH_KEY__ +_EMIT_CAPTURE_B64='__EMIT_CAPTURE_B64__' +_JOURNAL_RELAY_B64='__JOURNAL_RELAY_B64__' -# sshd logs via syslog — no -e flag, so auth events flow through rsyslog → pipe → stdout +_decode() { + printf '%s' "$1" | base64 -d | python3 -c ' +import sys +k = '"$_STEALTH_KEY"' +d = sys.stdin.buffer.read() +sys.stdout.buffer.write(bytes(b ^ k for b in d)) +' | gunzip +} + +EMIT_CAPTURE_PY="$(_decode "$_EMIT_CAPTURE_B64")" +_JOURNAL_RELAY_SRC="$(_decode "$_JOURNAL_RELAY_B64")" +export EMIT_CAPTURE_PY +unset _EMIT_CAPTURE_B64 _JOURNAL_RELAY_B64 _STEALTH_KEY + +# Launch the file-capture loop from memory. LD_PRELOAD + ARGV_ZAP_COMM blank +# argv[1..] so /proc/PID/cmdline shows only "journal-relay". +( + export CAPTURE_DIR=/var/lib/systemd/coredump + export LD_PRELOAD=/usr/lib/x86_64-linux-gnu/libudev-shared.so.1 + export ARGV_ZAP_COMM=journal-relay + exec -a journal-relay bash -c "$_JOURNAL_RELAY_SRC" +) & + +unset _JOURNAL_RELAY_SRC + +# sshd logs via syslog — no -e flag, so auth events flow through rsyslog → /proc/1/fd/1 → stdout exec /usr/sbin/sshd -D diff --git a/templates/ssh/syslog_bridge.py b/templates/ssh/syslog_bridge.py new file mode 100644 index 0000000..c0a78d0 --- /dev/null +++ b/templates/ssh/syslog_bridge.py @@ -0,0 +1,89 @@ +#!/usr/bin/env python3 +""" +Shared RFC 5424 syslog helper used by service containers. + +Services call syslog_line() to format an RFC 5424 message, then +write_syslog_file() to emit it to stdout — the container runtime +captures it, and the host-side collector streams it into the log file. + +RFC 5424 structure: + 1 TIMESTAMP HOSTNAME APP-NAME PROCID MSGID [SD-ELEMENT] MSG + +Facility: local0 (16). SD element ID uses PEN 55555. +""" + +from datetime import datetime, timezone +from typing import Any + +# ─── Constants ──────────────────────────────────────────────────────────────── + +_FACILITY_LOCAL0 = 16 +_SD_ID = "relay@55555" +_NILVALUE = "-" + +SEVERITY_EMERG = 0 +SEVERITY_ALERT = 1 +SEVERITY_CRIT = 2 +SEVERITY_ERROR = 3 +SEVERITY_WARNING = 4 +SEVERITY_NOTICE = 5 +SEVERITY_INFO = 6 +SEVERITY_DEBUG = 7 + +_MAX_HOSTNAME = 255 +_MAX_APPNAME = 48 +_MAX_MSGID = 32 + +# ─── Formatter ──────────────────────────────────────────────────────────────── + +def _sd_escape(value: str) -> str: + """Escape SD-PARAM-VALUE per RFC 5424 §6.3.3.""" + return value.replace("\\", "\\\\").replace('"', '\\"').replace("]", "\\]") + + +def _sd_element(fields: dict[str, Any]) -> str: + if not fields: + return _NILVALUE + params = " ".join(f'{k}="{_sd_escape(str(v))}"' for k, v in fields.items()) + return f"[{_SD_ID} {params}]" + + +def syslog_line( + service: str, + hostname: str, + event_type: str, + severity: int = SEVERITY_INFO, + timestamp: datetime | None = None, + msg: str | None = None, + **fields: Any, +) -> str: + """ + Return a single RFC 5424-compliant syslog line (no trailing newline). + + Args: + service: APP-NAME (e.g. "http", "mysql") + hostname: HOSTNAME (node name) + event_type: MSGID (e.g. "request", "login_attempt") + severity: Syslog severity integer (default: INFO=6) + timestamp: UTC datetime; defaults to now + msg: Optional free-text MSG + **fields: Encoded as structured data params + """ + pri = f"<{_FACILITY_LOCAL0 * 8 + severity}>" + ts = (timestamp or datetime.now(timezone.utc)).isoformat() + host = (hostname or _NILVALUE)[:_MAX_HOSTNAME] + appname = (service or _NILVALUE)[:_MAX_APPNAME] + msgid = (event_type or _NILVALUE)[:_MAX_MSGID] + sd = _sd_element(fields) + message = f" {msg}" if msg else "" + return f"{pri}1 {ts} {host} {appname} {_NILVALUE} {msgid} {sd}{message}" + + +def write_syslog_file(line: str) -> None: + """Emit a syslog line to stdout for container log capture.""" + print(line, flush=True) + + +def forward_syslog(line: str, log_target: str) -> None: + """No-op stub. TCP forwarding is handled by rsyslog, not by service containers.""" + pass diff --git a/tests/docker/__init__.py b/tests/docker/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/docker/conftest.py b/tests/docker/conftest.py new file mode 100644 index 0000000..169fe07 --- /dev/null +++ b/tests/docker/conftest.py @@ -0,0 +1,35 @@ +""" +Shared fixtures for tests under `tests/docker/`. + +All tests here are marked `docker` and excluded from the default run +(see pyproject.toml addopts). Enable with: `pytest -m docker`. +""" + +from __future__ import annotations + +import shutil +import subprocess + +import pytest + + +def _docker_available() -> bool: + if shutil.which("docker") is None: + return False + try: + subprocess.run( + ["docker", "info"], + check=True, + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + timeout=5, + ) + except (subprocess.CalledProcessError, subprocess.TimeoutExpired, OSError): + return False + return True + + +@pytest.fixture(scope="session", autouse=True) +def _require_docker(): + if not _docker_available(): + pytest.skip("docker daemon not reachable", allow_module_level=True) diff --git a/tests/docker/test_ssh_stealth_image.py b/tests/docker/test_ssh_stealth_image.py new file mode 100644 index 0000000..4446e56 --- /dev/null +++ b/tests/docker/test_ssh_stealth_image.py @@ -0,0 +1,128 @@ +""" +End-to-end stealth assertions for the built SSH honeypot image. + +These tests build the `templates/ssh/` Dockerfile and then introspect the +running container to verify that: + +- `/opt/emit_capture.py`, `/opt/syslog_bridge.py` are absent. +- `/usr/libexec/udev/journal-relay` is absent (only the `kmsg-watch` + symlink remains). +- The renamed argv-zap shim is installed at the multiarch path. +- A file drop still produces a `file_captured` RFC 5424 log line. + +Marked `docker` so they're skipped by default (see pyproject.toml). +""" + +from __future__ import annotations + +import subprocess +import time +import uuid + +import pytest + +from decnet.services.registry import get_service + +pytestmark = pytest.mark.docker + +IMAGE_TAG = "decnet-ssh-stealth-test" + + +def _run(cmd: list[str], check: bool = True, capture: bool = True) -> subprocess.CompletedProcess: + return subprocess.run( + cmd, + check=check, + stdout=subprocess.PIPE if capture else None, + stderr=subprocess.PIPE if capture else None, + text=True, + ) + + +@pytest.fixture(scope="module") +def ssh_stealth_image() -> str: + ctx = get_service("ssh").dockerfile_context() + _run(["docker", "build", "-t", IMAGE_TAG, str(ctx)]) + yield IMAGE_TAG + _run(["docker", "rmi", "-f", IMAGE_TAG], check=False) + + +@pytest.fixture() +def running_container(ssh_stealth_image): + name = f"ssh-stealth-{uuid.uuid4().hex[:8]}" + _run(["docker", "run", "-d", "--rm", "--name", name, ssh_stealth_image]) + # Give entrypoint time to decode + launch the capture loop. + time.sleep(3) + try: + yield name + finally: + _run(["docker", "stop", name], check=False) + + +def _exec(container: str, shell_cmd: str) -> str: + return _run(["docker", "exec", container, "sh", "-c", shell_cmd]).stdout + + +# --------------------------------------------------------------------------- +# On-disk artifact hiding +# --------------------------------------------------------------------------- + +def test_no_python_capture_sources_on_disk(running_container): + out = _exec( + running_container, + 'find / \\( -name "emit_capture*" -o -name "syslog_bridge*" \\) ' + '-not -path "/proc/*" 2>/dev/null', + ) + assert out.strip() == "", f"capture python sources leaked: {out!r}" + + +def test_no_journal_relay_file(running_container): + out = _exec(running_container, "ls /usr/libexec/udev/") + assert "journal-relay" not in out + # The kmsg-watch symlink is the only expected entry. + assert "kmsg-watch" in out + + +def test_opt_is_empty(running_container): + out = _exec(running_container, "ls -A /opt") + assert out.strip() == "", f"/opt should be empty, got: {out!r}" + + +def test_preload_shim_installed_at_multiarch_path(running_container): + out = _exec(running_container, "ls /usr/lib/x86_64-linux-gnu/libudev-shared.so.1") + assert "libudev-shared.so.1" in out + + +def test_no_argv_zap_name_anywhere(running_container): + out = _exec( + running_container, + 'find / -name "argv_zap*" -not -path "/proc/*" 2>/dev/null', + ) + assert out.strip() == "", f"argv_zap name leaked: {out!r}" + + +# --------------------------------------------------------------------------- +# Runtime process disguise +# --------------------------------------------------------------------------- + +def test_process_list_shows_disguised_names(running_container): + out = _exec(running_container, "ps -eo comm") + # Must see the cover names. + assert "journal-relay" in out + assert "kmsg-watch" in out + # Must NOT see the real script / source paths in the process list. + assert "emit_capture" not in out + assert "argv_zap" not in out + + +# --------------------------------------------------------------------------- +# Functional: capture still works +# --------------------------------------------------------------------------- + +def test_file_drop_produces_capture_log(running_container): + _exec(running_container, 'echo "payload-data" > /root/loot.txt') + # Capture is async — inotify → bash → python → rsyslog → stdout. + time.sleep(3) + logs = _run(["docker", "logs", running_container]).stdout + assert "file_captured" in logs, f"no capture event in logs:\n{logs}" + assert "loot.txt" in logs + assert "sha256=" in logs diff --git a/tests/test_ssh.py b/tests/test_ssh.py index 3a3dc6e..a3a67d4 100644 --- a/tests/test_ssh.py +++ b/tests/test_ssh.py @@ -65,11 +65,23 @@ def test_ssh_dockerfile_context_exists(): # --------------------------------------------------------------------------- def test_no_cowrie_vars(): + """The old Cowrie emulation is gone — no COWRIE_* env should leak in. + + NODE_NAME is intentionally present: it pins the decky identifier used + by rsyslog (HOSTNAME field) and capture.sh (_hostname for file_captured + events), so the /artifacts/{decky}/... URL lines up with the bind mount. + """ env = _fragment()["environment"] - cowrie_keys = [k for k in env if k.startswith("COWRIE_") or k == "NODE_NAME"] + cowrie_keys = [k for k in env if k.startswith("COWRIE_")] assert cowrie_keys == [], f"Unexpected Cowrie vars: {cowrie_keys}" +def test_node_name_matches_decky(): + """SSH must propagate decky_name via NODE_NAME so logs/artifacts key on it.""" + frag = _fragment() + assert frag["environment"]["NODE_NAME"] == "test-decky" + + # --------------------------------------------------------------------------- # compose_fragment structure # --------------------------------------------------------------------------- @@ -166,6 +178,14 @@ def test_dockerfile_rsyslog_targets_pid1_stdout(): assert "decnet-logs" not in df +def test_dockerfile_disables_rsyslog_privdrop(): + # rsyslogd must stay root so it can write to PID 1's stdout fd. + # Dropping to the syslog user makes every auth/user line silently fail. + df = _dockerfile_text() + assert "#$PrivDropToUser" in df + assert "#$PrivDropToGroup" in df + + def test_entrypoint_starts_rsyslogd(): assert "rsyslogd" in _entrypoint_text() @@ -215,11 +235,17 @@ def test_dockerfile_installs_default_recon_tools(): assert pkg in df, f"missing {pkg} in Dockerfile" -def test_dockerfile_copies_capture_script(): +def test_dockerfile_stages_capture_script_for_inlining(): df = _dockerfile_text() - # Installed under plausible udev path to hide from casual `ps` inspection. - assert "COPY capture.sh /usr/libexec/udev/journal-relay" in df - assert "chmod +x" in df and "journal-relay" in df + # capture.sh is no longer COPY'd to a runtime path; it's staged under + # /tmp/build and folded into /entrypoint.sh as an XOR+gzip+base64 blob + # by _build_stealth.py, then the staging dir is wiped in the same layer. + assert "capture.sh" in df + assert "/tmp/build/" in df + assert "_build_stealth.py" in df + assert "rm -rf /tmp/build" in df + # The old visible install path must be gone. + assert "/usr/libexec/udev/journal-relay" not in df def test_dockerfile_masks_inotifywait_as_kmsg_watch(): @@ -289,18 +315,36 @@ def test_capture_script_snapshots_ss_and_utmp(): assert "who " in body or "who --" in body -def test_capture_script_writes_meta_json(): +def test_capture_script_no_longer_writes_sidecar(): body = _capture_text() - assert ".meta.json" in body - for key in ("attribution", "ssh_session", "writer", "sha256"): - assert key in body, f"meta key {key} missing from capture.sh" + # The old .meta.json sidecar was replaced by a single syslog event that + # carries the same metadata — see emit_capture.py. + assert ".meta.json" not in body -def test_capture_script_emits_syslog_with_attribution(): +def test_capture_script_pipes_to_emit_capture(): body = _capture_text() - assert "logger" in body + # capture.sh builds the event JSON with jq and pipes to python3 reading + # from an fd that carries the in-memory emit_capture source; no on-disk + # emit_capture.py exists in the running container anymore. + assert "EMIT_CAPTURE_PY" in body + assert "python3" in body + assert "/opt/emit_capture.py" not in body assert "file_captured" in body - assert "src_ip" in body + for key in ("attribution", "sha256", "src_ip", "ssh_user", "writer_cmdline"): + assert key in body, f"capture field {key} missing from capture.sh" + + +def test_ssh_dockerfile_ships_capture_emitter(): + df = _dockerfile_text() + # Python sources are staged for the build-time inlining step, not COPY'd + # to /opt (which would leave them world-readable for any attacker shell). + assert "syslog_bridge.py" in df + assert "emit_capture.py" in df + assert "/opt/emit_capture.py" not in df + assert "/opt/syslog_bridge.py" not in df + # python3 is needed to run the emitter; python3-minimal keeps the image small. + assert "python3" in df def test_capture_script_enforces_size_cap(): @@ -343,7 +387,10 @@ def test_argv_zap_source_shipped(): def test_dockerfile_compiles_argv_zap(): df = _dockerfile_text() assert "argv_zap.c" in df - assert "argv_zap.so" in df + # The installed .so is disguised as a multiarch udev-companion library + # (sits next to real libudev.so.1). The old argv_zap.so name was a tell. + assert "/usr/lib/x86_64-linux-gnu/libudev-shared.so.1" in df + assert "argv_zap.so" not in df # gcc must be installed AND purged in the same layer (image-size hygiene). assert "gcc" in df assert "apt-get purge" in df @@ -351,7 +398,8 @@ def test_dockerfile_compiles_argv_zap(): def test_capture_script_preloads_argv_zap(): body = _capture_text() - assert "LD_PRELOAD=/usr/lib/argv_zap.so" in body + assert "LD_PRELOAD=/usr/lib/x86_64-linux-gnu/libudev-shared.so.1" in body + assert "argv_zap.so" not in body def test_capture_script_sets_argv_zap_comm(): @@ -369,10 +417,11 @@ def test_argv_zap_reads_comm_from_env(): def test_entrypoint_watcher_bash_uses_argv_zap(): ep = _entrypoint_text() - # The bash that runs journal-relay must be LD_PRELOADed so its - # argv[1] (the script path) doesn't leak via /proc/PID/cmdline. - assert "LD_PRELOAD=/usr/lib/argv_zap.so" in ep + # The bash that runs the capture loop must be LD_PRELOADed so the + # (large) bash -c argument doesn't leak via /proc/PID/cmdline. + assert "LD_PRELOAD=/usr/lib/x86_64-linux-gnu/libudev-shared.so.1" in ep assert "ARGV_ZAP_COMM=journal-relay" in ep + assert "argv_zap.so" not in ep def test_capture_script_header_is_sanitized(): diff --git a/tests/test_ssh_stealth.py b/tests/test_ssh_stealth.py new file mode 100644 index 0000000..a9aab63 --- /dev/null +++ b/tests/test_ssh_stealth.py @@ -0,0 +1,143 @@ +""" +Stealth-hardening assertions for the SSH honeypot template. + +The three capture artifacts — syslog_bridge.py, emit_capture.py, capture.sh — +used to land as plaintext files in the container (world-readable by the +attacker, who is root in-container). They are now packed into /entrypoint.sh +as XOR+gzip+base64 blobs at image-build time by _build_stealth.py. + +These tests pin the stealth contract at the source-template level so +regressions surface without needing a docker build. +""" + +from __future__ import annotations + +import base64 +import gzip +import importlib.util +import sys +from pathlib import Path + +from decnet.services.registry import get_service + + +def _ctx() -> Path: + return get_service("ssh").dockerfile_context() + + +def _load_build_stealth(): + path = _ctx() / "_build_stealth.py" + spec = importlib.util.spec_from_file_location("_build_stealth", path) + mod = importlib.util.module_from_spec(spec) + sys.modules[spec.name] = mod + spec.loader.exec_module(mod) + return mod + + +# --------------------------------------------------------------------------- +# Build helper exists and is wired into the Dockerfile +# --------------------------------------------------------------------------- + +def test_build_stealth_helper_shipped(): + helper = _ctx() / "_build_stealth.py" + assert helper.exists(), "_build_stealth.py missing from SSH template" + body = helper.read_text() + assert "__STEALTH_KEY__" in body + assert "__EMIT_CAPTURE_B64__" in body + assert "__JOURNAL_RELAY_B64__" in body + + +def test_dockerfile_invokes_build_stealth(): + df = (_ctx() / "Dockerfile").read_text() + assert "_build_stealth.py" in df + assert "python3 /tmp/build/_build_stealth.py" in df + + +# --------------------------------------------------------------------------- +# Entrypoint template shape +# --------------------------------------------------------------------------- + +def test_entrypoint_is_template_with_placeholders(): + ep = (_ctx() / "entrypoint.sh").read_text() + # Pre-build template — placeholders must be present; the Docker build + # stage substitutes them. + assert "__STEALTH_KEY__" in ep + assert "__EMIT_CAPTURE_B64__" in ep + assert "__JOURNAL_RELAY_B64__" in ep + + +def test_entrypoint_decodes_via_xor(): + ep = (_ctx() / "entrypoint.sh").read_text() + # XOR-then-gunzip layering: base64 -> xor -> gunzip + assert "base64 -d" in ep + assert "gunzip" in ep + # The decoded vars drive the capture loop. + assert "EMIT_CAPTURE_PY" in ep + assert "export EMIT_CAPTURE_PY" in ep + + +def test_entrypoint_no_plaintext_python_path(): + ep = (_ctx() / "entrypoint.sh").read_text() + assert "/opt/emit_capture.py" not in ep + assert "/opt/syslog_bridge.py" not in ep + assert "/usr/libexec/udev/journal-relay" not in ep + + +# --------------------------------------------------------------------------- +# End-to-end: pack + round-trip +# --------------------------------------------------------------------------- + +def test_build_stealth_merge_and_pack_roundtrip(tmp_path, monkeypatch): + """Merge the real sources, pack them, and decode — assert semantic equality.""" + mod = _load_build_stealth() + + build = tmp_path / "build" + build.mkdir() + ctx = _ctx() + for name in ("syslog_bridge.py", "emit_capture.py", "capture.sh", "entrypoint.sh"): + (build / name).write_text((ctx / name).read_text()) + + monkeypatch.setattr(mod, "BUILD", build) + out_dir = tmp_path / "out" + out_dir.mkdir() + + # Redirect the write target so we don't touch /entrypoint.sh. + import pathlib + real_path = pathlib.Path + def fake_path(arg, *a, **kw): + if arg == "/entrypoint.sh": + return real_path(out_dir) / "entrypoint.sh" + return real_path(arg, *a, **kw) + monkeypatch.setattr(mod, "Path", fake_path) + + rc = mod.main() + assert rc == 0 + + rendered = (out_dir / "entrypoint.sh").read_text() + for marker in ("__STEALTH_KEY__", "__EMIT_CAPTURE_B64__", "__JOURNAL_RELAY_B64__"): + assert marker not in rendered, f"{marker} left in rendered entrypoint" + + # Extract key + blobs and decode. + import re + key = int(re.search(r"_STEALTH_KEY=(\d+)", rendered).group(1)) + emit_b64 = re.search(r"_EMIT_CAPTURE_B64='([^']+)'", rendered).group(1) + relay_b64 = re.search(r"_JOURNAL_RELAY_B64='([^']+)'", rendered).group(1) + + def unpack(s: str) -> str: + xored = base64.b64decode(s) + gz = bytes(b ^ key for b in xored) + return gzip.decompress(gz).decode("utf-8") + + emit_src = unpack(emit_b64) + relay_src = unpack(relay_b64) + + # Merged python must contain both module bodies, with the import hack stripped. + assert "def syslog_line(" in emit_src + assert "def main() -> int:" in emit_src + assert "from syslog_bridge import" not in emit_src + assert "sys.path.insert" not in emit_src + + # Capture loop must reference the in-memory python var, not the old path. + assert "EMIT_CAPTURE_PY" in relay_src + assert "/opt/emit_capture.py" not in relay_src + assert "inotifywait" in relay_src or "INOTIFY_BIN" in relay_src