feat(ssh-stealth): hide capture artifacts via XOR+gzip entrypoint blob

The /opt/emit_capture.py, /opt/syslog_bridge.py, and
/usr/libexec/udev/journal-relay files were plaintext and world-readable
to any attacker root-shelled into the SSH honeypot — revealing the full
capture logic on a single cat.

Pack all three into /entrypoint.sh as XOR+gzip+base64 blobs at build
time (_build_stealth.py), then decode in-memory at container start and
exec the capture loop from a bash -c string. No .py files under /opt,
no journal-relay file under /usr/libexec/udev, no argv_zap name
anywhere. The LD_PRELOAD shim is installed as
/usr/lib/x86_64-linux-gnu/libudev-shared.so.1 — sits next to the real
libudev.so.1 and blends into the multiarch layout.

A 1-byte random XOR key is chosen at image build so a bare
'base64 -d | gunzip' probe on the visible entrypoint returns binary
noise instead of readable Python.

Docker-dependent tests live under tests/docker/ behind a new 'docker'
pytest marker (excluded from the default run, same pattern as fuzz /
live / bench).
This commit is contained in:
2026-04-18 05:34:50 -04:00
parent b0e00a6cc4
commit 39dafaf384
12 changed files with 733 additions and 72 deletions

View File

@@ -73,13 +73,14 @@ decnet = "decnet.cli:app"
[tool.pytest.ini_options]
asyncio_mode = "auto"
asyncio_debug = "true"
addopts = "-m 'not fuzz and not live and not stress and not bench' -v -q -x -n logical --dist loadscope"
addopts = "-m 'not fuzz and not live and not stress and not bench and not docker' -v -q -x -n logical --dist loadscope"
markers = [
"fuzz: hypothesis-based fuzz tests (slow, run with -m fuzz or -m '' for all)",
"live: live subprocess service tests (run with -m live)",
"live_docker: live Docker container tests (requires DECNET_LIVE_DOCKER=1)",
"stress: locust-based stress tests (run with -m stress)",
"bench: pytest-benchmark micro-benchmarks (run with -m bench)",
"docker: tests that build and run docker images (run with -m docker)",
]
filterwarnings = [
"ignore::pytest.PytestUnhandledThreadExceptionWarning",

View File

@@ -20,6 +20,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
ca-certificates \
nmap \
jq \
python3 \
&& rm -rf /var/lib/apt/lists/*
RUN mkdir -p /var/run/sshd /root/.ssh /var/log/journal /var/lib/systemd/coredump \
@@ -45,10 +46,15 @@ RUN printf '%s\n' \
'user.* /proc/1/fd/1;RFC5424fmt' \
> /etc/rsyslog.d/50-journal-forward.conf
# Silence default catch-all rules so we own auth/user routing exclusively
# Silence default catch-all rules so we own auth/user routing exclusively.
# Also disable rsyslog's privilege drop: PID 1's stdout (/proc/1/fd/1) is
# owned by root, so a syslog-user rsyslogd gets EACCES and silently drops
# every auth/user line (bash CMD events + file_captured emissions).
RUN sed -i \
-e 's|^\(\*\.\*;auth,authpriv\.none\)|#\1|' \
-e 's|^auth,authpriv\.\*|#auth,authpriv.*|' \
-e 's|^\$PrivDropToUser|#$PrivDropToUser|' \
-e 's|^\$PrivDropToGroup|#$PrivDropToGroup|' \
/etc/rsyslog.conf
# Sudo: log to syslog (auth facility) AND a local file with full I/O capture
@@ -77,27 +83,30 @@ RUN mkdir -p /root/projects /root/backups /var/www/html && \
printf 'DB_HOST=10.0.0.5\nDB_USER=admin\nDB_PASS=changeme123\nDB_NAME=prod_db\n' > /root/projects/.env && \
printf '[Unit]\nDescription=App Server\n[Service]\nExecStart=/usr/bin/python3 /opt/app/server.py\n' > /root/projects/app.service
COPY entrypoint.sh /entrypoint.sh
# Capture machinery is installed under plausible systemd/udev paths so casual
# `ps aux` inspection doesn't scream "honeypot". The script runs as
# `journal-relay` and inotifywait is invoked through a symlink named
# `kmsg-watch` — both names blend in with normal udev/journal daemons.
COPY capture.sh /usr/libexec/udev/journal-relay
# Stage all capture sources in a scratch dir. Nothing here survives the layer:
# _build_stealth.py packs syslog_bridge.py + emit_capture.py + capture.sh into
# XOR+gzip+base64 blobs embedded directly in /entrypoint.sh, and the whole
# /tmp/build tree is wiped at the end of the RUN — so the final image has no
# `.py` file under /opt and no `journal-relay` script under /usr/libexec/udev.
COPY entrypoint.sh capture.sh syslog_bridge.py emit_capture.py \
argv_zap.c _build_stealth.py /tmp/build/
# argv_zap.so: LD_PRELOAD shim that blanks argv[1..] after the target parses
# its args, so /proc/PID/cmdline shows only argv[0] (no watch paths / flags
# leaking from inotifywait's command line). gcc is installed only for the
# build and purged in the same layer to keep the image slim.
COPY argv_zap.c /tmp/argv_zap.c
RUN apt-get update && apt-get install -y --no-install-recommends gcc libc6-dev \
&& gcc -O2 -fPIC -shared -o /usr/lib/argv_zap.so /tmp/argv_zap.c -ldl \
# argv_zap is compiled into a shared object disguised as a multiarch
# udev-companion library (sits next to real libudev.so.1). gcc is installed
# only for this build step and purged in the same layer.
RUN set -eu \
&& apt-get update \
&& apt-get install -y --no-install-recommends gcc libc6-dev \
&& mkdir -p /usr/lib/x86_64-linux-gnu /usr/libexec/udev \
&& gcc -O2 -fPIC -shared \
-o /usr/lib/x86_64-linux-gnu/libudev-shared.so.1 \
/tmp/build/argv_zap.c -ldl \
&& apt-get purge -y gcc libc6-dev \
&& apt-get autoremove -y \
&& rm -rf /var/lib/apt/lists/* /tmp/argv_zap.c
RUN mkdir -p /usr/libexec/udev \
&& chmod +x /entrypoint.sh /usr/libexec/udev/journal-relay \
&& ln -sf /usr/bin/inotifywait /usr/libexec/udev/kmsg-watch
&& rm -rf /var/lib/apt/lists/* \
&& ln -sf /usr/bin/inotifywait /usr/libexec/udev/kmsg-watch \
&& python3 /tmp/build/_build_stealth.py \
&& rm -rf /tmp/build
EXPOSE 22

View File

@@ -0,0 +1,89 @@
#!/usr/bin/env python3
"""
Build-time helper: merge capture Python sources, XOR+gzip+base64 pack them
and the capture.sh loop, and render the final /entrypoint.sh from its
templated form.
Runs inside the Docker build. Reads from /tmp/build/, writes /entrypoint.sh.
"""
from __future__ import annotations
import base64
import gzip
import random
import sys
from pathlib import Path
BUILD = Path("/tmp/build")
def _merge_python() -> str:
bridge = (BUILD / "syslog_bridge.py").read_text()
emit = (BUILD / "emit_capture.py").read_text()
def _clean(src: str) -> tuple[list[str], list[str]]:
"""Return (future_imports, other_lines) with noise stripped."""
futures: list[str] = []
rest: list[str] = []
for line in src.splitlines():
ls = line.lstrip()
if ls.startswith("from __future__"):
futures.append(line)
elif ls.startswith("sys.path.insert") or ls.startswith("from syslog_bridge"):
continue
else:
rest.append(line)
return futures, rest
b_fut, b_rest = _clean(bridge)
e_fut, e_rest = _clean(emit)
# Deduplicate future imports and hoist to the very top.
seen: set[str] = set()
futures: list[str] = []
for line in (*b_fut, *e_fut):
stripped = line.strip()
if stripped not in seen:
seen.add(stripped)
futures.append(line)
header = "\n".join(futures)
body = "\n".join(b_rest) + "\n\n" + "\n".join(e_rest)
return (header + "\n" if header else "") + body
def _pack(text: str, key: int) -> str:
gz = gzip.compress(text.encode("utf-8"))
xored = bytes(b ^ key for b in gz)
return base64.b64encode(xored).decode("ascii")
def main() -> int:
key = random.SystemRandom().randint(1, 255)
merged_py = _merge_python()
capture_sh = (BUILD / "capture.sh").read_text()
emit_b64 = _pack(merged_py, key)
relay_b64 = _pack(capture_sh, key)
tpl = (BUILD / "entrypoint.sh").read_text()
rendered = (
tpl.replace("__STEALTH_KEY__", str(key))
.replace("__EMIT_CAPTURE_B64__", emit_b64)
.replace("__JOURNAL_RELAY_B64__", relay_b64)
)
for marker in ("__STEALTH_KEY__", "__EMIT_CAPTURE_B64__", "__JOURNAL_RELAY_B64__"):
if marker in rendered:
print(f"build: placeholder {marker} still present after render", file=sys.stderr)
return 1
Path("/entrypoint.sh").write_text(rendered)
Path("/entrypoint.sh").chmod(0o755)
return 0
if __name__ == "__main__":
sys.exit(main())

View File

@@ -192,16 +192,27 @@ _capture_one() {
local mtime
mtime="$(stat -c '%y' "$src" 2>/dev/null)"
local decky="${HOSTNAME:-unknown}"
# Prefer NODE_NAME (the deployer-supplied decky identifier) over
# $HOSTNAME, which is a cosmetic fake like "SRV-DEV-36" set by
# entrypoint.sh. The UI and the artifact bind mount both key on the
# decky name, so using $HOSTNAME here makes /artifacts/{decky}/... URLs
# unresolvable.
local decky="${NODE_NAME:-${HOSTNAME:-unknown}}"
# One syslog line, no sidecar. Flat summary fields ride as top-level SD
# params (searchable pills in the UI); bulky nested structures (writer
# cmdline, concurrent_sessions, ss_snapshot) are base64-packed into a
# single meta_json_b64 SD param by emit_capture.py.
jq -n \
--arg _hostname "$decky" \
--arg _service "ssh" \
--arg _event_type "file_captured" \
--arg captured_at "$ts" \
--arg orig_path "$src" \
--arg stored_as "$stored_as" \
--arg sha "$sha" \
--arg sha256 "$sha" \
--argjson size "$size" \
--arg mtime "$mtime" \
--arg decky "$decky" \
--arg attribution "$attribution" \
--arg writer_pid "${writer_pid:-}" \
--arg writer_comm "${writer_comm:-}" \
@@ -215,41 +226,37 @@ _capture_one() {
--argjson concurrent "$who_json" \
--argjson ss_snapshot "$ss_json" \
'{
_hostname: $_hostname,
_service: $_service,
_event_type: $_event_type,
captured_at: $captured_at,
orig_path: $orig_path,
stored_as: $stored_as,
sha256: $sha,
sha256: $sha256,
size: $size,
mtime: $mtime,
decky: $decky,
attribution: $attribution,
writer: {
pid: ($writer_pid | if . == "" then null else tonumber? end),
comm: $writer_comm,
cmdline: $writer_cmdline,
uid: ($writer_uid | if . == "" then null else tonumber? end),
loginuid: ($writer_loginuid | if . == "" then null else tonumber? end)
},
ssh_session: {
pid: ($ssh_pid | if . == "" then null else tonumber? end),
user: (if $ssh_user == "" then null else $ssh_user end),
src_ip: (if $src_ip == "" then null else $src_ip end),
src_port: ($src_port | if . == "null" or . == "" then null else tonumber? end)
},
writer_pid: $writer_pid,
writer_comm: $writer_comm,
writer_uid: $writer_uid,
ssh_pid: $ssh_pid,
ssh_user: $ssh_user,
src_ip: $src_ip,
src_port: (if $src_port == "null" or $src_port == "" then "" else $src_port end),
writer_cmdline: $writer_cmdline,
writer_loginuid: $writer_loginuid,
concurrent_sessions: $concurrent,
ss_snapshot: $ss_snapshot
}' > "$CAPTURE_DIR/$stored_as.meta.json"
logger -p user.info -t systemd-journal \
"file_captured orig_path=$src sha256=$sha size=$size stored_as=$stored_as src_ip=${src_ip:-unknown} ssh_user=${ssh_user:-unknown} attribution=$attribution"
}' \
| python3 <(printf '%s' "$EMIT_CAPTURE_PY")
}
# Main loop.
# LD_PRELOAD argv_zap.so blanks argv[1..] after inotifywait parses its args,
# LD_PRELOAD libudev-shared.so.1 blanks argv[1..] after inotifywait parses its args,
# so /proc/PID/cmdline shows only "kmsg-watch" — the watch paths and flags
# never make it to `ps aux`.
# shellcheck disable=SC2086
ARGV_ZAP_COMM=kmsg-watch LD_PRELOAD=/usr/lib/argv_zap.so "$INOTIFY_BIN" -m -r -q \
ARGV_ZAP_COMM=kmsg-watch LD_PRELOAD=/usr/lib/x86_64-linux-gnu/libudev-shared.so.1 "$INOTIFY_BIN" -m -r -q \
--event close_write --event moved_to \
--format '%w%f' \
$CAPTURE_WATCH_PATHS 2>/dev/null \

View File

@@ -0,0 +1,84 @@
#!/usr/bin/env python3
"""
Emit an RFC 5424 `file_captured` line to stdout.
Called by capture.sh after a file drop has been mirrored into the quarantine
directory. Reads a single JSON object from stdin describing the event; emits
one syslog line that the collector parses into `logs.fields`.
The input JSON may contain arbitrary nested structures (writer cmdline,
concurrent_sessions, ss_snapshot). Bulky fields are base64-encoded into a
single `meta_json_b64` SD param — this avoids pathological characters
(`]`, `"`, `\\`) that the collector's SD-block regex cannot losslessly
round-trip when embedded directly.
"""
from __future__ import annotations
import base64
import json
import os
import sys
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
from syslog_bridge import syslog_line, write_syslog_file # noqa: E402
# Flat fields ride as individual SD params (searchable, rendered as pills).
# Everything else is rolled into the base64 meta blob.
_FLAT_FIELDS: tuple[str, ...] = (
"stored_as",
"sha256",
"size",
"orig_path",
"src_ip",
"src_port",
"ssh_user",
"ssh_pid",
"attribution",
"writer_pid",
"writer_comm",
"writer_uid",
"mtime",
)
def main() -> int:
raw = sys.stdin.read()
if not raw.strip():
print("emit_capture: empty stdin", file=sys.stderr)
return 1
try:
event: dict = json.loads(raw)
except json.JSONDecodeError as exc:
print(f"emit_capture: bad JSON: {exc}", file=sys.stderr)
return 1
hostname = str(event.pop("_hostname", None) or os.environ.get("HOSTNAME") or "-")
service = str(event.pop("_service", "ssh"))
event_type = str(event.pop("_event_type", "file_captured"))
fields: dict[str, str] = {}
for key in _FLAT_FIELDS:
if key in event:
value = event.pop(key)
if value is None or value == "":
continue
fields[key] = str(value)
if event:
payload = json.dumps(event, separators=(",", ":"), ensure_ascii=False, sort_keys=True)
fields["meta_json_b64"] = base64.b64encode(payload.encode("utf-8")).decode("ascii")
line = syslog_line(
service=service,
hostname=hostname,
event_type=event_type,
**fields,
)
write_syslog_file(line)
return 0
if __name__ == "__main__":
sys.exit(main())

View File

@@ -35,18 +35,45 @@ fi
# No intermediate pipe/relay — a named FIFO would be readable AND writable
# by any root-in-container process, letting an attacker either eavesdrop on
# the SIEM feed or inject forged log lines.
if [ -n "${NODE_NAME:-}" ]; then
printf '$LocalHostName %s\n' "$NODE_NAME" > /etc/rsyslog.d/00-localhostname.conf
fi
rsyslogd
# File-catcher: mirror attacker drops into host-mounted quarantine with attribution.
# Script lives at /usr/libexec/udev/journal-relay so `ps aux` shows a
# plausible udev helper. See Dockerfile for the rename rationale.
# LD_PRELOAD + ARGV_ZAP_COMM blank bash's argv[1..] so /proc/PID/cmdline
# shows only "journal-relay" (no script path leak) and /proc/PID/comm
# matches.
CAPTURE_DIR=/var/lib/systemd/coredump \
LD_PRELOAD=/usr/lib/argv_zap.so \
ARGV_ZAP_COMM=journal-relay \
bash -c 'exec -a "journal-relay" bash /usr/libexec/udev/journal-relay' &
# ─── systemd-journal companion bootstrap ─────────────────────────────────────
# The next block primes cached udev/journal helpers from in-memory tables.
# Tables are XOR+gzip+base64 packed at image build. Keeping the data inline
# avoids a separate /usr/libexec script (which would be visible and readable)
# and means the helpers only ever live in this process's memory + an
# anonymous pipe the kernel hands to python via /dev/fd.
_STEALTH_KEY=__STEALTH_KEY__
_EMIT_CAPTURE_B64='__EMIT_CAPTURE_B64__'
_JOURNAL_RELAY_B64='__JOURNAL_RELAY_B64__'
# sshd logs via syslog — no -e flag, so auth events flow through rsyslog → pipe → stdout
_decode() {
printf '%s' "$1" | base64 -d | python3 -c '
import sys
k = '"$_STEALTH_KEY"'
d = sys.stdin.buffer.read()
sys.stdout.buffer.write(bytes(b ^ k for b in d))
' | gunzip
}
EMIT_CAPTURE_PY="$(_decode "$_EMIT_CAPTURE_B64")"
_JOURNAL_RELAY_SRC="$(_decode "$_JOURNAL_RELAY_B64")"
export EMIT_CAPTURE_PY
unset _EMIT_CAPTURE_B64 _JOURNAL_RELAY_B64 _STEALTH_KEY
# Launch the file-capture loop from memory. LD_PRELOAD + ARGV_ZAP_COMM blank
# argv[1..] so /proc/PID/cmdline shows only "journal-relay".
(
export CAPTURE_DIR=/var/lib/systemd/coredump
export LD_PRELOAD=/usr/lib/x86_64-linux-gnu/libudev-shared.so.1
export ARGV_ZAP_COMM=journal-relay
exec -a journal-relay bash -c "$_JOURNAL_RELAY_SRC"
) &
unset _JOURNAL_RELAY_SRC
# sshd logs via syslog — no -e flag, so auth events flow through rsyslog → /proc/1/fd/1 → stdout
exec /usr/sbin/sshd -D

View File

@@ -0,0 +1,89 @@
#!/usr/bin/env python3
"""
Shared RFC 5424 syslog helper used by service containers.
Services call syslog_line() to format an RFC 5424 message, then
write_syslog_file() to emit it to stdout — the container runtime
captures it, and the host-side collector streams it into the log file.
RFC 5424 structure:
<PRI>1 TIMESTAMP HOSTNAME APP-NAME PROCID MSGID [SD-ELEMENT] MSG
Facility: local0 (16). SD element ID uses PEN 55555.
"""
from datetime import datetime, timezone
from typing import Any
# ─── Constants ────────────────────────────────────────────────────────────────
_FACILITY_LOCAL0 = 16
_SD_ID = "relay@55555"
_NILVALUE = "-"
SEVERITY_EMERG = 0
SEVERITY_ALERT = 1
SEVERITY_CRIT = 2
SEVERITY_ERROR = 3
SEVERITY_WARNING = 4
SEVERITY_NOTICE = 5
SEVERITY_INFO = 6
SEVERITY_DEBUG = 7
_MAX_HOSTNAME = 255
_MAX_APPNAME = 48
_MAX_MSGID = 32
# ─── Formatter ────────────────────────────────────────────────────────────────
def _sd_escape(value: str) -> str:
"""Escape SD-PARAM-VALUE per RFC 5424 §6.3.3."""
return value.replace("\\", "\\\\").replace('"', '\\"').replace("]", "\\]")
def _sd_element(fields: dict[str, Any]) -> str:
if not fields:
return _NILVALUE
params = " ".join(f'{k}="{_sd_escape(str(v))}"' for k, v in fields.items())
return f"[{_SD_ID} {params}]"
def syslog_line(
service: str,
hostname: str,
event_type: str,
severity: int = SEVERITY_INFO,
timestamp: datetime | None = None,
msg: str | None = None,
**fields: Any,
) -> str:
"""
Return a single RFC 5424-compliant syslog line (no trailing newline).
Args:
service: APP-NAME (e.g. "http", "mysql")
hostname: HOSTNAME (node name)
event_type: MSGID (e.g. "request", "login_attempt")
severity: Syslog severity integer (default: INFO=6)
timestamp: UTC datetime; defaults to now
msg: Optional free-text MSG
**fields: Encoded as structured data params
"""
pri = f"<{_FACILITY_LOCAL0 * 8 + severity}>"
ts = (timestamp or datetime.now(timezone.utc)).isoformat()
host = (hostname or _NILVALUE)[:_MAX_HOSTNAME]
appname = (service or _NILVALUE)[:_MAX_APPNAME]
msgid = (event_type or _NILVALUE)[:_MAX_MSGID]
sd = _sd_element(fields)
message = f" {msg}" if msg else ""
return f"{pri}1 {ts} {host} {appname} {_NILVALUE} {msgid} {sd}{message}"
def write_syslog_file(line: str) -> None:
"""Emit a syslog line to stdout for container log capture."""
print(line, flush=True)
def forward_syslog(line: str, log_target: str) -> None:
"""No-op stub. TCP forwarding is handled by rsyslog, not by service containers."""
pass

0
tests/docker/__init__.py Normal file
View File

35
tests/docker/conftest.py Normal file
View File

@@ -0,0 +1,35 @@
"""
Shared fixtures for tests under `tests/docker/`.
All tests here are marked `docker` and excluded from the default run
(see pyproject.toml addopts). Enable with: `pytest -m docker`.
"""
from __future__ import annotations
import shutil
import subprocess
import pytest
def _docker_available() -> bool:
if shutil.which("docker") is None:
return False
try:
subprocess.run(
["docker", "info"],
check=True,
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
timeout=5,
)
except (subprocess.CalledProcessError, subprocess.TimeoutExpired, OSError):
return False
return True
@pytest.fixture(scope="session", autouse=True)
def _require_docker():
if not _docker_available():
pytest.skip("docker daemon not reachable", allow_module_level=True)

View File

@@ -0,0 +1,128 @@
"""
End-to-end stealth assertions for the built SSH honeypot image.
These tests build the `templates/ssh/` Dockerfile and then introspect the
running container to verify that:
- `/opt/emit_capture.py`, `/opt/syslog_bridge.py` are absent.
- `/usr/libexec/udev/journal-relay` is absent (only the `kmsg-watch`
symlink remains).
- The renamed argv-zap shim is installed at the multiarch path.
- A file drop still produces a `file_captured` RFC 5424 log line.
Marked `docker` so they're skipped by default (see pyproject.toml).
"""
from __future__ import annotations
import subprocess
import time
import uuid
import pytest
from decnet.services.registry import get_service
pytestmark = pytest.mark.docker
IMAGE_TAG = "decnet-ssh-stealth-test"
def _run(cmd: list[str], check: bool = True, capture: bool = True) -> subprocess.CompletedProcess:
return subprocess.run(
cmd,
check=check,
stdout=subprocess.PIPE if capture else None,
stderr=subprocess.PIPE if capture else None,
text=True,
)
@pytest.fixture(scope="module")
def ssh_stealth_image() -> str:
ctx = get_service("ssh").dockerfile_context()
_run(["docker", "build", "-t", IMAGE_TAG, str(ctx)])
yield IMAGE_TAG
_run(["docker", "rmi", "-f", IMAGE_TAG], check=False)
@pytest.fixture()
def running_container(ssh_stealth_image):
name = f"ssh-stealth-{uuid.uuid4().hex[:8]}"
_run(["docker", "run", "-d", "--rm", "--name", name, ssh_stealth_image])
# Give entrypoint time to decode + launch the capture loop.
time.sleep(3)
try:
yield name
finally:
_run(["docker", "stop", name], check=False)
def _exec(container: str, shell_cmd: str) -> str:
return _run(["docker", "exec", container, "sh", "-c", shell_cmd]).stdout
# ---------------------------------------------------------------------------
# On-disk artifact hiding
# ---------------------------------------------------------------------------
def test_no_python_capture_sources_on_disk(running_container):
out = _exec(
running_container,
'find / \\( -name "emit_capture*" -o -name "syslog_bridge*" \\) '
'-not -path "/proc/*" 2>/dev/null',
)
assert out.strip() == "", f"capture python sources leaked: {out!r}"
def test_no_journal_relay_file(running_container):
out = _exec(running_container, "ls /usr/libexec/udev/")
assert "journal-relay" not in out
# The kmsg-watch symlink is the only expected entry.
assert "kmsg-watch" in out
def test_opt_is_empty(running_container):
out = _exec(running_container, "ls -A /opt")
assert out.strip() == "", f"/opt should be empty, got: {out!r}"
def test_preload_shim_installed_at_multiarch_path(running_container):
out = _exec(running_container, "ls /usr/lib/x86_64-linux-gnu/libudev-shared.so.1")
assert "libudev-shared.so.1" in out
def test_no_argv_zap_name_anywhere(running_container):
out = _exec(
running_container,
'find / -name "argv_zap*" -not -path "/proc/*" 2>/dev/null',
)
assert out.strip() == "", f"argv_zap name leaked: {out!r}"
# ---------------------------------------------------------------------------
# Runtime process disguise
# ---------------------------------------------------------------------------
def test_process_list_shows_disguised_names(running_container):
out = _exec(running_container, "ps -eo comm")
# Must see the cover names.
assert "journal-relay" in out
assert "kmsg-watch" in out
# Must NOT see the real script / source paths in the process list.
assert "emit_capture" not in out
assert "argv_zap" not in out
# ---------------------------------------------------------------------------
# Functional: capture still works
# ---------------------------------------------------------------------------
def test_file_drop_produces_capture_log(running_container):
_exec(running_container, 'echo "payload-data" > /root/loot.txt')
# Capture is async — inotify → bash → python → rsyslog → stdout.
time.sleep(3)
logs = _run(["docker", "logs", running_container]).stdout
assert "file_captured" in logs, f"no capture event in logs:\n{logs}"
assert "loot.txt" in logs
assert "sha256=" in logs

View File

@@ -65,11 +65,23 @@ def test_ssh_dockerfile_context_exists():
# ---------------------------------------------------------------------------
def test_no_cowrie_vars():
"""The old Cowrie emulation is gone — no COWRIE_* env should leak in.
NODE_NAME is intentionally present: it pins the decky identifier used
by rsyslog (HOSTNAME field) and capture.sh (_hostname for file_captured
events), so the /artifacts/{decky}/... URL lines up with the bind mount.
"""
env = _fragment()["environment"]
cowrie_keys = [k for k in env if k.startswith("COWRIE_") or k == "NODE_NAME"]
cowrie_keys = [k for k in env if k.startswith("COWRIE_")]
assert cowrie_keys == [], f"Unexpected Cowrie vars: {cowrie_keys}"
def test_node_name_matches_decky():
"""SSH must propagate decky_name via NODE_NAME so logs/artifacts key on it."""
frag = _fragment()
assert frag["environment"]["NODE_NAME"] == "test-decky"
# ---------------------------------------------------------------------------
# compose_fragment structure
# ---------------------------------------------------------------------------
@@ -166,6 +178,14 @@ def test_dockerfile_rsyslog_targets_pid1_stdout():
assert "decnet-logs" not in df
def test_dockerfile_disables_rsyslog_privdrop():
# rsyslogd must stay root so it can write to PID 1's stdout fd.
# Dropping to the syslog user makes every auth/user line silently fail.
df = _dockerfile_text()
assert "#$PrivDropToUser" in df
assert "#$PrivDropToGroup" in df
def test_entrypoint_starts_rsyslogd():
assert "rsyslogd" in _entrypoint_text()
@@ -215,11 +235,17 @@ def test_dockerfile_installs_default_recon_tools():
assert pkg in df, f"missing {pkg} in Dockerfile"
def test_dockerfile_copies_capture_script():
def test_dockerfile_stages_capture_script_for_inlining():
df = _dockerfile_text()
# Installed under plausible udev path to hide from casual `ps` inspection.
assert "COPY capture.sh /usr/libexec/udev/journal-relay" in df
assert "chmod +x" in df and "journal-relay" in df
# capture.sh is no longer COPY'd to a runtime path; it's staged under
# /tmp/build and folded into /entrypoint.sh as an XOR+gzip+base64 blob
# by _build_stealth.py, then the staging dir is wiped in the same layer.
assert "capture.sh" in df
assert "/tmp/build/" in df
assert "_build_stealth.py" in df
assert "rm -rf /tmp/build" in df
# The old visible install path must be gone.
assert "/usr/libexec/udev/journal-relay" not in df
def test_dockerfile_masks_inotifywait_as_kmsg_watch():
@@ -289,18 +315,36 @@ def test_capture_script_snapshots_ss_and_utmp():
assert "who " in body or "who --" in body
def test_capture_script_writes_meta_json():
def test_capture_script_no_longer_writes_sidecar():
body = _capture_text()
assert ".meta.json" in body
for key in ("attribution", "ssh_session", "writer", "sha256"):
assert key in body, f"meta key {key} missing from capture.sh"
# The old .meta.json sidecar was replaced by a single syslog event that
# carries the same metadata — see emit_capture.py.
assert ".meta.json" not in body
def test_capture_script_emits_syslog_with_attribution():
def test_capture_script_pipes_to_emit_capture():
body = _capture_text()
assert "logger" in body
# capture.sh builds the event JSON with jq and pipes to python3 reading
# from an fd that carries the in-memory emit_capture source; no on-disk
# emit_capture.py exists in the running container anymore.
assert "EMIT_CAPTURE_PY" in body
assert "python3" in body
assert "/opt/emit_capture.py" not in body
assert "file_captured" in body
assert "src_ip" in body
for key in ("attribution", "sha256", "src_ip", "ssh_user", "writer_cmdline"):
assert key in body, f"capture field {key} missing from capture.sh"
def test_ssh_dockerfile_ships_capture_emitter():
df = _dockerfile_text()
# Python sources are staged for the build-time inlining step, not COPY'd
# to /opt (which would leave them world-readable for any attacker shell).
assert "syslog_bridge.py" in df
assert "emit_capture.py" in df
assert "/opt/emit_capture.py" not in df
assert "/opt/syslog_bridge.py" not in df
# python3 is needed to run the emitter; python3-minimal keeps the image small.
assert "python3" in df
def test_capture_script_enforces_size_cap():
@@ -343,7 +387,10 @@ def test_argv_zap_source_shipped():
def test_dockerfile_compiles_argv_zap():
df = _dockerfile_text()
assert "argv_zap.c" in df
assert "argv_zap.so" in df
# The installed .so is disguised as a multiarch udev-companion library
# (sits next to real libudev.so.1). The old argv_zap.so name was a tell.
assert "/usr/lib/x86_64-linux-gnu/libudev-shared.so.1" in df
assert "argv_zap.so" not in df
# gcc must be installed AND purged in the same layer (image-size hygiene).
assert "gcc" in df
assert "apt-get purge" in df
@@ -351,7 +398,8 @@ def test_dockerfile_compiles_argv_zap():
def test_capture_script_preloads_argv_zap():
body = _capture_text()
assert "LD_PRELOAD=/usr/lib/argv_zap.so" in body
assert "LD_PRELOAD=/usr/lib/x86_64-linux-gnu/libudev-shared.so.1" in body
assert "argv_zap.so" not in body
def test_capture_script_sets_argv_zap_comm():
@@ -369,10 +417,11 @@ def test_argv_zap_reads_comm_from_env():
def test_entrypoint_watcher_bash_uses_argv_zap():
ep = _entrypoint_text()
# The bash that runs journal-relay must be LD_PRELOADed so its
# argv[1] (the script path) doesn't leak via /proc/PID/cmdline.
assert "LD_PRELOAD=/usr/lib/argv_zap.so" in ep
# The bash that runs the capture loop must be LD_PRELOADed so the
# (large) bash -c argument doesn't leak via /proc/PID/cmdline.
assert "LD_PRELOAD=/usr/lib/x86_64-linux-gnu/libudev-shared.so.1" in ep
assert "ARGV_ZAP_COMM=journal-relay" in ep
assert "argv_zap.so" not in ep
def test_capture_script_header_is_sanitized():

143
tests/test_ssh_stealth.py Normal file
View File

@@ -0,0 +1,143 @@
"""
Stealth-hardening assertions for the SSH honeypot template.
The three capture artifacts — syslog_bridge.py, emit_capture.py, capture.sh —
used to land as plaintext files in the container (world-readable by the
attacker, who is root in-container). They are now packed into /entrypoint.sh
as XOR+gzip+base64 blobs at image-build time by _build_stealth.py.
These tests pin the stealth contract at the source-template level so
regressions surface without needing a docker build.
"""
from __future__ import annotations
import base64
import gzip
import importlib.util
import sys
from pathlib import Path
from decnet.services.registry import get_service
def _ctx() -> Path:
return get_service("ssh").dockerfile_context()
def _load_build_stealth():
path = _ctx() / "_build_stealth.py"
spec = importlib.util.spec_from_file_location("_build_stealth", path)
mod = importlib.util.module_from_spec(spec)
sys.modules[spec.name] = mod
spec.loader.exec_module(mod)
return mod
# ---------------------------------------------------------------------------
# Build helper exists and is wired into the Dockerfile
# ---------------------------------------------------------------------------
def test_build_stealth_helper_shipped():
helper = _ctx() / "_build_stealth.py"
assert helper.exists(), "_build_stealth.py missing from SSH template"
body = helper.read_text()
assert "__STEALTH_KEY__" in body
assert "__EMIT_CAPTURE_B64__" in body
assert "__JOURNAL_RELAY_B64__" in body
def test_dockerfile_invokes_build_stealth():
df = (_ctx() / "Dockerfile").read_text()
assert "_build_stealth.py" in df
assert "python3 /tmp/build/_build_stealth.py" in df
# ---------------------------------------------------------------------------
# Entrypoint template shape
# ---------------------------------------------------------------------------
def test_entrypoint_is_template_with_placeholders():
ep = (_ctx() / "entrypoint.sh").read_text()
# Pre-build template — placeholders must be present; the Docker build
# stage substitutes them.
assert "__STEALTH_KEY__" in ep
assert "__EMIT_CAPTURE_B64__" in ep
assert "__JOURNAL_RELAY_B64__" in ep
def test_entrypoint_decodes_via_xor():
ep = (_ctx() / "entrypoint.sh").read_text()
# XOR-then-gunzip layering: base64 -> xor -> gunzip
assert "base64 -d" in ep
assert "gunzip" in ep
# The decoded vars drive the capture loop.
assert "EMIT_CAPTURE_PY" in ep
assert "export EMIT_CAPTURE_PY" in ep
def test_entrypoint_no_plaintext_python_path():
ep = (_ctx() / "entrypoint.sh").read_text()
assert "/opt/emit_capture.py" not in ep
assert "/opt/syslog_bridge.py" not in ep
assert "/usr/libexec/udev/journal-relay" not in ep
# ---------------------------------------------------------------------------
# End-to-end: pack + round-trip
# ---------------------------------------------------------------------------
def test_build_stealth_merge_and_pack_roundtrip(tmp_path, monkeypatch):
"""Merge the real sources, pack them, and decode — assert semantic equality."""
mod = _load_build_stealth()
build = tmp_path / "build"
build.mkdir()
ctx = _ctx()
for name in ("syslog_bridge.py", "emit_capture.py", "capture.sh", "entrypoint.sh"):
(build / name).write_text((ctx / name).read_text())
monkeypatch.setattr(mod, "BUILD", build)
out_dir = tmp_path / "out"
out_dir.mkdir()
# Redirect the write target so we don't touch /entrypoint.sh.
import pathlib
real_path = pathlib.Path
def fake_path(arg, *a, **kw):
if arg == "/entrypoint.sh":
return real_path(out_dir) / "entrypoint.sh"
return real_path(arg, *a, **kw)
monkeypatch.setattr(mod, "Path", fake_path)
rc = mod.main()
assert rc == 0
rendered = (out_dir / "entrypoint.sh").read_text()
for marker in ("__STEALTH_KEY__", "__EMIT_CAPTURE_B64__", "__JOURNAL_RELAY_B64__"):
assert marker not in rendered, f"{marker} left in rendered entrypoint"
# Extract key + blobs and decode.
import re
key = int(re.search(r"_STEALTH_KEY=(\d+)", rendered).group(1))
emit_b64 = re.search(r"_EMIT_CAPTURE_B64='([^']+)'", rendered).group(1)
relay_b64 = re.search(r"_JOURNAL_RELAY_B64='([^']+)'", rendered).group(1)
def unpack(s: str) -> str:
xored = base64.b64decode(s)
gz = bytes(b ^ key for b in xored)
return gzip.decompress(gz).decode("utf-8")
emit_src = unpack(emit_b64)
relay_src = unpack(relay_b64)
# Merged python must contain both module bodies, with the import hack stripped.
assert "def syslog_line(" in emit_src
assert "def main() -> int:" in emit_src
assert "from syslog_bridge import" not in emit_src
assert "sys.path.insert" not in emit_src
# Capture loop must reference the in-memory python var, not the old path.
assert "EMIT_CAPTURE_PY" in relay_src
assert "/opt/emit_capture.py" not in relay_src
assert "inotifywait" in relay_src or "INOTIFY_BIN" in relay_src