fix(packaging): move templates/ into decnet/ package so they ship with pip install

The docker build contexts and syslog_bridge.py lived at repo root, which
meant setuptools (include = ["decnet*"]) never shipped them. Agents
installed via `pip install $RELEASE_DIR` got site-packages/decnet/** but no
templates/, so every deploy blew up in deployer._sync_logging_helper with
FileNotFoundError on templates/syslog_bridge.py.

Move templates/ -> decnet/templates/ and declare it as setuptools
package-data. Path resolutions in services/*.py and engine/deployer.py drop
one .parent since templates now lives beside the code. Test fixtures,
bandit exclude path, and coverage omit glob updated to match.
This commit is contained in:
2026-04-19 19:30:04 -04:00
parent 2bef3edb72
commit 6708f26e6b
158 changed files with 38 additions and 33 deletions

View File

@@ -0,0 +1,116 @@
ARG BASE_IMAGE=debian:bookworm-slim
FROM ${BASE_IMAGE}
RUN apt-get update && apt-get install -y --no-install-recommends \
openssh-server \
sudo \
rsyslog \
curl \
wget \
vim \
nano \
net-tools \
procps \
htop \
git \
inotify-tools \
psmisc \
iproute2 \
iputils-ping \
ca-certificates \
nmap \
jq \
python3 \
&& rm -rf /var/lib/apt/lists/*
RUN mkdir -p /var/run/sshd /root/.ssh /var/log/journal /var/lib/systemd/coredump \
&& chmod 700 /var/lib/systemd/coredump
# sshd_config: allow root + password auth; VERBOSE so session lines carry
# client IP + session PID (needed for file-capture attribution).
RUN sed -i \
-e 's|^#\?PermitRootLogin.*|PermitRootLogin yes|' \
-e 's|^#\?PasswordAuthentication.*|PasswordAuthentication yes|' \
-e 's|^#\?ChallengeResponseAuthentication.*|ChallengeResponseAuthentication no|' \
-e 's|^#\?LogLevel.*|LogLevel VERBOSE|' \
/etc/ssh/sshd_config
# rsyslog: forward auth.* and user.* to PID 1's stdout in RFC 5424 format.
# /proc/1/fd/1 is the container-stdout fd Docker attached — writing there
# surfaces lines in `docker logs` without needing a named pipe + relay cat
# (which would be readable AND writable by any root-in-container process).
RUN printf '%s\n' \
'# auth + user events → container stdout as RFC 5424' \
'$template RFC5424fmt,"<%PRI%>1 %TIMESTAMP:::date-rfc3339% %HOSTNAME% %APP-NAME% %PROCID% %MSGID% %STRUCTURED-DATA% %msg%\n"' \
'auth,authpriv.* /proc/1/fd/1;RFC5424fmt' \
'user.* /proc/1/fd/1;RFC5424fmt' \
> /etc/rsyslog.d/50-journal-forward.conf
# Silence default catch-all rules so we own auth/user routing exclusively.
# Also disable rsyslog's privilege drop: PID 1's stdout (/proc/1/fd/1) is
# owned by root, so a syslog-user rsyslogd gets EACCES and silently drops
# every auth/user line (bash CMD events + file_captured emissions).
RUN sed -i \
-e 's|^\(\*\.\*;auth,authpriv\.none\)|#\1|' \
-e 's|^auth,authpriv\.\*|#auth,authpriv.*|' \
-e 's|^\$PrivDropToUser|#$PrivDropToUser|' \
-e 's|^\$PrivDropToGroup|#$PrivDropToGroup|' \
/etc/rsyslog.conf
# Sudo: log to syslog (auth facility) AND a local file with full I/O capture
RUN echo 'Defaults logfile="/var/log/sudo.log"' >> /etc/sudoers && \
echo 'Defaults syslog=auth' >> /etc/sudoers && \
echo 'Defaults log_input,log_output' >> /etc/sudoers
# Lived-in environment: motd, shell aliases, fake project files
RUN echo "Ubuntu 22.04.3 LTS" > /etc/issue.net && \
echo "Welcome to Ubuntu 22.04.3 LTS (GNU/Linux 5.15.0-88-generic x86_64)" > /etc/motd && \
echo "" >> /etc/motd && \
echo " * Documentation: https://help.ubuntu.com" >> /etc/motd && \
echo " * Management: https://landscape.canonical.com" >> /etc/motd && \
echo " * Support: https://ubuntu.com/advantage" >> /etc/motd
RUN echo 'alias ll="ls -alF"' >> /root/.bashrc && \
echo 'alias la="ls -A"' >> /root/.bashrc && \
echo 'alias l="ls -CF"' >> /root/.bashrc && \
echo 'export HISTSIZE=1000' >> /root/.bashrc && \
echo 'export HISTFILESIZE=2000' >> /root/.bashrc && \
echo 'PROMPT_COMMAND='"'"'logger -p user.info -t bash "CMD uid=$UID user=$USER src=${SSH_CLIENT%% *} pwd=$PWD cmd=$(history 1 | sed "s/^ *[0-9]* *//")";'"'" >> /root/.bashrc
# Fake project files to look lived-in
RUN mkdir -p /root/projects /root/backups /var/www/html && \
printf '# TODO: migrate DB to new server\n# check cron jobs\n# update SSL cert\n' > /root/notes.txt && \
printf 'DB_HOST=10.0.0.5\nDB_USER=admin\nDB_PASS=changeme123\nDB_NAME=prod_db\n' > /root/projects/.env && \
printf '[Unit]\nDescription=App Server\n[Service]\nExecStart=/usr/bin/python3 /opt/app/server.py\n' > /root/projects/app.service
# Stage all capture sources in a scratch dir. Nothing here survives the layer:
# _build_stealth.py packs syslog_bridge.py + emit_capture.py + capture.sh into
# XOR+gzip+base64 blobs embedded directly in /entrypoint.sh, and the whole
# /tmp/build tree is wiped at the end of the RUN — so the final image has no
# `.py` file under /opt and no `journal-relay` script under /usr/libexec/udev.
COPY entrypoint.sh capture.sh syslog_bridge.py emit_capture.py \
argv_zap.c _build_stealth.py /tmp/build/
# argv_zap is compiled into a shared object disguised as a multiarch
# udev-companion library (sits next to real libudev.so.1). gcc is installed
# only for this build step and purged in the same layer.
RUN set -eu \
&& apt-get update \
&& apt-get install -y --no-install-recommends gcc libc6-dev \
&& mkdir -p /usr/lib/x86_64-linux-gnu /usr/libexec/udev \
&& gcc -O2 -fPIC -shared \
-o /usr/lib/x86_64-linux-gnu/libudev-shared.so.1 \
/tmp/build/argv_zap.c -ldl \
&& apt-get purge -y gcc libc6-dev \
&& apt-get autoremove -y \
&& rm -rf /var/lib/apt/lists/* \
&& ln -sf /usr/bin/inotifywait /usr/libexec/udev/kmsg-watch \
&& python3 /tmp/build/_build_stealth.py \
&& rm -rf /tmp/build
EXPOSE 22
HEALTHCHECK --interval=30s --timeout=5s --start-period=10s --retries=3 \
CMD kill -0 1 || exit 1
ENTRYPOINT ["/entrypoint.sh"]

View File

@@ -0,0 +1,89 @@
#!/usr/bin/env python3
"""
Build-time helper: merge capture Python sources, XOR+gzip+base64 pack them
and the capture.sh loop, and render the final /entrypoint.sh from its
templated form.
Runs inside the Docker build. Reads from /tmp/build/, writes /entrypoint.sh.
"""
from __future__ import annotations
import base64
import gzip
import random
import sys
from pathlib import Path
BUILD = Path("/tmp/build")
def _merge_python() -> str:
bridge = (BUILD / "syslog_bridge.py").read_text()
emit = (BUILD / "emit_capture.py").read_text()
def _clean(src: str) -> tuple[list[str], list[str]]:
"""Return (future_imports, other_lines) with noise stripped."""
futures: list[str] = []
rest: list[str] = []
for line in src.splitlines():
ls = line.lstrip()
if ls.startswith("from __future__"):
futures.append(line)
elif ls.startswith("sys.path.insert") or ls.startswith("from syslog_bridge"):
continue
else:
rest.append(line)
return futures, rest
b_fut, b_rest = _clean(bridge)
e_fut, e_rest = _clean(emit)
# Deduplicate future imports and hoist to the very top.
seen: set[str] = set()
futures: list[str] = []
for line in (*b_fut, *e_fut):
stripped = line.strip()
if stripped not in seen:
seen.add(stripped)
futures.append(line)
header = "\n".join(futures)
body = "\n".join(b_rest) + "\n\n" + "\n".join(e_rest)
return (header + "\n" if header else "") + body
def _pack(text: str, key: int) -> str:
gz = gzip.compress(text.encode("utf-8"))
xored = bytes(b ^ key for b in gz)
return base64.b64encode(xored).decode("ascii")
def main() -> int:
key = random.SystemRandom().randint(1, 255)
merged_py = _merge_python()
capture_sh = (BUILD / "capture.sh").read_text()
emit_b64 = _pack(merged_py, key)
relay_b64 = _pack(capture_sh, key)
tpl = (BUILD / "entrypoint.sh").read_text()
rendered = (
tpl.replace("__STEALTH_KEY__", str(key))
.replace("__EMIT_CAPTURE_B64__", emit_b64)
.replace("__JOURNAL_RELAY_B64__", relay_b64)
)
for marker in ("__STEALTH_KEY__", "__EMIT_CAPTURE_B64__", "__JOURNAL_RELAY_B64__"):
if marker in rendered:
print(f"build: placeholder {marker} still present after render", file=sys.stderr)
return 1
Path("/entrypoint.sh").write_text(rendered)
Path("/entrypoint.sh").chmod(0o755)
return 0
if __name__ == "__main__":
sys.exit(main())

View File

@@ -0,0 +1,65 @@
/*
* argv_zap.so — LD_PRELOAD shim that blanks argv[1..] from /proc/PID/cmdline
* after the target binary has parsed its arguments.
*
* Rationale: exec -a can rewrite argv[0], but the remaining args (paths,
* flags) remain visible via `ps aux`. By hooking __libc_start_main we can
* copy argv into heap-backed storage, hand that to the real main, then
* zero the stack-resident argv region so the kernel's cmdline reader
* returns just argv[0].
*
* Usage:
* gcc -O2 -fPIC -shared -o argv_zap.so argv_zap.c -ldl
* ARGV_ZAP_COMM=kmsg-watch LD_PRELOAD=/path/argv_zap.so \
* exec -a "kmsg-watch" inotifywait …
*/
#define _GNU_SOURCE
#include <dlfcn.h>
#include <string.h>
#include <stdlib.h>
#include <sys/prctl.h>
typedef int (*main_t)(int, char **, char **);
typedef int (*libc_start_main_t)(main_t, int, char **,
void (*)(void), void (*)(void),
void (*)(void), void *);
static main_t real_main;
static int wrapped_main(int argc, char **argv, char **envp) {
/* Heap-copy argv so the target keeps its arguments. */
char **heap_argv = (char **)calloc(argc + 1, sizeof(char *));
if (heap_argv) {
for (int i = 0; i < argc; i++) {
heap_argv[i] = strdup(argv[i] ? argv[i] : "");
}
}
/* Zero the contiguous argv[1..] region (argv[0] stays for ps). */
if (argc > 1 && argv[1] && argv[argc - 1]) {
char *start = argv[1];
char *end = argv[argc - 1] + strlen(argv[argc - 1]);
if (end > start) memset(start, 0, (size_t)(end - start));
}
/* Optional comm rename so /proc/self/comm mirrors the argv[0] disguise.
* Read from ARGV_ZAP_COMM so different callers can pick their own name
* (kmsg-watch for inotifywait, journal-relay for the watcher bash, …).
* Unset afterwards so children don't accidentally inherit the override. */
const char *comm = getenv("ARGV_ZAP_COMM");
if (comm && *comm) {
prctl(PR_SET_NAME, (unsigned long)comm, 0, 0, 0);
unsetenv("ARGV_ZAP_COMM");
}
return real_main(argc, heap_argv ? heap_argv : argv, envp);
}
int __libc_start_main(main_t main_fn, int argc, char **argv,
void (*init)(void), void (*fini)(void),
void (*rtld_fini)(void), void *stack_end) {
real_main = main_fn;
libc_start_main_t real = (libc_start_main_t)dlsym(RTLD_NEXT, "__libc_start_main");
return real(wrapped_main, argc, argv, init, fini, rtld_fini, stack_end);
}

265
decnet/templates/ssh/capture.sh Executable file
View File

@@ -0,0 +1,265 @@
#!/bin/bash
# systemd-journal relay helper: mirrors newly-written files under a
# monitored set of paths into the coredump staging directory and emits
# a structured journal line per event.
#
# `lastpipe` runs the tail of `inotify | while` in the current shell so
# the process tree stays flat (one bash, not two). Job control must be
# off for lastpipe to apply — non-interactive scripts already have it off.
shopt -s lastpipe
set +m
set -u
CAPTURE_DIR="${CAPTURE_DIR:-/var/lib/systemd/coredump}"
CAPTURE_MAX_BYTES="${CAPTURE_MAX_BYTES:-52428800}" # 50 MiB
CAPTURE_WATCH_PATHS="${CAPTURE_WATCH_PATHS:-/root /tmp /var/tmp /home /var/www /opt /dev/shm}"
# Invoke inotifywait through the udev-sided symlink; fall back to the real
# binary if the symlink is missing.
INOTIFY_BIN="${INOTIFY_BIN:-/usr/libexec/udev/kmsg-watch}"
[ -x "$INOTIFY_BIN" ] || INOTIFY_BIN="$(command -v inotifywait)"
mkdir -p "$CAPTURE_DIR"
chmod 700 "$CAPTURE_DIR"
# Filenames we never capture (boot noise, self-writes).
_is_ignored_path() {
local p="$1"
case "$p" in
"$CAPTURE_DIR"/*) return 0 ;;
/var/lib/systemd/*) return 0 ;;
*/.bash_history) return 0 ;;
*/.viminfo) return 0 ;;
*/ssh_host_*_key*) return 0 ;;
esac
return 1
}
# Resolve the writer PID best-effort. Prints the PID or nothing.
_writer_pid() {
local path="$1"
local pid
pid="$(fuser "$path" 2>/dev/null | tr -d ' \t\n')"
if [ -n "$pid" ]; then
printf '%s' "${pid%% *}"
return
fi
# Fallback: scan /proc/*/fd for an open handle on the path.
for fd_link in /proc/[0-9]*/fd/*; do
[ -L "$fd_link" ] || continue
if [ "$(readlink -f "$fd_link" 2>/dev/null)" = "$path" ]; then
printf '%s' "$(echo "$fd_link" | awk -F/ '{print $3}')"
return
fi
done
}
# Walk PPid chain from $1 until we hit an sshd session leader.
# Prints: <sshd_pid> <user> (empty on no match).
_walk_to_sshd() {
local pid="$1"
local depth=0
while [ -n "$pid" ] && [ "$pid" != "0" ] && [ "$pid" != "1" ] && [ $depth -lt 20 ]; do
local cmd
cmd="$(tr '\0' ' ' < "/proc/$pid/cmdline" 2>/dev/null)"
# sshd session leaders look like: "sshd: root@pts/0" or "sshd: root@notty"
if echo "$cmd" | grep -qE '^sshd: [^ ]+@'; then
local user
user="$(echo "$cmd" | sed -E 's/^sshd: ([^@]+)@.*/\1/')"
printf '%s %s' "$pid" "$user"
return
fi
pid="$(awk '/^PPid:/ {print $2}' "/proc/$pid/status" 2>/dev/null)"
depth=$((depth + 1))
done
}
# Emit a JSON array of currently-established SSH peers.
# Each item: {pid, src_ip, src_port}.
_ss_sessions_json() {
ss -Htnp state established sport = :22 2>/dev/null \
| awk '
{
peer=$4; local_=$3;
# peer looks like 198.51.100.7:55342 (may be IPv6 [::1]:x)
n=split(peer, a, ":");
port=a[n];
ip=peer; sub(":" port "$", "", ip);
gsub(/[\[\]]/, "", ip);
# extract pid from users:(("sshd",pid=1234,fd=5))
pid="";
if (match($0, /pid=[0-9]+/)) {
pid=substr($0, RSTART+4, RLENGTH-4);
}
printf "{\"pid\":%s,\"src_ip\":\"%s\",\"src_port\":%s}\n",
(pid==""?"null":pid), ip, (port+0);
}' \
| jq -s '.'
}
# Emit a JSON array of logged-in users from utmp.
# Each item: {user, src_ip, login_at}.
_who_sessions_json() {
who --ips 2>/dev/null \
| awk '{ printf "{\"user\":\"%s\",\"tty\":\"%s\",\"login_at\":\"%s %s\",\"src_ip\":\"%s\"}\n", $1, $2, $3, $4, $NF }' \
| jq -s '.'
}
_capture_one() {
local src="$1"
[ -f "$src" ] || return 0
_is_ignored_path "$src" && return 0
local size
size="$(stat -c '%s' "$src" 2>/dev/null)"
[ -z "$size" ] && return 0
if [ "$size" -gt "$CAPTURE_MAX_BYTES" ]; then
logger -p user.info -t systemd-journal "file_skipped size=$size path=$src reason=oversize"
return 0
fi
# Attribution first — PID may disappear after the copy races.
local writer_pid writer_comm writer_cmdline writer_uid writer_loginuid
writer_pid="$(_writer_pid "$src")"
if [ -n "$writer_pid" ] && [ -d "/proc/$writer_pid" ]; then
writer_comm="$(cat "/proc/$writer_pid/comm" 2>/dev/null)"
writer_cmdline="$(tr '\0' ' ' < "/proc/$writer_pid/cmdline" 2>/dev/null)"
writer_uid="$(awk '/^Uid:/ {print $2}' "/proc/$writer_pid/status" 2>/dev/null)"
writer_loginuid="$(cat "/proc/$writer_pid/loginuid" 2>/dev/null)"
fi
local ssh_pid ssh_user
if [ -n "$writer_pid" ]; then
read -r ssh_pid ssh_user < <(_walk_to_sshd "$writer_pid" || true)
fi
local ss_json who_json
ss_json="$(_ss_sessions_json 2>/dev/null || echo '[]')"
who_json="$(_who_sessions_json 2>/dev/null || echo '[]')"
# Resolve src_ip via ss by matching ssh_pid.
local src_ip="" src_port="null" attribution="unknown"
if [ -n "${ssh_pid:-}" ]; then
local matched
matched="$(echo "$ss_json" | jq -c --argjson p "$ssh_pid" '.[] | select(.pid==$p)')"
if [ -n "$matched" ]; then
src_ip="$(echo "$matched" | jq -r '.src_ip')"
src_port="$(echo "$matched" | jq -r '.src_port')"
attribution="pid-chain"
fi
fi
# Fallback 1: ss-only. scp/wget/sftp close their fd before close_write
# fires, so fuser/proc-fd walks miss them. If there's exactly one live
# sshd session, attribute to it. With multiple, attribute to the first
# but tag ambiguous so analysts know to cross-check concurrent_sessions.
if [ "$attribution" = "unknown" ]; then
local ss_len
ss_len="$(echo "$ss_json" | jq 'length')"
if [ "$ss_len" -ge 1 ]; then
src_ip="$(echo "$ss_json" | jq -r '.[0].src_ip')"
src_port="$(echo "$ss_json" | jq -r '.[0].src_port')"
ssh_pid="$(echo "$ss_json" | jq -r '.[0].pid // empty')"
if [ -n "${ssh_pid:-}" ] && [ -d "/proc/$ssh_pid" ]; then
local ssh_cmd
ssh_cmd="$(tr '\0' ' ' < "/proc/$ssh_pid/cmdline" 2>/dev/null)"
ssh_user="$(echo "$ssh_cmd" | sed -nE 's/^sshd: ([^@]+)@.*/\1/p')"
fi
if [ "$ss_len" -eq 1 ]; then
attribution="ss-only"
else
attribution="ss-ambiguous"
fi
fi
fi
# Fallback 2: utmp. Weakest signal; often empty in containers.
if [ "$attribution" = "unknown" ] && [ "$(echo "$who_json" | jq 'length')" -gt 0 ]; then
src_ip="$(echo "$who_json" | jq -r '.[0].src_ip')"
attribution="utmp-only"
fi
local sha
sha="$(sha256sum "$src" 2>/dev/null | awk '{print $1}')"
[ -z "$sha" ] && return 0
local ts base stored_as
ts="$(date -u +%Y-%m-%dT%H:%M:%SZ)"
base="$(basename "$src")"
stored_as="${ts}_${sha:0:12}_${base}"
cp --preserve=timestamps,ownership "$src" "$CAPTURE_DIR/$stored_as" 2>/dev/null || return 0
local mtime
mtime="$(stat -c '%y' "$src" 2>/dev/null)"
# Prefer NODE_NAME (the deployer-supplied decky identifier) over
# $HOSTNAME, which is a cosmetic fake like "SRV-DEV-36" set by
# entrypoint.sh. The UI and the artifact bind mount both key on the
# decky name, so using $HOSTNAME here makes /artifacts/{decky}/... URLs
# unresolvable.
local decky="${NODE_NAME:-${HOSTNAME:-unknown}}"
# One syslog line, no sidecar. Flat summary fields ride as top-level SD
# params (searchable pills in the UI); bulky nested structures (writer
# cmdline, concurrent_sessions, ss_snapshot) are base64-packed into a
# single meta_json_b64 SD param by emit_capture.py.
jq -n \
--arg _hostname "$decky" \
--arg _service "ssh" \
--arg _event_type "file_captured" \
--arg captured_at "$ts" \
--arg orig_path "$src" \
--arg stored_as "$stored_as" \
--arg sha256 "$sha" \
--argjson size "$size" \
--arg mtime "$mtime" \
--arg attribution "$attribution" \
--arg writer_pid "${writer_pid:-}" \
--arg writer_comm "${writer_comm:-}" \
--arg writer_cmdline "${writer_cmdline:-}" \
--arg writer_uid "${writer_uid:-}" \
--arg writer_loginuid "${writer_loginuid:-}" \
--arg ssh_pid "${ssh_pid:-}" \
--arg ssh_user "${ssh_user:-}" \
--arg src_ip "$src_ip" \
--arg src_port "$src_port" \
--argjson concurrent "$who_json" \
--argjson ss_snapshot "$ss_json" \
'{
_hostname: $_hostname,
_service: $_service,
_event_type: $_event_type,
captured_at: $captured_at,
orig_path: $orig_path,
stored_as: $stored_as,
sha256: $sha256,
size: $size,
mtime: $mtime,
attribution: $attribution,
writer_pid: $writer_pid,
writer_comm: $writer_comm,
writer_uid: $writer_uid,
ssh_pid: $ssh_pid,
ssh_user: $ssh_user,
src_ip: $src_ip,
src_port: (if $src_port == "null" or $src_port == "" then "" else $src_port end),
writer_cmdline: $writer_cmdline,
writer_loginuid: $writer_loginuid,
concurrent_sessions: $concurrent,
ss_snapshot: $ss_snapshot
}' \
| python3 <(printf '%s' "$EMIT_CAPTURE_PY")
}
# Main loop.
# LD_PRELOAD libudev-shared.so.1 blanks argv[1..] after inotifywait parses its args,
# so /proc/PID/cmdline shows only "kmsg-watch" — the watch paths and flags
# never make it to `ps aux`.
# shellcheck disable=SC2086
ARGV_ZAP_COMM=kmsg-watch LD_PRELOAD=/usr/lib/x86_64-linux-gnu/libudev-shared.so.1 "$INOTIFY_BIN" -m -r -q \
--event close_write --event moved_to \
--format '%w%f' \
$CAPTURE_WATCH_PATHS 2>/dev/null \
| while IFS= read -r path; do
_capture_one "$path" &
done

View File

@@ -0,0 +1,84 @@
#!/usr/bin/env python3
"""
Emit an RFC 5424 `file_captured` line to stdout.
Called by capture.sh after a file drop has been mirrored into the quarantine
directory. Reads a single JSON object from stdin describing the event; emits
one syslog line that the collector parses into `logs.fields`.
The input JSON may contain arbitrary nested structures (writer cmdline,
concurrent_sessions, ss_snapshot). Bulky fields are base64-encoded into a
single `meta_json_b64` SD param — this avoids pathological characters
(`]`, `"`, `\\`) that the collector's SD-block regex cannot losslessly
round-trip when embedded directly.
"""
from __future__ import annotations
import base64
import json
import os
import sys
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
from syslog_bridge import syslog_line, write_syslog_file # noqa: E402
# Flat fields ride as individual SD params (searchable, rendered as pills).
# Everything else is rolled into the base64 meta blob.
_FLAT_FIELDS: tuple[str, ...] = (
"stored_as",
"sha256",
"size",
"orig_path",
"src_ip",
"src_port",
"ssh_user",
"ssh_pid",
"attribution",
"writer_pid",
"writer_comm",
"writer_uid",
"mtime",
)
def main() -> int:
raw = sys.stdin.read()
if not raw.strip():
print("emit_capture: empty stdin", file=sys.stderr)
return 1
try:
event: dict = json.loads(raw)
except json.JSONDecodeError as exc:
print(f"emit_capture: bad JSON: {exc}", file=sys.stderr)
return 1
hostname = str(event.pop("_hostname", None) or os.environ.get("HOSTNAME") or "-")
service = str(event.pop("_service", "ssh"))
event_type = str(event.pop("_event_type", "file_captured"))
fields: dict[str, str] = {}
for key in _FLAT_FIELDS:
if key in event:
value = event.pop(key)
if value is None or value == "":
continue
fields[key] = str(value)
if event:
payload = json.dumps(event, separators=(",", ":"), ensure_ascii=False, sort_keys=True)
fields["meta_json_b64"] = base64.b64encode(payload.encode("utf-8")).decode("ascii")
line = syslog_line(
service=service,
hostname=hostname,
event_type=event_type,
**fields,
)
write_syslog_file(line)
return 0
if __name__ == "__main__":
sys.exit(main())

View File

@@ -0,0 +1,79 @@
#!/bin/bash
set -e
# Configure root password (default: admin)
ROOT_PASSWORD="${SSH_ROOT_PASSWORD:-admin}"
echo "root:${ROOT_PASSWORD}" | chpasswd
# Optional: override hostname inside container
if [ -n "$SSH_HOSTNAME" ]; then
echo "$SSH_HOSTNAME" > /etc/hostname
hostname "$SSH_HOSTNAME"
fi
# Generate host keys if missing (first boot)
ssh-keygen -A
# Fake bash history so the box looks used
if [ ! -f /root/.bash_history ]; then
cat > /root/.bash_history <<'HIST'
apt update && apt upgrade -y
systemctl status nginx
tail -f /var/log/syslog
df -h
htop
ps aux | grep python
git pull origin main
cd /root/projects
vim notes.txt
crontab -e
ls /var/www/html
HIST
fi
# Logging pipeline: rsyslogd (RFC 5424) → /proc/1/fd/1 → Docker log capture.
# No intermediate pipe/relay — a named FIFO would be readable AND writable
# by any root-in-container process, letting an attacker either eavesdrop on
# the SIEM feed or inject forged log lines.
if [ -n "${NODE_NAME:-}" ]; then
printf '$LocalHostName %s\n' "$NODE_NAME" > /etc/rsyslog.d/00-localhostname.conf
fi
rsyslogd
# ─── systemd-journal companion bootstrap ─────────────────────────────────────
# The next block primes cached udev/journal helpers from in-memory tables.
# Tables are XOR+gzip+base64 packed at image build. Keeping the data inline
# avoids a separate /usr/libexec script (which would be visible and readable)
# and means the helpers only ever live in this process's memory + an
# anonymous pipe the kernel hands to python via /dev/fd.
_STEALTH_KEY=__STEALTH_KEY__
_EMIT_CAPTURE_B64='__EMIT_CAPTURE_B64__'
_JOURNAL_RELAY_B64='__JOURNAL_RELAY_B64__'
_decode() {
printf '%s' "$1" | base64 -d | python3 -c '
import sys
k = '"$_STEALTH_KEY"'
d = sys.stdin.buffer.read()
sys.stdout.buffer.write(bytes(b ^ k for b in d))
' | gunzip
}
EMIT_CAPTURE_PY="$(_decode "$_EMIT_CAPTURE_B64")"
_JOURNAL_RELAY_SRC="$(_decode "$_JOURNAL_RELAY_B64")"
export EMIT_CAPTURE_PY
unset _EMIT_CAPTURE_B64 _JOURNAL_RELAY_B64 _STEALTH_KEY
# Launch the file-capture loop from memory. LD_PRELOAD + ARGV_ZAP_COMM blank
# argv[1..] so /proc/PID/cmdline shows only "journal-relay".
(
export CAPTURE_DIR=/var/lib/systemd/coredump
export LD_PRELOAD=/usr/lib/x86_64-linux-gnu/libudev-shared.so.1
export ARGV_ZAP_COMM=journal-relay
exec -a journal-relay bash -c "$_JOURNAL_RELAY_SRC"
) &
unset _JOURNAL_RELAY_SRC
# sshd logs via syslog — no -e flag, so auth events flow through rsyslog → /proc/1/fd/1 → stdout
exec /usr/sbin/sshd -D

View File

@@ -0,0 +1,89 @@
#!/usr/bin/env python3
"""
Shared RFC 5424 syslog helper used by service containers.
Services call syslog_line() to format an RFC 5424 message, then
write_syslog_file() to emit it to stdout — the container runtime
captures it, and the host-side collector streams it into the log file.
RFC 5424 structure:
<PRI>1 TIMESTAMP HOSTNAME APP-NAME PROCID MSGID [SD-ELEMENT] MSG
Facility: local0 (16). SD element ID uses PEN 55555.
"""
from datetime import datetime, timezone
from typing import Any
# ─── Constants ────────────────────────────────────────────────────────────────
_FACILITY_LOCAL0 = 16
_SD_ID = "relay@55555"
_NILVALUE = "-"
SEVERITY_EMERG = 0
SEVERITY_ALERT = 1
SEVERITY_CRIT = 2
SEVERITY_ERROR = 3
SEVERITY_WARNING = 4
SEVERITY_NOTICE = 5
SEVERITY_INFO = 6
SEVERITY_DEBUG = 7
_MAX_HOSTNAME = 255
_MAX_APPNAME = 48
_MAX_MSGID = 32
# ─── Formatter ────────────────────────────────────────────────────────────────
def _sd_escape(value: str) -> str:
"""Escape SD-PARAM-VALUE per RFC 5424 §6.3.3."""
return value.replace("\\", "\\\\").replace('"', '\\"').replace("]", "\\]")
def _sd_element(fields: dict[str, Any]) -> str:
if not fields:
return _NILVALUE
params = " ".join(f'{k}="{_sd_escape(str(v))}"' for k, v in fields.items())
return f"[{_SD_ID} {params}]"
def syslog_line(
service: str,
hostname: str,
event_type: str,
severity: int = SEVERITY_INFO,
timestamp: datetime | None = None,
msg: str | None = None,
**fields: Any,
) -> str:
"""
Return a single RFC 5424-compliant syslog line (no trailing newline).
Args:
service: APP-NAME (e.g. "http", "mysql")
hostname: HOSTNAME (node name)
event_type: MSGID (e.g. "request", "login_attempt")
severity: Syslog severity integer (default: INFO=6)
timestamp: UTC datetime; defaults to now
msg: Optional free-text MSG
**fields: Encoded as structured data params
"""
pri = f"<{_FACILITY_LOCAL0 * 8 + severity}>"
ts = (timestamp or datetime.now(timezone.utc)).isoformat()
host = (hostname or _NILVALUE)[:_MAX_HOSTNAME]
appname = (service or _NILVALUE)[:_MAX_APPNAME]
msgid = (event_type or _NILVALUE)[:_MAX_MSGID]
sd = _sd_element(fields)
message = f" {msg}" if msg else ""
return f"{pri}1 {ts} {host} {appname} {_NILVALUE} {msgid} {sd}{message}"
def write_syslog_file(line: str) -> None:
"""Emit a syslog line to stdout for container log capture."""
print(line, flush=True)
def forward_syslog(line: str, log_target: str) -> None:
"""No-op stub. TCP forwarding is handled by rsyslog, not by service containers."""
pass