merge: testing → main (reconcile 2-week divergence)

This commit is contained in:
2026-04-28 18:36:00 -04:00
parent 499836c9e4
commit 862e4dbb31
1235 changed files with 160255 additions and 7996 deletions

View File

@@ -0,0 +1,30 @@
ARG BASE_IMAGE=debian:bookworm-slim
FROM ${BASE_IMAGE}
RUN apt-get update && apt-get install -y --no-install-recommends \
python3 python3-pip openssl \
&& rm -rf /var/lib/apt/lists/*
ENV PIP_BREAK_SYSTEM_PACKAGES=1
RUN pip3 install --no-cache-dir flask jinja2
COPY syslog_bridge.py /opt/syslog_bridge.py
COPY instance_seed.py /opt/instance_seed.py
COPY server.py /opt/server.py
COPY entrypoint.sh /entrypoint.sh
RUN chmod +x /entrypoint.sh
RUN mkdir -p /opt/tls
EXPOSE 443
RUN useradd -r -s /bin/false -d /opt logrelay \
&& chown -R logrelay:logrelay /opt/tls \
&& apt-get update && apt-get install -y --no-install-recommends libcap2-bin \
&& rm -rf /var/lib/apt/lists/* \
&& (find /usr/bin/ -maxdepth 1 -name 'python3*' -type f -exec setcap 'cap_net_bind_service+eip' {} \; 2>/dev/null || true)
HEALTHCHECK --interval=30s --timeout=5s --start-period=10s --retries=3 \
CMD kill -0 1 || exit 1
USER logrelay
ENTRYPOINT ["/entrypoint.sh"]

View File

@@ -0,0 +1,18 @@
#!/bin/bash
set -e
TLS_DIR="/opt/tls"
CERT="${TLS_CERT:-$TLS_DIR/cert.pem}"
KEY="${TLS_KEY:-$TLS_DIR/key.pem}"
# Generate a self-signed certificate if none exists
if [ ! -f "$CERT" ] || [ ! -f "$KEY" ]; then
mkdir -p "$TLS_DIR"
CN="${TLS_CN:-${NODE_NAME:-localhost}}"
openssl req -x509 -newkey rsa:2048 -nodes \
-keyout "$KEY" -out "$CERT" \
-days 3650 -subj "/CN=$CN" \
2>/dev/null
fi
exec python3 /opt/server.py

View File

@@ -0,0 +1,120 @@
#!/usr/bin/env python3
"""
Per-instance stealth seeding for honeypot service templates.
The whole decoy fleet looks identical to a scanner unless each decky
diverges on the boring details: cluster UUIDs, auth salts, uptime, minor
version strings, etc. This module derives a stable per-instance seed
from NODE_NAME (+ optional INSTANCE_ID) and exposes helpers that return
deterministic-per-decky-but-different-across-the-fleet values.
Connection-time jitter is intentionally NOT seeded — two hits to the same
decky should not replay the same latency curve.
"""
from __future__ import annotations
import asyncio
import hashlib
import os
import random
import time
import uuid
from typing import Sequence, TypeVar
T = TypeVar("T")
_HOSTNAME = (
os.environ.get("NODE_NAME")
or os.environ.get("HOSTNAME")
or "decky"
)
_INSTANCE_ID = os.environ.get("INSTANCE_ID", "")
_SEED_MATERIAL = f"{_HOSTNAME}:{_INSTANCE_ID}".encode()
_SEED_INT = int.from_bytes(hashlib.sha256(_SEED_MATERIAL).digest()[:8], "big")
#: Deterministic RNG seeded per decky — use for *persistent* choices
#: (versions, UUIDs, stored credentials). Never use for timing.
rng = random.Random(_SEED_INT)
#: Process boot time — real uptime elapsed since container start.
_PROCESS_START = time.time()
#: Deterministic per-instance fake "has been up for this long at boot"
#: offset, so every decky pretends to have a different history.
_BOOT_OFFSET = rng.randint(3600, 45 * 86400)
def hostname() -> str:
return _HOSTNAME
def uptime_seconds() -> int:
"""Monotonically increasing, unique per instance."""
return int(_BOOT_OFFSET + (time.time() - _PROCESS_START))
def boot_epoch() -> int:
"""Fake wall-clock boot time for this instance (seconds since epoch)."""
return int(time.time() - uptime_seconds())
def instance_uuid(namespace: str = "") -> str:
"""Deterministic UUID4-looking value for this instance+namespace."""
ns = uuid.UUID("00000000-0000-0000-0000-000000000000")
return str(uuid.uuid5(ns, f"{_HOSTNAME}:{namespace}"))
def instance_hex(nbytes: int, namespace: str = "") -> str:
"""Deterministic hex token of given byte length."""
material = f"{_HOSTNAME}:{namespace}".encode()
digest = hashlib.sha256(material).digest()
while len(digest) < nbytes:
digest += hashlib.sha256(digest).digest()
return digest[:nbytes].hex()
def pick(choices: Sequence[T]) -> T:
"""Deterministic choice from a sequence."""
return rng.choice(list(choices))
def pick_weighted(choices: Sequence[tuple[T, float]]) -> T:
"""Deterministic weighted choice. Input: [(item, weight), ...]."""
total = sum(w for _, w in choices)
r = rng.uniform(0, total)
acc = 0.0
for item, w in choices:
acc += w
if r <= acc:
return item
return choices[-1][0]
def random_bytes(n: int, namespace: str = "") -> bytes:
"""Deterministic per-instance byte string of length n."""
out = bytearray()
i = 0
while len(out) < n:
out.extend(
hashlib.sha256(f"{_HOSTNAME}:{namespace}:{i}".encode()).digest()
)
i += 1
return bytes(out[:n])
def fresh_bytes(n: int) -> bytes:
"""Non-deterministic random bytes — for per-connection nonces/salts."""
return os.urandom(n)
async def jitter(min_ms: int = 5, max_ms: int = 120) -> None:
"""Async response-time jitter. Uses unseeded RNG so timing varies
across connections to the same decky — seeded jitter would leak
predictability."""
await asyncio.sleep(random.uniform(min_ms, max_ms) / 1000.0)
def jitter_sync(min_ms: int = 5, max_ms: int = 120) -> None:
"""Blocking jitter for non-asyncio servers."""
time.sleep(random.uniform(min_ms, max_ms) / 1000.0)

View File

@@ -0,0 +1,164 @@
#!/usr/bin/env python3
"""
HTTPS service emulator using Flask + TLS.
Identical to the HTTP honeypot but wrapped in TLS. Accepts all requests,
logs every detail (method, path, headers, body, TLS info), and responds
with configurable pages. Forwards events as JSON to LOG_TARGET if set.
"""
import json
import logging
import os
import ssl
from pathlib import Path
from flask import Flask, request, send_from_directory
from werkzeug.serving import make_server, WSGIRequestHandler
import instance_seed as _seed
from syslog_bridge import (
classify_authorization,
extract_form_credentials,
forward_syslog,
syslog_line,
write_syslog_file,
)
logging.getLogger("werkzeug").setLevel(logging.ERROR)
NODE_NAME = os.environ.get("NODE_NAME", "webserver")
SERVICE_NAME = "https"
LOG_TARGET = os.environ.get("LOG_TARGET", "")
PORT = int(os.environ.get("PORT", "443"))
_SERVER_CHOICES = [
"Apache/2.4.41 (Ubuntu)",
"Apache/2.4.52 (Ubuntu)",
"Apache/2.4.54 (Debian)",
"Apache/2.4.56 (Debian)",
"Apache/2.4.57 (Debian)",
"Apache/2.4.58 (Ubuntu)",
"Apache/2.4.59 (Debian)",
"nginx/1.18.0 (Ubuntu)",
"nginx/1.22.1",
"nginx/1.24.0 (Ubuntu)",
"nginx/1.25.3",
]
SERVER_HEADER = os.environ.get("SERVER_HEADER") or _seed.pick(_SERVER_CHOICES)
RESPONSE_CODE = int(os.environ.get("RESPONSE_CODE", "403"))
FAKE_APP = os.environ.get("FAKE_APP", "")
EXTRA_HEADERS = json.loads(os.environ.get("EXTRA_HEADERS", "{}"))
CUSTOM_BODY = os.environ.get("CUSTOM_BODY", "")
FILES_DIR = os.environ.get("FILES_DIR", "")
TLS_CERT = os.environ.get("TLS_CERT", "/opt/tls/cert.pem")
TLS_KEY = os.environ.get("TLS_KEY", "/opt/tls/key.pem")
_FAKE_APP_BODIES: dict[str, str] = {
"apache_default": (
"<!DOCTYPE HTML PUBLIC \"-//IETF//DTD HTML 2.0//EN\">\n"
"<html><head><title>Apache2 Debian Default Page</title></head>\n"
"<body><h1>Apache2 Debian Default Page</h1>\n"
"<p>It works!</p></body></html>"
),
"nginx_default": (
"<!DOCTYPE html><html><head><title>Welcome to nginx!</title></head>\n"
"<body><h1>Welcome to nginx!</h1>\n"
"<p>If you see this page, the nginx web server is successfully installed.</p>\n"
"</body></html>"
),
"wordpress": (
"<!DOCTYPE html><html><head><title>WordPress &rsaquo; Error</title></head>\n"
"<body id=\"error-page\"><div class=\"wp-die-message\">\n"
"<h1>Error establishing a database connection</h1></div></body></html>"
),
"phpmyadmin": (
"<!DOCTYPE html><html><head><title>phpMyAdmin</title></head>\n"
"<body><form method=\"post\" action=\"index.php\">\n"
"<input type=\"text\" name=\"pma_username\" />\n"
"<input type=\"password\" name=\"pma_password\" />\n"
"<input type=\"submit\" value=\"Go\" /></form></body></html>"
),
"iis_default": (
"<!DOCTYPE html><html><head><title>IIS Windows Server</title></head>\n"
"<body><h1>IIS Windows Server</h1>\n"
"<p>Welcome to Internet Information Services</p></body></html>"
),
}
app = Flask(__name__)
@app.after_request
def _fix_server_header(response):
response.headers["Server"] = SERVER_HEADER
return response
def _log(event_type: str, severity: int = 6, **kwargs) -> None:
line = syslog_line(SERVICE_NAME, NODE_NAME, event_type, severity, **kwargs)
write_syslog_file(line)
forward_syslog(line, LOG_TARGET)
@app.before_request
def log_request():
body = request.get_data(as_text=True)[:4096]
cred = (
classify_authorization(request.headers.get("Authorization"))
or extract_form_credentials(body, request.headers.get("Content-Type"))
)
_log(
"request",
method=request.method,
path=request.path,
remote_addr=request.remote_addr,
headers=dict(request.headers),
body=body[:512],
**(cred or {}),
)
@app.route("/", defaults={"path": ""})
@app.route("/<path:path>", methods=["GET", "POST", "PUT", "DELETE", "PATCH", "OPTIONS", "HEAD"])
def catch_all(path):
# Serve static files directory if configured
if FILES_DIR and path:
files_path = Path(FILES_DIR) / path
if files_path.is_file():
return send_from_directory(FILES_DIR, path)
# Select response body: custom > fake_app preset > default 403
if CUSTOM_BODY:
body = CUSTOM_BODY
elif FAKE_APP and FAKE_APP in _FAKE_APP_BODIES:
body = _FAKE_APP_BODIES[FAKE_APP]
else:
body = (
"<!DOCTYPE HTML PUBLIC \"-//IETF//DTD HTML 2.0//EN\">\n"
"<html><head>\n"
"<title>403 Forbidden</title>\n"
"</head><body>\n"
"<h1>Forbidden</h1>\n"
"<p>You don't have permission to access this resource.</p>\n"
"<hr>\n"
f"<address>{SERVER_HEADER} Server at {NODE_NAME} Port 443</address>\n"
"</body></html>\n"
)
headers = {"Content-Type": "text/html", **EXTRA_HEADERS}
return body, RESPONSE_CODE, headers
class _SilentHandler(WSGIRequestHandler):
"""Suppress Werkzeug's Server header so Flask's after_request is the sole source."""
def version_string(self) -> str:
return ""
if __name__ == "__main__":
_log("startup", msg=f"HTTPS server starting as {NODE_NAME}")
ctx = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER)
ctx.load_cert_chain(TLS_CERT, TLS_KEY)
srv = make_server("0.0.0.0", PORT, app, request_handler=_SilentHandler) # nosec B104
srv.socket = ctx.wrap_socket(srv.socket, server_side=True)
srv.serve_forever()

View File

@@ -0,0 +1,261 @@
#!/usr/bin/env python3
"""
Shared RFC 5424 syslog helper used by service containers.
Services call syslog_line() to format an RFC 5424 message, then
write_syslog_file() to emit it to stdout — the container runtime
captures it, and the host-side collector streams it into the log file.
RFC 5424 structure:
<PRI>1 TIMESTAMP HOSTNAME APP-NAME PROCID MSGID [SD-ELEMENT] MSG
Facility: local0 (16). SD element ID uses PEN 55555.
"""
import base64
import re
from datetime import datetime, timezone
from typing import Any, Optional
# ─── Constants ────────────────────────────────────────────────────────────────
_FACILITY_LOCAL0 = 16
_SD_ID = "relay@55555"
_NILVALUE = "-"
SEVERITY_EMERG = 0
SEVERITY_ALERT = 1
SEVERITY_CRIT = 2
SEVERITY_ERROR = 3
SEVERITY_WARNING = 4
SEVERITY_NOTICE = 5
SEVERITY_INFO = 6
SEVERITY_DEBUG = 7
_MAX_HOSTNAME = 255
_MAX_APPNAME = 48
_MAX_MSGID = 32
# ─── Formatter ────────────────────────────────────────────────────────────────
def _sd_escape(value: str) -> str:
"""Escape SD-PARAM-VALUE per RFC 5424 §6.3.3."""
return value.replace("\\", "\\\\").replace('"', '\\"').replace("]", "\\]")
def _sd_element(fields: dict[str, Any]) -> str:
if not fields:
return _NILVALUE
params = " ".join(f'{k}="{_sd_escape(str(v))}"' for k, v in fields.items())
return f"[{_SD_ID} {params}]"
def syslog_line(
service: str,
hostname: str,
event_type: str,
severity: int = SEVERITY_INFO,
timestamp: datetime | None = None,
msg: str | None = None,
**fields: Any,
) -> str:
"""
Return a single RFC 5424-compliant syslog line (no trailing newline).
Args:
service: APP-NAME (e.g. "http", "mysql")
hostname: HOSTNAME (node name)
event_type: MSGID (e.g. "request", "login_attempt")
severity: Syslog severity integer (default: INFO=6)
timestamp: UTC datetime; defaults to now
msg: Optional free-text MSG
**fields: Encoded as structured data params
"""
pri = f"<{_FACILITY_LOCAL0 * 8 + severity}>"
ts = (timestamp or datetime.now(timezone.utc)).isoformat()
host = (hostname or _NILVALUE)[:_MAX_HOSTNAME]
appname = (service or _NILVALUE)[:_MAX_APPNAME]
msgid = (event_type or _NILVALUE)[:_MAX_MSGID]
sd = _sd_element(fields)
message = f" {msg}" if msg else ""
return f"{pri}1 {ts} {host} {appname} {_NILVALUE} {msgid} {sd}{message}"
def encode_secret(secret: str) -> dict[str, str]:
"""Standardized credential-secret encoding for the universal SD-block shape.
Returns ``{'secret_printable': ..., 'secret_b64': ...}`` ready to spread
into a :func:`syslog_line` / ``_log`` call::
_log("auth_attempt", principal=user, **encode_secret(password))
``secret_printable`` mirrors auth-helper.c's sd_escape: bytes outside
``[0x20, 0x7f)`` collapse to ``'?'`` so the field is always parser-safe
RFC 5424 ASCII. ``secret_b64`` preserves the *original* utf-8 bytes —
NUL/0xff/control/non-utf8 sequences all survive losslessly, useful as
a fingerprinting signal even when the printable form sanitizes them.
The decnet web ingester's native-shape branch keys off ``secret_b64``
being present, so any service emitter calling this helper lands its
cred attempt directly in the :class:`Credential` table.
"""
raw = secret.encode("utf-8", errors="replace")
printable = "".join(chr(b) if 0x20 <= b < 0x7f else "?" for b in raw)
return {
"secret_printable": printable,
"secret_b64": base64.b64encode(raw).decode("ascii"),
}
_DIGEST_PARAM_RE = re.compile(r'(\w+)\s*=\s*"([^"]*)"|(\w+)\s*=\s*([^,\s]+)')
def classify_authorization(header_value: Optional[str]) -> Optional[dict[str, Any]]:
"""Parse an HTTP Authorization header value into Credential SD fields.
Returns a dict with the universal cred shape ready to spread into a
``_log(...)`` call::
auth = request.headers.get("Authorization")
cred = classify_authorization(auth)
if cred:
_log("auth_attempt", **cred)
Recognised schemes:
* Basic — base64(user:pw); decoded → ``principal=user`` +
``secret_kind="plaintext"`` + ``encode_secret(pw)``.
* Bearer / Token — opaque token; ``principal=None`` +
``secret_kind="http_bearer"`` + ``encode_secret(token)``.
* Digest — ``principal=username`` from header +
``secret_kind="http_digest_md5"`` + ``encode_secret(response)``.
Returns ``None`` for anything unrecognized (AWS4-HMAC-SHA256, NTLM,
Negotiate, …) — callers can still log the raw header value in the
ambient SD-block; we just don't know how to extract a hashable
secret from it.
"""
if not header_value or not isinstance(header_value, str):
return None
parts = header_value.strip().split(None, 1)
if len(parts) < 2:
return None
scheme, rest = parts[0].lower(), parts[1].strip()
if scheme == "basic":
try:
decoded = base64.b64decode(rest, validate=True).decode("utf-8", errors="replace")
except (ValueError, base64.binascii.Error):
return None
if ":" not in decoded:
return None
user, _, pw = decoded.partition(":")
return {
"principal": user,
"secret_kind": "plaintext",
**encode_secret(pw),
}
if scheme in ("bearer", "token"):
return {
"principal": None,
"secret_kind": "http_bearer",
**encode_secret(rest),
}
if scheme == "digest":
params: dict[str, str] = {}
for m in _DIGEST_PARAM_RE.finditer(rest):
k = m.group(1) or m.group(3)
v = m.group(2) if m.group(2) is not None else m.group(4)
if k:
params[k.lower()] = v
response = params.get("response")
if not response:
return None
return {
"principal": params.get("username"),
"secret_kind": "http_digest_md5",
**encode_secret(response),
}
return None
_FORM_PRINCIPAL_KEYS = (
"username", "user", "email", "login", "userid", "account",
"log", # wp-login.php
"user_login", # WordPress alt
"uname", # phpMyAdmin
"pma_username",
)
_FORM_SECRET_KEYS = (
"password", "pass", "pwd", "passwd", "passwort", "mot_de_passe",
"user_password", # WordPress alt
"pma_password", # phpMyAdmin
)
def extract_form_credentials(
body: Optional[str],
content_type: Optional[str],
) -> Optional[dict[str, Any]]:
"""Parse an `application/x-www-form-urlencoded` body for credentials.
Returns the universal cred SD shape ready to spread into a
``_log(...)`` call when both a principal-shaped key and a secret-
shaped key are present in the body. Otherwise returns ``None``.
Field-name detection is case-insensitive and covers the most common
login-form variants (WordPress wp-login.php, phpMyAdmin, Joomla,
etc.). Add more entries to ``_FORM_PRINCIPAL_KEYS`` /
``_FORM_SECRET_KEYS`` as new templates surface them.
"""
if not body or not isinstance(content_type, str):
return None
if not content_type.lower().startswith("application/x-www-form-urlencoded"):
return None
fields: dict[str, str] = {}
for pair in body.split("&"):
if "=" not in pair:
continue
k, _, v = pair.partition("=")
# urllib decode without importing urllib at module scope (the
# template emitters are import-cost-sensitive). Inline the
# tiny percent-decode + plus-decode.
try:
from urllib.parse import unquote_plus
key = unquote_plus(k).lower()
val = unquote_plus(v)
except Exception:
continue
# First-wins so duplicate-key forms don't get clobbered.
fields.setdefault(key, val)
principal: Optional[str] = None
for k in _FORM_PRINCIPAL_KEYS:
if k in fields:
principal = fields[k]
break
secret: Optional[str] = None
for k in _FORM_SECRET_KEYS:
if k in fields:
secret = fields[k]
break
if secret is None:
return None
return {
"principal": principal,
"secret_kind": "plaintext",
**encode_secret(secret),
}
def write_syslog_file(line: str) -> None:
"""Emit a syslog line to stdout for container log capture."""
print(line, flush=True)
def forward_syslog(line: str, log_target: str) -> None:
"""No-op stub. TCP forwarding is handled by rsyslog, not by service containers."""
pass