merge: testing → main (reconcile 2-week divergence)

This commit is contained in:
2026-04-28 18:36:00 -04:00
parent 499836c9e4
commit 862e4dbb31
1235 changed files with 160255 additions and 7996 deletions

View File

@@ -0,0 +1,24 @@
ARG BASE_IMAGE=debian:bookworm-slim
FROM ${BASE_IMAGE}
RUN apt-get update && apt-get install -y --no-install-recommends \
python3 \
&& rm -rf /var/lib/apt/lists/*
COPY syslog_bridge.py /opt/syslog_bridge.py
COPY server.py /opt/server.py
COPY entrypoint.sh /entrypoint.sh
RUN chmod +x /entrypoint.sh
EXPOSE 5060/udp
EXPOSE 5060/tcp
RUN useradd -r -s /bin/false -d /opt logrelay \
&& apt-get update && apt-get install -y --no-install-recommends libcap2-bin \
&& rm -rf /var/lib/apt/lists/* \
&& (find /usr/bin/ -maxdepth 1 -name 'python3*' -type f -exec setcap 'cap_net_bind_service+eip' {} \; 2>/dev/null || true)
HEALTHCHECK --interval=30s --timeout=5s --start-period=10s --retries=3 \
CMD kill -0 1 || exit 1
USER logrelay
ENTRYPOINT ["/entrypoint.sh"]

View File

@@ -0,0 +1,3 @@
#!/bin/bash
set -e
exec python3 /opt/server.py

View File

@@ -0,0 +1,120 @@
#!/usr/bin/env python3
"""
Per-instance stealth seeding for honeypot service templates.
The whole decoy fleet looks identical to a scanner unless each decky
diverges on the boring details: cluster UUIDs, auth salts, uptime, minor
version strings, etc. This module derives a stable per-instance seed
from NODE_NAME (+ optional INSTANCE_ID) and exposes helpers that return
deterministic-per-decky-but-different-across-the-fleet values.
Connection-time jitter is intentionally NOT seeded — two hits to the same
decky should not replay the same latency curve.
"""
from __future__ import annotations
import asyncio
import hashlib
import os
import random
import time
import uuid
from typing import Sequence, TypeVar
T = TypeVar("T")
_HOSTNAME = (
os.environ.get("NODE_NAME")
or os.environ.get("HOSTNAME")
or "decky"
)
_INSTANCE_ID = os.environ.get("INSTANCE_ID", "")
_SEED_MATERIAL = f"{_HOSTNAME}:{_INSTANCE_ID}".encode()
_SEED_INT = int.from_bytes(hashlib.sha256(_SEED_MATERIAL).digest()[:8], "big")
#: Deterministic RNG seeded per decky — use for *persistent* choices
#: (versions, UUIDs, stored credentials). Never use for timing.
rng = random.Random(_SEED_INT)
#: Process boot time — real uptime elapsed since container start.
_PROCESS_START = time.time()
#: Deterministic per-instance fake "has been up for this long at boot"
#: offset, so every decky pretends to have a different history.
_BOOT_OFFSET = rng.randint(3600, 45 * 86400)
def hostname() -> str:
return _HOSTNAME
def uptime_seconds() -> int:
"""Monotonically increasing, unique per instance."""
return int(_BOOT_OFFSET + (time.time() - _PROCESS_START))
def boot_epoch() -> int:
"""Fake wall-clock boot time for this instance (seconds since epoch)."""
return int(time.time() - uptime_seconds())
def instance_uuid(namespace: str = "") -> str:
"""Deterministic UUID4-looking value for this instance+namespace."""
ns = uuid.UUID("00000000-0000-0000-0000-000000000000")
return str(uuid.uuid5(ns, f"{_HOSTNAME}:{namespace}"))
def instance_hex(nbytes: int, namespace: str = "") -> str:
"""Deterministic hex token of given byte length."""
material = f"{_HOSTNAME}:{namespace}".encode()
digest = hashlib.sha256(material).digest()
while len(digest) < nbytes:
digest += hashlib.sha256(digest).digest()
return digest[:nbytes].hex()
def pick(choices: Sequence[T]) -> T:
"""Deterministic choice from a sequence."""
return rng.choice(list(choices))
def pick_weighted(choices: Sequence[tuple[T, float]]) -> T:
"""Deterministic weighted choice. Input: [(item, weight), ...]."""
total = sum(w for _, w in choices)
r = rng.uniform(0, total)
acc = 0.0
for item, w in choices:
acc += w
if r <= acc:
return item
return choices[-1][0]
def random_bytes(n: int, namespace: str = "") -> bytes:
"""Deterministic per-instance byte string of length n."""
out = bytearray()
i = 0
while len(out) < n:
out.extend(
hashlib.sha256(f"{_HOSTNAME}:{namespace}:{i}".encode()).digest()
)
i += 1
return bytes(out[:n])
def fresh_bytes(n: int) -> bytes:
"""Non-deterministic random bytes — for per-connection nonces/salts."""
return os.urandom(n)
async def jitter(min_ms: int = 5, max_ms: int = 120) -> None:
"""Async response-time jitter. Uses unseeded RNG so timing varies
across connections to the same decky — seeded jitter would leak
predictability."""
await asyncio.sleep(random.uniform(min_ms, max_ms) / 1000.0)
def jitter_sync(min_ms: int = 5, max_ms: int = 120) -> None:
"""Blocking jitter for non-asyncio servers."""
time.sleep(random.uniform(min_ms, max_ms) / 1000.0)

View File

@@ -0,0 +1,148 @@
#!/usr/bin/env python3
"""
SIP server (UDP + TCP port 5060).
Parses SIP REGISTER and INVITE messages, logs credentials from the
Authorization header and call metadata, then responds with 401 Unauthorized.
"""
import asyncio
import os
import re
from syslog_bridge import (
classify_authorization,
forward_syslog,
syslog_line,
write_syslog_file,
)
NODE_NAME = os.environ.get("NODE_NAME", "pbx")
SERVICE_NAME = "sip"
LOG_TARGET = os.environ.get("LOG_TARGET", "")
_401 = (
"SIP/2.0 401 Unauthorized\r\n"
"Via: {via}\r\n"
"From: {from_}\r\n"
"To: {to}\r\n"
"Call-ID: {call_id}\r\n"
"CSeq: {cseq}\r\n"
'WWW-Authenticate: Digest realm="{host}", nonce="{nonce}", algorithm=MD5\r\n'
"Content-Length: 0\r\n\r\n"
)
def _log(event_type: str, severity: int = 6, **kwargs) -> None:
line = syslog_line(SERVICE_NAME, NODE_NAME, event_type, severity, **kwargs)
write_syslog_file(line)
forward_syslog(line, LOG_TARGET)
def _parse_headers(msg: str) -> dict:
headers = {}
for line in msg.splitlines()[1:]:
if ":" in line:
k, _, v = line.partition(":")
headers[k.strip().lower()] = v.strip()
return headers
def _handle_message(data: bytes, src_addr) -> bytes | None:
try:
msg = data.decode(errors="replace")
except Exception:
return None
first_line = msg.splitlines()[0] if msg else ""
method = first_line.split()[0] if first_line else "UNKNOWN"
headers = _parse_headers(msg)
auth_header = headers.get("authorization", "")
username = ""
if auth_header:
m = re.search(r'username="([^"]+)"', auth_header)
username = m.group(1) if m else ""
# SIP Digest is the same shape as HTTP Digest (RFC 7616 derived from
# RFC 2617). classify_authorization handles it identically — emits
# secret_kind="http_digest_md5", which is correct: the cred is the
# MD5 hash response, regardless of whether it rode in over SIP or
# HTTP. Reuse-analytics correlates across both.
cred = classify_authorization(auth_header)
_log(
"request",
src=src_addr[0],
src_port=src_addr[1],
method=method,
from_=headers.get("from", ""),
to=headers.get("to", ""),
username=username,
auth=auth_header[:256],
**(cred or {}),
)
if method in ("REGISTER", "INVITE", "OPTIONS"):
nonce = os.urandom(8).hex()
response = _401.format(
via=headers.get("via", ""),
from_=headers.get("from", ""),
to=headers.get("to", ""),
call_id=headers.get("call-id", ""),
cseq=headers.get("cseq", ""),
host=NODE_NAME,
nonce=nonce,
)
return response.encode()
return None
class SIPUDPProtocol(asyncio.DatagramProtocol):
def __init__(self):
self._transport = None
def connection_made(self, transport):
self._transport = transport
def datagram_received(self, data, addr):
response = _handle_message(data, addr)
if response and self._transport:
self._transport.sendto(response, addr)
class SIPTCPProtocol(asyncio.Protocol):
def __init__(self):
self._transport = None
self._peer = None
self._buf = b""
def connection_made(self, transport):
self._transport = transport
self._peer = transport.get_extra_info("peername", ("?", 0))
def data_received(self, data):
self._buf += data
if b"\r\n\r\n" in self._buf or b"\n\n" in self._buf:
response = _handle_message(self._buf, self._peer)
self._buf = b""
if response:
self._transport.write(response)
def connection_lost(self, exc):
pass
async def main():
_log("startup", msg=f"SIP server starting as {NODE_NAME}")
loop = asyncio.get_running_loop()
udp_transport, _ = await loop.create_datagram_endpoint(
SIPUDPProtocol, local_addr=("0.0.0.0", 5060) # nosec B104
)
tcp_server = await loop.create_server(SIPTCPProtocol, "0.0.0.0", 5060) # nosec B104
async with tcp_server:
await tcp_server.serve_forever()
udp_transport.close()
if __name__ == "__main__":
asyncio.run(main())

View File

@@ -0,0 +1,261 @@
#!/usr/bin/env python3
"""
Shared RFC 5424 syslog helper used by service containers.
Services call syslog_line() to format an RFC 5424 message, then
write_syslog_file() to emit it to stdout — the container runtime
captures it, and the host-side collector streams it into the log file.
RFC 5424 structure:
<PRI>1 TIMESTAMP HOSTNAME APP-NAME PROCID MSGID [SD-ELEMENT] MSG
Facility: local0 (16). SD element ID uses PEN 55555.
"""
import base64
import re
from datetime import datetime, timezone
from typing import Any, Optional
# ─── Constants ────────────────────────────────────────────────────────────────
_FACILITY_LOCAL0 = 16
_SD_ID = "relay@55555"
_NILVALUE = "-"
SEVERITY_EMERG = 0
SEVERITY_ALERT = 1
SEVERITY_CRIT = 2
SEVERITY_ERROR = 3
SEVERITY_WARNING = 4
SEVERITY_NOTICE = 5
SEVERITY_INFO = 6
SEVERITY_DEBUG = 7
_MAX_HOSTNAME = 255
_MAX_APPNAME = 48
_MAX_MSGID = 32
# ─── Formatter ────────────────────────────────────────────────────────────────
def _sd_escape(value: str) -> str:
"""Escape SD-PARAM-VALUE per RFC 5424 §6.3.3."""
return value.replace("\\", "\\\\").replace('"', '\\"').replace("]", "\\]")
def _sd_element(fields: dict[str, Any]) -> str:
if not fields:
return _NILVALUE
params = " ".join(f'{k}="{_sd_escape(str(v))}"' for k, v in fields.items())
return f"[{_SD_ID} {params}]"
def syslog_line(
service: str,
hostname: str,
event_type: str,
severity: int = SEVERITY_INFO,
timestamp: datetime | None = None,
msg: str | None = None,
**fields: Any,
) -> str:
"""
Return a single RFC 5424-compliant syslog line (no trailing newline).
Args:
service: APP-NAME (e.g. "http", "mysql")
hostname: HOSTNAME (node name)
event_type: MSGID (e.g. "request", "login_attempt")
severity: Syslog severity integer (default: INFO=6)
timestamp: UTC datetime; defaults to now
msg: Optional free-text MSG
**fields: Encoded as structured data params
"""
pri = f"<{_FACILITY_LOCAL0 * 8 + severity}>"
ts = (timestamp or datetime.now(timezone.utc)).isoformat()
host = (hostname or _NILVALUE)[:_MAX_HOSTNAME]
appname = (service or _NILVALUE)[:_MAX_APPNAME]
msgid = (event_type or _NILVALUE)[:_MAX_MSGID]
sd = _sd_element(fields)
message = f" {msg}" if msg else ""
return f"{pri}1 {ts} {host} {appname} {_NILVALUE} {msgid} {sd}{message}"
def encode_secret(secret: str) -> dict[str, str]:
"""Standardized credential-secret encoding for the universal SD-block shape.
Returns ``{'secret_printable': ..., 'secret_b64': ...}`` ready to spread
into a :func:`syslog_line` / ``_log`` call::
_log("auth_attempt", principal=user, **encode_secret(password))
``secret_printable`` mirrors auth-helper.c's sd_escape: bytes outside
``[0x20, 0x7f)`` collapse to ``'?'`` so the field is always parser-safe
RFC 5424 ASCII. ``secret_b64`` preserves the *original* utf-8 bytes —
NUL/0xff/control/non-utf8 sequences all survive losslessly, useful as
a fingerprinting signal even when the printable form sanitizes them.
The decnet web ingester's native-shape branch keys off ``secret_b64``
being present, so any service emitter calling this helper lands its
cred attempt directly in the :class:`Credential` table.
"""
raw = secret.encode("utf-8", errors="replace")
printable = "".join(chr(b) if 0x20 <= b < 0x7f else "?" for b in raw)
return {
"secret_printable": printable,
"secret_b64": base64.b64encode(raw).decode("ascii"),
}
_DIGEST_PARAM_RE = re.compile(r'(\w+)\s*=\s*"([^"]*)"|(\w+)\s*=\s*([^,\s]+)')
def classify_authorization(header_value: Optional[str]) -> Optional[dict[str, Any]]:
"""Parse an HTTP Authorization header value into Credential SD fields.
Returns a dict with the universal cred shape ready to spread into a
``_log(...)`` call::
auth = request.headers.get("Authorization")
cred = classify_authorization(auth)
if cred:
_log("auth_attempt", **cred)
Recognised schemes:
* Basic — base64(user:pw); decoded → ``principal=user`` +
``secret_kind="plaintext"`` + ``encode_secret(pw)``.
* Bearer / Token — opaque token; ``principal=None`` +
``secret_kind="http_bearer"`` + ``encode_secret(token)``.
* Digest — ``principal=username`` from header +
``secret_kind="http_digest_md5"`` + ``encode_secret(response)``.
Returns ``None`` for anything unrecognized (AWS4-HMAC-SHA256, NTLM,
Negotiate, …) — callers can still log the raw header value in the
ambient SD-block; we just don't know how to extract a hashable
secret from it.
"""
if not header_value or not isinstance(header_value, str):
return None
parts = header_value.strip().split(None, 1)
if len(parts) < 2:
return None
scheme, rest = parts[0].lower(), parts[1].strip()
if scheme == "basic":
try:
decoded = base64.b64decode(rest, validate=True).decode("utf-8", errors="replace")
except (ValueError, base64.binascii.Error):
return None
if ":" not in decoded:
return None
user, _, pw = decoded.partition(":")
return {
"principal": user,
"secret_kind": "plaintext",
**encode_secret(pw),
}
if scheme in ("bearer", "token"):
return {
"principal": None,
"secret_kind": "http_bearer",
**encode_secret(rest),
}
if scheme == "digest":
params: dict[str, str] = {}
for m in _DIGEST_PARAM_RE.finditer(rest):
k = m.group(1) or m.group(3)
v = m.group(2) if m.group(2) is not None else m.group(4)
if k:
params[k.lower()] = v
response = params.get("response")
if not response:
return None
return {
"principal": params.get("username"),
"secret_kind": "http_digest_md5",
**encode_secret(response),
}
return None
_FORM_PRINCIPAL_KEYS = (
"username", "user", "email", "login", "userid", "account",
"log", # wp-login.php
"user_login", # WordPress alt
"uname", # phpMyAdmin
"pma_username",
)
_FORM_SECRET_KEYS = (
"password", "pass", "pwd", "passwd", "passwort", "mot_de_passe",
"user_password", # WordPress alt
"pma_password", # phpMyAdmin
)
def extract_form_credentials(
body: Optional[str],
content_type: Optional[str],
) -> Optional[dict[str, Any]]:
"""Parse an `application/x-www-form-urlencoded` body for credentials.
Returns the universal cred SD shape ready to spread into a
``_log(...)`` call when both a principal-shaped key and a secret-
shaped key are present in the body. Otherwise returns ``None``.
Field-name detection is case-insensitive and covers the most common
login-form variants (WordPress wp-login.php, phpMyAdmin, Joomla,
etc.). Add more entries to ``_FORM_PRINCIPAL_KEYS`` /
``_FORM_SECRET_KEYS`` as new templates surface them.
"""
if not body or not isinstance(content_type, str):
return None
if not content_type.lower().startswith("application/x-www-form-urlencoded"):
return None
fields: dict[str, str] = {}
for pair in body.split("&"):
if "=" not in pair:
continue
k, _, v = pair.partition("=")
# urllib decode without importing urllib at module scope (the
# template emitters are import-cost-sensitive). Inline the
# tiny percent-decode + plus-decode.
try:
from urllib.parse import unquote_plus
key = unquote_plus(k).lower()
val = unquote_plus(v)
except Exception:
continue
# First-wins so duplicate-key forms don't get clobbered.
fields.setdefault(key, val)
principal: Optional[str] = None
for k in _FORM_PRINCIPAL_KEYS:
if k in fields:
principal = fields[k]
break
secret: Optional[str] = None
for k in _FORM_SECRET_KEYS:
if k in fields:
secret = fields[k]
break
if secret is None:
return None
return {
"principal": principal,
"secret_kind": "plaintext",
**encode_secret(secret),
}
def write_syslog_file(line: str) -> None:
"""Emit a syslog line to stdout for container log capture."""
print(line, flush=True)
def forward_syslog(line: str, log_target: str) -> None:
"""No-op stub. TCP forwarding is handled by rsyslog, not by service containers."""
pass