feat(webhooks): subscription CRUD + HMAC-signed delivery client

Introduces the webhook egress foundation — a new WebhookSubscription
table, admin-gated CRUD under /api/v1/webhooks, and the shared
delivery client that both the test-ping route and the upcoming worker
will use. No worker yet; this commit is API + model + client only.

Simple-mode enum (AttackerDetail / DeckyStatus / SystemStatus) expands
to bus-topic patterns at the router layer; storage is always the raw
pattern list. Advanced mode lets admins supply raw NATS-style patterns
directly. Filter-at-subscribe: the worker (next commit) will subscribe
to the union of patterns across enabled subscriptions.

Delivery client handles HMAC-SHA256 signing (X-DECNET-Signature),
retry on 429/5xx/network errors with jittered backoff, no-retry on
4xx. Secrets never leave the server on GET/LIST — only the create
response carries the secret for copy-out.

CRUD routes publish WEBHOOK_SUBSCRIPTIONS_CHANGED on the bus after
every mutation so the (future) worker can hot-reload.

Opens DEBT-037 for the deferred items (circuit breaker, dead-letter,
batch delivery, payload templates, secret-at-rest).
This commit is contained in:
2026-04-24 15:30:05 -04:00
parent 162f7c1194
commit b70845a85d
17 changed files with 1222 additions and 0 deletions

View File

@@ -0,0 +1 @@
"""External webhook egress — ship bus events to SIEM/SOAR stacks."""

188
decnet/webhook/client.py Normal file
View File

@@ -0,0 +1,188 @@
"""HMAC-signed HTTP POST delivery for webhook events.
The delivery function is shared between the worker's normal dispatch
loop and the `/webhooks/{uuid}/test` admin route — same payload shape,
same signing, same headers. Retry policy is configurable by the caller
so manual tests can skip retries entirely while the worker retries
with backoff.
"""
from __future__ import annotations
import asyncio
import hashlib
import hmac
import random
from dataclasses import dataclass
from datetime import datetime, timezone
from typing import Any, Optional
from uuid import uuid4
import httpx
import orjson
from decnet.logging import get_logger
log = get_logger("webhook.client")
_DEFAULT_TIMEOUT_S = 10.0
_DEFAULT_RETRY_SCHEDULE = (1.0, 2.0, 4.0)
_JITTER_LOW = 0.8
_JITTER_HIGH = 1.2
_PAYLOAD_VERSION = 1
@dataclass(frozen=True)
class SyntheticEvent:
"""Structural match for decnet.bus.base.Event — avoids importing the
bus dependency into the HTTP egress layer."""
topic: str
type: str
ts: str
id: str
payload: dict[str, Any]
@dataclass
class DeliveryResult:
ok: bool
status_code: Optional[int] = None
error: Optional[str] = None
attempts: int = 0
def _canonical_ts(value: Any) -> str:
"""Normalize bus-event ts (epoch float / ISO str / None) to ISO-8601 UTC."""
if isinstance(value, str) and value:
return value
if isinstance(value, (int, float)):
return datetime.fromtimestamp(float(value), tz=timezone.utc).isoformat()
return datetime.now(timezone.utc).isoformat()
def _event_id(event: Any) -> str:
explicit = getattr(event, "id", None)
if isinstance(explicit, str) and explicit:
return explicit
return str(uuid4())
def build_payload(event: Any) -> bytes:
"""Serialize an event to the canonical JSON body sent on the wire.
Stable key order (`orjson.OPT_SORT_KEYS`) matters because the HMAC
signs the exact byte sequence — receivers recomputing the hash must
see the same bytes we did.
"""
body = {
"v": _PAYLOAD_VERSION,
"id": _event_id(event),
"ts": _canonical_ts(getattr(event, "ts", None)),
"topic": getattr(event, "topic", ""),
"type": getattr(event, "type", "") or "",
"payload": getattr(event, "payload", None) or {},
}
return orjson.dumps(body, option=orjson.OPT_SORT_KEYS)
def sign(secret: str, body: bytes) -> str:
"""Return `sha256=<hex>` — the value of the `X-DECNET-Signature` header."""
digest = hmac.new(
secret.encode("utf-8"), body, hashlib.sha256
).hexdigest()
return f"sha256={digest}"
def _build_headers(secret: str, body: bytes, topic: str, event_id: str) -> dict[str, str]:
return {
"Content-Type": "application/json",
"User-Agent": "decnet-webhook/1.0",
"X-DECNET-Signature": sign(secret, body),
"X-DECNET-Event-Id": event_id,
"X-DECNET-Event-Topic": topic,
"X-DECNET-Timestamp": str(int(datetime.now(timezone.utc).timestamp())),
}
def _should_retry(status_code: int) -> bool:
"""Retry on network error, 5xx, and 429. 4xx (other) is terminal —
the receiver is telling us the request itself is wrong; retrying
won't help."""
if status_code == 429:
return True
return status_code >= 500
def _jittered(delay: float) -> float:
# Jitter is a load-smoothing knob, not a secret — non-crypto random is
# fine. Using secrets.SystemRandom here would burn entropy for no gain.
return delay * random.uniform(_JITTER_LOW, _JITTER_HIGH) # nosec B311
async def deliver(
sub: dict[str, Any],
event: Any,
*,
retry_schedule: Optional[list[float] | tuple[float, ...]] = None,
timeout_s: float = _DEFAULT_TIMEOUT_S,
client: Optional[httpx.AsyncClient] = None,
) -> DeliveryResult:
"""POST *event* to *sub['url']* with HMAC signing and bounded retries.
*sub* is a subscription row dict (from `repo.get_webhook_subscription`).
*retry_schedule* is the between-attempt delays in seconds; `None` uses
the default `(1, 2, 4)`, `[]` disables retries entirely (one attempt).
*client* allows tests to inject a mock `httpx.AsyncClient`.
"""
schedule = (
list(retry_schedule) if retry_schedule is not None
else list(_DEFAULT_RETRY_SCHEDULE)
)
max_attempts = 1 + len(schedule)
body = build_payload(event)
topic = getattr(event, "topic", "")
eid = _event_id(event)
headers = _build_headers(sub["secret"], body, topic, eid)
url = sub["url"]
owns_client = client is None
if owns_client:
client = httpx.AsyncClient(timeout=timeout_s)
last_status: Optional[int] = None
last_error: Optional[str] = None
try:
for attempt in range(1, max_attempts + 1):
try:
resp = await client.post(url, content=body, headers=headers)
last_status = resp.status_code
if 200 <= resp.status_code < 300:
return DeliveryResult(
ok=True, status_code=resp.status_code, attempts=attempt
)
if not _should_retry(resp.status_code):
return DeliveryResult(
ok=False,
status_code=resp.status_code,
error=f"non-retryable {resp.status_code}",
attempts=attempt,
)
last_error = f"http {resp.status_code}"
except (httpx.RequestError, asyncio.TimeoutError) as e:
last_error = f"{type(e).__name__}: {e}"
last_status = None
if attempt < max_attempts:
await asyncio.sleep(_jittered(schedule[attempt - 1]))
return DeliveryResult(
ok=False,
status_code=last_status,
error=last_error or "exhausted retries",
attempts=max_attempts,
)
finally:
if owns_client:
await client.aclose()

54
decnet/webhook/enums.py Normal file
View File

@@ -0,0 +1,54 @@
"""Simple-mode event enum → bus-topic pattern expansion.
The UI's Simple mode hides the NATS-style wildcard syntax behind three
friendly choices. Storage is always the expanded pattern list — the
enum exists only at the API boundary.
"""
from __future__ import annotations
# Patterns map to the bus topic hierarchy shipped by DEBT-031's worker
# rollout (see `decnet/bus/topics.py`):
# - attacker.{observed,fingerprinted,scored,session.started,session.ended}
# - decky.{id}.{state,traffic}
# - system.{log,<worker>.health,<worker>.control,bus.health}
SIMPLE_EVENT_PATTERNS: dict[str, list[str]] = {
"AttackerDetail": ["attacker.>"],
"DeckyStatus": ["decky.*.state", "decky.*.traffic"],
"SystemStatus": ["system.>"],
}
def expand_simple_events(names: list[str]) -> list[str]:
"""Flatten a list of simple-event names into their bus patterns.
Unknown names are silently dropped — the router layer validates
against the `SimpleEvent` Literal before calling us, so a bad value
here means a programming error elsewhere, not user input.
"""
out: list[str] = []
for n in names:
out.extend(SIMPLE_EVENT_PATTERNS.get(n, []))
return out
def merge_patterns(
simple: list[str] | None, advanced: list[str] | None
) -> list[str]:
"""Combine simple-event expansions with advanced raw patterns, deduped.
Order-preserving (simple expansions first, then advanced patterns in
the order the user supplied them) so operators see deterministic
patterns in API responses.
"""
seen: set[str] = set()
out: list[str] = []
for p in expand_simple_events(simple or []):
if p not in seen:
seen.add(p)
out.append(p)
for p in advanced or []:
if isinstance(p, str) and p and p not in seen:
seen.add(p)
out.append(p)
return out