merge: testing → main (reconcile 2-week divergence)
This commit is contained in:
11
decnet/sniffer/__init__.py
Normal file
11
decnet/sniffer/__init__.py
Normal file
@@ -0,0 +1,11 @@
|
||||
"""
|
||||
Fleet-wide MACVLAN sniffer microservice.
|
||||
|
||||
Runs as a single host-side background task (not per-decky) that sniffs
|
||||
all TLS traffic on the MACVLAN interface, extracts fingerprints, and
|
||||
feeds events into the existing log pipeline.
|
||||
"""
|
||||
|
||||
from decnet.sniffer.worker import sniffer_worker
|
||||
|
||||
__all__ = ["sniffer_worker"]
|
||||
1276
decnet/sniffer/fingerprint.py
Normal file
1276
decnet/sniffer/fingerprint.py
Normal file
File diff suppressed because it is too large
Load Diff
238
decnet/sniffer/p0f.py
Normal file
238
decnet/sniffer/p0f.py
Normal file
@@ -0,0 +1,238 @@
|
||||
"""
|
||||
Passive OS fingerprinting (p0f-lite) for the DECNET sniffer.
|
||||
|
||||
Pure-Python lookup module. Given the values of an incoming TCP SYN packet
|
||||
(TTL, window, MSS, window-scale, and TCP option ordering), returns a coarse
|
||||
OS bucket (linux / windows / macos_ios / freebsd / openbsd / nmap / unknown)
|
||||
plus derived hop distance and inferred initial TTL.
|
||||
|
||||
Rationale
|
||||
---------
|
||||
Full p0f v3 distinguishes several dozen OS/tool profiles by combining dozens
|
||||
of low-level quirks (OLEN, WSIZE, EOL padding, PCLASS, quirks, payload class).
|
||||
For DECNET we only need a coarse bucket — enough to tag an attacker as
|
||||
"linux beacon" vs "windows interactive" vs "active scan". The curated
|
||||
table below covers default stacks that dominate real-world attacker traffic.
|
||||
|
||||
References (public p0f v3 DB, nmap-os-db, and Mozilla OS Fingerprint table):
|
||||
https://github.com/p0f/p0f/blob/master/p0f.fp
|
||||
|
||||
No external dependencies.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from decnet.telemetry import traced as _traced
|
||||
|
||||
# ─── TTL → initial TTL bucket ───────────────────────────────────────────────
|
||||
|
||||
# Common "hop 0" TTLs. Packets decrement TTL once per hop, so we round up
|
||||
# the observed TTL to the nearest known starting value.
|
||||
_TTL_BUCKETS: tuple[int, ...] = (32, 64, 128, 255)
|
||||
|
||||
|
||||
def initial_ttl(ttl: int) -> int:
|
||||
"""
|
||||
Round *ttl* up to the nearest known initial-TTL bucket.
|
||||
|
||||
A SYN with TTL=59 was almost certainly emitted by a Linux/BSD host
|
||||
(initial 64) five hops away; TTL=120 by a Windows host (initial 128)
|
||||
eight hops away.
|
||||
"""
|
||||
for bucket in _TTL_BUCKETS:
|
||||
if ttl <= bucket:
|
||||
return bucket
|
||||
return 255
|
||||
|
||||
|
||||
def hop_distance(ttl: int) -> int:
|
||||
"""
|
||||
Estimate hops between the attacker and the sniffer based on TTL.
|
||||
|
||||
Upper-bounded at 64 (anything further has most likely been mangled
|
||||
by a misconfigured firewall or a TTL-spoofing NAT).
|
||||
"""
|
||||
dist = initial_ttl(ttl) - ttl
|
||||
if dist < 0:
|
||||
return 0
|
||||
if dist > 64:
|
||||
return 64
|
||||
return dist
|
||||
|
||||
|
||||
# ─── OS signature table (TTL bucket, window, MSS, wscale, option-order) ─────
|
||||
|
||||
# Each entry is a set of loose predicates. If all predicates match, the
|
||||
# OS label is returned. First-match wins. `None` means "don't care".
|
||||
#
|
||||
# The option signatures use the short-code alphabet from
|
||||
# decnet/prober/tcpfp.py :: _OPT_CODES (M=MSS, N=NOP, W=WScale,
|
||||
# T=Timestamp, S=SAckOK, E=EOL).
|
||||
|
||||
_SIGNATURES: tuple[tuple[dict, str], ...] = (
|
||||
# ── nmap -sS / -sT default probe ───────────────────────────────────────
|
||||
# nmap crafts very distinctive SYNs: tiny window (1024/4096/etc.), full
|
||||
# option set including WScale=10 and SAckOK. Match these first so they
|
||||
# don't get misclassified as Linux.
|
||||
(
|
||||
{
|
||||
"ttl_bucket": 64,
|
||||
"window_in": {1024, 2048, 3072, 4096, 31337, 32768, 65535},
|
||||
"mss": 1460,
|
||||
"wscale": 10,
|
||||
"options": "M,W,T,S,S",
|
||||
},
|
||||
"nmap",
|
||||
),
|
||||
(
|
||||
{
|
||||
"ttl_bucket": 64,
|
||||
"window_in": {1024, 2048, 3072, 4096, 31337, 32768, 65535},
|
||||
"options_starts_with": "M,W,T,S",
|
||||
},
|
||||
"nmap",
|
||||
),
|
||||
# ── macOS / iOS default SYN (match before Linux — shares TTL 64) ──────
|
||||
# TTL 64, window 65535, MSS 1460, WScale 6, specific option order
|
||||
# M,N,W,N,N,T,S,E (Darwin signature with EOL padding).
|
||||
(
|
||||
{
|
||||
"ttl_bucket": 64,
|
||||
"window": 65535,
|
||||
"wscale": 6,
|
||||
"options": "M,N,W,N,N,T,S,E",
|
||||
},
|
||||
"macos_ios",
|
||||
),
|
||||
(
|
||||
{
|
||||
"ttl_bucket": 64,
|
||||
"window_in": {65535},
|
||||
"wscale_in": {5, 6},
|
||||
"has_timestamps": True,
|
||||
"options_ends_with": "E",
|
||||
},
|
||||
"macos_ios",
|
||||
),
|
||||
# ── FreeBSD default SYN (TTL 64, no EOL) ───────────────────────────────
|
||||
(
|
||||
{
|
||||
"ttl_bucket": 64,
|
||||
"window": 65535,
|
||||
"wscale": 6,
|
||||
"has_sack": True,
|
||||
"has_timestamps": True,
|
||||
"options_no_eol": True,
|
||||
},
|
||||
"freebsd",
|
||||
),
|
||||
# ── Linux (kernel 3.x – 6.x) default SYN ───────────────────────────────
|
||||
# TTL 64, window 29200 / 64240 / 65535, MSS 1460, WScale 7, full options.
|
||||
(
|
||||
{
|
||||
"ttl_bucket": 64,
|
||||
"window_min": 5000,
|
||||
"wscale_in": {6, 7, 8, 9, 10, 11, 12, 13, 14},
|
||||
"has_sack": True,
|
||||
"has_timestamps": True,
|
||||
},
|
||||
"linux",
|
||||
),
|
||||
# ── OpenBSD default SYN ─────────────────────────────────────────────────
|
||||
# TTL 64, window 16384, WScale 3-6, MSS 1460
|
||||
(
|
||||
{
|
||||
"ttl_bucket": 64,
|
||||
"window_in": {16384, 16960},
|
||||
"wscale_in": {3, 4, 5, 6},
|
||||
},
|
||||
"openbsd",
|
||||
),
|
||||
# ── Windows 10/11/Server default SYN ────────────────────────────────────
|
||||
# TTL 128, window 64240/65535, MSS 1460, WScale 8, SACK+TS
|
||||
(
|
||||
{
|
||||
"ttl_bucket": 128,
|
||||
"window_min": 8192,
|
||||
"wscale_in": {2, 6, 7, 8},
|
||||
"has_sack": True,
|
||||
},
|
||||
"windows",
|
||||
),
|
||||
# ── Windows 7/XP (legacy) ───────────────────────────────────────────────
|
||||
(
|
||||
{
|
||||
"ttl_bucket": 128,
|
||||
"window_in": {8192, 16384, 65535},
|
||||
},
|
||||
"windows",
|
||||
),
|
||||
# ── Embedded / Cisco / network gear ─────────────────────────────────────
|
||||
(
|
||||
{
|
||||
"ttl_bucket": 255,
|
||||
},
|
||||
"embedded",
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
def _match_signature(
|
||||
sig: dict,
|
||||
ttl: int,
|
||||
window: int,
|
||||
mss: int,
|
||||
wscale: int | None,
|
||||
options_sig: str,
|
||||
) -> bool:
|
||||
"""Evaluate every predicate in *sig* against the observed values."""
|
||||
tb = initial_ttl(ttl)
|
||||
if "ttl_bucket" in sig and sig["ttl_bucket"] != tb:
|
||||
return False
|
||||
if "window" in sig and sig["window"] != window:
|
||||
return False
|
||||
if "window_in" in sig and window not in sig["window_in"]:
|
||||
return False
|
||||
if "window_min" in sig and window < sig["window_min"]:
|
||||
return False
|
||||
if "mss" in sig and sig["mss"] != mss:
|
||||
return False
|
||||
if "wscale" in sig and sig["wscale"] != wscale:
|
||||
return False
|
||||
if "wscale_in" in sig and wscale not in sig["wscale_in"]:
|
||||
return False
|
||||
if "has_sack" in sig:
|
||||
if sig["has_sack"] != ("S" in options_sig):
|
||||
return False
|
||||
if "has_timestamps" in sig:
|
||||
if sig["has_timestamps"] != ("T" in options_sig):
|
||||
return False
|
||||
if "options" in sig and sig["options"] != options_sig:
|
||||
return False
|
||||
if "options_starts_with" in sig and not options_sig.startswith(sig["options_starts_with"]):
|
||||
return False
|
||||
if "options_ends_with" in sig and not options_sig.endswith(sig["options_ends_with"]):
|
||||
return False
|
||||
if "options_no_eol" in sig and sig["options_no_eol"] and "E" in options_sig:
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
@_traced("sniffer.p0f_guess_os")
|
||||
def guess_os(
|
||||
ttl: int,
|
||||
window: int,
|
||||
mss: int = 0,
|
||||
wscale: int | None = None,
|
||||
options_sig: str = "",
|
||||
) -> str:
|
||||
"""
|
||||
Return a coarse OS bucket for the given SYN characteristics.
|
||||
|
||||
One of: "linux", "windows", "macos_ios", "freebsd", "openbsd",
|
||||
"embedded", "nmap", "unknown".
|
||||
"""
|
||||
for sig, label in _SIGNATURES:
|
||||
if _match_signature(sig, ttl, window, mss, wscale, options_sig):
|
||||
return label
|
||||
return "unknown"
|
||||
63
decnet/sniffer/seq_class.py
Normal file
63
decnet/sniffer/seq_class.py
Normal file
@@ -0,0 +1,63 @@
|
||||
"""
|
||||
Sequence-pattern classifier for TCP/IP fields that are useful as a tooling
|
||||
fingerprint when sampled across multiple packets from the same source.
|
||||
|
||||
Two callers today:
|
||||
- IP-ID sequence per attacker (random/incremental/zero/constant).
|
||||
- TCP ISN sequence per attacker; modern stacks randomise, so a non-random
|
||||
result is itself a strong signal (legacy stacks, custom raw-socket tools).
|
||||
|
||||
Pure stdlib so it stays trivially unit-testable.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import statistics
|
||||
|
||||
# Minimum samples needed for a meaningful classification. Below this we
|
||||
# return "unknown" rather than guess from 1-3 noisy values.
|
||||
_MIN_SAMPLES = 4
|
||||
|
||||
# Max plausible delta for an "incremental" classification. The IP-ID field
|
||||
# is 16-bit so kernel-emitted increments wrap rapidly under load — anything
|
||||
# over 4096 between consecutive SYNs from the same host is almost certainly
|
||||
# random rather than a counter we just happen to be sampling sparsely.
|
||||
_INCREMENTAL_MAX_DELTA = 0x1000
|
||||
|
||||
# Coefficient-of-variation threshold above which we call a sequence random.
|
||||
# stddev/mean > 0.5 is well past anything a counter would produce.
|
||||
_RANDOM_CV_THRESHOLD = 0.5
|
||||
|
||||
|
||||
def classify_sequence(samples: list[int]) -> str:
|
||||
"""
|
||||
Classify an integer sequence as one of:
|
||||
- "zero": every sample is 0
|
||||
- "constant": every sample is the same non-zero value
|
||||
- "incremental": strictly monotonic with small positive deltas
|
||||
- "random": high coefficient of variation, no monotonic pattern
|
||||
- "unknown": fewer than _MIN_SAMPLES samples
|
||||
|
||||
Order is preserved — pass the deque/list in arrival order.
|
||||
"""
|
||||
if len(samples) < _MIN_SAMPLES:
|
||||
return "unknown"
|
||||
|
||||
if all(s == 0 for s in samples):
|
||||
return "zero"
|
||||
|
||||
first = samples[0]
|
||||
if all(s == first for s in samples):
|
||||
return "constant"
|
||||
|
||||
deltas = [b - a for a, b in zip(samples, samples[1:])]
|
||||
if all(0 < d <= _INCREMENTAL_MAX_DELTA for d in deltas):
|
||||
return "incremental"
|
||||
|
||||
mean = statistics.fmean(samples)
|
||||
if mean > 0:
|
||||
stdev = statistics.pstdev(samples)
|
||||
if stdev / mean > _RANDOM_CV_THRESHOLD:
|
||||
return "random"
|
||||
|
||||
return "random"
|
||||
71
decnet/sniffer/syslog.py
Normal file
71
decnet/sniffer/syslog.py
Normal file
@@ -0,0 +1,71 @@
|
||||
"""
|
||||
RFC 5424 syslog formatting and log-file writing for the fleet sniffer.
|
||||
|
||||
Reuses the same wire format as templates/sniffer/decnet_logging.py so the
|
||||
existing collector parser and ingester can consume events without changes.
|
||||
"""
|
||||
|
||||
import json
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
from decnet.collector.worker import parse_rfc5424
|
||||
from decnet.telemetry import traced as _traced
|
||||
|
||||
# ─── Constants (must match templates/sniffer/decnet_logging.py) ──────────────
|
||||
|
||||
_FACILITY_LOCAL0 = 16
|
||||
_SD_ID = "relay@55555"
|
||||
_NILVALUE = "-"
|
||||
|
||||
SEVERITY_INFO = 6
|
||||
SEVERITY_WARNING = 4
|
||||
|
||||
_MAX_HOSTNAME = 255
|
||||
_MAX_APPNAME = 48
|
||||
_MAX_MSGID = 32
|
||||
|
||||
|
||||
# ─── Formatter ───────────────────────────────────────────────────────────────
|
||||
|
||||
def _sd_escape(value: str) -> str:
|
||||
return value.replace("\\", "\\\\").replace('"', '\\"').replace("]", "\\]")
|
||||
|
||||
|
||||
def _sd_element(fields: dict[str, Any]) -> str:
|
||||
if not fields:
|
||||
return _NILVALUE
|
||||
params = " ".join(f'{k}="{_sd_escape(str(v))}"' for k, v in fields.items())
|
||||
return f"[{_SD_ID} {params}]"
|
||||
|
||||
|
||||
def syslog_line(
|
||||
service: str,
|
||||
hostname: str,
|
||||
event_type: str,
|
||||
severity: int = SEVERITY_INFO,
|
||||
msg: str | None = None,
|
||||
**fields: Any,
|
||||
) -> str:
|
||||
pri = f"<{_FACILITY_LOCAL0 * 8 + severity}>"
|
||||
ts = datetime.now(timezone.utc).isoformat()
|
||||
host = (hostname or _NILVALUE)[:_MAX_HOSTNAME]
|
||||
appname = (service or _NILVALUE)[:_MAX_APPNAME]
|
||||
msgid = (event_type or _NILVALUE)[:_MAX_MSGID]
|
||||
sd = _sd_element(fields)
|
||||
message = f" {msg}" if msg else ""
|
||||
return f"{pri}1 {ts} {host} {appname} {_NILVALUE} {msgid} {sd}{message}"
|
||||
|
||||
|
||||
@_traced("sniffer.write_event")
|
||||
def write_event(line: str, log_path: Path, json_path: Path) -> None:
|
||||
"""Append a syslog line to the raw log and its parsed JSON to the json log."""
|
||||
with open(log_path, "a", encoding="utf-8") as lf:
|
||||
lf.write(line + "\n")
|
||||
lf.flush()
|
||||
parsed = parse_rfc5424(line)
|
||||
if parsed:
|
||||
with open(json_path, "a", encoding="utf-8") as jf:
|
||||
jf.write(json.dumps(parsed) + "\n")
|
||||
jf.flush()
|
||||
243
decnet/sniffer/worker.py
Normal file
243
decnet/sniffer/worker.py
Normal file
@@ -0,0 +1,243 @@
|
||||
"""
|
||||
Fleet-wide MACVLAN sniffer worker.
|
||||
|
||||
Runs as a single host-side async background task that sniffs all TLS
|
||||
traffic on the MACVLAN host interface. Maps packets to deckies by IP
|
||||
and feeds fingerprint events into the existing log pipeline.
|
||||
|
||||
Modeled on decnet.collector.worker — same lifecycle pattern.
|
||||
Fault-isolated: any exception is logged and the worker exits cleanly.
|
||||
The API never depends on this worker being alive.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import contextlib
|
||||
import os
|
||||
import subprocess # nosec B404 — needed for interface checks
|
||||
import threading
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
from pathlib import Path
|
||||
from typing import Any, Callable
|
||||
|
||||
from decnet.bus import topics as _topics
|
||||
from decnet.bus.base import BaseBus
|
||||
from decnet.bus.factory import get_bus
|
||||
from decnet.bus.publish import (
|
||||
make_thread_safe_publisher,
|
||||
run_control_listener_signal,
|
||||
run_health_heartbeat,
|
||||
)
|
||||
from decnet.logging import get_logger
|
||||
from decnet.network import HOST_IPVLAN_IFACE, HOST_MACVLAN_IFACE
|
||||
from decnet.sniffer.fingerprint import SnifferEngine
|
||||
from decnet.sniffer.syslog import write_event
|
||||
from decnet.telemetry import traced as _traced
|
||||
|
||||
logger = get_logger("sniffer")
|
||||
|
||||
_IP_MAP_REFRESH_INTERVAL: float = 60.0
|
||||
|
||||
|
||||
def _load_ip_to_decky() -> dict[str, str]:
|
||||
"""Build IP → decky-name mapping from decnet-state.json."""
|
||||
from decnet.config import load_state
|
||||
state = load_state()
|
||||
if state is None:
|
||||
return {}
|
||||
config, _ = state
|
||||
mapping: dict[str, str] = {}
|
||||
for decky in config.deckies:
|
||||
mapping[decky.ip] = decky.name
|
||||
return mapping
|
||||
|
||||
|
||||
def _make_decky_traffic_publisher(
|
||||
bus: BaseBus,
|
||||
loop: asyncio.AbstractEventLoop,
|
||||
) -> Callable[[str, str, dict[str, Any]], None]:
|
||||
"""Wrap :func:`make_thread_safe_publisher` with the decky-traffic topic.
|
||||
|
||||
The scapy sniff loop runs in a dedicated worker thread — this adapter
|
||||
turns ``(decky_name, event_type, payload)`` calls from the engine into
|
||||
a bus publish on ``decky.{name}.traffic`` without blocking the sniff
|
||||
thread on the network round-trip.
|
||||
"""
|
||||
raw = make_thread_safe_publisher(bus, loop)
|
||||
|
||||
def _publish(decky_name: str, event_type: str, payload: dict[str, Any]) -> None:
|
||||
topic = _topics.decky(decky_name, _topics.DECKY_TRAFFIC)
|
||||
raw(topic, payload, event_type)
|
||||
|
||||
return _publish
|
||||
|
||||
|
||||
def _interface_exists(iface: str) -> bool:
|
||||
"""Check if a network interface exists on this host."""
|
||||
try:
|
||||
result = subprocess.run( # nosec B603 B607 — hardcoded args
|
||||
["ip", "link", "show", iface],
|
||||
capture_output=True, text=True, check=False,
|
||||
)
|
||||
return result.returncode == 0
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
|
||||
@_traced("sniffer.sniff_loop")
|
||||
def _sniff_loop(
|
||||
interface: str,
|
||||
log_path: Path,
|
||||
json_path: Path,
|
||||
stop_event: threading.Event,
|
||||
publish_fn: Callable[[str, str, dict[str, Any]], None] | None = None,
|
||||
) -> None:
|
||||
"""Blocking sniff loop. Runs in a dedicated thread via asyncio.to_thread."""
|
||||
try:
|
||||
from scapy.sendrecv import sniff
|
||||
except ImportError:
|
||||
logger.error("scapy not installed — sniffer cannot start")
|
||||
return
|
||||
|
||||
ip_map = _load_ip_to_decky()
|
||||
if not ip_map:
|
||||
logger.warning("sniffer: no deckies in state — nothing to sniff")
|
||||
return
|
||||
|
||||
def _write_fn(line: str) -> None:
|
||||
write_event(line, log_path, json_path)
|
||||
|
||||
engine = SnifferEngine(
|
||||
ip_to_decky=ip_map, write_fn=_write_fn, publish_fn=publish_fn,
|
||||
)
|
||||
|
||||
# Periodically refresh IP map in a background daemon thread
|
||||
def _refresh_loop() -> None:
|
||||
while not stop_event.is_set():
|
||||
stop_event.wait(_IP_MAP_REFRESH_INTERVAL)
|
||||
if stop_event.is_set():
|
||||
break
|
||||
try:
|
||||
new_map = _load_ip_to_decky()
|
||||
if new_map:
|
||||
engine.update_ip_map(new_map)
|
||||
except Exception as exc:
|
||||
logger.debug("sniffer: ip map refresh failed: %s", exc)
|
||||
|
||||
refresh_thread = threading.Thread(target=_refresh_loop, daemon=True)
|
||||
refresh_thread.start()
|
||||
|
||||
logger.info("sniffer: sniffing on interface=%s deckies=%d", interface, len(ip_map))
|
||||
|
||||
try:
|
||||
sniff(
|
||||
iface=interface,
|
||||
filter="tcp",
|
||||
prn=engine.on_packet,
|
||||
store=False,
|
||||
stop_filter=lambda pkt: stop_event.is_set(),
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.error("sniffer: scapy sniff exited: %s", exc)
|
||||
finally:
|
||||
stop_event.set()
|
||||
logger.info("sniffer: sniff loop ended")
|
||||
|
||||
|
||||
@_traced("sniffer.worker")
|
||||
async def sniffer_worker(log_file: str) -> None:
|
||||
"""
|
||||
Async entry point — started as asyncio.create_task in the API lifespan.
|
||||
|
||||
Fully fault-isolated: catches all exceptions, logs them, and returns
|
||||
cleanly. The API continues running regardless of sniffer state.
|
||||
"""
|
||||
try:
|
||||
# Interface selection: explicit env override wins, otherwise probe
|
||||
# both the MACVLAN and IPvlan host-side names since the driver
|
||||
# choice is per-deploy (--ipvlan flag).
|
||||
env_iface = os.environ.get("DECNET_SNIFFER_IFACE")
|
||||
if env_iface:
|
||||
interface = env_iface
|
||||
elif _interface_exists(HOST_MACVLAN_IFACE):
|
||||
interface = HOST_MACVLAN_IFACE
|
||||
elif _interface_exists(HOST_IPVLAN_IFACE):
|
||||
interface = HOST_IPVLAN_IFACE
|
||||
else:
|
||||
logger.warning(
|
||||
"sniffer: neither %s nor %s found — sniffer disabled "
|
||||
"(fleet may not be deployed yet)",
|
||||
HOST_MACVLAN_IFACE, HOST_IPVLAN_IFACE,
|
||||
)
|
||||
return
|
||||
|
||||
if not _interface_exists(interface):
|
||||
logger.warning(
|
||||
"sniffer: interface %s not found — sniffer disabled "
|
||||
"(fleet may not be deployed yet)", interface,
|
||||
)
|
||||
return
|
||||
|
||||
log_path = Path(log_file)
|
||||
json_path = log_path.with_suffix(".json")
|
||||
log_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
stop_event = threading.Event()
|
||||
|
||||
loop = asyncio.get_running_loop()
|
||||
|
||||
# Connect to the bus for decky.{id}.traffic fan-out. Failure here
|
||||
# is non-fatal: the sniffer still writes syslog, it just doesn't
|
||||
# push notifications to downstream consumers.
|
||||
bus: BaseBus | None = None
|
||||
try:
|
||||
candidate = get_bus(client_name="sniffer")
|
||||
await candidate.connect()
|
||||
bus = candidate
|
||||
except Exception as exc: # noqa: BLE001
|
||||
logger.warning(
|
||||
"sniffer: bus unavailable, running in publish-off mode: %s", exc,
|
||||
)
|
||||
|
||||
publish_fn: Callable[[str, str, dict[str, Any]], None] | None = None
|
||||
if bus is not None:
|
||||
publish_fn = _make_decky_traffic_publisher(bus, loop)
|
||||
|
||||
# Workers panel: heartbeat + SIGTERM-based stop control. The
|
||||
# sniff loop is a blocking scapy thread, so an asyncio shutdown
|
||||
# event can't reach it — translating the bus stop into SIGTERM
|
||||
# routes through the existing CancelledError path below.
|
||||
heartbeat_task = asyncio.create_task(run_health_heartbeat(bus, "sniffer"))
|
||||
control_task = asyncio.create_task(
|
||||
run_control_listener_signal(bus, "sniffer"),
|
||||
)
|
||||
|
||||
# Dedicated thread pool so the long-running sniff loop doesn't
|
||||
# occupy a slot in the default asyncio executor.
|
||||
sniffer_pool = ThreadPoolExecutor(
|
||||
max_workers=2, thread_name_prefix="decnet-sniffer",
|
||||
)
|
||||
|
||||
try:
|
||||
await loop.run_in_executor(
|
||||
sniffer_pool, _sniff_loop,
|
||||
interface, log_path, json_path, stop_event, publish_fn,
|
||||
)
|
||||
except asyncio.CancelledError:
|
||||
logger.info("sniffer: shutdown requested")
|
||||
stop_event.set()
|
||||
sniffer_pool.shutdown(wait=False)
|
||||
raise
|
||||
finally:
|
||||
sniffer_pool.shutdown(wait=False)
|
||||
for t in (heartbeat_task, control_task):
|
||||
t.cancel()
|
||||
with contextlib.suppress(Exception, asyncio.CancelledError):
|
||||
await t
|
||||
if bus is not None:
|
||||
with contextlib.suppress(Exception):
|
||||
await bus.close()
|
||||
|
||||
except asyncio.CancelledError:
|
||||
raise
|
||||
except Exception as exc:
|
||||
logger.error("sniffer: worker failed — API continues without sniffing: %s", exc)
|
||||
Reference in New Issue
Block a user