feat(clustering): identity clusterer worker skeleton

Adds the decnet clusterer master-only command + provider-subpackage
shape (base.py + factory.py + impl/connected_components.py) so
subsequent commits can land similarity-graph features without
churning callers.

The skeleton ConnectedComponentsClusterer.tick is a no-op; the
worker shell is fully wired (bus consumer on attacker.observed +
attacker.scored, slow-tick fallback, health heartbeat, control
listener, ClusterResult fan-out to identity.formed/observation.linked
/merged). Subscribers on identity.> see no events from this clusterer
until edge functions land, but the lifecycle is in place.
This commit is contained in:
2026-04-26 08:09:11 -04:00
parent 6b6a808a4a
commit e545f7d8d3
9 changed files with 620 additions and 1 deletions

View File

@@ -29,7 +29,7 @@ MASTER_ONLY_COMMANDS: frozenset[str] = frozenset({
"api", "swarmctl", "deploy", "redeploy", "teardown",
"mutate", "listener", "profiler",
"services", "distros", "correlate", "archetypes", "web",
"db-reset", "init", "webhook",
"db-reset", "init", "webhook", "clusterer",
})
MASTER_ONLY_GROUPS: frozenset[str] = frozenset({"swarm", "topology", "geoip"})

View File

@@ -191,3 +191,51 @@ def register(app: typer.Typer) -> None:
asyncio.run(_run())
except KeyboardInterrupt:
console.print("\n[yellow]Reuse correlator stopped.[/]")
@app.command(name="clusterer")
def clusterer(
poll_interval_secs: float = typer.Option(
60.0, "--poll-interval", "-i",
help="Slow-tick fallback when the bus is idle or unavailable (seconds)",
),
daemon: bool = typer.Option(
False, "--daemon", "-d",
help="Detach to background as a daemon process",
),
) -> None:
"""Identity-resolution clusterer.
Bus-woken on ``attacker.observed`` and ``attacker.scored``;
builds a similarity graph over observations, runs
connected-components, writes ``attacker_identities`` rows, and
publishes ``identity.formed`` / ``identity.observation.linked``
/ ``identity.merged`` / ``identity.unmerged``.
"""
import asyncio
from decnet.cli.gating import _require_master_mode
from decnet.clustering.worker import run_clusterer_loop
from decnet.web.dependencies import repo
_require_master_mode("clusterer")
if daemon:
log.info("clusterer daemonizing poll=%s", poll_interval_secs)
_utils._daemonize()
log.info("clusterer command invoked poll=%s", poll_interval_secs)
console.print(
f"[bold cyan]Identity clusterer starting[/] "
f"poll={poll_interval_secs}s"
)
console.print("[dim]Press Ctrl+C to stop[/]")
async def _run() -> None:
await repo.initialize()
await run_clusterer_loop(
repo, poll_interval_secs=poll_interval_secs,
)
try:
asyncio.run(_run())
except KeyboardInterrupt:
console.print("\n[yellow]Identity clusterer stopped.[/]")