feat(correlation/attribution): wire bus handler, persist state (Phase 4)

attribution_worker.handle_observation_event now executes the full
end-to-end path:

* ensure stub identity (Phase 1)
* observations_for_identity_primitive() — new repo helper joining
  observations through attackers.identity_id, so v1's clusterer
  gets cross-attacker rollup for free
* aggregate_observations() with ValueKind dispatched off the BEHAVE
  PRIMITIVE_REGISTRY; unknown primitives default to categorical
* upsert_attribution_state() — last_change_ts locked when state is
  unchanged so the dashboard can render "stable since X"
* publish attribution.profile.state_changed only on transition;
  idempotent re-runs over the same observation set fire nothing
  (loop-prevention invariant matching ttp.tagged)

Tests:
* 5 end-to-end attribution scenarios over in-memory SQLite + FakeBus.
* test_base_repo's DummyRepo + coverage body now stub every abstract
  surface BaseRepository declares — the 6 added by this branch plus
  the 12 left un-stubbed by earlier work (BEHAVE Phase 1, TTP
  rollups, iter helpers). The coverage test could not previously
  even instantiate.
* test_aggregate_categorical's dispatcher rejection updated for the
  Phase 3 + 4 contract — ValueError on unknown kinds, not
  NotImplementedError.
This commit is contained in:
2026-05-09 02:16:12 -04:00
parent c39802a4bb
commit dd265d7520
6 changed files with 536 additions and 17 deletions

View File

@@ -165,11 +165,12 @@ def test_dispatcher_routes_categorical() -> None:
assert a == b == c
def test_dispatcher_rejects_unimplemented_kinds() -> None:
"""numeric / hash kinds land in Phase 3; surface the gap loudly
so a misuse doesn't silently fall through to categorical."""
def test_dispatcher_rejects_unknown_value_kind() -> None:
"""Unknown ValueKind tags surface as ValueError so misuse doesn't
silently fall through to categorical. Phase 3 wired numeric +
hash; the rejection is for typos and v1 kinds that haven't
landed yet."""
import pytest
obs = _pad(value=5000.0, count=5)
for kind in ("numeric", "hash"):
with pytest.raises(NotImplementedError):
aggregate_observations(obs, value_kind=kind)
obs = _pad(value="typed", count=5)
with pytest.raises(ValueError):
aggregate_observations(obs, value_kind="bogus_kind")

View File

@@ -0,0 +1,275 @@
"""Phase 4 — end-to-end worker wiring.
Observation event → stub identity → load series → merger → upsert
state → emit ``attribution.profile.state_changed`` on transition.
Phase 1 covered stub-only wiring; this file pins the merger /
persist / publish path against an in-memory SQLite + FakeBus.
"""
from __future__ import annotations
from datetime import datetime, timezone
from pathlib import Path
from typing import Any
import pytest
from decnet.bus import topics as _topics
from decnet.bus.fake import FakeBus
from decnet.correlation import attribution_worker as _aw
from decnet.web.db.factory import get_repository
@pytest.fixture
async def repo(tmp_path: Path):
r = get_repository(db_path=str(tmp_path / "phase4.db"))
await r.initialize()
return r
@pytest.fixture
async def attacker_uuid(repo) -> str:
now = datetime.now(timezone.utc)
return await repo.upsert_attacker({
"ip": "10.0.0.5",
"first_seen": now,
"last_seen": now,
})
def _envelope(
*,
primitive: str,
value: Any,
attacker_uuid: str,
evidence_ref: str,
ts: float,
confidence: float = 0.9,
) -> dict[str, Any]:
return {
"id": f"obs-{evidence_ref}-{primitive}",
"primitive": primitive,
"value": value,
"confidence": confidence,
"window_start_ts": ts,
"window_end_ts": ts,
"source": "test",
"evidence_ref": evidence_ref,
"envelope_v": 1,
"ts": ts,
"attacker_uuid": attacker_uuid,
}
def _bus_event(payload: dict[str, Any]) -> dict[str, Any]:
"""Worker reads payload via getattr(.payload, fallback to dict)."""
return payload
async def _seed_observations(
repo, attacker_uuid: str, primitive: str, values: list[Any],
*, start_ts: float = 1714000000.0,
) -> None:
for i, v in enumerate(values):
ts = start_ts + i * 60.0
# ts in evidence_ref so repeated calls with overlapping i but
# distinct start_ts produce distinct rows.
await repo.upsert_observation(_envelope(
primitive=primitive,
value=v,
attacker_uuid=attacker_uuid,
evidence_ref=f"shard:test#{primitive}-{ts}",
ts=ts,
))
@pytest.mark.anyio
async def test_handler_writes_unknown_below_threshold(
repo, attacker_uuid: str,
) -> None:
"""Two observations for one primitive → state row written with
state='unknown' (< MIN_OBSERVATIONS_FOR_STATE)."""
bus = FakeBus()
await bus.connect()
await _seed_observations(
repo, attacker_uuid, "motor.input_modality", ["typed", "typed"],
)
await _aw.handle_observation_event(bus, repo, _bus_event({
"attacker_uuid": attacker_uuid,
"primitive": "motor.input_modality",
}))
attacker = await repo.get_attacker_by_uuid(attacker_uuid)
assert attacker is not None
identity_uuid = attacker["identity_id"]
state = await repo.get_attribution_state(
identity_uuid, "motor.input_modality",
)
assert state is not None
assert state["state"] == "unknown"
await bus.close()
@pytest.mark.anyio
async def test_handler_emits_state_changed_on_transition(
repo, attacker_uuid: str, monkeypatch: pytest.MonkeyPatch,
) -> None:
"""As observations cross MIN_OBSERVATIONS_FOR_STATE, the worker
fires <new>→unknown then unknown→stable; idempotent re-runs in
between fire nothing."""
bus = FakeBus()
await bus.connect()
captured: list[dict[str, Any]] = []
async def _capture(_bus, topic, payload, *, event_type=""):
captured.append({"topic": topic, "payload": payload})
monkeypatch.setattr(_aw, "publish_safely", _capture)
for i in range(5):
await _seed_observations(
repo, attacker_uuid, "motor.input_modality",
["typed"], start_ts=1714000000.0 + i * 60.0,
)
await _aw.handle_observation_event(bus, repo, _bus_event({
"attacker_uuid": attacker_uuid,
"primitive": "motor.input_modality",
}))
states_seen = [c["payload"]["new_state"] for c in captured]
assert states_seen == ["unknown", "stable"], states_seen
# The transition payload carries old + new + the observation that
# caused the flip.
assert captured[0]["payload"]["old_state"] is None
assert captured[1]["payload"]["old_state"] == "unknown"
await bus.close()
@pytest.mark.anyio
async def test_handler_no_event_when_state_unchanged(
repo, attacker_uuid: str,
) -> None:
"""Re-running the merger over an unchanged observation set must
not emit a duplicate state_changed event (loop-prevention)."""
bus = FakeBus()
await bus.connect()
captured: list[Any] = []
sub = bus.subscribe(
_topics.attribution(_topics.ATTRIBUTION_PROFILE_STATE_CHANGED),
)
import asyncio
async def drain() -> None:
try:
async with sub:
async for ev in sub:
captured.append(ev)
except Exception:
pass
drain_task = asyncio.create_task(drain())
await asyncio.sleep(0)
await _seed_observations(
repo, attacker_uuid, "motor.input_modality",
["typed"] * 5,
)
# First run: <new> → stable, fires event.
await _aw.handle_observation_event(bus, repo, _bus_event({
"attacker_uuid": attacker_uuid,
"primitive": "motor.input_modality",
}))
await asyncio.sleep(0.05)
first_count = len(captured)
# Re-run with no new observations: state stays "stable", no event.
for _ in range(3):
await _aw.handle_observation_event(bus, repo, _bus_event({
"attacker_uuid": attacker_uuid,
"primitive": "motor.input_modality",
}))
await asyncio.sleep(0.05)
drain_task.cancel()
assert len(captured) == first_count, (
"state didn't change; no additional events should fire"
)
await bus.close()
@pytest.mark.anyio
async def test_handler_locks_last_change_ts_when_unchanged(
repo, attacker_uuid: str,
) -> None:
"""When the state doesn't change, last_change_ts must NOT advance —
that's what tells the dashboard 'stable since X', not 'stable
since most-recent-observation'."""
bus = FakeBus()
await bus.connect()
await _seed_observations(
repo, attacker_uuid, "motor.input_modality",
["typed"] * 5,
)
await _aw.handle_observation_event(bus, repo, _bus_event({
"attacker_uuid": attacker_uuid,
"primitive": "motor.input_modality",
}))
attacker = await repo.get_attacker_by_uuid(attacker_uuid)
assert attacker is not None
identity_uuid = attacker["identity_id"]
first = await repo.get_attribution_state(
identity_uuid, "motor.input_modality",
)
assert first is not None
locked_ts = first["last_change_ts"]
# Add another stable observation, re-run.
await _seed_observations(
repo, attacker_uuid, "motor.input_modality",
["typed"], start_ts=1714010000.0,
)
await _aw.handle_observation_event(bus, repo, _bus_event({
"attacker_uuid": attacker_uuid,
"primitive": "motor.input_modality",
}))
second = await repo.get_attribution_state(
identity_uuid, "motor.input_modality",
)
assert second is not None
assert second["last_change_ts"] == locked_ts
# last_observation_ts DID advance.
assert second["last_observation_ts"] > locked_ts
await bus.close()
@pytest.mark.anyio
async def test_handler_routes_numeric_primitive(
repo, attacker_uuid: str,
) -> None:
"""Worker dispatches to the numeric merger when the primitive
registry kind is NUMERIC."""
bus = FakeBus()
await bus.connect()
# toolchain.c2.beacon_interval_ms is registered NUMERIC in BEHAVE.
primitive = "toolchain.c2.beacon_interval_ms"
await _seed_observations(
repo, attacker_uuid, primitive,
[5000.0, 5050.0, 4980.0, 5020.0, 5010.0],
)
await _aw.handle_observation_event(bus, repo, _bus_event({
"attacker_uuid": attacker_uuid,
"primitive": primitive,
}))
attacker = await repo.get_attacker_by_uuid(attacker_uuid)
assert attacker is not None
state = await repo.get_attribution_state(
attacker["identity_id"], primitive,
)
assert state is not None
# Numeric merger returns a smoothed mean, not a string.
assert isinstance(state["current_value"], float)
assert state["state"] == "stable"
await bus.close()