feat(correlation/attribution): substrate + idle handler (Phase 1)

v0 Phase 1 of ATTRIBUTION-ENGINE.md:

* AttributionStateRow SQLModel keyed on (identity_uuid, primitive)
  per ANTI direction — re-keying state rows when the v1 clusterer
  merges attackers is the migration debt v0 should not bake in.
  ATTRIBUTION-ENGINE.md updated with the deviation note.
* AttributionMixin: ensure_stub_identity_for_attacker, idempotent
  upsert_attribution_state, get_attribution_state[_for_identity],
  list_multi_actor_identities (the Phase 5 correlator's read).
* attribution.profile.{state_changed,multi_actor_suspected} bus
  topics + builder; wiki Service-Bus.md updated separately.
* attribution_worker.py: subscribes to attacker.observation.>,
  ensures stub identity per event, logs and continues. No merger,
  no state writes, no derived events — Phase 4 wires those.
* attribution/{aggregate.py,_thresholds.py} skeletons: Phase 2
  fills _aggregate_categorical, Phase 3 adds numeric+hash+dispatcher.
This commit is contained in:
2026-05-08 23:16:13 -04:00
parent e94ab608d9
commit c2891d6cca
15 changed files with 1203 additions and 0 deletions

View File

@@ -87,3 +87,19 @@ def test_identity_builder() -> None:
def test_identity_builder_rejects_empty() -> None:
with pytest.raises(ValueError):
topics.identity("")
def test_attribution_builder() -> None:
assert (
topics.attribution(topics.ATTRIBUTION_PROFILE_STATE_CHANGED)
== "attribution.profile.state_changed"
)
assert (
topics.attribution(topics.ATTRIBUTION_PROFILE_MULTI_ACTOR_SUSPECTED)
== "attribution.profile.multi_actor_suspected"
)
def test_attribution_builder_rejects_empty() -> None:
with pytest.raises(ValueError):
topics.attribution("")

View File

@@ -0,0 +1,169 @@
"""Phase 1 attribution worker — wiring smoke tests.
The Phase 1 worker subscribes to ``attacker.observation.>`` and, for
each event, ensures the source attacker has a stub identity row.
That's it — no merger, no state writes, no derived events. These
tests pin the wiring + the stub-materialisation contract.
Phase 4 will extend with end-to-end state-row + transition-event
assertions.
"""
from __future__ import annotations
from datetime import datetime, timezone
from pathlib import Path
from typing import Any
import pytest
from decnet.bus.fake import FakeBus
from decnet.correlation import attribution_worker as _aw
from decnet.web.db.factory import get_repository
@pytest.fixture
async def repo(tmp_path: Path):
r = get_repository(db_path=str(tmp_path / "attribution_wiring.db"))
await r.initialize()
return r
@pytest.fixture
async def attacker_uuid(repo) -> str:
now = datetime.now(timezone.utc)
return await repo.upsert_attacker({
"ip": "10.0.0.42",
"first_seen": now,
"last_seen": now,
})
def _make_event(payload: dict[str, Any]) -> Any:
"""Light Event-shaped object — the handler reads ``.payload``
via ``getattr`` and falls back to dicts. We pass a dict directly
because that's what tests give the BEHAVE handler too."""
return payload
@pytest.mark.anyio
async def test_handle_event_creates_stub_for_known_attacker(
repo, attacker_uuid: str,
) -> None:
"""First observation for an attacker → stub identity created and
stamped onto the Attacker row."""
bus = FakeBus()
await bus.connect()
payload = {
"attacker_uuid": attacker_uuid,
"primitive": "motor.input_modality",
"value": "pasted",
"ts": 1714000000.0,
"confidence": 0.9,
}
await _aw.handle_observation_event(bus, repo, _make_event(payload))
attacker = await repo.get_attacker_by_uuid(attacker_uuid)
assert attacker is not None
assert attacker["identity_id"] is not None
# Second event re-uses the same stub.
await _aw.handle_observation_event(bus, repo, _make_event(payload))
attacker_again = await repo.get_attacker_by_uuid(attacker_uuid)
assert attacker_again["identity_id"] == attacker["identity_id"]
await bus.close()
@pytest.mark.anyio
async def test_handle_event_defers_for_missing_attacker(repo) -> None:
"""No Attacker row yet → handler returns without raising and
without inserting an orphan identity (the worker treats this as
'profiler hasn't materialised the attacker, defer')."""
bus = FakeBus()
await bus.connect()
payload = {
"attacker_uuid": "00000000000000000000000000000000",
"primitive": "motor.input_modality",
"value": "pasted",
"ts": 1714000000.0,
"confidence": 0.9,
}
# Should NOT raise.
await _aw.handle_observation_event(bus, repo, _make_event(payload))
# No identities materialised.
identities = await repo.list_all_identities()
assert identities == []
await bus.close()
@pytest.mark.anyio
async def test_handle_event_skips_malformed_payload(
repo, attacker_uuid: str,
) -> None:
"""Missing attacker_uuid or primitive → log + continue, never
raise. Bus delivery is at-least-once; bad payloads must not
poison the consumer."""
bus = FakeBus()
await bus.connect()
for bad in (
{"primitive": "motor.input_modality"}, # missing attacker_uuid
{"attacker_uuid": attacker_uuid}, # missing primitive
{}, # both missing
):
await _aw.handle_observation_event(bus, repo, _make_event(bad))
# No identity materialised because every payload was rejected
# before the stub helper ran.
attacker = await repo.get_attacker_by_uuid(attacker_uuid)
assert attacker is not None
assert attacker["identity_id"] is None
await bus.close()
@pytest.mark.anyio
async def test_handle_event_idempotent_per_observation(
repo, attacker_uuid: str,
) -> None:
"""Hammer the same payload N times — one stub identity, no
duplicate rows, no exception."""
bus = FakeBus()
await bus.connect()
payload = {
"attacker_uuid": attacker_uuid,
"primitive": "motor.input_modality",
"value": "pasted",
"ts": 1714000000.0,
"confidence": 0.9,
}
for _ in range(5):
await _aw.handle_observation_event(bus, repo, _make_event(payload))
identities = await repo.list_all_identities()
assert len(identities) == 1
await bus.close()
@pytest.mark.anyio
async def test_event_object_payload_attribute(
repo, attacker_uuid: str,
) -> None:
"""Real bus events carry payload on ``.payload``; the handler
must follow the attribute, not assume the event itself is the
dict."""
class _Evt:
def __init__(self, payload: dict[str, Any]) -> None:
self.payload = payload
bus = FakeBus()
await bus.connect()
payload = {
"attacker_uuid": attacker_uuid,
"primitive": "cognitive.feedback_loop_engagement",
"value": "closed_loop",
"ts": 1714000000.0,
"confidence": 0.85,
}
await _aw.handle_observation_event(bus, repo, _Evt(payload))
attacker = await repo.get_attacker_by_uuid(attacker_uuid)
assert attacker is not None
assert attacker["identity_id"] is not None
await bus.close()

View File

@@ -0,0 +1,224 @@
"""AttributionStateRow + identity-stub repo tests — Phase 1 substrate.
Mirrors ``tests/db/test_observations.py``: SQLite ``tmp_path`` factory,
``@pytest.mark.anyio`` markers, an ``Attacker`` seeded so the stub-
materialisation path has a valid FK.
"""
from __future__ import annotations
from datetime import datetime, timezone
from pathlib import Path
import pytest
from decnet.web.db.factory import get_repository
@pytest.fixture
async def repo(tmp_path: Path):
r = get_repository(db_path=str(tmp_path / "attribution.db"))
await r.initialize()
return r
@pytest.fixture
async def attacker_uuid(repo) -> str:
now = datetime.now(timezone.utc)
return await repo.upsert_attacker({
"ip": "10.0.0.7",
"first_seen": now,
"last_seen": now,
})
@pytest.mark.anyio
async def test_ensure_stub_creates_identity_for_new_attacker(
repo, attacker_uuid: str,
) -> None:
"""First call: Attacker has no identity_id → stub created and
stamped onto the Attacker row."""
identity_uuid = await repo.ensure_stub_identity_for_attacker(attacker_uuid)
assert identity_uuid is not None
assert isinstance(identity_uuid, str)
attacker = await repo.get_attacker_by_uuid(attacker_uuid)
assert attacker is not None
assert attacker["identity_id"] == identity_uuid
identity = await repo.get_identity_by_uuid(identity_uuid)
assert identity is not None
assert identity["uuid"] == identity_uuid
assert identity["merged_into_uuid"] is None
assert identity["schema_version"] == 1
@pytest.mark.anyio
async def test_ensure_stub_idempotent(repo, attacker_uuid: str) -> None:
"""Second call returns the same identity_uuid; no second insert."""
first = await repo.ensure_stub_identity_for_attacker(attacker_uuid)
second = await repo.ensure_stub_identity_for_attacker(attacker_uuid)
third = await repo.ensure_stub_identity_for_attacker(attacker_uuid)
assert first == second == third
@pytest.mark.anyio
async def test_ensure_stub_returns_none_for_missing_attacker(repo) -> None:
"""Worker treats missing-Attacker as 'defer' — repo returns None
without raising or inserting an orphan identity."""
out = await repo.ensure_stub_identity_for_attacker(
"00000000000000000000000000000000",
)
assert out is None
@pytest.mark.anyio
async def test_upsert_and_read_back_state(repo, attacker_uuid: str) -> None:
"""Round-trip: every column on the state row survives one
insert + read."""
identity_uuid = await repo.ensure_stub_identity_for_attacker(attacker_uuid)
assert identity_uuid is not None
await repo.upsert_attribution_state({
"identity_uuid": identity_uuid,
"primitive": "motor.input_modality",
"current_value": "pasted",
"state": "stable",
"confidence": 0.91,
"observation_count": 5,
"last_change_ts": 1714521660.456,
"last_observation_ts": 1714521660.456,
})
out = await repo.get_attribution_state(
identity_uuid, "motor.input_modality",
)
assert out is not None
assert out["state"] == "stable"
assert out["confidence"] == 0.91
assert out["current_value"] == "pasted"
assert out["observation_count"] == 5
assert out["last_change_ts"] == 1714521660.456
@pytest.mark.anyio
async def test_upsert_idempotent_on_natural_key(
repo, attacker_uuid: str,
) -> None:
"""Same (identity_uuid, primitive) twice → one row, second wins
on mutable fields."""
identity_uuid = await repo.ensure_stub_identity_for_attacker(attacker_uuid)
assert identity_uuid is not None
base = {
"identity_uuid": identity_uuid,
"primitive": "motor.input_modality",
"current_value": "typed",
"state": "stable",
"confidence": 0.7,
"observation_count": 3,
"last_change_ts": 1714000000.0,
"last_observation_ts": 1714000000.0,
}
await repo.upsert_attribution_state(base)
await repo.upsert_attribution_state({
**base,
"current_value": "pasted",
"state": "drifting",
"confidence": 0.85,
"observation_count": 8,
"last_change_ts": 1714000300.0,
"last_observation_ts": 1714000400.0,
})
rows = await repo.get_attribution_state_for_identity(identity_uuid)
assert len(rows) == 1
assert rows[0]["state"] == "drifting"
assert rows[0]["confidence"] == 0.85
assert rows[0]["current_value"] == "pasted"
@pytest.mark.anyio
async def test_get_state_for_identity_orders_by_primitive(
repo, attacker_uuid: str,
) -> None:
"""Multiple primitives → one row each, primitive-ordered for
deterministic API output."""
identity_uuid = await repo.ensure_stub_identity_for_attacker(attacker_uuid)
assert identity_uuid is not None
primitives = [
"motor.input_modality",
"cognitive.feedback_loop_engagement",
"temporal.weekend_cadence",
]
for i, p in enumerate(primitives):
await repo.upsert_attribution_state({
"identity_uuid": identity_uuid,
"primitive": p,
"current_value": "x",
"state": "stable",
"confidence": 0.8,
"observation_count": 5,
"last_change_ts": 1714000000.0 + i,
"last_observation_ts": 1714000000.0 + i,
})
rows = await repo.get_attribution_state_for_identity(identity_uuid)
assert [r["primitive"] for r in rows] == sorted(primitives)
@pytest.mark.anyio
async def test_list_multi_actor_requires_two_primitives(
repo, attacker_uuid: str,
) -> None:
"""Single-primitive multi_actor flag is too noisy. Correlator
only fires on ≥ 2 primitives independently flagging the same
identity."""
identity_uuid = await repo.ensure_stub_identity_for_attacker(attacker_uuid)
assert identity_uuid is not None
# One multi_actor row → no co-flag yet.
await repo.upsert_attribution_state({
"identity_uuid": identity_uuid,
"primitive": "motor.input_modality",
"current_value": "conflicted",
"state": "multi_actor",
"confidence": 0.55,
"observation_count": 10,
"last_change_ts": 1714000000.0,
"last_observation_ts": 1714000000.0,
})
assert await repo.list_multi_actor_identities() == []
# Add a second multi_actor row → identity surfaces with both
# primitives.
await repo.upsert_attribution_state({
"identity_uuid": identity_uuid,
"primitive": "cognitive.feedback_loop_engagement",
"current_value": "conflicted",
"state": "multi_actor",
"confidence": 0.6,
"observation_count": 8,
"last_change_ts": 1714000100.0,
"last_observation_ts": 1714000100.0,
})
out = await repo.list_multi_actor_identities()
assert len(out) == 1
assert out[0]["identity_uuid"] == identity_uuid
assert sorted(out[0]["primitives"]) == [
"cognitive.feedback_loop_engagement",
"motor.input_modality",
]
@pytest.mark.anyio
async def test_get_state_returns_none_for_unknown_pair(
repo, attacker_uuid: str,
) -> None:
"""Worker uses None as 'no prior state, initialise from this
observation' — surface the contract directly."""
identity_uuid = await repo.ensure_stub_identity_for_attacker(attacker_uuid)
assert identity_uuid is not None
out = await repo.get_attribution_state(
identity_uuid, "motor.input_modality",
)
assert out is None