feat(correlation/attribution): substrate + idle handler (Phase 1)
v0 Phase 1 of ATTRIBUTION-ENGINE.md:
* AttributionStateRow SQLModel keyed on (identity_uuid, primitive)
per ANTI direction — re-keying state rows when the v1 clusterer
merges attackers is the migration debt v0 should not bake in.
ATTRIBUTION-ENGINE.md updated with the deviation note.
* AttributionMixin: ensure_stub_identity_for_attacker, idempotent
upsert_attribution_state, get_attribution_state[_for_identity],
list_multi_actor_identities (the Phase 5 correlator's read).
* attribution.profile.{state_changed,multi_actor_suspected} bus
topics + builder; wiki Service-Bus.md updated separately.
* attribution_worker.py: subscribes to attacker.observation.>,
ensures stub identity per event, logs and continues. No merger,
no state writes, no derived events — Phase 4 wires those.
* attribution/{aggregate.py,_thresholds.py} skeletons: Phase 2
fills _aggregate_categorical, Phase 3 adds numeric+hash+dispatcher.
This commit is contained in:
@@ -29,6 +29,8 @@ Token structure (NATS-style, dot-separated):
|
|||||||
campaign.unmerged
|
campaign.unmerged
|
||||||
credential.captured
|
credential.captured
|
||||||
credential.reuse.detected
|
credential.reuse.detected
|
||||||
|
attribution.profile.state_changed
|
||||||
|
attribution.profile.multi_actor_suspected
|
||||||
canary.{token_id}.triggered
|
canary.{token_id}.triggered
|
||||||
canary.{token_id}.placed
|
canary.{token_id}.placed
|
||||||
canary.{token_id}.revoked
|
canary.{token_id}.revoked
|
||||||
@@ -57,6 +59,7 @@ IDENTITY = "identity"
|
|||||||
CAMPAIGN = "campaign"
|
CAMPAIGN = "campaign"
|
||||||
SYSTEM = "system"
|
SYSTEM = "system"
|
||||||
CREDENTIAL = "credential"
|
CREDENTIAL = "credential"
|
||||||
|
ATTRIBUTION = "attribution"
|
||||||
ORCHESTRATOR = "orchestrator"
|
ORCHESTRATOR = "orchestrator"
|
||||||
CANARY = "canary"
|
CANARY = "canary"
|
||||||
SMTP = "smtp"
|
SMTP = "smtp"
|
||||||
@@ -210,6 +213,42 @@ CAMPAIGN_UNMERGED = "unmerged"
|
|||||||
CREDENTIAL_CAPTURED = "captured"
|
CREDENTIAL_CAPTURED = "captured"
|
||||||
CREDENTIAL_REUSE_DETECTED = "reuse.detected"
|
CREDENTIAL_REUSE_DETECTED = "reuse.detected"
|
||||||
|
|
||||||
|
# Attribution-engine event types (second/third tokens under
|
||||||
|
# ``attribution``). Published by the v0 attribution worker
|
||||||
|
# (``decnet.correlation.attribution_worker``) which subscribes to
|
||||||
|
# ``attacker.observation.>`` and runs the per-(identity, primitive)
|
||||||
|
# state machine. See ``development/ATTRIBUTION-ENGINE.md``.
|
||||||
|
#
|
||||||
|
# attribution.profile.state_changed — per-primitive state
|
||||||
|
# transition (e.g.
|
||||||
|
# stable → drifting).
|
||||||
|
# Payload: identity_uuid,
|
||||||
|
# primitive, old_state,
|
||||||
|
# new_state, current_value,
|
||||||
|
# confidence,
|
||||||
|
# observation_count, ts.
|
||||||
|
# attribution.profile.multi_actor_suspected — fires when ≥ 2
|
||||||
|
# primitives flag the same
|
||||||
|
# identity as multi_actor
|
||||||
|
# concurrently. Cross-
|
||||||
|
# primitive correlator;
|
||||||
|
# single-primitive
|
||||||
|
# multi_actor is too noisy
|
||||||
|
# on its own. Payload:
|
||||||
|
# identity_uuid, primitives,
|
||||||
|
# evidence_summary,
|
||||||
|
# confidence, ts.
|
||||||
|
#
|
||||||
|
# These are *derived* signals — distinct from
|
||||||
|
# ``identity.*`` (clusterer lifecycle, IDENTITY_RESOLUTION.md) and
|
||||||
|
# ``attacker.observation.*`` (raw extractor envelopes,
|
||||||
|
# BEHAVE-INTEGRATION.md). The three families compose: observations feed
|
||||||
|
# the attribution engine, the engine emits derived state, the clusterer
|
||||||
|
# reads observations + state to form / merge identities.
|
||||||
|
ATTRIBUTION_PROFILE_PREFIX = "profile"
|
||||||
|
ATTRIBUTION_PROFILE_STATE_CHANGED = "profile.state_changed"
|
||||||
|
ATTRIBUTION_PROFILE_MULTI_ACTOR_SUSPECTED = "profile.multi_actor_suspected"
|
||||||
|
|
||||||
# Canary-token event types (third token under ``canary``).
|
# Canary-token event types (third token under ``canary``).
|
||||||
#
|
#
|
||||||
# canary.{token_id}.placed — orchestrator/API successfully planted a
|
# canary.{token_id}.placed — orchestrator/API successfully planted a
|
||||||
@@ -402,6 +441,20 @@ def attacker_observation(primitive: str) -> str:
|
|||||||
return f"{ATTACKER}.{ATTACKER_OBSERVATION_PREFIX}.{primitive}"
|
return f"{ATTACKER}.{ATTACKER_OBSERVATION_PREFIX}.{primitive}"
|
||||||
|
|
||||||
|
|
||||||
|
def attribution(event_type: str) -> str:
|
||||||
|
"""Build ``attribution.<event_type>``.
|
||||||
|
|
||||||
|
*event_type* is typically one of
|
||||||
|
:data:`ATTRIBUTION_PROFILE_STATE_CHANGED` or
|
||||||
|
:data:`ATTRIBUTION_PROFILE_MULTI_ACTOR_SUSPECTED` — both contain a
|
||||||
|
dot (``profile.state_changed``) which is permitted under the same
|
||||||
|
"trailing dotted leaf" rule that ``attacker.session.started`` uses.
|
||||||
|
"""
|
||||||
|
if not event_type:
|
||||||
|
raise ValueError("attribution topic requires a non-empty event_type")
|
||||||
|
return f"{ATTRIBUTION}.{event_type}"
|
||||||
|
|
||||||
|
|
||||||
def campaign(event_type: str) -> str:
|
def campaign(event_type: str) -> str:
|
||||||
"""Build ``campaign.<event_type>``.
|
"""Build ``campaign.<event_type>``.
|
||||||
|
|
||||||
|
|||||||
21
decnet/correlation/attribution/__init__.py
Normal file
21
decnet/correlation/attribution/__init__.py
Normal file
@@ -0,0 +1,21 @@
|
|||||||
|
"""DECNET attribution engine — v0 aggregation library.
|
||||||
|
|
||||||
|
Pure library: per-(identity, primitive) state machine over BEHAVE-SHELL
|
||||||
|
observations. No I/O, no bus, no DB. The bus subscriber and DB writes
|
||||||
|
live in :mod:`decnet.correlation.attribution_worker` so this package
|
||||||
|
stays trivially testable with synthetic observation lists.
|
||||||
|
|
||||||
|
See ``development/ATTRIBUTION-ENGINE.md`` for the full design and the
|
||||||
|
explicit bright line: this engine does NOT do persona classification
|
||||||
|
(HUMAN/LLM/SCRIPTED), does NOT gate access, does NOT attribute to
|
||||||
|
named persons. It surfaces *behavioural coherence* and *behavioural
|
||||||
|
drift*, and stops there.
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from decnet.correlation.attribution.aggregate import (
|
||||||
|
AttributionState,
|
||||||
|
aggregate_observations,
|
||||||
|
)
|
||||||
|
|
||||||
|
__all__ = ["AttributionState", "aggregate_observations"]
|
||||||
62
decnet/correlation/attribution/_thresholds.py
Normal file
62
decnet/correlation/attribution/_thresholds.py
Normal file
@@ -0,0 +1,62 @@
|
|||||||
|
"""Calibration thresholds for the attribution engine — every magic
|
||||||
|
number lives here, named, with the calibration source cited.
|
||||||
|
|
||||||
|
v0 values are heuristic. Real calibration ships when red-team
|
||||||
|
exercises produce labelled trace data
|
||||||
|
(``ATTRIBUTION-ENGINE.md`` §"Out of scope"). Until then these constants
|
||||||
|
are the engine's only knobs; aggregate.py never embeds a literal.
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
# ── Categorical merger ────────────────────────────────────────────────
|
||||||
|
# Last-N window size for the categorical state machine. 5 calibrates
|
||||||
|
# against typical session counts (most attackers are observed < 10
|
||||||
|
# times before they go quiet — ATTRIBUTION-ENGINE.md §"Open question
|
||||||
|
# 2"). Operators with long-running attackers will want a wider window
|
||||||
|
# in v1.
|
||||||
|
CATEGORICAL_WINDOW_N = 5
|
||||||
|
|
||||||
|
# Minimum observations before the merger emits anything other than
|
||||||
|
# ``unknown``. Below this floor the state machine has no signal.
|
||||||
|
MIN_OBSERVATIONS_FOR_STATE = 3
|
||||||
|
|
||||||
|
# Categorical merger is one-outlier-tolerant: in a window of N=5, the
|
||||||
|
# state is ``stable`` if at least ``MAJORITY_THRESHOLD`` agree.
|
||||||
|
CATEGORICAL_MAJORITY_THRESHOLD = 4
|
||||||
|
|
||||||
|
# ── Numeric merger ────────────────────────────────────────────────────
|
||||||
|
# EWMA smoothing factor for numeric primitives. 0.3 weights recent
|
||||||
|
# observations enough to surface drift quickly without flapping on
|
||||||
|
# single outliers.
|
||||||
|
NUMERIC_EWMA_ALPHA = 0.3
|
||||||
|
|
||||||
|
# Coefficient-of-variation thresholds: dispersion / |mean|.
|
||||||
|
NUMERIC_STABLE_DISPERSION_PCT = 0.20 # < 20% of mean → stable
|
||||||
|
NUMERIC_DRIFT_MEAN_SHIFT_PCT = 0.30 # mean moved > 30% → drifting
|
||||||
|
NUMERIC_CONFLICT_DISPERSION_PCT = 1.0 # > 100% of mean → conflicted
|
||||||
|
|
||||||
|
# ── Hash merger ───────────────────────────────────────────────────────
|
||||||
|
# Rotations within HASH_DRIFT_WINDOW count toward state transitions.
|
||||||
|
# Below DRIFT_MAX → drifting; above → conflicted. The values mirror the
|
||||||
|
# DEBT-032 fingerprint-rotation calibration — bumped by one because
|
||||||
|
# the attribution engine takes one rotation as evidence-of-life, not
|
||||||
|
# yet evidence-of-drift.
|
||||||
|
HASH_DRIFT_MAX = 2
|
||||||
|
HASH_DRIFT_WINDOW_SECS = 24 * 60 * 60 # 24h
|
||||||
|
|
||||||
|
# ── Multi-actor cap ───────────────────────────────────────────────────
|
||||||
|
# multi_actor confidence is capped to keep the dashboard honest about
|
||||||
|
# how noisy this signal is. ATTRIBUTION-ENGINE.md §"Open question 1":
|
||||||
|
# flapping primitives on flaky networks look like two operators.
|
||||||
|
MULTI_ACTOR_MAX_CONFIDENCE = 0.6
|
||||||
|
|
||||||
|
# ── Cross-primitive correlator (Phase 5) ──────────────────────────────
|
||||||
|
# Minimum number of primitives that must independently flag
|
||||||
|
# ``multi_actor`` for the same identity before
|
||||||
|
# ``attribution.profile.multi_actor_suspected`` fires.
|
||||||
|
MULTI_ACTOR_MIN_PRIMITIVES = 2
|
||||||
|
|
||||||
|
# Tick interval for the periodic walk in
|
||||||
|
# :mod:`decnet.correlation.attribution_worker`. Configurable via env
|
||||||
|
# var in v1; hardcoded in v0.
|
||||||
|
MULTI_ACTOR_TICK_SECS = 60.0
|
||||||
87
decnet/correlation/attribution/aggregate.py
Normal file
87
decnet/correlation/attribution/aggregate.py
Normal file
@@ -0,0 +1,87 @@
|
|||||||
|
"""Per-(identity, primitive) state-machine — the attribution engine's
|
||||||
|
core merge logic.
|
||||||
|
|
||||||
|
Pure: given a list of BEHAVE observations for one
|
||||||
|
``(identity_uuid, primitive)`` pair, returns the derived state and
|
||||||
|
mirror metadata. No DB, no bus, no I/O. The worker
|
||||||
|
(``decnet.correlation.attribution_worker``) is responsible for loading
|
||||||
|
the observations and writing the state row.
|
||||||
|
|
||||||
|
State vocabulary is frozen at five values (see
|
||||||
|
``ATTRIBUTION-ENGINE.md``):
|
||||||
|
|
||||||
|
* ``unknown`` — < 3 observations (insufficient signal)
|
||||||
|
* ``stable`` — recent N agree
|
||||||
|
* ``drifting`` — recent N stable but disagree with older N
|
||||||
|
* ``conflicted`` — recent N split
|
||||||
|
* ``multi_actor`` — conflicted + cross-session alternation pattern
|
||||||
|
|
||||||
|
Phase 2 ships :func:`_aggregate_categorical`. Phase 3 will add
|
||||||
|
:func:`_aggregate_numeric` and :func:`_aggregate_hash` and the
|
||||||
|
ValueKind dispatcher.
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from typing import Any, Iterable, Sequence
|
||||||
|
|
||||||
|
__all__ = ["AttributionState", "aggregate_observations"]
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(frozen=True)
|
||||||
|
class AttributionState:
|
||||||
|
"""Output of the merger for one ``(identity, primitive)`` pair.
|
||||||
|
|
||||||
|
The fields map 1:1 onto :class:`AttributionStateRow` columns —
|
||||||
|
callers compose the final dict for ``upsert_attribution_state``
|
||||||
|
by adding ``identity_uuid`` and ``primitive`` (the merger does not
|
||||||
|
own the natural key).
|
||||||
|
"""
|
||||||
|
|
||||||
|
current_value: Any
|
||||||
|
state: str
|
||||||
|
confidence: float
|
||||||
|
observation_count: int
|
||||||
|
last_observation_ts: float
|
||||||
|
|
||||||
|
|
||||||
|
def aggregate_observations(
|
||||||
|
observations: Sequence[dict[str, Any]],
|
||||||
|
) -> AttributionState:
|
||||||
|
"""Run the merger over *observations* and return the derived state.
|
||||||
|
|
||||||
|
*observations* is a list of dicts with at minimum ``value``,
|
||||||
|
``ts``, and ``confidence`` fields (matching the BEHAVE
|
||||||
|
``Observation`` envelope shape that
|
||||||
|
``ObservationRow.observations_time_series`` returns). They MUST
|
||||||
|
arrive ordered by ``ts`` ascending; the merger assumes that.
|
||||||
|
|
||||||
|
Phase 2 only supports categorical values. Phase 3 will dispatch
|
||||||
|
on the BEHAVE primitive's ``ValueKind`` and pick the right merger.
|
||||||
|
"""
|
||||||
|
if not observations:
|
||||||
|
return AttributionState(
|
||||||
|
current_value=None,
|
||||||
|
state="unknown",
|
||||||
|
confidence=0.0,
|
||||||
|
observation_count=0,
|
||||||
|
last_observation_ts=0.0,
|
||||||
|
)
|
||||||
|
# Phase 2 stub — categorical only. Phase 3 will inspect
|
||||||
|
# ``primitive`` (passed in alongside observations) to pick a
|
||||||
|
# merger; for now defer to the categorical implementation
|
||||||
|
# (``_aggregate_categorical``) which Phase 2 lands.
|
||||||
|
raise NotImplementedError(
|
||||||
|
"aggregate_observations is implemented in Phase 2 (categorical) "
|
||||||
|
"and Phase 3 (numeric + hash). v0 Phase 1 ships the substrate "
|
||||||
|
"only; the worker logs without invoking the merger.",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _coerce_obs_iter(
|
||||||
|
observations: Iterable[dict[str, Any]],
|
||||||
|
) -> list[dict[str, Any]]:
|
||||||
|
"""Defensive: accept any iterable, return a list. Used by the
|
||||||
|
worker which pulls observations off the bus + DB into mixed
|
||||||
|
iterables."""
|
||||||
|
return list(observations)
|
||||||
178
decnet/correlation/attribution_worker.py
Normal file
178
decnet/correlation/attribution_worker.py
Normal file
@@ -0,0 +1,178 @@
|
|||||||
|
"""Attribution-engine bus subscriber — v0 Phase 1 skeleton.
|
||||||
|
|
||||||
|
Subscribes to ``attacker.observation.>`` and, for each event, ensures
|
||||||
|
the source attacker has a stub identity in ``attacker_identities``.
|
||||||
|
Phase 1 does **not** invoke the merger or write
|
||||||
|
``attribution_state`` rows; that wiring lands in Phase 4 once the
|
||||||
|
Phase 2/3 mergers are in.
|
||||||
|
|
||||||
|
Pattern mirrors :mod:`decnet.correlation.reuse_worker`: bus-subscribe
|
||||||
|
with a wake event, fall back to poll-only if the bus is unavailable,
|
||||||
|
publish derived events with :func:`publish_safely`, log per-handler
|
||||||
|
exceptions and continue.
|
||||||
|
|
||||||
|
Trigger isolation: the per-event handler is wrapped in a single
|
||||||
|
try/except. Any exception is logged and the loop continues with the
|
||||||
|
next event. This is the same posture BEHAVE-SHELL's
|
||||||
|
``_handler.handle_session_ended`` adopts.
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import contextlib
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
from decnet.bus import topics as _topics
|
||||||
|
from decnet.bus.base import BaseBus
|
||||||
|
from decnet.bus.factory import get_bus
|
||||||
|
from decnet.bus.publish import (
|
||||||
|
run_control_listener_signal as _run_control_listener_signal,
|
||||||
|
run_health_heartbeat as _run_health_heartbeat,
|
||||||
|
)
|
||||||
|
from decnet.logging import get_logger
|
||||||
|
from decnet.web.db.repository import BaseRepository
|
||||||
|
|
||||||
|
log = get_logger("correlation.attribution_worker")
|
||||||
|
|
||||||
|
_WORKER_NAME = "attribution"
|
||||||
|
_OBSERVATION_PATTERN = f"{_topics.ATTACKER}.{_topics.ATTACKER_OBSERVATION_PREFIX}.>"
|
||||||
|
|
||||||
|
|
||||||
|
async def run_attribution_loop(
|
||||||
|
repo: BaseRepository,
|
||||||
|
*,
|
||||||
|
shutdown: asyncio.Event | None = None,
|
||||||
|
) -> None:
|
||||||
|
"""Run the attribution worker until cancelled.
|
||||||
|
|
||||||
|
*shutdown* is an optional external stop signal; the loop also
|
||||||
|
exits cleanly on ``CancelledError`` and ``KeyboardInterrupt``.
|
||||||
|
"""
|
||||||
|
log.info("attribution worker started pattern=%s", _OBSERVATION_PATTERN)
|
||||||
|
|
||||||
|
bus: BaseBus | None = None
|
||||||
|
sub_task: asyncio.Task | None = None
|
||||||
|
heartbeat_task: asyncio.Task | None = None
|
||||||
|
control_task: asyncio.Task | None = None
|
||||||
|
try:
|
||||||
|
candidate = get_bus(client_name=f"{_WORKER_NAME}-correlator")
|
||||||
|
await candidate.connect()
|
||||||
|
bus = candidate
|
||||||
|
sub_task = asyncio.create_task(
|
||||||
|
_consume_observations(bus, repo),
|
||||||
|
)
|
||||||
|
heartbeat_task = asyncio.create_task(
|
||||||
|
_run_health_heartbeat(bus, _WORKER_NAME),
|
||||||
|
)
|
||||||
|
control_task = asyncio.create_task(
|
||||||
|
_run_control_listener_signal(bus, _WORKER_NAME),
|
||||||
|
)
|
||||||
|
except Exception as exc: # noqa: BLE001
|
||||||
|
log.warning(
|
||||||
|
"attribution worker: bus unavailable, idle until bus returns: %s",
|
||||||
|
exc,
|
||||||
|
)
|
||||||
|
|
||||||
|
if shutdown is None:
|
||||||
|
shutdown = asyncio.Event()
|
||||||
|
|
||||||
|
try:
|
||||||
|
await shutdown.wait()
|
||||||
|
except (asyncio.CancelledError, KeyboardInterrupt):
|
||||||
|
log.info("attribution worker stopped")
|
||||||
|
finally:
|
||||||
|
for task in (sub_task, heartbeat_task, control_task):
|
||||||
|
if task is None:
|
||||||
|
continue
|
||||||
|
task.cancel()
|
||||||
|
with contextlib.suppress(asyncio.CancelledError, Exception):
|
||||||
|
await task
|
||||||
|
if bus is not None:
|
||||||
|
with contextlib.suppress(Exception):
|
||||||
|
await bus.close()
|
||||||
|
|
||||||
|
|
||||||
|
async def _consume_observations(
|
||||||
|
bus: BaseBus, repo: BaseRepository,
|
||||||
|
) -> None:
|
||||||
|
"""Pull events off ``attacker.observation.>`` and dispatch each
|
||||||
|
to :func:`handle_observation_event`.
|
||||||
|
|
||||||
|
Per-event exceptions are caught and logged; the subscription
|
||||||
|
survives bad payloads. If the subscription itself dies (bus
|
||||||
|
disconnect), the worker idles — the supervisor systemd unit
|
||||||
|
will restart on a clean exit.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
sub = bus.subscribe(_OBSERVATION_PATTERN)
|
||||||
|
async with sub:
|
||||||
|
async for event in sub:
|
||||||
|
try:
|
||||||
|
await handle_observation_event(bus, repo, event)
|
||||||
|
except Exception: # noqa: BLE001
|
||||||
|
log.exception("attribution worker: handler failed")
|
||||||
|
except asyncio.CancelledError:
|
||||||
|
raise
|
||||||
|
except Exception as exc: # noqa: BLE001
|
||||||
|
log.warning(
|
||||||
|
"attribution worker: subscriber for %s died (%s)",
|
||||||
|
_OBSERVATION_PATTERN, exc,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
async def handle_observation_event(
|
||||||
|
bus: BaseBus | None,
|
||||||
|
repo: BaseRepository,
|
||||||
|
event: Any,
|
||||||
|
) -> None:
|
||||||
|
"""Handle one ``attacker.observation.<primitive>`` event.
|
||||||
|
|
||||||
|
Phase 1: ensure the source attacker has a stub identity, then log
|
||||||
|
and return. Phase 4 will: load prior state, run merger, upsert
|
||||||
|
new state, emit ``attribution.profile.state_changed`` on
|
||||||
|
transition.
|
||||||
|
|
||||||
|
*event* is whatever shape :class:`BaseBus`'s subscription yields —
|
||||||
|
a ``BusEvent`` with ``payload`` (dict) and ``event_type`` (str)
|
||||||
|
fields. The payload carries the BEHAVE envelope plus DECNET-side
|
||||||
|
``attacker_uuid`` denorm (see
|
||||||
|
``decnet.profiler.behave_shell._handler._publish_observation``).
|
||||||
|
"""
|
||||||
|
payload = _payload_of(event)
|
||||||
|
attacker_uuid = payload.get("attacker_uuid")
|
||||||
|
primitive = payload.get("primitive")
|
||||||
|
if not attacker_uuid or not primitive:
|
||||||
|
log.debug(
|
||||||
|
"attribution worker: skipping malformed event (uuid=%r primitive=%r)",
|
||||||
|
attacker_uuid, primitive,
|
||||||
|
)
|
||||||
|
return
|
||||||
|
identity_uuid = await repo.ensure_stub_identity_for_attacker(
|
||||||
|
str(attacker_uuid),
|
||||||
|
)
|
||||||
|
if identity_uuid is None:
|
||||||
|
log.info(
|
||||||
|
"attribution worker: no Attacker row for uuid=%s yet; deferring",
|
||||||
|
attacker_uuid,
|
||||||
|
)
|
||||||
|
return
|
||||||
|
# Phase 4 will run the merger here and emit
|
||||||
|
# ``attribution.profile.state_changed`` on transition. Phase 1
|
||||||
|
# ends with stub materialisation only.
|
||||||
|
log.debug(
|
||||||
|
"attribution worker: stub identity=%s for attacker=%s primitive=%s",
|
||||||
|
identity_uuid, attacker_uuid, primitive,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _payload_of(event: Any) -> dict[str, Any]:
|
||||||
|
"""Extract the dict payload from a BusEvent or fall through if
|
||||||
|
*event* is already a dict (test fixtures may pass either)."""
|
||||||
|
payload = getattr(event, "payload", event)
|
||||||
|
return payload if isinstance(payload, dict) else {}
|
||||||
|
|
||||||
|
|
||||||
|
__all__ = [
|
||||||
|
"run_attribution_loop",
|
||||||
|
"handle_observation_event",
|
||||||
|
]
|
||||||
@@ -59,6 +59,9 @@ from .attachments import (
|
|||||||
from .observations import (
|
from .observations import (
|
||||||
ObservationRow,
|
ObservationRow,
|
||||||
)
|
)
|
||||||
|
from .attribution_state import (
|
||||||
|
AttributionStateRow,
|
||||||
|
)
|
||||||
from .campaigns import (
|
from .campaigns import (
|
||||||
Campaign,
|
Campaign,
|
||||||
CampaignsResponse,
|
CampaignsResponse,
|
||||||
@@ -252,6 +255,7 @@ __all__ = [
|
|||||||
"AttackerIdentity",
|
"AttackerIdentity",
|
||||||
"AttackerIntel",
|
"AttackerIntel",
|
||||||
"AttackersResponse",
|
"AttackersResponse",
|
||||||
|
"AttributionStateRow",
|
||||||
"ObservationRow",
|
"ObservationRow",
|
||||||
"ObservedAttachment",
|
"ObservedAttachment",
|
||||||
"SmtpTarget",
|
"SmtpTarget",
|
||||||
|
|||||||
78
decnet/web/db/models/attribution_state.py
Normal file
78
decnet/web/db/models/attribution_state.py
Normal file
@@ -0,0 +1,78 @@
|
|||||||
|
"""Per-(identity, primitive) attribution state — v0 of the
|
||||||
|
attribution engine.
|
||||||
|
|
||||||
|
Materialised view of the state machine in
|
||||||
|
``decnet.correlation.attribution.aggregate``. Re-derivable from
|
||||||
|
``observations`` + the DEBT-032 fingerprint-rotation log; this row is
|
||||||
|
a cache for cheap dashboard reads, not a source of truth.
|
||||||
|
|
||||||
|
Keyed on ``identity_uuid``, not ``attacker_uuid``: pre-clusterer,
|
||||||
|
every Attacker maps 1:1 to a stub row in ``attacker_identities``
|
||||||
|
(``merged_into_uuid = NULL``) so the key is stable across the v0 / v1
|
||||||
|
boundary. When v1's clusterer eventually merges identities, the loser
|
||||||
|
row's state is recomputed from the union of observations under the
|
||||||
|
winner — no schema change, no column-rename migration.
|
||||||
|
|
||||||
|
This deviates from ``development/ATTRIBUTION-ENGINE.md`` §"Subject of
|
||||||
|
attribution in v0" (which resolved on ``attacker_uuid``); the doc gets
|
||||||
|
a deviation note in the same commit that ships this file.
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
from sqlalchemy import JSON, Column, Index
|
||||||
|
from sqlmodel import Field, SQLModel
|
||||||
|
|
||||||
|
|
||||||
|
class AttributionStateRow(SQLModel, table=True):
|
||||||
|
"""One state row per (identity, primitive). At most one row per
|
||||||
|
pair — composite PK enforces it.
|
||||||
|
"""
|
||||||
|
|
||||||
|
__tablename__ = "attribution_state"
|
||||||
|
__table_args__ = (
|
||||||
|
Index("ix_attribution_state_state", "state"),
|
||||||
|
Index("ix_attribution_state_last_change", "last_change_ts"),
|
||||||
|
Index(
|
||||||
|
"ix_attribution_state_identity_state",
|
||||||
|
"identity_uuid", "state",
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
# ── key ────────────────────────────────────────────────────────────
|
||||||
|
identity_uuid: str = Field(
|
||||||
|
foreign_key="attacker_identities.uuid", primary_key=True,
|
||||||
|
)
|
||||||
|
primitive: str = Field(primary_key=True)
|
||||||
|
|
||||||
|
# ── derived state ──────────────────────────────────────────────────
|
||||||
|
# Mirrors the BEHAVE Observation ``value`` column shape so the
|
||||||
|
# frontend can render the merger output the same way it renders raw
|
||||||
|
# latest-wins values today (BEHAVE-INTEGRATION.md Q3).
|
||||||
|
current_value: dict[str, Any] | str | int | float | bool | list = Field(
|
||||||
|
sa_column=Column(JSON, nullable=False),
|
||||||
|
)
|
||||||
|
# 'unknown' | 'stable' | 'drifting' | 'conflicted' | 'multi_actor'.
|
||||||
|
# Five states, frozen — see ATTRIBUTION-ENGINE.md §"State machine".
|
||||||
|
state: str
|
||||||
|
# Engine's confidence in the *state assertion*, not in any verdict
|
||||||
|
# about the attacker. ``multi_actor`` is capped at 0.6 by
|
||||||
|
# convention; other states use the merger's per-ValueKind formula.
|
||||||
|
confidence: float
|
||||||
|
# How many observations underlie this row. Used by the API to gate
|
||||||
|
# ``unknown`` (< 3 obs) without re-querying ``observations``.
|
||||||
|
observation_count: int = Field(default=0)
|
||||||
|
# When ``state`` last flipped. Equals ``updated_at`` on insert.
|
||||||
|
last_change_ts: float
|
||||||
|
# Most recent observation that fed this row. Used by the merger to
|
||||||
|
# detect drift windows without a full observation re-scan.
|
||||||
|
last_observation_ts: float
|
||||||
|
|
||||||
|
# ── audit ──────────────────────────────────────────────────────────
|
||||||
|
# Mirrors AttackerIdentity convention (federation gossip in v2).
|
||||||
|
schema_version: int = Field(default=1)
|
||||||
|
updated_at: datetime = Field(
|
||||||
|
default_factory=lambda: datetime.now(timezone.utc),
|
||||||
|
)
|
||||||
@@ -1492,3 +1492,86 @@ class BaseRepository(ABC):
|
|||||||
SQLModel TTP mixin.
|
SQLModel TTP mixin.
|
||||||
"""
|
"""
|
||||||
return []
|
return []
|
||||||
|
|
||||||
|
# ─── Attribution engine (v0 — aggregation only) ────────────────────
|
||||||
|
# See development/ATTRIBUTION-ENGINE.md. The engine consumes
|
||||||
|
# ``attacker.observation.*`` events and writes per-(identity,
|
||||||
|
# primitive) state rows. Pre-clusterer, every Attacker maps 1:1
|
||||||
|
# to a stub AttackerIdentity row so the keying is stable across
|
||||||
|
# the v0 / v1 boundary.
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
async def ensure_stub_identity_for_attacker(
|
||||||
|
self, attacker_uuid: str,
|
||||||
|
) -> Optional[str]:
|
||||||
|
"""Return the ``identity_uuid`` for *attacker_uuid*, creating a
|
||||||
|
degenerate 1:1 stub in ``attacker_identities`` if the attacker
|
||||||
|
does not yet have one.
|
||||||
|
|
||||||
|
Returns ``None`` if the attacker row itself is missing (the
|
||||||
|
worker treats that as "defer" — the profiler tick has not yet
|
||||||
|
materialised the Attacker; same posture as
|
||||||
|
``_handler.handle_session_ended`` in BEHAVE-SHELL).
|
||||||
|
|
||||||
|
Idempotent under concurrent calls: the second caller sees the
|
||||||
|
first caller's stamp and returns the same uuid. Implementations
|
||||||
|
are responsible for serialising the read-then-insert against
|
||||||
|
the bus's at-least-once delivery.
|
||||||
|
|
||||||
|
The third return value (boolean) signalling "newly created" is
|
||||||
|
deliberately omitted — the worker emits ``identity.formed`` on
|
||||||
|
a transition observed via the row's absence on its first call,
|
||||||
|
not via a flag from the repo. Keeps the repo idempotent and
|
||||||
|
flag-free.
|
||||||
|
"""
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
async def upsert_attribution_state(self, data: dict[str, Any]) -> None:
|
||||||
|
"""Insert or update an :class:`AttributionStateRow` keyed on
|
||||||
|
``(identity_uuid, primitive)``.
|
||||||
|
|
||||||
|
``data`` MUST carry: ``identity_uuid``, ``primitive``,
|
||||||
|
``current_value``, ``state``, ``confidence``,
|
||||||
|
``observation_count``, ``last_change_ts``,
|
||||||
|
``last_observation_ts``. ``schema_version`` defaults to 1.
|
||||||
|
"""
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
async def get_attribution_state_for_identity(
|
||||||
|
self, identity_uuid: str,
|
||||||
|
) -> list[dict[str, Any]]:
|
||||||
|
"""Return every attribution-state row for *identity_uuid*.
|
||||||
|
|
||||||
|
Empty list when the identity has no derived state yet (e.g.
|
||||||
|
observations have arrived but the engine has not run, or the
|
||||||
|
engine has not produced ≥ 3 observations per primitive). The
|
||||||
|
attribution API surface and AttackerDetail badge renderer both
|
||||||
|
consume this projection.
|
||||||
|
"""
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
async def get_attribution_state(
|
||||||
|
self, identity_uuid: str, primitive: str,
|
||||||
|
) -> Optional[dict[str, Any]]:
|
||||||
|
"""Return one ``(identity_uuid, primitive)`` row, or ``None``.
|
||||||
|
|
||||||
|
Used by the attribution worker on each inbound observation to
|
||||||
|
load the prior state before running the merger. ``None`` means
|
||||||
|
"no prior state — initialise from this observation alone".
|
||||||
|
"""
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
async def list_multi_actor_identities(
|
||||||
|
self,
|
||||||
|
) -> list[dict[str, Any]]:
|
||||||
|
"""List ``{identity_uuid, primitives}`` for identities that
|
||||||
|
currently have ≥ 2 primitives flagged ``multi_actor``.
|
||||||
|
|
||||||
|
Backs the cross-primitive correlator (Phase 5). Empty list when
|
||||||
|
no identity is co-flagged.
|
||||||
|
"""
|
||||||
|
raise NotImplementedError
|
||||||
|
|||||||
@@ -35,6 +35,7 @@ from decnet.web.db.sqlmodel_repo._helpers import ( # noqa: F401 (re-exported f
|
|||||||
)
|
)
|
||||||
from decnet.web.db.sqlmodel_repo.attacker_intel import AttackerIntelMixin
|
from decnet.web.db.sqlmodel_repo.attacker_intel import AttackerIntelMixin
|
||||||
from decnet.web.db.sqlmodel_repo.attackers import AttackersMixin
|
from decnet.web.db.sqlmodel_repo.attackers import AttackersMixin
|
||||||
|
from decnet.web.db.sqlmodel_repo.attribution import AttributionMixin
|
||||||
from decnet.web.db.sqlmodel_repo.auth import AuthMixin
|
from decnet.web.db.sqlmodel_repo.auth import AuthMixin
|
||||||
from decnet.web.db.sqlmodel_repo.bounties import BountiesMixin
|
from decnet.web.db.sqlmodel_repo.bounties import BountiesMixin
|
||||||
from decnet.web.db.sqlmodel_repo.campaigns import CampaignsMixin
|
from decnet.web.db.sqlmodel_repo.campaigns import CampaignsMixin
|
||||||
@@ -58,6 +59,7 @@ from decnet.web.db.sqlmodel_repo.webhooks import WebhooksMixin
|
|||||||
class SQLModelRepository(
|
class SQLModelRepository(
|
||||||
AttackerIntelMixin,
|
AttackerIntelMixin,
|
||||||
AttackersMixin,
|
AttackersMixin,
|
||||||
|
AttributionMixin,
|
||||||
AuthMixin,
|
AuthMixin,
|
||||||
BountiesMixin,
|
BountiesMixin,
|
||||||
CampaignsMixin,
|
CampaignsMixin,
|
||||||
|
|||||||
215
decnet/web/db/sqlmodel_repo/attribution.py
Normal file
215
decnet/web/db/sqlmodel_repo/attribution.py
Normal file
@@ -0,0 +1,215 @@
|
|||||||
|
"""Repo mixin for the ``attribution_state`` table + identity stub
|
||||||
|
materialisation.
|
||||||
|
|
||||||
|
Composed onto :class:`SQLModelRepository`. Five public methods, all
|
||||||
|
serving the v0 attribution engine
|
||||||
|
(``decnet.correlation.attribution_worker``):
|
||||||
|
|
||||||
|
* :meth:`ensure_stub_identity_for_attacker` — pre-clusterer 1:1 stub
|
||||||
|
identity creation. Idempotent under concurrent observation bursts.
|
||||||
|
* :meth:`upsert_attribution_state` — keyed on
|
||||||
|
``(identity_uuid, primitive)``.
|
||||||
|
* :meth:`get_attribution_state` / :meth:`get_attribution_state_for_identity`
|
||||||
|
— single-row and per-identity reads.
|
||||||
|
* :meth:`list_multi_actor_identities` — feeds the Phase 5 cross-
|
||||||
|
primitive correlator.
|
||||||
|
|
||||||
|
See ``development/ATTRIBUTION-ENGINE.md`` for the full design.
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import uuid as _uuid
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
from typing import Any, Optional
|
||||||
|
|
||||||
|
from sqlalchemy import func, select
|
||||||
|
from sqlmodel import col
|
||||||
|
|
||||||
|
from decnet.web.db.models import (
|
||||||
|
Attacker,
|
||||||
|
AttackerIdentity,
|
||||||
|
AttributionStateRow,
|
||||||
|
)
|
||||||
|
from decnet.web.db.sqlmodel_repo._helpers import _MixinBase
|
||||||
|
|
||||||
|
|
||||||
|
class AttributionMixin(_MixinBase):
|
||||||
|
"""Mixin: methods composed onto :class:`SQLModelRepository`."""
|
||||||
|
|
||||||
|
async def ensure_stub_identity_for_attacker(
|
||||||
|
self, attacker_uuid: str,
|
||||||
|
) -> Optional[str]:
|
||||||
|
"""Return the ``identity_uuid`` for *attacker_uuid*, creating a
|
||||||
|
degenerate 1:1 stub in ``attacker_identities`` if absent.
|
||||||
|
|
||||||
|
Returns ``None`` when the Attacker row itself is missing — the
|
||||||
|
attribution worker treats that as "defer" (mirrors the
|
||||||
|
``_handler.handle_session_ended`` posture in BEHAVE-SHELL).
|
||||||
|
|
||||||
|
Idempotent: the second caller for the same attacker reads the
|
||||||
|
``identity_id`` stamped by the first caller and returns it
|
||||||
|
without inserting again. Race: two concurrent first-callers
|
||||||
|
could both see ``identity_id = NULL`` and both create stubs;
|
||||||
|
the loser's commit would leave a dangling AttackerIdentity row
|
||||||
|
with no Attacker referencing it. Acceptable in v0 (rare; rows
|
||||||
|
are tiny; gc'd in v1 when the clusterer runs). The
|
||||||
|
single-writer attribution worker plus the bus's per-identity
|
||||||
|
ordering make even that race vanishingly rare in practice.
|
||||||
|
"""
|
||||||
|
async with self._session() as session:
|
||||||
|
attacker_row = (
|
||||||
|
await session.execute(
|
||||||
|
select(Attacker).where(Attacker.uuid == attacker_uuid)
|
||||||
|
)
|
||||||
|
).scalar_one_or_none()
|
||||||
|
if attacker_row is None:
|
||||||
|
return None
|
||||||
|
if attacker_row.identity_id:
|
||||||
|
return attacker_row.identity_id
|
||||||
|
new_uuid = _uuid.uuid4().hex
|
||||||
|
now = datetime.now(timezone.utc)
|
||||||
|
session.add(
|
||||||
|
AttackerIdentity(
|
||||||
|
uuid=new_uuid,
|
||||||
|
schema_version=1,
|
||||||
|
first_seen_at=attacker_row.first_seen,
|
||||||
|
last_seen_at=attacker_row.last_seen,
|
||||||
|
created_at=now,
|
||||||
|
updated_at=now,
|
||||||
|
observation_count=1,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
attacker_row.identity_id = new_uuid
|
||||||
|
session.add(attacker_row)
|
||||||
|
await session.commit()
|
||||||
|
return new_uuid
|
||||||
|
|
||||||
|
async def upsert_attribution_state(
|
||||||
|
self, data: dict[str, Any],
|
||||||
|
) -> None:
|
||||||
|
"""Insert or update one ``(identity_uuid, primitive)`` row.
|
||||||
|
|
||||||
|
``data`` MUST carry: ``identity_uuid``, ``primitive``,
|
||||||
|
``current_value``, ``state``, ``confidence``,
|
||||||
|
``observation_count``, ``last_change_ts``,
|
||||||
|
``last_observation_ts``. ``schema_version`` and ``updated_at``
|
||||||
|
are managed here.
|
||||||
|
"""
|
||||||
|
identity_uuid = data["identity_uuid"]
|
||||||
|
primitive = data["primitive"]
|
||||||
|
async with self._session() as session:
|
||||||
|
existing = (
|
||||||
|
await session.execute(
|
||||||
|
select(AttributionStateRow).where(
|
||||||
|
AttributionStateRow.identity_uuid == identity_uuid,
|
||||||
|
AttributionStateRow.primitive == primitive,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
).scalar_one_or_none()
|
||||||
|
now = datetime.now(timezone.utc)
|
||||||
|
if existing is not None:
|
||||||
|
for k, v in data.items():
|
||||||
|
if k in ("identity_uuid", "primitive"):
|
||||||
|
continue
|
||||||
|
setattr(existing, k, v)
|
||||||
|
existing.updated_at = now
|
||||||
|
session.add(existing)
|
||||||
|
else:
|
||||||
|
session.add(
|
||||||
|
AttributionStateRow(
|
||||||
|
**{**data, "schema_version": 1, "updated_at": now}
|
||||||
|
)
|
||||||
|
)
|
||||||
|
await session.commit()
|
||||||
|
|
||||||
|
async def get_attribution_state(
|
||||||
|
self, identity_uuid: str, primitive: str,
|
||||||
|
) -> Optional[dict[str, Any]]:
|
||||||
|
"""Single-row lookup. ``None`` when the merger has not yet run
|
||||||
|
for this ``(identity_uuid, primitive)`` pair."""
|
||||||
|
async with self._session() as session:
|
||||||
|
row = (
|
||||||
|
await session.execute(
|
||||||
|
select(AttributionStateRow).where(
|
||||||
|
AttributionStateRow.identity_uuid == identity_uuid,
|
||||||
|
AttributionStateRow.primitive == primitive,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
).scalar_one_or_none()
|
||||||
|
return None if row is None else row.model_dump(mode="json")
|
||||||
|
|
||||||
|
async def get_attribution_state_for_identity(
|
||||||
|
self, identity_uuid: str,
|
||||||
|
) -> list[dict[str, Any]]:
|
||||||
|
"""All attribution-state rows for one identity, primitive-
|
||||||
|
ordered for deterministic API output."""
|
||||||
|
async with self._session() as session:
|
||||||
|
rows = (
|
||||||
|
await session.execute(
|
||||||
|
select(AttributionStateRow)
|
||||||
|
.where(AttributionStateRow.identity_uuid == identity_uuid)
|
||||||
|
.order_by(AttributionStateRow.primitive)
|
||||||
|
)
|
||||||
|
).scalars().all()
|
||||||
|
return [r.model_dump(mode="json") for r in rows]
|
||||||
|
|
||||||
|
async def list_multi_actor_identities(
|
||||||
|
self,
|
||||||
|
) -> list[dict[str, Any]]:
|
||||||
|
"""Identities with ≥ 2 primitives currently in ``multi_actor``.
|
||||||
|
|
||||||
|
Output shape::
|
||||||
|
|
||||||
|
[{"identity_uuid": "...", "primitives": ["motor.input_modality",
|
||||||
|
"cognitive.feedback_loop_engagement"]},
|
||||||
|
...]
|
||||||
|
|
||||||
|
Empty list when no identity is co-flagged. Used by the Phase 5
|
||||||
|
cross-primitive correlator — single-primitive ``multi_actor``
|
||||||
|
is too noisy to alarm on, two independent primitives is the
|
||||||
|
threshold for ``attribution.profile.multi_actor_suspected``.
|
||||||
|
"""
|
||||||
|
async with self._session() as session:
|
||||||
|
# First pass: identities with ≥ 2 multi_actor rows.
|
||||||
|
count_stmt = (
|
||||||
|
select(
|
||||||
|
col(AttributionStateRow.identity_uuid),
|
||||||
|
func.count().label("ct"),
|
||||||
|
)
|
||||||
|
.where(AttributionStateRow.state == "multi_actor")
|
||||||
|
.group_by(col(AttributionStateRow.identity_uuid))
|
||||||
|
.having(func.count() >= 2)
|
||||||
|
)
|
||||||
|
co_flagged = [
|
||||||
|
row.identity_uuid
|
||||||
|
for row in (await session.execute(count_stmt)).all()
|
||||||
|
]
|
||||||
|
if not co_flagged:
|
||||||
|
return []
|
||||||
|
# Second pass: collect the primitive list per co-flagged
|
||||||
|
# identity. Two queries beat one wide one because the
|
||||||
|
# first query's count-having filter prunes the second
|
||||||
|
# query's row set without a self-join.
|
||||||
|
detail_stmt = (
|
||||||
|
select(
|
||||||
|
col(AttributionStateRow.identity_uuid),
|
||||||
|
col(AttributionStateRow.primitive),
|
||||||
|
)
|
||||||
|
.where(
|
||||||
|
AttributionStateRow.state == "multi_actor",
|
||||||
|
col(AttributionStateRow.identity_uuid).in_(co_flagged),
|
||||||
|
)
|
||||||
|
.order_by(
|
||||||
|
col(AttributionStateRow.identity_uuid),
|
||||||
|
col(AttributionStateRow.primitive),
|
||||||
|
)
|
||||||
|
)
|
||||||
|
grouped: dict[str, list[str]] = {}
|
||||||
|
for row in (await session.execute(detail_stmt)).all():
|
||||||
|
grouped.setdefault(row.identity_uuid, []).append(
|
||||||
|
row.primitive,
|
||||||
|
)
|
||||||
|
return [
|
||||||
|
{"identity_uuid": iuuid, "primitives": prims}
|
||||||
|
for iuuid, prims in grouped.items()
|
||||||
|
]
|
||||||
@@ -506,6 +506,17 @@ v0.
|
|||||||
Five states, no more (resist the urge to grow the enum).
|
Five states, no more (resist the urge to grow the enum).
|
||||||
- **Subject of attribution in v0.** RESOLVED: `attacker_uuid`,
|
- **Subject of attribution in v0.** RESOLVED: `attacker_uuid`,
|
||||||
not `identity_uuid`. v1 widens.
|
not `identity_uuid`. v1 widens.
|
||||||
|
- **Deviation (Phase 1 implementation):** the engine actually keys
|
||||||
|
state rows on `identity_uuid` from day one, materialising a 1:1
|
||||||
|
stub `attacker_identities` row per Attacker on first observation.
|
||||||
|
Rationale: re-keying state rows when the v1 clusterer eventually
|
||||||
|
merges attackers is exactly the migration debt v0 should not
|
||||||
|
bake in. With identity-keyed state from the start, the v1
|
||||||
|
clusterer becomes a natural rollup operation (merge B's stub
|
||||||
|
identity into A's identity, recompute the union once on the
|
||||||
|
merge event) instead of a column-rename. No polymorphic
|
||||||
|
`subject_uuid` column. ANTI sign-off in conversation; saved as
|
||||||
|
memory `feedback_attribution_keys_identity`.
|
||||||
|
|
||||||
## Real open questions
|
## Real open questions
|
||||||
|
|
||||||
|
|||||||
@@ -87,3 +87,19 @@ def test_identity_builder() -> None:
|
|||||||
def test_identity_builder_rejects_empty() -> None:
|
def test_identity_builder_rejects_empty() -> None:
|
||||||
with pytest.raises(ValueError):
|
with pytest.raises(ValueError):
|
||||||
topics.identity("")
|
topics.identity("")
|
||||||
|
|
||||||
|
|
||||||
|
def test_attribution_builder() -> None:
|
||||||
|
assert (
|
||||||
|
topics.attribution(topics.ATTRIBUTION_PROFILE_STATE_CHANGED)
|
||||||
|
== "attribution.profile.state_changed"
|
||||||
|
)
|
||||||
|
assert (
|
||||||
|
topics.attribution(topics.ATTRIBUTION_PROFILE_MULTI_ACTOR_SUSPECTED)
|
||||||
|
== "attribution.profile.multi_actor_suspected"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_attribution_builder_rejects_empty() -> None:
|
||||||
|
with pytest.raises(ValueError):
|
||||||
|
topics.attribution("")
|
||||||
|
|||||||
0
tests/correlation/attribution/__init__.py
Normal file
0
tests/correlation/attribution/__init__.py
Normal file
169
tests/correlation/attribution/test_attribution_worker_phase1.py
Normal file
169
tests/correlation/attribution/test_attribution_worker_phase1.py
Normal file
@@ -0,0 +1,169 @@
|
|||||||
|
"""Phase 1 attribution worker — wiring smoke tests.
|
||||||
|
|
||||||
|
The Phase 1 worker subscribes to ``attacker.observation.>`` and, for
|
||||||
|
each event, ensures the source attacker has a stub identity row.
|
||||||
|
That's it — no merger, no state writes, no derived events. These
|
||||||
|
tests pin the wiring + the stub-materialisation contract.
|
||||||
|
|
||||||
|
Phase 4 will extend with end-to-end state-row + transition-event
|
||||||
|
assertions.
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from decnet.bus.fake import FakeBus
|
||||||
|
from decnet.correlation import attribution_worker as _aw
|
||||||
|
from decnet.web.db.factory import get_repository
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
async def repo(tmp_path: Path):
|
||||||
|
r = get_repository(db_path=str(tmp_path / "attribution_wiring.db"))
|
||||||
|
await r.initialize()
|
||||||
|
return r
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
async def attacker_uuid(repo) -> str:
|
||||||
|
now = datetime.now(timezone.utc)
|
||||||
|
return await repo.upsert_attacker({
|
||||||
|
"ip": "10.0.0.42",
|
||||||
|
"first_seen": now,
|
||||||
|
"last_seen": now,
|
||||||
|
})
|
||||||
|
|
||||||
|
|
||||||
|
def _make_event(payload: dict[str, Any]) -> Any:
|
||||||
|
"""Light Event-shaped object — the handler reads ``.payload``
|
||||||
|
via ``getattr`` and falls back to dicts. We pass a dict directly
|
||||||
|
because that's what tests give the BEHAVE handler too."""
|
||||||
|
return payload
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.anyio
|
||||||
|
async def test_handle_event_creates_stub_for_known_attacker(
|
||||||
|
repo, attacker_uuid: str,
|
||||||
|
) -> None:
|
||||||
|
"""First observation for an attacker → stub identity created and
|
||||||
|
stamped onto the Attacker row."""
|
||||||
|
bus = FakeBus()
|
||||||
|
await bus.connect()
|
||||||
|
payload = {
|
||||||
|
"attacker_uuid": attacker_uuid,
|
||||||
|
"primitive": "motor.input_modality",
|
||||||
|
"value": "pasted",
|
||||||
|
"ts": 1714000000.0,
|
||||||
|
"confidence": 0.9,
|
||||||
|
}
|
||||||
|
await _aw.handle_observation_event(bus, repo, _make_event(payload))
|
||||||
|
|
||||||
|
attacker = await repo.get_attacker_by_uuid(attacker_uuid)
|
||||||
|
assert attacker is not None
|
||||||
|
assert attacker["identity_id"] is not None
|
||||||
|
|
||||||
|
# Second event re-uses the same stub.
|
||||||
|
await _aw.handle_observation_event(bus, repo, _make_event(payload))
|
||||||
|
attacker_again = await repo.get_attacker_by_uuid(attacker_uuid)
|
||||||
|
assert attacker_again["identity_id"] == attacker["identity_id"]
|
||||||
|
await bus.close()
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.anyio
|
||||||
|
async def test_handle_event_defers_for_missing_attacker(repo) -> None:
|
||||||
|
"""No Attacker row yet → handler returns without raising and
|
||||||
|
without inserting an orphan identity (the worker treats this as
|
||||||
|
'profiler hasn't materialised the attacker, defer')."""
|
||||||
|
bus = FakeBus()
|
||||||
|
await bus.connect()
|
||||||
|
payload = {
|
||||||
|
"attacker_uuid": "00000000000000000000000000000000",
|
||||||
|
"primitive": "motor.input_modality",
|
||||||
|
"value": "pasted",
|
||||||
|
"ts": 1714000000.0,
|
||||||
|
"confidence": 0.9,
|
||||||
|
}
|
||||||
|
# Should NOT raise.
|
||||||
|
await _aw.handle_observation_event(bus, repo, _make_event(payload))
|
||||||
|
# No identities materialised.
|
||||||
|
identities = await repo.list_all_identities()
|
||||||
|
assert identities == []
|
||||||
|
await bus.close()
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.anyio
|
||||||
|
async def test_handle_event_skips_malformed_payload(
|
||||||
|
repo, attacker_uuid: str,
|
||||||
|
) -> None:
|
||||||
|
"""Missing attacker_uuid or primitive → log + continue, never
|
||||||
|
raise. Bus delivery is at-least-once; bad payloads must not
|
||||||
|
poison the consumer."""
|
||||||
|
bus = FakeBus()
|
||||||
|
await bus.connect()
|
||||||
|
for bad in (
|
||||||
|
{"primitive": "motor.input_modality"}, # missing attacker_uuid
|
||||||
|
{"attacker_uuid": attacker_uuid}, # missing primitive
|
||||||
|
{}, # both missing
|
||||||
|
):
|
||||||
|
await _aw.handle_observation_event(bus, repo, _make_event(bad))
|
||||||
|
|
||||||
|
# No identity materialised because every payload was rejected
|
||||||
|
# before the stub helper ran.
|
||||||
|
attacker = await repo.get_attacker_by_uuid(attacker_uuid)
|
||||||
|
assert attacker is not None
|
||||||
|
assert attacker["identity_id"] is None
|
||||||
|
await bus.close()
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.anyio
|
||||||
|
async def test_handle_event_idempotent_per_observation(
|
||||||
|
repo, attacker_uuid: str,
|
||||||
|
) -> None:
|
||||||
|
"""Hammer the same payload N times — one stub identity, no
|
||||||
|
duplicate rows, no exception."""
|
||||||
|
bus = FakeBus()
|
||||||
|
await bus.connect()
|
||||||
|
payload = {
|
||||||
|
"attacker_uuid": attacker_uuid,
|
||||||
|
"primitive": "motor.input_modality",
|
||||||
|
"value": "pasted",
|
||||||
|
"ts": 1714000000.0,
|
||||||
|
"confidence": 0.9,
|
||||||
|
}
|
||||||
|
for _ in range(5):
|
||||||
|
await _aw.handle_observation_event(bus, repo, _make_event(payload))
|
||||||
|
|
||||||
|
identities = await repo.list_all_identities()
|
||||||
|
assert len(identities) == 1
|
||||||
|
await bus.close()
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.anyio
|
||||||
|
async def test_event_object_payload_attribute(
|
||||||
|
repo, attacker_uuid: str,
|
||||||
|
) -> None:
|
||||||
|
"""Real bus events carry payload on ``.payload``; the handler
|
||||||
|
must follow the attribute, not assume the event itself is the
|
||||||
|
dict."""
|
||||||
|
class _Evt:
|
||||||
|
def __init__(self, payload: dict[str, Any]) -> None:
|
||||||
|
self.payload = payload
|
||||||
|
|
||||||
|
bus = FakeBus()
|
||||||
|
await bus.connect()
|
||||||
|
payload = {
|
||||||
|
"attacker_uuid": attacker_uuid,
|
||||||
|
"primitive": "cognitive.feedback_loop_engagement",
|
||||||
|
"value": "closed_loop",
|
||||||
|
"ts": 1714000000.0,
|
||||||
|
"confidence": 0.85,
|
||||||
|
}
|
||||||
|
await _aw.handle_observation_event(bus, repo, _Evt(payload))
|
||||||
|
attacker = await repo.get_attacker_by_uuid(attacker_uuid)
|
||||||
|
assert attacker is not None
|
||||||
|
assert attacker["identity_id"] is not None
|
||||||
|
await bus.close()
|
||||||
224
tests/db/test_attribution_state.py
Normal file
224
tests/db/test_attribution_state.py
Normal file
@@ -0,0 +1,224 @@
|
|||||||
|
"""AttributionStateRow + identity-stub repo tests — Phase 1 substrate.
|
||||||
|
|
||||||
|
Mirrors ``tests/db/test_observations.py``: SQLite ``tmp_path`` factory,
|
||||||
|
``@pytest.mark.anyio`` markers, an ``Attacker`` seeded so the stub-
|
||||||
|
materialisation path has a valid FK.
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from decnet.web.db.factory import get_repository
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
async def repo(tmp_path: Path):
|
||||||
|
r = get_repository(db_path=str(tmp_path / "attribution.db"))
|
||||||
|
await r.initialize()
|
||||||
|
return r
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
async def attacker_uuid(repo) -> str:
|
||||||
|
now = datetime.now(timezone.utc)
|
||||||
|
return await repo.upsert_attacker({
|
||||||
|
"ip": "10.0.0.7",
|
||||||
|
"first_seen": now,
|
||||||
|
"last_seen": now,
|
||||||
|
})
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.anyio
|
||||||
|
async def test_ensure_stub_creates_identity_for_new_attacker(
|
||||||
|
repo, attacker_uuid: str,
|
||||||
|
) -> None:
|
||||||
|
"""First call: Attacker has no identity_id → stub created and
|
||||||
|
stamped onto the Attacker row."""
|
||||||
|
identity_uuid = await repo.ensure_stub_identity_for_attacker(attacker_uuid)
|
||||||
|
assert identity_uuid is not None
|
||||||
|
assert isinstance(identity_uuid, str)
|
||||||
|
|
||||||
|
attacker = await repo.get_attacker_by_uuid(attacker_uuid)
|
||||||
|
assert attacker is not None
|
||||||
|
assert attacker["identity_id"] == identity_uuid
|
||||||
|
|
||||||
|
identity = await repo.get_identity_by_uuid(identity_uuid)
|
||||||
|
assert identity is not None
|
||||||
|
assert identity["uuid"] == identity_uuid
|
||||||
|
assert identity["merged_into_uuid"] is None
|
||||||
|
assert identity["schema_version"] == 1
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.anyio
|
||||||
|
async def test_ensure_stub_idempotent(repo, attacker_uuid: str) -> None:
|
||||||
|
"""Second call returns the same identity_uuid; no second insert."""
|
||||||
|
first = await repo.ensure_stub_identity_for_attacker(attacker_uuid)
|
||||||
|
second = await repo.ensure_stub_identity_for_attacker(attacker_uuid)
|
||||||
|
third = await repo.ensure_stub_identity_for_attacker(attacker_uuid)
|
||||||
|
assert first == second == third
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.anyio
|
||||||
|
async def test_ensure_stub_returns_none_for_missing_attacker(repo) -> None:
|
||||||
|
"""Worker treats missing-Attacker as 'defer' — repo returns None
|
||||||
|
without raising or inserting an orphan identity."""
|
||||||
|
out = await repo.ensure_stub_identity_for_attacker(
|
||||||
|
"00000000000000000000000000000000",
|
||||||
|
)
|
||||||
|
assert out is None
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.anyio
|
||||||
|
async def test_upsert_and_read_back_state(repo, attacker_uuid: str) -> None:
|
||||||
|
"""Round-trip: every column on the state row survives one
|
||||||
|
insert + read."""
|
||||||
|
identity_uuid = await repo.ensure_stub_identity_for_attacker(attacker_uuid)
|
||||||
|
assert identity_uuid is not None
|
||||||
|
|
||||||
|
await repo.upsert_attribution_state({
|
||||||
|
"identity_uuid": identity_uuid,
|
||||||
|
"primitive": "motor.input_modality",
|
||||||
|
"current_value": "pasted",
|
||||||
|
"state": "stable",
|
||||||
|
"confidence": 0.91,
|
||||||
|
"observation_count": 5,
|
||||||
|
"last_change_ts": 1714521660.456,
|
||||||
|
"last_observation_ts": 1714521660.456,
|
||||||
|
})
|
||||||
|
|
||||||
|
out = await repo.get_attribution_state(
|
||||||
|
identity_uuid, "motor.input_modality",
|
||||||
|
)
|
||||||
|
assert out is not None
|
||||||
|
assert out["state"] == "stable"
|
||||||
|
assert out["confidence"] == 0.91
|
||||||
|
assert out["current_value"] == "pasted"
|
||||||
|
assert out["observation_count"] == 5
|
||||||
|
assert out["last_change_ts"] == 1714521660.456
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.anyio
|
||||||
|
async def test_upsert_idempotent_on_natural_key(
|
||||||
|
repo, attacker_uuid: str,
|
||||||
|
) -> None:
|
||||||
|
"""Same (identity_uuid, primitive) twice → one row, second wins
|
||||||
|
on mutable fields."""
|
||||||
|
identity_uuid = await repo.ensure_stub_identity_for_attacker(attacker_uuid)
|
||||||
|
assert identity_uuid is not None
|
||||||
|
|
||||||
|
base = {
|
||||||
|
"identity_uuid": identity_uuid,
|
||||||
|
"primitive": "motor.input_modality",
|
||||||
|
"current_value": "typed",
|
||||||
|
"state": "stable",
|
||||||
|
"confidence": 0.7,
|
||||||
|
"observation_count": 3,
|
||||||
|
"last_change_ts": 1714000000.0,
|
||||||
|
"last_observation_ts": 1714000000.0,
|
||||||
|
}
|
||||||
|
await repo.upsert_attribution_state(base)
|
||||||
|
await repo.upsert_attribution_state({
|
||||||
|
**base,
|
||||||
|
"current_value": "pasted",
|
||||||
|
"state": "drifting",
|
||||||
|
"confidence": 0.85,
|
||||||
|
"observation_count": 8,
|
||||||
|
"last_change_ts": 1714000300.0,
|
||||||
|
"last_observation_ts": 1714000400.0,
|
||||||
|
})
|
||||||
|
|
||||||
|
rows = await repo.get_attribution_state_for_identity(identity_uuid)
|
||||||
|
assert len(rows) == 1
|
||||||
|
assert rows[0]["state"] == "drifting"
|
||||||
|
assert rows[0]["confidence"] == 0.85
|
||||||
|
assert rows[0]["current_value"] == "pasted"
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.anyio
|
||||||
|
async def test_get_state_for_identity_orders_by_primitive(
|
||||||
|
repo, attacker_uuid: str,
|
||||||
|
) -> None:
|
||||||
|
"""Multiple primitives → one row each, primitive-ordered for
|
||||||
|
deterministic API output."""
|
||||||
|
identity_uuid = await repo.ensure_stub_identity_for_attacker(attacker_uuid)
|
||||||
|
assert identity_uuid is not None
|
||||||
|
primitives = [
|
||||||
|
"motor.input_modality",
|
||||||
|
"cognitive.feedback_loop_engagement",
|
||||||
|
"temporal.weekend_cadence",
|
||||||
|
]
|
||||||
|
for i, p in enumerate(primitives):
|
||||||
|
await repo.upsert_attribution_state({
|
||||||
|
"identity_uuid": identity_uuid,
|
||||||
|
"primitive": p,
|
||||||
|
"current_value": "x",
|
||||||
|
"state": "stable",
|
||||||
|
"confidence": 0.8,
|
||||||
|
"observation_count": 5,
|
||||||
|
"last_change_ts": 1714000000.0 + i,
|
||||||
|
"last_observation_ts": 1714000000.0 + i,
|
||||||
|
})
|
||||||
|
|
||||||
|
rows = await repo.get_attribution_state_for_identity(identity_uuid)
|
||||||
|
assert [r["primitive"] for r in rows] == sorted(primitives)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.anyio
|
||||||
|
async def test_list_multi_actor_requires_two_primitives(
|
||||||
|
repo, attacker_uuid: str,
|
||||||
|
) -> None:
|
||||||
|
"""Single-primitive multi_actor flag is too noisy. Correlator
|
||||||
|
only fires on ≥ 2 primitives independently flagging the same
|
||||||
|
identity."""
|
||||||
|
identity_uuid = await repo.ensure_stub_identity_for_attacker(attacker_uuid)
|
||||||
|
assert identity_uuid is not None
|
||||||
|
|
||||||
|
# One multi_actor row → no co-flag yet.
|
||||||
|
await repo.upsert_attribution_state({
|
||||||
|
"identity_uuid": identity_uuid,
|
||||||
|
"primitive": "motor.input_modality",
|
||||||
|
"current_value": "conflicted",
|
||||||
|
"state": "multi_actor",
|
||||||
|
"confidence": 0.55,
|
||||||
|
"observation_count": 10,
|
||||||
|
"last_change_ts": 1714000000.0,
|
||||||
|
"last_observation_ts": 1714000000.0,
|
||||||
|
})
|
||||||
|
assert await repo.list_multi_actor_identities() == []
|
||||||
|
|
||||||
|
# Add a second multi_actor row → identity surfaces with both
|
||||||
|
# primitives.
|
||||||
|
await repo.upsert_attribution_state({
|
||||||
|
"identity_uuid": identity_uuid,
|
||||||
|
"primitive": "cognitive.feedback_loop_engagement",
|
||||||
|
"current_value": "conflicted",
|
||||||
|
"state": "multi_actor",
|
||||||
|
"confidence": 0.6,
|
||||||
|
"observation_count": 8,
|
||||||
|
"last_change_ts": 1714000100.0,
|
||||||
|
"last_observation_ts": 1714000100.0,
|
||||||
|
})
|
||||||
|
out = await repo.list_multi_actor_identities()
|
||||||
|
assert len(out) == 1
|
||||||
|
assert out[0]["identity_uuid"] == identity_uuid
|
||||||
|
assert sorted(out[0]["primitives"]) == [
|
||||||
|
"cognitive.feedback_loop_engagement",
|
||||||
|
"motor.input_modality",
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.anyio
|
||||||
|
async def test_get_state_returns_none_for_unknown_pair(
|
||||||
|
repo, attacker_uuid: str,
|
||||||
|
) -> None:
|
||||||
|
"""Worker uses None as 'no prior state, initialise from this
|
||||||
|
observation' — surface the contract directly."""
|
||||||
|
identity_uuid = await repo.ensure_stub_identity_for_attacker(attacker_uuid)
|
||||||
|
assert identity_uuid is not None
|
||||||
|
out = await repo.get_attribution_state(
|
||||||
|
identity_uuid, "motor.input_modality",
|
||||||
|
)
|
||||||
|
assert out is None
|
||||||
Reference in New Issue
Block a user