feat(correlation/attribution): substrate + idle handler (Phase 1)
v0 Phase 1 of ATTRIBUTION-ENGINE.md:
* AttributionStateRow SQLModel keyed on (identity_uuid, primitive)
per ANTI direction — re-keying state rows when the v1 clusterer
merges attackers is the migration debt v0 should not bake in.
ATTRIBUTION-ENGINE.md updated with the deviation note.
* AttributionMixin: ensure_stub_identity_for_attacker, idempotent
upsert_attribution_state, get_attribution_state[_for_identity],
list_multi_actor_identities (the Phase 5 correlator's read).
* attribution.profile.{state_changed,multi_actor_suspected} bus
topics + builder; wiki Service-Bus.md updated separately.
* attribution_worker.py: subscribes to attacker.observation.>,
ensures stub identity per event, logs and continues. No merger,
no state writes, no derived events — Phase 4 wires those.
* attribution/{aggregate.py,_thresholds.py} skeletons: Phase 2
fills _aggregate_categorical, Phase 3 adds numeric+hash+dispatcher.
This commit is contained in:
@@ -59,6 +59,9 @@ from .attachments import (
|
||||
from .observations import (
|
||||
ObservationRow,
|
||||
)
|
||||
from .attribution_state import (
|
||||
AttributionStateRow,
|
||||
)
|
||||
from .campaigns import (
|
||||
Campaign,
|
||||
CampaignsResponse,
|
||||
@@ -252,6 +255,7 @@ __all__ = [
|
||||
"AttackerIdentity",
|
||||
"AttackerIntel",
|
||||
"AttackersResponse",
|
||||
"AttributionStateRow",
|
||||
"ObservationRow",
|
||||
"ObservedAttachment",
|
||||
"SmtpTarget",
|
||||
|
||||
78
decnet/web/db/models/attribution_state.py
Normal file
78
decnet/web/db/models/attribution_state.py
Normal file
@@ -0,0 +1,78 @@
|
||||
"""Per-(identity, primitive) attribution state — v0 of the
|
||||
attribution engine.
|
||||
|
||||
Materialised view of the state machine in
|
||||
``decnet.correlation.attribution.aggregate``. Re-derivable from
|
||||
``observations`` + the DEBT-032 fingerprint-rotation log; this row is
|
||||
a cache for cheap dashboard reads, not a source of truth.
|
||||
|
||||
Keyed on ``identity_uuid``, not ``attacker_uuid``: pre-clusterer,
|
||||
every Attacker maps 1:1 to a stub row in ``attacker_identities``
|
||||
(``merged_into_uuid = NULL``) so the key is stable across the v0 / v1
|
||||
boundary. When v1's clusterer eventually merges identities, the loser
|
||||
row's state is recomputed from the union of observations under the
|
||||
winner — no schema change, no column-rename migration.
|
||||
|
||||
This deviates from ``development/ATTRIBUTION-ENGINE.md`` §"Subject of
|
||||
attribution in v0" (which resolved on ``attacker_uuid``); the doc gets
|
||||
a deviation note in the same commit that ships this file.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime, timezone
|
||||
from typing import Any
|
||||
|
||||
from sqlalchemy import JSON, Column, Index
|
||||
from sqlmodel import Field, SQLModel
|
||||
|
||||
|
||||
class AttributionStateRow(SQLModel, table=True):
|
||||
"""One state row per (identity, primitive). At most one row per
|
||||
pair — composite PK enforces it.
|
||||
"""
|
||||
|
||||
__tablename__ = "attribution_state"
|
||||
__table_args__ = (
|
||||
Index("ix_attribution_state_state", "state"),
|
||||
Index("ix_attribution_state_last_change", "last_change_ts"),
|
||||
Index(
|
||||
"ix_attribution_state_identity_state",
|
||||
"identity_uuid", "state",
|
||||
),
|
||||
)
|
||||
|
||||
# ── key ────────────────────────────────────────────────────────────
|
||||
identity_uuid: str = Field(
|
||||
foreign_key="attacker_identities.uuid", primary_key=True,
|
||||
)
|
||||
primitive: str = Field(primary_key=True)
|
||||
|
||||
# ── derived state ──────────────────────────────────────────────────
|
||||
# Mirrors the BEHAVE Observation ``value`` column shape so the
|
||||
# frontend can render the merger output the same way it renders raw
|
||||
# latest-wins values today (BEHAVE-INTEGRATION.md Q3).
|
||||
current_value: dict[str, Any] | str | int | float | bool | list = Field(
|
||||
sa_column=Column(JSON, nullable=False),
|
||||
)
|
||||
# 'unknown' | 'stable' | 'drifting' | 'conflicted' | 'multi_actor'.
|
||||
# Five states, frozen — see ATTRIBUTION-ENGINE.md §"State machine".
|
||||
state: str
|
||||
# Engine's confidence in the *state assertion*, not in any verdict
|
||||
# about the attacker. ``multi_actor`` is capped at 0.6 by
|
||||
# convention; other states use the merger's per-ValueKind formula.
|
||||
confidence: float
|
||||
# How many observations underlie this row. Used by the API to gate
|
||||
# ``unknown`` (< 3 obs) without re-querying ``observations``.
|
||||
observation_count: int = Field(default=0)
|
||||
# When ``state`` last flipped. Equals ``updated_at`` on insert.
|
||||
last_change_ts: float
|
||||
# Most recent observation that fed this row. Used by the merger to
|
||||
# detect drift windows without a full observation re-scan.
|
||||
last_observation_ts: float
|
||||
|
||||
# ── audit ──────────────────────────────────────────────────────────
|
||||
# Mirrors AttackerIdentity convention (federation gossip in v2).
|
||||
schema_version: int = Field(default=1)
|
||||
updated_at: datetime = Field(
|
||||
default_factory=lambda: datetime.now(timezone.utc),
|
||||
)
|
||||
@@ -1492,3 +1492,86 @@ class BaseRepository(ABC):
|
||||
SQLModel TTP mixin.
|
||||
"""
|
||||
return []
|
||||
|
||||
# ─── Attribution engine (v0 — aggregation only) ────────────────────
|
||||
# See development/ATTRIBUTION-ENGINE.md. The engine consumes
|
||||
# ``attacker.observation.*`` events and writes per-(identity,
|
||||
# primitive) state rows. Pre-clusterer, every Attacker maps 1:1
|
||||
# to a stub AttackerIdentity row so the keying is stable across
|
||||
# the v0 / v1 boundary.
|
||||
|
||||
@abstractmethod
|
||||
async def ensure_stub_identity_for_attacker(
|
||||
self, attacker_uuid: str,
|
||||
) -> Optional[str]:
|
||||
"""Return the ``identity_uuid`` for *attacker_uuid*, creating a
|
||||
degenerate 1:1 stub in ``attacker_identities`` if the attacker
|
||||
does not yet have one.
|
||||
|
||||
Returns ``None`` if the attacker row itself is missing (the
|
||||
worker treats that as "defer" — the profiler tick has not yet
|
||||
materialised the Attacker; same posture as
|
||||
``_handler.handle_session_ended`` in BEHAVE-SHELL).
|
||||
|
||||
Idempotent under concurrent calls: the second caller sees the
|
||||
first caller's stamp and returns the same uuid. Implementations
|
||||
are responsible for serialising the read-then-insert against
|
||||
the bus's at-least-once delivery.
|
||||
|
||||
The third return value (boolean) signalling "newly created" is
|
||||
deliberately omitted — the worker emits ``identity.formed`` on
|
||||
a transition observed via the row's absence on its first call,
|
||||
not via a flag from the repo. Keeps the repo idempotent and
|
||||
flag-free.
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
@abstractmethod
|
||||
async def upsert_attribution_state(self, data: dict[str, Any]) -> None:
|
||||
"""Insert or update an :class:`AttributionStateRow` keyed on
|
||||
``(identity_uuid, primitive)``.
|
||||
|
||||
``data`` MUST carry: ``identity_uuid``, ``primitive``,
|
||||
``current_value``, ``state``, ``confidence``,
|
||||
``observation_count``, ``last_change_ts``,
|
||||
``last_observation_ts``. ``schema_version`` defaults to 1.
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
@abstractmethod
|
||||
async def get_attribution_state_for_identity(
|
||||
self, identity_uuid: str,
|
||||
) -> list[dict[str, Any]]:
|
||||
"""Return every attribution-state row for *identity_uuid*.
|
||||
|
||||
Empty list when the identity has no derived state yet (e.g.
|
||||
observations have arrived but the engine has not run, or the
|
||||
engine has not produced ≥ 3 observations per primitive). The
|
||||
attribution API surface and AttackerDetail badge renderer both
|
||||
consume this projection.
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
@abstractmethod
|
||||
async def get_attribution_state(
|
||||
self, identity_uuid: str, primitive: str,
|
||||
) -> Optional[dict[str, Any]]:
|
||||
"""Return one ``(identity_uuid, primitive)`` row, or ``None``.
|
||||
|
||||
Used by the attribution worker on each inbound observation to
|
||||
load the prior state before running the merger. ``None`` means
|
||||
"no prior state — initialise from this observation alone".
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
@abstractmethod
|
||||
async def list_multi_actor_identities(
|
||||
self,
|
||||
) -> list[dict[str, Any]]:
|
||||
"""List ``{identity_uuid, primitives}`` for identities that
|
||||
currently have ≥ 2 primitives flagged ``multi_actor``.
|
||||
|
||||
Backs the cross-primitive correlator (Phase 5). Empty list when
|
||||
no identity is co-flagged.
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
@@ -35,6 +35,7 @@ from decnet.web.db.sqlmodel_repo._helpers import ( # noqa: F401 (re-exported f
|
||||
)
|
||||
from decnet.web.db.sqlmodel_repo.attacker_intel import AttackerIntelMixin
|
||||
from decnet.web.db.sqlmodel_repo.attackers import AttackersMixin
|
||||
from decnet.web.db.sqlmodel_repo.attribution import AttributionMixin
|
||||
from decnet.web.db.sqlmodel_repo.auth import AuthMixin
|
||||
from decnet.web.db.sqlmodel_repo.bounties import BountiesMixin
|
||||
from decnet.web.db.sqlmodel_repo.campaigns import CampaignsMixin
|
||||
@@ -58,6 +59,7 @@ from decnet.web.db.sqlmodel_repo.webhooks import WebhooksMixin
|
||||
class SQLModelRepository(
|
||||
AttackerIntelMixin,
|
||||
AttackersMixin,
|
||||
AttributionMixin,
|
||||
AuthMixin,
|
||||
BountiesMixin,
|
||||
CampaignsMixin,
|
||||
|
||||
215
decnet/web/db/sqlmodel_repo/attribution.py
Normal file
215
decnet/web/db/sqlmodel_repo/attribution.py
Normal file
@@ -0,0 +1,215 @@
|
||||
"""Repo mixin for the ``attribution_state`` table + identity stub
|
||||
materialisation.
|
||||
|
||||
Composed onto :class:`SQLModelRepository`. Five public methods, all
|
||||
serving the v0 attribution engine
|
||||
(``decnet.correlation.attribution_worker``):
|
||||
|
||||
* :meth:`ensure_stub_identity_for_attacker` — pre-clusterer 1:1 stub
|
||||
identity creation. Idempotent under concurrent observation bursts.
|
||||
* :meth:`upsert_attribution_state` — keyed on
|
||||
``(identity_uuid, primitive)``.
|
||||
* :meth:`get_attribution_state` / :meth:`get_attribution_state_for_identity`
|
||||
— single-row and per-identity reads.
|
||||
* :meth:`list_multi_actor_identities` — feeds the Phase 5 cross-
|
||||
primitive correlator.
|
||||
|
||||
See ``development/ATTRIBUTION-ENGINE.md`` for the full design.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import uuid as _uuid
|
||||
from datetime import datetime, timezone
|
||||
from typing import Any, Optional
|
||||
|
||||
from sqlalchemy import func, select
|
||||
from sqlmodel import col
|
||||
|
||||
from decnet.web.db.models import (
|
||||
Attacker,
|
||||
AttackerIdentity,
|
||||
AttributionStateRow,
|
||||
)
|
||||
from decnet.web.db.sqlmodel_repo._helpers import _MixinBase
|
||||
|
||||
|
||||
class AttributionMixin(_MixinBase):
|
||||
"""Mixin: methods composed onto :class:`SQLModelRepository`."""
|
||||
|
||||
async def ensure_stub_identity_for_attacker(
|
||||
self, attacker_uuid: str,
|
||||
) -> Optional[str]:
|
||||
"""Return the ``identity_uuid`` for *attacker_uuid*, creating a
|
||||
degenerate 1:1 stub in ``attacker_identities`` if absent.
|
||||
|
||||
Returns ``None`` when the Attacker row itself is missing — the
|
||||
attribution worker treats that as "defer" (mirrors the
|
||||
``_handler.handle_session_ended`` posture in BEHAVE-SHELL).
|
||||
|
||||
Idempotent: the second caller for the same attacker reads the
|
||||
``identity_id`` stamped by the first caller and returns it
|
||||
without inserting again. Race: two concurrent first-callers
|
||||
could both see ``identity_id = NULL`` and both create stubs;
|
||||
the loser's commit would leave a dangling AttackerIdentity row
|
||||
with no Attacker referencing it. Acceptable in v0 (rare; rows
|
||||
are tiny; gc'd in v1 when the clusterer runs). The
|
||||
single-writer attribution worker plus the bus's per-identity
|
||||
ordering make even that race vanishingly rare in practice.
|
||||
"""
|
||||
async with self._session() as session:
|
||||
attacker_row = (
|
||||
await session.execute(
|
||||
select(Attacker).where(Attacker.uuid == attacker_uuid)
|
||||
)
|
||||
).scalar_one_or_none()
|
||||
if attacker_row is None:
|
||||
return None
|
||||
if attacker_row.identity_id:
|
||||
return attacker_row.identity_id
|
||||
new_uuid = _uuid.uuid4().hex
|
||||
now = datetime.now(timezone.utc)
|
||||
session.add(
|
||||
AttackerIdentity(
|
||||
uuid=new_uuid,
|
||||
schema_version=1,
|
||||
first_seen_at=attacker_row.first_seen,
|
||||
last_seen_at=attacker_row.last_seen,
|
||||
created_at=now,
|
||||
updated_at=now,
|
||||
observation_count=1,
|
||||
)
|
||||
)
|
||||
attacker_row.identity_id = new_uuid
|
||||
session.add(attacker_row)
|
||||
await session.commit()
|
||||
return new_uuid
|
||||
|
||||
async def upsert_attribution_state(
|
||||
self, data: dict[str, Any],
|
||||
) -> None:
|
||||
"""Insert or update one ``(identity_uuid, primitive)`` row.
|
||||
|
||||
``data`` MUST carry: ``identity_uuid``, ``primitive``,
|
||||
``current_value``, ``state``, ``confidence``,
|
||||
``observation_count``, ``last_change_ts``,
|
||||
``last_observation_ts``. ``schema_version`` and ``updated_at``
|
||||
are managed here.
|
||||
"""
|
||||
identity_uuid = data["identity_uuid"]
|
||||
primitive = data["primitive"]
|
||||
async with self._session() as session:
|
||||
existing = (
|
||||
await session.execute(
|
||||
select(AttributionStateRow).where(
|
||||
AttributionStateRow.identity_uuid == identity_uuid,
|
||||
AttributionStateRow.primitive == primitive,
|
||||
)
|
||||
)
|
||||
).scalar_one_or_none()
|
||||
now = datetime.now(timezone.utc)
|
||||
if existing is not None:
|
||||
for k, v in data.items():
|
||||
if k in ("identity_uuid", "primitive"):
|
||||
continue
|
||||
setattr(existing, k, v)
|
||||
existing.updated_at = now
|
||||
session.add(existing)
|
||||
else:
|
||||
session.add(
|
||||
AttributionStateRow(
|
||||
**{**data, "schema_version": 1, "updated_at": now}
|
||||
)
|
||||
)
|
||||
await session.commit()
|
||||
|
||||
async def get_attribution_state(
|
||||
self, identity_uuid: str, primitive: str,
|
||||
) -> Optional[dict[str, Any]]:
|
||||
"""Single-row lookup. ``None`` when the merger has not yet run
|
||||
for this ``(identity_uuid, primitive)`` pair."""
|
||||
async with self._session() as session:
|
||||
row = (
|
||||
await session.execute(
|
||||
select(AttributionStateRow).where(
|
||||
AttributionStateRow.identity_uuid == identity_uuid,
|
||||
AttributionStateRow.primitive == primitive,
|
||||
)
|
||||
)
|
||||
).scalar_one_or_none()
|
||||
return None if row is None else row.model_dump(mode="json")
|
||||
|
||||
async def get_attribution_state_for_identity(
|
||||
self, identity_uuid: str,
|
||||
) -> list[dict[str, Any]]:
|
||||
"""All attribution-state rows for one identity, primitive-
|
||||
ordered for deterministic API output."""
|
||||
async with self._session() as session:
|
||||
rows = (
|
||||
await session.execute(
|
||||
select(AttributionStateRow)
|
||||
.where(AttributionStateRow.identity_uuid == identity_uuid)
|
||||
.order_by(AttributionStateRow.primitive)
|
||||
)
|
||||
).scalars().all()
|
||||
return [r.model_dump(mode="json") for r in rows]
|
||||
|
||||
async def list_multi_actor_identities(
|
||||
self,
|
||||
) -> list[dict[str, Any]]:
|
||||
"""Identities with ≥ 2 primitives currently in ``multi_actor``.
|
||||
|
||||
Output shape::
|
||||
|
||||
[{"identity_uuid": "...", "primitives": ["motor.input_modality",
|
||||
"cognitive.feedback_loop_engagement"]},
|
||||
...]
|
||||
|
||||
Empty list when no identity is co-flagged. Used by the Phase 5
|
||||
cross-primitive correlator — single-primitive ``multi_actor``
|
||||
is too noisy to alarm on, two independent primitives is the
|
||||
threshold for ``attribution.profile.multi_actor_suspected``.
|
||||
"""
|
||||
async with self._session() as session:
|
||||
# First pass: identities with ≥ 2 multi_actor rows.
|
||||
count_stmt = (
|
||||
select(
|
||||
col(AttributionStateRow.identity_uuid),
|
||||
func.count().label("ct"),
|
||||
)
|
||||
.where(AttributionStateRow.state == "multi_actor")
|
||||
.group_by(col(AttributionStateRow.identity_uuid))
|
||||
.having(func.count() >= 2)
|
||||
)
|
||||
co_flagged = [
|
||||
row.identity_uuid
|
||||
for row in (await session.execute(count_stmt)).all()
|
||||
]
|
||||
if not co_flagged:
|
||||
return []
|
||||
# Second pass: collect the primitive list per co-flagged
|
||||
# identity. Two queries beat one wide one because the
|
||||
# first query's count-having filter prunes the second
|
||||
# query's row set without a self-join.
|
||||
detail_stmt = (
|
||||
select(
|
||||
col(AttributionStateRow.identity_uuid),
|
||||
col(AttributionStateRow.primitive),
|
||||
)
|
||||
.where(
|
||||
AttributionStateRow.state == "multi_actor",
|
||||
col(AttributionStateRow.identity_uuid).in_(co_flagged),
|
||||
)
|
||||
.order_by(
|
||||
col(AttributionStateRow.identity_uuid),
|
||||
col(AttributionStateRow.primitive),
|
||||
)
|
||||
)
|
||||
grouped: dict[str, list[str]] = {}
|
||||
for row in (await session.execute(detail_stmt)).all():
|
||||
grouped.setdefault(row.identity_uuid, []).append(
|
||||
row.primitive,
|
||||
)
|
||||
return [
|
||||
{"identity_uuid": iuuid, "primitives": prims}
|
||||
for iuuid, prims in grouped.items()
|
||||
]
|
||||
Reference in New Issue
Block a user