v0 Phase 1 of ATTRIBUTION-ENGINE.md:
* AttributionStateRow SQLModel keyed on (identity_uuid, primitive)
per ANTI direction — re-keying state rows when the v1 clusterer
merges attackers is the migration debt v0 should not bake in.
ATTRIBUTION-ENGINE.md updated with the deviation note.
* AttributionMixin: ensure_stub_identity_for_attacker, idempotent
upsert_attribution_state, get_attribution_state[_for_identity],
list_multi_actor_identities (the Phase 5 correlator's read).
* attribution.profile.{state_changed,multi_actor_suspected} bus
topics + builder; wiki Service-Bus.md updated separately.
* attribution_worker.py: subscribes to attacker.observation.>,
ensures stub identity per event, logs and continues. No merger,
no state writes, no derived events — Phase 4 wires those.
* attribution/{aggregate.py,_thresholds.py} skeletons: Phase 2
fills _aggregate_categorical, Phase 3 adds numeric+hash+dispatcher.
216 lines
8.4 KiB
Python
216 lines
8.4 KiB
Python
"""Repo mixin for the ``attribution_state`` table + identity stub
|
|
materialisation.
|
|
|
|
Composed onto :class:`SQLModelRepository`. Five public methods, all
|
|
serving the v0 attribution engine
|
|
(``decnet.correlation.attribution_worker``):
|
|
|
|
* :meth:`ensure_stub_identity_for_attacker` — pre-clusterer 1:1 stub
|
|
identity creation. Idempotent under concurrent observation bursts.
|
|
* :meth:`upsert_attribution_state` — keyed on
|
|
``(identity_uuid, primitive)``.
|
|
* :meth:`get_attribution_state` / :meth:`get_attribution_state_for_identity`
|
|
— single-row and per-identity reads.
|
|
* :meth:`list_multi_actor_identities` — feeds the Phase 5 cross-
|
|
primitive correlator.
|
|
|
|
See ``development/ATTRIBUTION-ENGINE.md`` for the full design.
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
import uuid as _uuid
|
|
from datetime import datetime, timezone
|
|
from typing import Any, Optional
|
|
|
|
from sqlalchemy import func, select
|
|
from sqlmodel import col
|
|
|
|
from decnet.web.db.models import (
|
|
Attacker,
|
|
AttackerIdentity,
|
|
AttributionStateRow,
|
|
)
|
|
from decnet.web.db.sqlmodel_repo._helpers import _MixinBase
|
|
|
|
|
|
class AttributionMixin(_MixinBase):
|
|
"""Mixin: methods composed onto :class:`SQLModelRepository`."""
|
|
|
|
async def ensure_stub_identity_for_attacker(
|
|
self, attacker_uuid: str,
|
|
) -> Optional[str]:
|
|
"""Return the ``identity_uuid`` for *attacker_uuid*, creating a
|
|
degenerate 1:1 stub in ``attacker_identities`` if absent.
|
|
|
|
Returns ``None`` when the Attacker row itself is missing — the
|
|
attribution worker treats that as "defer" (mirrors the
|
|
``_handler.handle_session_ended`` posture in BEHAVE-SHELL).
|
|
|
|
Idempotent: the second caller for the same attacker reads the
|
|
``identity_id`` stamped by the first caller and returns it
|
|
without inserting again. Race: two concurrent first-callers
|
|
could both see ``identity_id = NULL`` and both create stubs;
|
|
the loser's commit would leave a dangling AttackerIdentity row
|
|
with no Attacker referencing it. Acceptable in v0 (rare; rows
|
|
are tiny; gc'd in v1 when the clusterer runs). The
|
|
single-writer attribution worker plus the bus's per-identity
|
|
ordering make even that race vanishingly rare in practice.
|
|
"""
|
|
async with self._session() as session:
|
|
attacker_row = (
|
|
await session.execute(
|
|
select(Attacker).where(Attacker.uuid == attacker_uuid)
|
|
)
|
|
).scalar_one_or_none()
|
|
if attacker_row is None:
|
|
return None
|
|
if attacker_row.identity_id:
|
|
return attacker_row.identity_id
|
|
new_uuid = _uuid.uuid4().hex
|
|
now = datetime.now(timezone.utc)
|
|
session.add(
|
|
AttackerIdentity(
|
|
uuid=new_uuid,
|
|
schema_version=1,
|
|
first_seen_at=attacker_row.first_seen,
|
|
last_seen_at=attacker_row.last_seen,
|
|
created_at=now,
|
|
updated_at=now,
|
|
observation_count=1,
|
|
)
|
|
)
|
|
attacker_row.identity_id = new_uuid
|
|
session.add(attacker_row)
|
|
await session.commit()
|
|
return new_uuid
|
|
|
|
async def upsert_attribution_state(
|
|
self, data: dict[str, Any],
|
|
) -> None:
|
|
"""Insert or update one ``(identity_uuid, primitive)`` row.
|
|
|
|
``data`` MUST carry: ``identity_uuid``, ``primitive``,
|
|
``current_value``, ``state``, ``confidence``,
|
|
``observation_count``, ``last_change_ts``,
|
|
``last_observation_ts``. ``schema_version`` and ``updated_at``
|
|
are managed here.
|
|
"""
|
|
identity_uuid = data["identity_uuid"]
|
|
primitive = data["primitive"]
|
|
async with self._session() as session:
|
|
existing = (
|
|
await session.execute(
|
|
select(AttributionStateRow).where(
|
|
AttributionStateRow.identity_uuid == identity_uuid,
|
|
AttributionStateRow.primitive == primitive,
|
|
)
|
|
)
|
|
).scalar_one_or_none()
|
|
now = datetime.now(timezone.utc)
|
|
if existing is not None:
|
|
for k, v in data.items():
|
|
if k in ("identity_uuid", "primitive"):
|
|
continue
|
|
setattr(existing, k, v)
|
|
existing.updated_at = now
|
|
session.add(existing)
|
|
else:
|
|
session.add(
|
|
AttributionStateRow(
|
|
**{**data, "schema_version": 1, "updated_at": now}
|
|
)
|
|
)
|
|
await session.commit()
|
|
|
|
async def get_attribution_state(
|
|
self, identity_uuid: str, primitive: str,
|
|
) -> Optional[dict[str, Any]]:
|
|
"""Single-row lookup. ``None`` when the merger has not yet run
|
|
for this ``(identity_uuid, primitive)`` pair."""
|
|
async with self._session() as session:
|
|
row = (
|
|
await session.execute(
|
|
select(AttributionStateRow).where(
|
|
AttributionStateRow.identity_uuid == identity_uuid,
|
|
AttributionStateRow.primitive == primitive,
|
|
)
|
|
)
|
|
).scalar_one_or_none()
|
|
return None if row is None else row.model_dump(mode="json")
|
|
|
|
async def get_attribution_state_for_identity(
|
|
self, identity_uuid: str,
|
|
) -> list[dict[str, Any]]:
|
|
"""All attribution-state rows for one identity, primitive-
|
|
ordered for deterministic API output."""
|
|
async with self._session() as session:
|
|
rows = (
|
|
await session.execute(
|
|
select(AttributionStateRow)
|
|
.where(AttributionStateRow.identity_uuid == identity_uuid)
|
|
.order_by(AttributionStateRow.primitive)
|
|
)
|
|
).scalars().all()
|
|
return [r.model_dump(mode="json") for r in rows]
|
|
|
|
async def list_multi_actor_identities(
|
|
self,
|
|
) -> list[dict[str, Any]]:
|
|
"""Identities with ≥ 2 primitives currently in ``multi_actor``.
|
|
|
|
Output shape::
|
|
|
|
[{"identity_uuid": "...", "primitives": ["motor.input_modality",
|
|
"cognitive.feedback_loop_engagement"]},
|
|
...]
|
|
|
|
Empty list when no identity is co-flagged. Used by the Phase 5
|
|
cross-primitive correlator — single-primitive ``multi_actor``
|
|
is too noisy to alarm on, two independent primitives is the
|
|
threshold for ``attribution.profile.multi_actor_suspected``.
|
|
"""
|
|
async with self._session() as session:
|
|
# First pass: identities with ≥ 2 multi_actor rows.
|
|
count_stmt = (
|
|
select(
|
|
col(AttributionStateRow.identity_uuid),
|
|
func.count().label("ct"),
|
|
)
|
|
.where(AttributionStateRow.state == "multi_actor")
|
|
.group_by(col(AttributionStateRow.identity_uuid))
|
|
.having(func.count() >= 2)
|
|
)
|
|
co_flagged = [
|
|
row.identity_uuid
|
|
for row in (await session.execute(count_stmt)).all()
|
|
]
|
|
if not co_flagged:
|
|
return []
|
|
# Second pass: collect the primitive list per co-flagged
|
|
# identity. Two queries beat one wide one because the
|
|
# first query's count-having filter prunes the second
|
|
# query's row set without a self-join.
|
|
detail_stmt = (
|
|
select(
|
|
col(AttributionStateRow.identity_uuid),
|
|
col(AttributionStateRow.primitive),
|
|
)
|
|
.where(
|
|
AttributionStateRow.state == "multi_actor",
|
|
col(AttributionStateRow.identity_uuid).in_(co_flagged),
|
|
)
|
|
.order_by(
|
|
col(AttributionStateRow.identity_uuid),
|
|
col(AttributionStateRow.primitive),
|
|
)
|
|
)
|
|
grouped: dict[str, list[str]] = {}
|
|
for row in (await session.execute(detail_stmt)).all():
|
|
grouped.setdefault(row.identity_uuid, []).append(
|
|
row.primitive,
|
|
)
|
|
return [
|
|
{"identity_uuid": iuuid, "primitives": prims}
|
|
for iuuid, prims in grouped.items()
|
|
]
|