feat(correlation/attribution): substrate + idle handler (Phase 1)
v0 Phase 1 of ATTRIBUTION-ENGINE.md:
* AttributionStateRow SQLModel keyed on (identity_uuid, primitive)
per ANTI direction — re-keying state rows when the v1 clusterer
merges attackers is the migration debt v0 should not bake in.
ATTRIBUTION-ENGINE.md updated with the deviation note.
* AttributionMixin: ensure_stub_identity_for_attacker, idempotent
upsert_attribution_state, get_attribution_state[_for_identity],
list_multi_actor_identities (the Phase 5 correlator's read).
* attribution.profile.{state_changed,multi_actor_suspected} bus
topics + builder; wiki Service-Bus.md updated separately.
* attribution_worker.py: subscribes to attacker.observation.>,
ensures stub identity per event, logs and continues. No merger,
no state writes, no derived events — Phase 4 wires those.
* attribution/{aggregate.py,_thresholds.py} skeletons: Phase 2
fills _aggregate_categorical, Phase 3 adds numeric+hash+dispatcher.
This commit is contained in:
@@ -35,6 +35,7 @@ from decnet.web.db.sqlmodel_repo._helpers import ( # noqa: F401 (re-exported f
|
||||
)
|
||||
from decnet.web.db.sqlmodel_repo.attacker_intel import AttackerIntelMixin
|
||||
from decnet.web.db.sqlmodel_repo.attackers import AttackersMixin
|
||||
from decnet.web.db.sqlmodel_repo.attribution import AttributionMixin
|
||||
from decnet.web.db.sqlmodel_repo.auth import AuthMixin
|
||||
from decnet.web.db.sqlmodel_repo.bounties import BountiesMixin
|
||||
from decnet.web.db.sqlmodel_repo.campaigns import CampaignsMixin
|
||||
@@ -58,6 +59,7 @@ from decnet.web.db.sqlmodel_repo.webhooks import WebhooksMixin
|
||||
class SQLModelRepository(
|
||||
AttackerIntelMixin,
|
||||
AttackersMixin,
|
||||
AttributionMixin,
|
||||
AuthMixin,
|
||||
BountiesMixin,
|
||||
CampaignsMixin,
|
||||
|
||||
215
decnet/web/db/sqlmodel_repo/attribution.py
Normal file
215
decnet/web/db/sqlmodel_repo/attribution.py
Normal file
@@ -0,0 +1,215 @@
|
||||
"""Repo mixin for the ``attribution_state`` table + identity stub
|
||||
materialisation.
|
||||
|
||||
Composed onto :class:`SQLModelRepository`. Five public methods, all
|
||||
serving the v0 attribution engine
|
||||
(``decnet.correlation.attribution_worker``):
|
||||
|
||||
* :meth:`ensure_stub_identity_for_attacker` — pre-clusterer 1:1 stub
|
||||
identity creation. Idempotent under concurrent observation bursts.
|
||||
* :meth:`upsert_attribution_state` — keyed on
|
||||
``(identity_uuid, primitive)``.
|
||||
* :meth:`get_attribution_state` / :meth:`get_attribution_state_for_identity`
|
||||
— single-row and per-identity reads.
|
||||
* :meth:`list_multi_actor_identities` — feeds the Phase 5 cross-
|
||||
primitive correlator.
|
||||
|
||||
See ``development/ATTRIBUTION-ENGINE.md`` for the full design.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import uuid as _uuid
|
||||
from datetime import datetime, timezone
|
||||
from typing import Any, Optional
|
||||
|
||||
from sqlalchemy import func, select
|
||||
from sqlmodel import col
|
||||
|
||||
from decnet.web.db.models import (
|
||||
Attacker,
|
||||
AttackerIdentity,
|
||||
AttributionStateRow,
|
||||
)
|
||||
from decnet.web.db.sqlmodel_repo._helpers import _MixinBase
|
||||
|
||||
|
||||
class AttributionMixin(_MixinBase):
|
||||
"""Mixin: methods composed onto :class:`SQLModelRepository`."""
|
||||
|
||||
async def ensure_stub_identity_for_attacker(
|
||||
self, attacker_uuid: str,
|
||||
) -> Optional[str]:
|
||||
"""Return the ``identity_uuid`` for *attacker_uuid*, creating a
|
||||
degenerate 1:1 stub in ``attacker_identities`` if absent.
|
||||
|
||||
Returns ``None`` when the Attacker row itself is missing — the
|
||||
attribution worker treats that as "defer" (mirrors the
|
||||
``_handler.handle_session_ended`` posture in BEHAVE-SHELL).
|
||||
|
||||
Idempotent: the second caller for the same attacker reads the
|
||||
``identity_id`` stamped by the first caller and returns it
|
||||
without inserting again. Race: two concurrent first-callers
|
||||
could both see ``identity_id = NULL`` and both create stubs;
|
||||
the loser's commit would leave a dangling AttackerIdentity row
|
||||
with no Attacker referencing it. Acceptable in v0 (rare; rows
|
||||
are tiny; gc'd in v1 when the clusterer runs). The
|
||||
single-writer attribution worker plus the bus's per-identity
|
||||
ordering make even that race vanishingly rare in practice.
|
||||
"""
|
||||
async with self._session() as session:
|
||||
attacker_row = (
|
||||
await session.execute(
|
||||
select(Attacker).where(Attacker.uuid == attacker_uuid)
|
||||
)
|
||||
).scalar_one_or_none()
|
||||
if attacker_row is None:
|
||||
return None
|
||||
if attacker_row.identity_id:
|
||||
return attacker_row.identity_id
|
||||
new_uuid = _uuid.uuid4().hex
|
||||
now = datetime.now(timezone.utc)
|
||||
session.add(
|
||||
AttackerIdentity(
|
||||
uuid=new_uuid,
|
||||
schema_version=1,
|
||||
first_seen_at=attacker_row.first_seen,
|
||||
last_seen_at=attacker_row.last_seen,
|
||||
created_at=now,
|
||||
updated_at=now,
|
||||
observation_count=1,
|
||||
)
|
||||
)
|
||||
attacker_row.identity_id = new_uuid
|
||||
session.add(attacker_row)
|
||||
await session.commit()
|
||||
return new_uuid
|
||||
|
||||
async def upsert_attribution_state(
|
||||
self, data: dict[str, Any],
|
||||
) -> None:
|
||||
"""Insert or update one ``(identity_uuid, primitive)`` row.
|
||||
|
||||
``data`` MUST carry: ``identity_uuid``, ``primitive``,
|
||||
``current_value``, ``state``, ``confidence``,
|
||||
``observation_count``, ``last_change_ts``,
|
||||
``last_observation_ts``. ``schema_version`` and ``updated_at``
|
||||
are managed here.
|
||||
"""
|
||||
identity_uuid = data["identity_uuid"]
|
||||
primitive = data["primitive"]
|
||||
async with self._session() as session:
|
||||
existing = (
|
||||
await session.execute(
|
||||
select(AttributionStateRow).where(
|
||||
AttributionStateRow.identity_uuid == identity_uuid,
|
||||
AttributionStateRow.primitive == primitive,
|
||||
)
|
||||
)
|
||||
).scalar_one_or_none()
|
||||
now = datetime.now(timezone.utc)
|
||||
if existing is not None:
|
||||
for k, v in data.items():
|
||||
if k in ("identity_uuid", "primitive"):
|
||||
continue
|
||||
setattr(existing, k, v)
|
||||
existing.updated_at = now
|
||||
session.add(existing)
|
||||
else:
|
||||
session.add(
|
||||
AttributionStateRow(
|
||||
**{**data, "schema_version": 1, "updated_at": now}
|
||||
)
|
||||
)
|
||||
await session.commit()
|
||||
|
||||
async def get_attribution_state(
|
||||
self, identity_uuid: str, primitive: str,
|
||||
) -> Optional[dict[str, Any]]:
|
||||
"""Single-row lookup. ``None`` when the merger has not yet run
|
||||
for this ``(identity_uuid, primitive)`` pair."""
|
||||
async with self._session() as session:
|
||||
row = (
|
||||
await session.execute(
|
||||
select(AttributionStateRow).where(
|
||||
AttributionStateRow.identity_uuid == identity_uuid,
|
||||
AttributionStateRow.primitive == primitive,
|
||||
)
|
||||
)
|
||||
).scalar_one_or_none()
|
||||
return None if row is None else row.model_dump(mode="json")
|
||||
|
||||
async def get_attribution_state_for_identity(
|
||||
self, identity_uuid: str,
|
||||
) -> list[dict[str, Any]]:
|
||||
"""All attribution-state rows for one identity, primitive-
|
||||
ordered for deterministic API output."""
|
||||
async with self._session() as session:
|
||||
rows = (
|
||||
await session.execute(
|
||||
select(AttributionStateRow)
|
||||
.where(AttributionStateRow.identity_uuid == identity_uuid)
|
||||
.order_by(AttributionStateRow.primitive)
|
||||
)
|
||||
).scalars().all()
|
||||
return [r.model_dump(mode="json") for r in rows]
|
||||
|
||||
async def list_multi_actor_identities(
|
||||
self,
|
||||
) -> list[dict[str, Any]]:
|
||||
"""Identities with ≥ 2 primitives currently in ``multi_actor``.
|
||||
|
||||
Output shape::
|
||||
|
||||
[{"identity_uuid": "...", "primitives": ["motor.input_modality",
|
||||
"cognitive.feedback_loop_engagement"]},
|
||||
...]
|
||||
|
||||
Empty list when no identity is co-flagged. Used by the Phase 5
|
||||
cross-primitive correlator — single-primitive ``multi_actor``
|
||||
is too noisy to alarm on, two independent primitives is the
|
||||
threshold for ``attribution.profile.multi_actor_suspected``.
|
||||
"""
|
||||
async with self._session() as session:
|
||||
# First pass: identities with ≥ 2 multi_actor rows.
|
||||
count_stmt = (
|
||||
select(
|
||||
col(AttributionStateRow.identity_uuid),
|
||||
func.count().label("ct"),
|
||||
)
|
||||
.where(AttributionStateRow.state == "multi_actor")
|
||||
.group_by(col(AttributionStateRow.identity_uuid))
|
||||
.having(func.count() >= 2)
|
||||
)
|
||||
co_flagged = [
|
||||
row.identity_uuid
|
||||
for row in (await session.execute(count_stmt)).all()
|
||||
]
|
||||
if not co_flagged:
|
||||
return []
|
||||
# Second pass: collect the primitive list per co-flagged
|
||||
# identity. Two queries beat one wide one because the
|
||||
# first query's count-having filter prunes the second
|
||||
# query's row set without a self-join.
|
||||
detail_stmt = (
|
||||
select(
|
||||
col(AttributionStateRow.identity_uuid),
|
||||
col(AttributionStateRow.primitive),
|
||||
)
|
||||
.where(
|
||||
AttributionStateRow.state == "multi_actor",
|
||||
col(AttributionStateRow.identity_uuid).in_(co_flagged),
|
||||
)
|
||||
.order_by(
|
||||
col(AttributionStateRow.identity_uuid),
|
||||
col(AttributionStateRow.primitive),
|
||||
)
|
||||
)
|
||||
grouped: dict[str, list[str]] = {}
|
||||
for row in (await session.execute(detail_stmt)).all():
|
||||
grouped.setdefault(row.identity_uuid, []).append(
|
||||
row.primitive,
|
||||
)
|
||||
return [
|
||||
{"identity_uuid": iuuid, "primitives": prims}
|
||||
for iuuid, prims in grouped.items()
|
||||
]
|
||||
Reference in New Issue
Block a user