Files
DECNET/decnet/web/db/sqlmodel_repo/ttp.py
anti 3977f06374 feat(ttp/ipv6_leak): wire Ipv6LeakLifter into composite tagger and worker
- Add "ipv6_leak" to KNOWN_SOURCE_KINDS in ttp/base.py
- Register Ipv6LeakLifter(store) in factory.py get_tagger()
- Subscribe worker to attacker.fingerprinted; route by Event.type
  so JARM/HASSH/ipv6_leak share the topic without source_kind collision
- Add bump_attacker_ipv6_leak() to BaseRepository (abstract) +
  TTPMixin (implementation): increments ipv6_leak_count, sets last_ipv6_*
  denorm fields, appends-with-dedup to AttackerIdentity.ipv6_link_local_iids
- Call bump_attacker_ipv6_leak from _process_event after insert_tags
- Add DummyRepo stub + coverage call in tests/db/test_base_repo.py
2026-05-17 20:41:55 -04:00

538 lines
22 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""TTP-tagging repository — ``ttp_tag`` reads + idempotent inserts.
Implementation phase E.3.3 of ``development/TTP_TAGGING.md``. The
shape was pinned at E.1.10; this file fills in the bodies.
Dialect-split convention: portable rollup queries live here on the
mixin; the bulk-insert "ignore on duplicate" hook lands in the
per-dialect ``SQLiteRepository`` / ``MySQLRepository`` subclasses
(``decnet/web/db/sqlite/repository.py`` /
``decnet/web/db/mysql/repository.py``) where the actual
``ON CONFLICT DO NOTHING`` vs ``INSERT IGNORE`` SQL diverges.
"""
from __future__ import annotations
import json
from collections.abc import AsyncIterator
from datetime import datetime, timezone
from typing import Any, Optional
from sqlalchemy import func, select
from sqlmodel import col
from decnet.web.db.models import (
Attacker,
AttackerIdentity,
CampaignTechniqueRow,
IdentityTechniqueRow,
TechniqueRollupRow,
TTPTag,
)
from decnet.web.db.models.canary import CanaryTrigger
from decnet.web.db.sqlmodel_repo._helpers import _MixinBase
def _technique_name(tid: str | None) -> str | None:
from decnet.ttp.attack_catalog import technique_name # heavy — lazy on first call
return technique_name(tid)
def _mitre_url_for(tid: str | None) -> str | None:
from decnet.ttp.attack_stix import mitre_url_for # heavy — lazy on first call
return mitre_url_for(tid)
# Confidence floor: tags computed below this value are silently dropped
# at insert time. Pinned by tests/ttp/test_confidence.py.
_CONFIDENCE_FLOOR: float = 0.3
class TTPMixin(_MixinBase):
"""Mixin: TTP-tag query + insert methods composed onto
:class:`SQLModelRepository`.
Expects ``self._session()`` from the base mixin and
``self._insert_tags_or_ignore()`` from the per-dialect repo.
Adding a new ``ttp_tag`` query method here requires adding a
contract test in ``tests/web/db/test_ttp_repo.py`` (E.2.13) AND a
parametrized run against both SQLite and MySQL via the existing
``db_backends`` fixture.
"""
async def _insert_tags_or_ignore(
self, rows: list[TTPTag],
) -> int:
"""Dialect-specific bulk INSERT … ON CONFLICT DO NOTHING.
Default body is the portable two-step (SELECT then ``add_all``)
used as a safety-net; the SQLite + MySQL repositories override
this with their native ``OR IGNORE`` / ``INSERT IGNORE`` SQL.
"""
raise NotImplementedError(
"_insert_tags_or_ignore is overridden in per-dialect repos",
)
async def insert_tags(self, rows: list[TTPTag]) -> int:
"""Bulk-upsert tags with ``INSERT OR IGNORE`` semantics.
Drops rows with ``confidence < _CONFIDENCE_FLOOR`` (= 0.3) before
the write. Returns the count of rows actually inserted (i.e. that
passed the floor AND were not already present at their
deterministic :func:`compute_tag_uuid` PK).
"""
if not rows:
return 0
kept = [r for r in rows if r.confidence >= _CONFIDENCE_FLOOR]
if not kept:
return 0
return await self._insert_tags_or_ignore(kept)
async def list_techniques_by_identity(
self,
uuid: str,
) -> list[IdentityTechniqueRow]:
"""Per-Identity TTP rollup. Includes (a) tags directly anchored
on this identity (``identity_uuid == uuid``) — covers identity-
rollup tags with NULL ``attacker_uuid`` — and (b) tags anchored
on an Attacker whose ``identity_id`` projects up to this
identity (per-Attacker tags rolling up to the Identity).
"""
async with self._session() as session:
attacker_uuids_subq = (
select(col(Attacker.uuid))
.where(col(Attacker.identity_id) == uuid)
.scalar_subquery()
)
stmt: Any = (
select(
col(TTPTag.technique_id),
col(TTPTag.sub_technique_id),
func.max(col(TTPTag.tactic)).label("tactic"),
func.count().label("count"),
func.min(col(TTPTag.created_at)).label("first_seen"),
func.max(col(TTPTag.created_at)).label("last_seen"),
func.max(col(TTPTag.confidence)).label("confidence_max"),
)
.where(
(col(TTPTag.identity_uuid) == uuid)
| (col(TTPTag.attacker_uuid).in_(attacker_uuids_subq))
)
.group_by(TTPTag.technique_id, TTPTag.sub_technique_id)
)
res = await session.execute(stmt)
return [
IdentityTechniqueRow(
technique_id=r.technique_id,
technique_name=_technique_name(r.technique_id),
sub_technique_id=r.sub_technique_id,
sub_technique_name=_technique_name(r.sub_technique_id),
mitre_url=_mitre_url_for(r.sub_technique_id or r.technique_id),
tactic=r.tactic,
count=r.count,
first_seen=r.first_seen,
last_seen=r.last_seen,
confidence_max=r.confidence_max,
)
for r in res.all()
]
async def list_techniques_by_attacker(
self,
uuid: str,
) -> list[IdentityTechniqueRow]:
"""Per-Attacker (per-IP) TTP rollup. Identity-rollup tags
(``attacker_uuid IS NULL``) are deliberately excluded — those
belong to the Identity, not any one IP underneath it.
"""
async with self._session() as session:
stmt: Any = (
select(
col(TTPTag.technique_id),
col(TTPTag.sub_technique_id),
func.max(col(TTPTag.tactic)).label("tactic"),
func.count().label("count"),
func.min(col(TTPTag.created_at)).label("first_seen"),
func.max(col(TTPTag.created_at)).label("last_seen"),
func.max(col(TTPTag.confidence)).label("confidence_max"),
)
.where(TTPTag.attacker_uuid == uuid)
.group_by(TTPTag.technique_id, TTPTag.sub_technique_id)
)
res = await session.execute(stmt)
return [
IdentityTechniqueRow(
technique_id=r.technique_id,
technique_name=_technique_name(r.technique_id),
sub_technique_id=r.sub_technique_id,
sub_technique_name=_technique_name(r.sub_technique_id),
mitre_url=_mitre_url_for(r.sub_technique_id or r.technique_id),
tactic=r.tactic,
count=r.count,
first_seen=r.first_seen,
last_seen=r.last_seen,
confidence_max=r.confidence_max,
)
for r in res.all()
]
async def list_techniques_by_campaign(
self,
uuid: str,
) -> list[CampaignTechniqueRow]:
"""Campaign-wide TTP rollup. Joins ``ttp_tag.identity_uuid`` →
:class:`AttackerIdentity` and filters on
``AttackerIdentity.campaign_id``. Note: the FK column is
``campaign_id``, not ``campaign_uuid``.
"""
async with self._session() as session:
stmt: Any = (
select(
col(TTPTag.technique_id),
col(TTPTag.sub_technique_id),
func.max(col(TTPTag.tactic)).label("tactic"),
func.count().label("count"),
func.count(func.distinct(col(TTPTag.identity_uuid))).label(
"identity_count",
),
func.max(col(TTPTag.created_at)).label("last_seen"),
)
.join(
AttackerIdentity,
AttackerIdentity.uuid == TTPTag.identity_uuid,
)
.where(AttackerIdentity.campaign_id == uuid)
.group_by(TTPTag.technique_id, TTPTag.sub_technique_id)
)
res = await session.execute(stmt)
return [
CampaignTechniqueRow(
technique_id=r.technique_id,
technique_name=_technique_name(r.technique_id),
sub_technique_id=r.sub_technique_id,
sub_technique_name=_technique_name(r.sub_technique_id),
mitre_url=_mitre_url_for(r.sub_technique_id or r.technique_id),
tactic=r.tactic,
count=r.count,
identity_count=r.identity_count,
last_seen=r.last_seen,
)
for r in res.all()
]
async def list_techniques_by_session(
self,
sid: str,
) -> list[IdentityTechniqueRow]:
"""Session-scoped TTP timeline. Filtered on
``ttp_tag.session_id``.
"""
async with self._session() as session:
stmt: Any = (
select(
col(TTPTag.technique_id),
col(TTPTag.sub_technique_id),
func.max(col(TTPTag.tactic)).label("tactic"),
func.count().label("count"),
func.min(col(TTPTag.created_at)).label("first_seen"),
func.max(col(TTPTag.created_at)).label("last_seen"),
func.max(col(TTPTag.confidence)).label("confidence_max"),
)
.where(TTPTag.session_id == sid)
.group_by(TTPTag.technique_id, TTPTag.sub_technique_id)
)
res = await session.execute(stmt)
return [
IdentityTechniqueRow(
technique_id=r.technique_id,
technique_name=_technique_name(r.technique_id),
sub_technique_id=r.sub_technique_id,
sub_technique_name=_technique_name(r.sub_technique_id),
mitre_url=_mitre_url_for(r.sub_technique_id or r.technique_id),
tactic=r.tactic,
count=r.count,
first_seen=r.first_seen,
last_seen=r.last_seen,
confidence_max=r.confidence_max,
)
for r in res.all()
]
async def list_ttp_decky_phases(
self, identity_uuid: str,
) -> list[dict[str, Any]]:
"""Per-decky tag observations for the UKC bridge (E.3.15).
Includes (a) tags directly anchored on this identity and
(b) tags anchored on Attackers whose ``identity_id`` projects
up to this identity — same scope as
:meth:`list_techniques_by_identity`.
"""
async with self._session() as session:
attacker_uuids_subq = (
select(col(Attacker.uuid))
.where(col(Attacker.identity_id) == identity_uuid)
.scalar_subquery()
)
stmt: Any = (
select(
col(TTPTag.decky_id),
col(TTPTag.tactic),
col(TTPTag.created_at),
)
.where(
(
(col(TTPTag.identity_uuid) == identity_uuid)
| (col(TTPTag.attacker_uuid).in_(attacker_uuids_subq))
)
& (col(TTPTag.decky_id).is_not(None))
)
.order_by(col(TTPTag.created_at))
)
res = await session.execute(stmt)
return [
{
"decky_id": r.decky_id,
"tactic": r.tactic,
"created_at_ts": (
r.created_at.timestamp()
if r.created_at is not None else 0.0
),
}
for r in res.all()
]
async def list_tags_by_scope_and_technique(
self,
*,
scope: str,
uuid: str,
technique_id: str,
sub_technique_id: str | None = None,
limit: int = 200,
) -> list[dict[str, Any]]:
"""Return raw ``ttp_tag`` rows for a scope + technique pair.
Powers the operator-facing inspector that explains *why* the
rule engine flagged a technique. Three scopes:
* ``scope="identity"`` — tags directly anchored on the identity
AND tags on Attackers projecting up to the identity.
* ``scope="attacker"`` — tags anchored on this attacker_uuid.
* ``scope="session"`` — tags anchored on this session_id.
Newest-first; capped at ``limit`` rows so a heavily-tagged
attacker doesn't sink the inspector.
"""
async with self._session() as session:
stmt: Any = select(TTPTag)
if scope == "identity":
attacker_uuids_subq = (
select(col(Attacker.uuid))
.where(col(Attacker.identity_id) == uuid)
.scalar_subquery()
)
stmt = stmt.where(
(col(TTPTag.identity_uuid) == uuid)
| (col(TTPTag.attacker_uuid).in_(attacker_uuids_subq))
)
elif scope == "attacker":
stmt = stmt.where(col(TTPTag.attacker_uuid) == uuid)
elif scope == "session":
stmt = stmt.where(col(TTPTag.session_id) == uuid)
else:
raise ValueError(f"unknown scope: {scope!r}")
stmt = stmt.where(col(TTPTag.technique_id) == technique_id)
if sub_technique_id is not None:
stmt = stmt.where(
col(TTPTag.sub_technique_id) == sub_technique_id,
)
stmt = stmt.order_by(col(TTPTag.created_at).desc()).limit(limit)
res = await session.execute(stmt)
return [r.model_dump(mode="json") for r in res.scalars().all()]
async def list_ttp_tags_by_attacker(
self, uuid: str, limit: int = 2000,
) -> list[dict]:
"""Raw ``ttp_tag`` rows for one attacker UUID. Newest-first.
Used by the STIX exporter (and similar full-row consumers) that
need per-tag granularity — distinct from the rollup returned by
:meth:`list_techniques_by_attacker`.
"""
async with self._session() as session:
stmt: Any = (
select(TTPTag)
.where(TTPTag.attacker_uuid == uuid)
.order_by(col(TTPTag.created_at).desc())
.limit(limit)
)
res = await session.execute(stmt)
return [r.model_dump(mode="json") for r in res.scalars().all()]
async def get_all_ttp_rollups_for_export(self) -> dict[str, list[dict[str, Any]]]:
"""Return ``{attacker_uuid: [rollup_dict, ...]}`` for all attackers.
Single query; used by the fleet STIX export so it doesn't fan out
N × list_techniques_by_attacker calls.
"""
async with self._session() as session:
stmt: Any = (
select(
col(TTPTag.attacker_uuid),
col(TTPTag.technique_id),
col(TTPTag.sub_technique_id),
func.max(col(TTPTag.tactic)).label("tactic"),
func.count().label("count"),
func.max(col(TTPTag.confidence)).label("confidence_max"),
)
.where(col(TTPTag.attacker_uuid).is_not(None))
.group_by(
TTPTag.attacker_uuid,
TTPTag.technique_id,
TTPTag.sub_technique_id,
)
)
res = await session.execute(stmt)
out: dict[str, list[dict[str, Any]]] = {}
for r in res.all():
out.setdefault(r.attacker_uuid, []).append({
"technique_id": r.technique_id,
"sub_technique_id": r.sub_technique_id,
"tactic": r.tactic,
"count": r.count,
"confidence_max": r.confidence_max,
})
return out
# ── Backfill iterators (E.4) ────────────────────────────────────
#
# Read-only iterators consumed by ``decnet ttp backfill`` to replay
# historical events through the live :class:`CompositeTagger`. The
# CLI builds :class:`TaggerEvent` objects from these and persists
# results via :meth:`insert_tags` — same idempotent path the bus
# worker uses, no bus publish.
#
# Per TTP_TAGGING.md §"Order of work" / §"Bus topics" the historical
# replay deliberately bypasses bus publish so SIEM/webhook fan-out
# does not re-fire on already-attributed events.
async def iter_attacker_commands_since(
self, since: datetime,
) -> AsyncIterator[tuple[Attacker, list[dict[str, Any]]]]:
"""Yield ``(Attacker, decoded_commands)`` pairs since *since*.
Walks every :class:`Attacker` whose ``last_seen >= since`` and
decodes the JSON ``commands`` blob; non-list / malformed
payloads are skipped silently (the JSON column is best-effort
per the model docstring).
"""
async with self._session() as session:
stmt: Any = (
select(Attacker).where(col(Attacker.last_seen) >= since)
)
res = await session.execute(stmt)
for row in res.scalars().all():
try:
decoded = json.loads(row.commands or "[]")
except (ValueError, TypeError):
continue
if not isinstance(decoded, list):
continue
yield row, [c for c in decoded if isinstance(c, dict)]
async def iter_canary_triggers_since(
self, since: datetime,
) -> AsyncIterator[CanaryTrigger]:
"""Yield :class:`CanaryTrigger` rows fired since *since*."""
async with self._session() as session:
stmt: Any = (
select(CanaryTrigger)
.where(col(CanaryTrigger.occurred_at) >= since)
)
res = await session.execute(stmt)
for row in res.scalars().all():
yield row
async def bump_attacker_ipv6_leak(
self,
attacker_uuid: str,
identity_uuid: Optional[str],
evidence: dict[str, Any],
) -> None:
"""Increment ``Attacker.ipv6_leak_count`` + set last_ipv6_* denorm fields.
Also appends-with-dedup to ``AttackerIdentity.ipv6_link_local_iids``
(JSON text column, keyed by ``addr``). Both updates run in a single
session; missing rows are silently skipped.
"""
now = datetime.now(timezone.utc)
addr = evidence.get("addr", "")
async with self._session() as session:
res = await session.execute(
select(Attacker).where(Attacker.uuid == attacker_uuid)
)
attacker = res.scalar_one_or_none()
if attacker is not None:
attacker.ipv6_leak_count = (attacker.ipv6_leak_count or 0) + 1
attacker.last_ipv6_leak_at = now
attacker.last_ipv6_link_local = addr or None
attacker.last_ipv6_iid_kind = evidence.get("iid_kind") or None
attacker.last_ipv6_mac_oui = evidence.get("mac_oui") or None
session.add(attacker)
if identity_uuid:
id_res = await session.execute(
select(AttackerIdentity).where(
AttackerIdentity.uuid == identity_uuid
)
)
identity = id_res.scalar_one_or_none()
if identity is not None and addr:
try:
iids: list[dict[str, Any]] = json.loads(
identity.ipv6_link_local_iids or "[]"
)
except (json.JSONDecodeError, TypeError):
iids = []
if not any(e.get("iid") == addr for e in iids):
iids.append({
"iid": addr,
"oui": evidence.get("mac_oui", ""),
"kind": evidence.get("iid_kind", "unknown"),
"first_seen": now.isoformat(),
})
identity.ipv6_link_local_iids = json.dumps(iids)
session.add(identity)
await session.commit()
async def list_distinct_techniques(self) -> list[TechniqueRollupRow]:
"""Fleet-wide distinct-technique rollup with counts +
most-recent-seen timestamps.
"""
async with self._session() as session:
stmt: Any = (
select(
col(TTPTag.technique_id),
col(TTPTag.sub_technique_id),
func.max(col(TTPTag.tactic)).label("tactic"),
func.count().label("count"),
func.max(col(TTPTag.created_at)).label("last_seen"),
)
.group_by(TTPTag.technique_id, TTPTag.sub_technique_id)
)
res = await session.execute(stmt)
return [
TechniqueRollupRow(
technique_id=r.technique_id,
technique_name=_technique_name(r.technique_id),
sub_technique_id=r.sub_technique_id,
sub_technique_name=_technique_name(r.sub_technique_id),
mitre_url=_mitre_url_for(r.sub_technique_id or r.technique_id),
tactic=r.tactic,
count=r.count,
last_seen=r.last_seen,
)
for r in res.all()
]