feat(ttp): E.3.3 repository — insert_tags + listing rollups (dual backend)

Dialect-split: portable rollup queries on TTPMixin; bulk insert with
ON CONFLICT DO NOTHING / INSERT IGNORE in the per-dialect repos.
Confidence-floor (< 0.3) drop applied at mixin layer before the
dialect hook. BaseRepository now declares the six TTP methods abstract.

Tests in tests/web/db/test_ttp_repo.py flipped from pytest.fail stubs
to real dual-backend behavioral tests; tests/ttp/test_confidence.py
drop-below-floor xfail removed.
This commit is contained in:
2026-05-01 08:04:46 -04:00
parent 226b3adfa2
commit fee697694d
7 changed files with 452 additions and 98 deletions

View File

@@ -15,10 +15,11 @@ from __future__ import annotations
from typing import Any, List, Optional
from sqlalchemy import func, select, text, literal_column
from sqlalchemy.dialects.mysql import insert as mysql_insert
from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker
from decnet.web.db.models import Log
from decnet.web.db.models import Log, TTPTag
from decnet.web.db.mysql.database import get_async_engine
from decnet.web.db.sqlmodel_repo import SQLModelRepository
@@ -151,6 +152,26 @@ class MySQLRepository(SQLModelRepository):
# TEXT-stored JSON, same behavior we rely on in SQLite.
return text(f"JSON_UNQUOTE(JSON_EXTRACT(fields, '$.{key}')) = :val")
async def _insert_tags_or_ignore(self, rows: list[TTPTag]) -> int:
"""Bulk-insert with MySQL's ``INSERT IGNORE`` on the ``uuid`` PK.
``rowcount`` returns the number of NEW rows; duplicates are
silently ignored (matching the SQLite ``ON CONFLICT DO NOTHING``
contract).
"""
if not rows:
return 0
payload = [r.model_dump() for r in rows]
stmt = (
mysql_insert(TTPTag.__table__) # type: ignore[attr-defined]
.values(payload)
.prefix_with("IGNORE")
)
async with self._session() as session:
result = await session.execute(stmt)
await session.commit()
return int(result.rowcount or 0)
async def get_log_histogram(
self,
search: Optional[str] = None,

View File

@@ -2,6 +2,12 @@ from abc import ABC, abstractmethod
from typing import Any, Optional
from decnet.web.db.models.topology import DeckyRow, EdgeRow, LANRow, TopologySummary
from decnet.web.db.models import (
CampaignTechniqueRow,
IdentityTechniqueRow,
TechniqueRollupRow,
TTPTag,
)
class BaseRepository(ABC):
@@ -1300,3 +1306,49 @@ class BaseRepository(ABC):
async def count_probe_relays(self, attacker_ip: str, decky: str) -> int:
raise NotImplementedError
# -------------------- TTP tagging (E.3.3) --------------------
@abstractmethod
async def insert_tags(self, rows: list[TTPTag]) -> int:
"""Bulk-upsert ``ttp_tag`` rows with ``INSERT OR IGNORE`` semantics.
Drops rows with ``confidence < 0.3`` (the floor pinned in
``tests/ttp/test_confidence.py``). Returns the count of rows
actually written. Idempotent — replaying the same source events
converges to the same tag set without duplicates.
"""
raise NotImplementedError
@abstractmethod
async def list_techniques_by_identity(
self, uuid: str,
) -> list[IdentityTechniqueRow]:
"""Per-Identity TTP rollup (joins through ``Attacker.identity_id``)."""
raise NotImplementedError
@abstractmethod
async def list_techniques_by_attacker(
self, uuid: str,
) -> list[IdentityTechniqueRow]:
"""Per-Attacker (per-IP) TTP rollup; excludes identity-rollup tags."""
raise NotImplementedError
@abstractmethod
async def list_techniques_by_campaign(
self, uuid: str,
) -> list[CampaignTechniqueRow]:
"""Campaign-wide TTP rollup across member identities."""
raise NotImplementedError
@abstractmethod
async def list_techniques_by_session(
self, sid: str,
) -> list[IdentityTechniqueRow]:
"""Session-scoped TTP timeline."""
raise NotImplementedError
@abstractmethod
async def list_distinct_techniques(self) -> list[TechniqueRollupRow]:
"""Fleet-wide distinct-technique rollup."""
raise NotImplementedError

View File

@@ -1,11 +1,12 @@
from typing import Any, List, Optional
from sqlalchemy import func, select, text, literal_column
from sqlalchemy.dialects.sqlite import insert as sqlite_insert
from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker
from decnet.config import _ROOT
from decnet.web.db.models import Log
from decnet.web.db.models import Log, TTPTag
from decnet.web.db.sqlite.database import get_async_engine
from decnet.web.db.sqlmodel_repo import SQLModelRepository
@@ -83,6 +84,21 @@ class SQLiteRepository(SQLModelRepository):
# SQLite stores JSON as text; json_extract is the canonical accessor.
return text(f"json_extract(fields, '$.{key}') = :val")
async def _insert_tags_or_ignore(self, rows: list[TTPTag]) -> int:
"""Bulk-insert with SQLite's ``ON CONFLICT DO NOTHING`` on the
``uuid`` PK. Returns rowcount of newly-inserted rows; the
skipped duplicates do not count.
"""
if not rows:
return 0
payload = [r.model_dump() for r in rows]
stmt = sqlite_insert(TTPTag.__table__).values(payload) # type: ignore[attr-defined]
stmt = stmt.on_conflict_do_nothing(index_elements=["uuid"])
async with self._session() as session:
result = await session.execute(stmt)
await session.commit()
return int(result.rowcount or 0)
async def get_log_histogram(
self,
search: Optional[str] = None,

View File

@@ -1,19 +1,25 @@
"""TTP-tagging repository — `ttp_tag` reads + idempotent inserts.
"""TTP-tagging repository — ``ttp_tag`` reads + idempotent inserts.
Contract step E.1.10 of `development/TTP_TAGGING.md`. Method bodies
raise ``NotImplementedError``; the SQL lands at E.3 implementation
phase. The shape — argument types, return types, idempotency
semantics on ``insert_tags`` — is the public contract from this
commit forward.
Implementation phase E.3.3 of ``development/TTP_TAGGING.md``. The
shape was pinned at E.1.10; this file fills in the bodies.
Per the dual-DB-backend project convention, dialect-specific behavior
(``INSERT OR IGNORE`` on SQLite vs ``INSERT IGNORE`` on MySQL) is
overridden in the per-dialect subclasses (``decnet.web.db.sqlite``,
``decnet.web.db.mysql``); the shared base lives here.
Dialect-split convention: portable rollup queries live here on the
mixin; the bulk-insert "ignore on duplicate" hook lands in the
per-dialect ``SQLiteRepository`` / ``MySQLRepository`` subclasses
(``decnet/web/db/sqlite/repository.py`` /
``decnet/web/db/mysql/repository.py``) where the actual
``ON CONFLICT DO NOTHING`` vs ``INSERT IGNORE`` SQL diverges.
"""
from __future__ import annotations
from typing import Any
from sqlalchemy import func, select
from sqlmodel import col
from decnet.web.db.models import (
Attacker,
AttackerIdentity,
CampaignTechniqueRow,
IdentityTechniqueRow,
TechniqueRollupRow,
@@ -22,85 +28,232 @@ from decnet.web.db.models import (
from decnet.web.db.sqlmodel_repo._helpers import _MixinBase
# Confidence floor: tags computed below this value are silently dropped
# at insert time. Pinned by tests/ttp/test_confidence.py.
_CONFIDENCE_FLOOR: float = 0.3
class TTPMixin(_MixinBase):
"""Mixin: TTP-tag query + insert methods composed onto
:class:`SQLModelRepository`.
Expects ``self._session()`` from the base mixin. Adding a new
``ttp_tag`` query method here requires adding a contract test in
``tests/web/db/test_ttp_repo.py`` (E.2.13) AND a parametrized run
against both SQLite and MySQL via the existing ``db_backends``
fixture.
Expects ``self._session()`` from the base mixin and
``self._insert_tags_or_ignore()`` from the per-dialect repo.
Adding a new ``ttp_tag`` query method here requires adding a
contract test in ``tests/web/db/test_ttp_repo.py`` (E.2.13) AND a
parametrized run against both SQLite and MySQL via the existing
``db_backends`` fixture.
"""
async def _insert_tags_or_ignore(
self, rows: list[TTPTag],
) -> int:
"""Dialect-specific bulk INSERT … ON CONFLICT DO NOTHING.
Default body is the portable two-step (SELECT then ``add_all``)
used as a safety-net; the SQLite + MySQL repositories override
this with their native ``OR IGNORE`` / ``INSERT IGNORE`` SQL.
"""
raise NotImplementedError(
"_insert_tags_or_ignore is overridden in per-dialect repos",
)
async def insert_tags(self, rows: list[TTPTag]) -> int:
"""Bulk-upsert tags with ``INSERT OR IGNORE`` semantics.
Returns the number of rows actually inserted (i.e. that were
not already present at their deterministic
:func:`compute_tag_uuid` PK). The idempotency property is the
load-bearing contract: replaying the same source events must
converge to the same tag set without writing duplicates and
without raising. See TTP_TAGGING.md §"Idempotency" + §"Bus
topics — Loop-prevention invariant".
Drops rows with ``confidence < _CONFIDENCE_FLOOR`` (= 0.3) before
the write. Returns the count of rows actually inserted (i.e. that
passed the floor AND were not already present at their
deterministic :func:`compute_tag_uuid` PK).
"""
raise NotImplementedError(
"insert_tags lands at E.3 implementation phase",
)
if not rows:
return 0
kept = [r for r in rows if r.confidence >= _CONFIDENCE_FLOOR]
if not kept:
return 0
return await self._insert_tags_or_ignore(kept)
async def list_techniques_by_identity(
self,
uuid: str,
) -> list[IdentityTechniqueRow]:
"""Per-Identity TTP rollup. Joins ``ttp_tag`` on
``identity_uuid`` and groups by ``(technique_id,
sub_technique_id)``. Includes identity-rollup tags (with NULL
``attacker_uuid``) and per-event tags whose denormalised
``identity_uuid`` matches.
"""Per-Identity TTP rollup. Includes (a) tags directly anchored
on this identity (``identity_uuid == uuid``) — covers identity-
rollup tags with NULL ``attacker_uuid`` — and (b) tags anchored
on an Attacker whose ``identity_id`` projects up to this
identity (per-Attacker tags rolling up to the Identity).
"""
raise NotImplementedError(
"list_techniques_by_identity lands at E.3",
)
async with self._session() as session:
attacker_uuids_subq = (
select(col(Attacker.uuid))
.where(col(Attacker.identity_id) == uuid)
.scalar_subquery()
)
stmt: Any = (
select(
col(TTPTag.technique_id),
col(TTPTag.sub_technique_id),
func.max(col(TTPTag.tactic)).label("tactic"),
func.count().label("count"),
func.min(col(TTPTag.created_at)).label("first_seen"),
func.max(col(TTPTag.created_at)).label("last_seen"),
func.max(col(TTPTag.confidence)).label("confidence_max"),
)
.where(
(col(TTPTag.identity_uuid) == uuid)
| (col(TTPTag.attacker_uuid).in_(attacker_uuids_subq))
)
.group_by(TTPTag.technique_id, TTPTag.sub_technique_id)
)
res = await session.execute(stmt)
return [
IdentityTechniqueRow(
technique_id=r.technique_id,
sub_technique_id=r.sub_technique_id,
tactic=r.tactic,
count=r.count,
first_seen=r.first_seen,
last_seen=r.last_seen,
confidence_max=r.confidence_max,
)
for r in res.all()
]
async def list_techniques_by_attacker(
self,
uuid: str,
) -> list[IdentityTechniqueRow]:
"""Per-Attacker (per-IP) TTP rollup. Reads ``ttp_tag`` filtered
on ``attacker_uuid``. Identity-rollup tags (NULL attacker
anchor) are deliberately excluded — those belong to the
Identity, not any one IP underneath it.
"""Per-Attacker (per-IP) TTP rollup. Identity-rollup tags
(``attacker_uuid IS NULL``) are deliberately excluded — those
belong to the Identity, not any one IP underneath it.
"""
raise NotImplementedError(
"list_techniques_by_attacker lands at E.3",
)
async with self._session() as session:
stmt: Any = (
select(
col(TTPTag.technique_id),
col(TTPTag.sub_technique_id),
func.max(col(TTPTag.tactic)).label("tactic"),
func.count().label("count"),
func.min(col(TTPTag.created_at)).label("first_seen"),
func.max(col(TTPTag.created_at)).label("last_seen"),
func.max(col(TTPTag.confidence)).label("confidence_max"),
)
.where(TTPTag.attacker_uuid == uuid)
.group_by(TTPTag.technique_id, TTPTag.sub_technique_id)
)
res = await session.execute(stmt)
return [
IdentityTechniqueRow(
technique_id=r.technique_id,
sub_technique_id=r.sub_technique_id,
tactic=r.tactic,
count=r.count,
first_seen=r.first_seen,
last_seen=r.last_seen,
confidence_max=r.confidence_max,
)
for r in res.all()
]
async def list_techniques_by_campaign(
self,
uuid: str,
) -> list[CampaignTechniqueRow]:
"""Campaign-wide TTP rollup. Joins ``ttp_tag`` -> Identity ->
``campaign_uuid`` and groups across all member Identities.
"""Campaign-wide TTP rollup. Joins ``ttp_tag.identity_uuid`` →
:class:`AttackerIdentity` and filters on
``AttackerIdentity.campaign_id``. Note: the FK column is
``campaign_id``, not ``campaign_uuid``.
"""
raise NotImplementedError(
"list_techniques_by_campaign lands at E.3",
)
async with self._session() as session:
stmt: Any = (
select(
col(TTPTag.technique_id),
col(TTPTag.sub_technique_id),
func.max(col(TTPTag.tactic)).label("tactic"),
func.count().label("count"),
func.count(func.distinct(col(TTPTag.identity_uuid))).label(
"identity_count",
),
func.max(col(TTPTag.created_at)).label("last_seen"),
)
.join(
AttackerIdentity,
AttackerIdentity.uuid == TTPTag.identity_uuid,
)
.where(AttackerIdentity.campaign_id == uuid)
.group_by(TTPTag.technique_id, TTPTag.sub_technique_id)
)
res = await session.execute(stmt)
return [
CampaignTechniqueRow(
technique_id=r.technique_id,
sub_technique_id=r.sub_technique_id,
tactic=r.tactic,
count=r.count,
identity_count=r.identity_count,
last_seen=r.last_seen,
)
for r in res.all()
]
async def list_techniques_by_session(
self,
sid: str,
) -> list[IdentityTechniqueRow]:
"""Session-scoped TTP timeline. Filtered on ``ttp_tag.session_id``.
Used by the SessionDetail page (post-v0).
"""Session-scoped TTP timeline. Filtered on
``ttp_tag.session_id``.
"""
raise NotImplementedError(
"list_techniques_by_session lands at E.3",
)
async with self._session() as session:
stmt: Any = (
select(
col(TTPTag.technique_id),
col(TTPTag.sub_technique_id),
func.max(col(TTPTag.tactic)).label("tactic"),
func.count().label("count"),
func.min(col(TTPTag.created_at)).label("first_seen"),
func.max(col(TTPTag.created_at)).label("last_seen"),
func.max(col(TTPTag.confidence)).label("confidence_max"),
)
.where(TTPTag.session_id == sid)
.group_by(TTPTag.technique_id, TTPTag.sub_technique_id)
)
res = await session.execute(stmt)
return [
IdentityTechniqueRow(
technique_id=r.technique_id,
sub_technique_id=r.sub_technique_id,
tactic=r.tactic,
count=r.count,
first_seen=r.first_seen,
last_seen=r.last_seen,
confidence_max=r.confidence_max,
)
for r in res.all()
]
async def list_distinct_techniques(self) -> list[TechniqueRollupRow]:
"""Fleet-wide distinct-technique rollup with counts +
most-recent-seen timestamps. Backs ``GET /api/v1/ttp/techniques``.
most-recent-seen timestamps.
"""
raise NotImplementedError(
"list_distinct_techniques lands at E.3",
)
async with self._session() as session:
stmt: Any = (
select(
col(TTPTag.technique_id),
col(TTPTag.sub_technique_id),
func.max(col(TTPTag.tactic)).label("tactic"),
func.count().label("count"),
func.max(col(TTPTag.created_at)).label("last_seen"),
)
.group_by(TTPTag.technique_id, TTPTag.sub_technique_id)
)
res = await session.execute(stmt)
return [
TechniqueRollupRow(
technique_id=r.technique_id,
sub_technique_id=r.sub_technique_id,
tactic=r.tactic,
count=r.count,
last_seen=r.last_seen,
)
for r in res.all()
]