feat(ttp): E.3.3 repository — insert_tags + listing rollups (dual backend)
Dialect-split: portable rollup queries on TTPMixin; bulk insert with ON CONFLICT DO NOTHING / INSERT IGNORE in the per-dialect repos. Confidence-floor (< 0.3) drop applied at mixin layer before the dialect hook. BaseRepository now declares the six TTP methods abstract. Tests in tests/web/db/test_ttp_repo.py flipped from pytest.fail stubs to real dual-backend behavioral tests; tests/ttp/test_confidence.py drop-below-floor xfail removed.
This commit is contained in:
@@ -15,10 +15,11 @@ from __future__ import annotations
|
||||
from typing import Any, List, Optional
|
||||
|
||||
from sqlalchemy import func, select, text, literal_column
|
||||
from sqlalchemy.dialects.mysql import insert as mysql_insert
|
||||
from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker
|
||||
|
||||
|
||||
from decnet.web.db.models import Log
|
||||
from decnet.web.db.models import Log, TTPTag
|
||||
from decnet.web.db.mysql.database import get_async_engine
|
||||
from decnet.web.db.sqlmodel_repo import SQLModelRepository
|
||||
|
||||
@@ -151,6 +152,26 @@ class MySQLRepository(SQLModelRepository):
|
||||
# TEXT-stored JSON, same behavior we rely on in SQLite.
|
||||
return text(f"JSON_UNQUOTE(JSON_EXTRACT(fields, '$.{key}')) = :val")
|
||||
|
||||
async def _insert_tags_or_ignore(self, rows: list[TTPTag]) -> int:
|
||||
"""Bulk-insert with MySQL's ``INSERT IGNORE`` on the ``uuid`` PK.
|
||||
|
||||
``rowcount`` returns the number of NEW rows; duplicates are
|
||||
silently ignored (matching the SQLite ``ON CONFLICT DO NOTHING``
|
||||
contract).
|
||||
"""
|
||||
if not rows:
|
||||
return 0
|
||||
payload = [r.model_dump() for r in rows]
|
||||
stmt = (
|
||||
mysql_insert(TTPTag.__table__) # type: ignore[attr-defined]
|
||||
.values(payload)
|
||||
.prefix_with("IGNORE")
|
||||
)
|
||||
async with self._session() as session:
|
||||
result = await session.execute(stmt)
|
||||
await session.commit()
|
||||
return int(result.rowcount or 0)
|
||||
|
||||
async def get_log_histogram(
|
||||
self,
|
||||
search: Optional[str] = None,
|
||||
|
||||
@@ -2,6 +2,12 @@ from abc import ABC, abstractmethod
|
||||
from typing import Any, Optional
|
||||
|
||||
from decnet.web.db.models.topology import DeckyRow, EdgeRow, LANRow, TopologySummary
|
||||
from decnet.web.db.models import (
|
||||
CampaignTechniqueRow,
|
||||
IdentityTechniqueRow,
|
||||
TechniqueRollupRow,
|
||||
TTPTag,
|
||||
)
|
||||
|
||||
|
||||
class BaseRepository(ABC):
|
||||
@@ -1300,3 +1306,49 @@ class BaseRepository(ABC):
|
||||
|
||||
async def count_probe_relays(self, attacker_ip: str, decky: str) -> int:
|
||||
raise NotImplementedError
|
||||
|
||||
# -------------------- TTP tagging (E.3.3) --------------------
|
||||
|
||||
@abstractmethod
|
||||
async def insert_tags(self, rows: list[TTPTag]) -> int:
|
||||
"""Bulk-upsert ``ttp_tag`` rows with ``INSERT OR IGNORE`` semantics.
|
||||
|
||||
Drops rows with ``confidence < 0.3`` (the floor pinned in
|
||||
``tests/ttp/test_confidence.py``). Returns the count of rows
|
||||
actually written. Idempotent — replaying the same source events
|
||||
converges to the same tag set without duplicates.
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
@abstractmethod
|
||||
async def list_techniques_by_identity(
|
||||
self, uuid: str,
|
||||
) -> list[IdentityTechniqueRow]:
|
||||
"""Per-Identity TTP rollup (joins through ``Attacker.identity_id``)."""
|
||||
raise NotImplementedError
|
||||
|
||||
@abstractmethod
|
||||
async def list_techniques_by_attacker(
|
||||
self, uuid: str,
|
||||
) -> list[IdentityTechniqueRow]:
|
||||
"""Per-Attacker (per-IP) TTP rollup; excludes identity-rollup tags."""
|
||||
raise NotImplementedError
|
||||
|
||||
@abstractmethod
|
||||
async def list_techniques_by_campaign(
|
||||
self, uuid: str,
|
||||
) -> list[CampaignTechniqueRow]:
|
||||
"""Campaign-wide TTP rollup across member identities."""
|
||||
raise NotImplementedError
|
||||
|
||||
@abstractmethod
|
||||
async def list_techniques_by_session(
|
||||
self, sid: str,
|
||||
) -> list[IdentityTechniqueRow]:
|
||||
"""Session-scoped TTP timeline."""
|
||||
raise NotImplementedError
|
||||
|
||||
@abstractmethod
|
||||
async def list_distinct_techniques(self) -> list[TechniqueRollupRow]:
|
||||
"""Fleet-wide distinct-technique rollup."""
|
||||
raise NotImplementedError
|
||||
|
||||
@@ -1,11 +1,12 @@
|
||||
from typing import Any, List, Optional
|
||||
|
||||
from sqlalchemy import func, select, text, literal_column
|
||||
from sqlalchemy.dialects.sqlite import insert as sqlite_insert
|
||||
from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker
|
||||
|
||||
|
||||
from decnet.config import _ROOT
|
||||
from decnet.web.db.models import Log
|
||||
from decnet.web.db.models import Log, TTPTag
|
||||
from decnet.web.db.sqlite.database import get_async_engine
|
||||
from decnet.web.db.sqlmodel_repo import SQLModelRepository
|
||||
|
||||
@@ -83,6 +84,21 @@ class SQLiteRepository(SQLModelRepository):
|
||||
# SQLite stores JSON as text; json_extract is the canonical accessor.
|
||||
return text(f"json_extract(fields, '$.{key}') = :val")
|
||||
|
||||
async def _insert_tags_or_ignore(self, rows: list[TTPTag]) -> int:
|
||||
"""Bulk-insert with SQLite's ``ON CONFLICT DO NOTHING`` on the
|
||||
``uuid`` PK. Returns rowcount of newly-inserted rows; the
|
||||
skipped duplicates do not count.
|
||||
"""
|
||||
if not rows:
|
||||
return 0
|
||||
payload = [r.model_dump() for r in rows]
|
||||
stmt = sqlite_insert(TTPTag.__table__).values(payload) # type: ignore[attr-defined]
|
||||
stmt = stmt.on_conflict_do_nothing(index_elements=["uuid"])
|
||||
async with self._session() as session:
|
||||
result = await session.execute(stmt)
|
||||
await session.commit()
|
||||
return int(result.rowcount or 0)
|
||||
|
||||
async def get_log_histogram(
|
||||
self,
|
||||
search: Optional[str] = None,
|
||||
|
||||
@@ -1,19 +1,25 @@
|
||||
"""TTP-tagging repository — `ttp_tag` reads + idempotent inserts.
|
||||
"""TTP-tagging repository — ``ttp_tag`` reads + idempotent inserts.
|
||||
|
||||
Contract step E.1.10 of `development/TTP_TAGGING.md`. Method bodies
|
||||
raise ``NotImplementedError``; the SQL lands at E.3 implementation
|
||||
phase. The shape — argument types, return types, idempotency
|
||||
semantics on ``insert_tags`` — is the public contract from this
|
||||
commit forward.
|
||||
Implementation phase E.3.3 of ``development/TTP_TAGGING.md``. The
|
||||
shape was pinned at E.1.10; this file fills in the bodies.
|
||||
|
||||
Per the dual-DB-backend project convention, dialect-specific behavior
|
||||
(``INSERT OR IGNORE`` on SQLite vs ``INSERT IGNORE`` on MySQL) is
|
||||
overridden in the per-dialect subclasses (``decnet.web.db.sqlite``,
|
||||
``decnet.web.db.mysql``); the shared base lives here.
|
||||
Dialect-split convention: portable rollup queries live here on the
|
||||
mixin; the bulk-insert "ignore on duplicate" hook lands in the
|
||||
per-dialect ``SQLiteRepository`` / ``MySQLRepository`` subclasses
|
||||
(``decnet/web/db/sqlite/repository.py`` /
|
||||
``decnet/web/db/mysql/repository.py``) where the actual
|
||||
``ON CONFLICT DO NOTHING`` vs ``INSERT IGNORE`` SQL diverges.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any
|
||||
|
||||
from sqlalchemy import func, select
|
||||
from sqlmodel import col
|
||||
|
||||
from decnet.web.db.models import (
|
||||
Attacker,
|
||||
AttackerIdentity,
|
||||
CampaignTechniqueRow,
|
||||
IdentityTechniqueRow,
|
||||
TechniqueRollupRow,
|
||||
@@ -22,85 +28,232 @@ from decnet.web.db.models import (
|
||||
from decnet.web.db.sqlmodel_repo._helpers import _MixinBase
|
||||
|
||||
|
||||
# Confidence floor: tags computed below this value are silently dropped
|
||||
# at insert time. Pinned by tests/ttp/test_confidence.py.
|
||||
_CONFIDENCE_FLOOR: float = 0.3
|
||||
|
||||
|
||||
class TTPMixin(_MixinBase):
|
||||
"""Mixin: TTP-tag query + insert methods composed onto
|
||||
:class:`SQLModelRepository`.
|
||||
|
||||
Expects ``self._session()`` from the base mixin. Adding a new
|
||||
``ttp_tag`` query method here requires adding a contract test in
|
||||
``tests/web/db/test_ttp_repo.py`` (E.2.13) AND a parametrized run
|
||||
against both SQLite and MySQL via the existing ``db_backends``
|
||||
fixture.
|
||||
Expects ``self._session()`` from the base mixin and
|
||||
``self._insert_tags_or_ignore()`` from the per-dialect repo.
|
||||
Adding a new ``ttp_tag`` query method here requires adding a
|
||||
contract test in ``tests/web/db/test_ttp_repo.py`` (E.2.13) AND a
|
||||
parametrized run against both SQLite and MySQL via the existing
|
||||
``db_backends`` fixture.
|
||||
"""
|
||||
|
||||
async def _insert_tags_or_ignore(
|
||||
self, rows: list[TTPTag],
|
||||
) -> int:
|
||||
"""Dialect-specific bulk INSERT … ON CONFLICT DO NOTHING.
|
||||
|
||||
Default body is the portable two-step (SELECT then ``add_all``)
|
||||
used as a safety-net; the SQLite + MySQL repositories override
|
||||
this with their native ``OR IGNORE`` / ``INSERT IGNORE`` SQL.
|
||||
"""
|
||||
raise NotImplementedError(
|
||||
"_insert_tags_or_ignore is overridden in per-dialect repos",
|
||||
)
|
||||
|
||||
async def insert_tags(self, rows: list[TTPTag]) -> int:
|
||||
"""Bulk-upsert tags with ``INSERT OR IGNORE`` semantics.
|
||||
|
||||
Returns the number of rows actually inserted (i.e. that were
|
||||
not already present at their deterministic
|
||||
:func:`compute_tag_uuid` PK). The idempotency property is the
|
||||
load-bearing contract: replaying the same source events must
|
||||
converge to the same tag set without writing duplicates and
|
||||
without raising. See TTP_TAGGING.md §"Idempotency" + §"Bus
|
||||
topics — Loop-prevention invariant".
|
||||
Drops rows with ``confidence < _CONFIDENCE_FLOOR`` (= 0.3) before
|
||||
the write. Returns the count of rows actually inserted (i.e. that
|
||||
passed the floor AND were not already present at their
|
||||
deterministic :func:`compute_tag_uuid` PK).
|
||||
"""
|
||||
raise NotImplementedError(
|
||||
"insert_tags lands at E.3 implementation phase",
|
||||
)
|
||||
if not rows:
|
||||
return 0
|
||||
kept = [r for r in rows if r.confidence >= _CONFIDENCE_FLOOR]
|
||||
if not kept:
|
||||
return 0
|
||||
return await self._insert_tags_or_ignore(kept)
|
||||
|
||||
async def list_techniques_by_identity(
|
||||
self,
|
||||
uuid: str,
|
||||
) -> list[IdentityTechniqueRow]:
|
||||
"""Per-Identity TTP rollup. Joins ``ttp_tag`` on
|
||||
``identity_uuid`` and groups by ``(technique_id,
|
||||
sub_technique_id)``. Includes identity-rollup tags (with NULL
|
||||
``attacker_uuid``) and per-event tags whose denormalised
|
||||
``identity_uuid`` matches.
|
||||
"""Per-Identity TTP rollup. Includes (a) tags directly anchored
|
||||
on this identity (``identity_uuid == uuid``) — covers identity-
|
||||
rollup tags with NULL ``attacker_uuid`` — and (b) tags anchored
|
||||
on an Attacker whose ``identity_id`` projects up to this
|
||||
identity (per-Attacker tags rolling up to the Identity).
|
||||
"""
|
||||
raise NotImplementedError(
|
||||
"list_techniques_by_identity lands at E.3",
|
||||
)
|
||||
async with self._session() as session:
|
||||
attacker_uuids_subq = (
|
||||
select(col(Attacker.uuid))
|
||||
.where(col(Attacker.identity_id) == uuid)
|
||||
.scalar_subquery()
|
||||
)
|
||||
stmt: Any = (
|
||||
select(
|
||||
col(TTPTag.technique_id),
|
||||
col(TTPTag.sub_technique_id),
|
||||
func.max(col(TTPTag.tactic)).label("tactic"),
|
||||
func.count().label("count"),
|
||||
func.min(col(TTPTag.created_at)).label("first_seen"),
|
||||
func.max(col(TTPTag.created_at)).label("last_seen"),
|
||||
func.max(col(TTPTag.confidence)).label("confidence_max"),
|
||||
)
|
||||
.where(
|
||||
(col(TTPTag.identity_uuid) == uuid)
|
||||
| (col(TTPTag.attacker_uuid).in_(attacker_uuids_subq))
|
||||
)
|
||||
.group_by(TTPTag.technique_id, TTPTag.sub_technique_id)
|
||||
)
|
||||
res = await session.execute(stmt)
|
||||
return [
|
||||
IdentityTechniqueRow(
|
||||
technique_id=r.technique_id,
|
||||
sub_technique_id=r.sub_technique_id,
|
||||
tactic=r.tactic,
|
||||
count=r.count,
|
||||
first_seen=r.first_seen,
|
||||
last_seen=r.last_seen,
|
||||
confidence_max=r.confidence_max,
|
||||
)
|
||||
for r in res.all()
|
||||
]
|
||||
|
||||
async def list_techniques_by_attacker(
|
||||
self,
|
||||
uuid: str,
|
||||
) -> list[IdentityTechniqueRow]:
|
||||
"""Per-Attacker (per-IP) TTP rollup. Reads ``ttp_tag`` filtered
|
||||
on ``attacker_uuid``. Identity-rollup tags (NULL attacker
|
||||
anchor) are deliberately excluded — those belong to the
|
||||
Identity, not any one IP underneath it.
|
||||
"""Per-Attacker (per-IP) TTP rollup. Identity-rollup tags
|
||||
(``attacker_uuid IS NULL``) are deliberately excluded — those
|
||||
belong to the Identity, not any one IP underneath it.
|
||||
"""
|
||||
raise NotImplementedError(
|
||||
"list_techniques_by_attacker lands at E.3",
|
||||
)
|
||||
async with self._session() as session:
|
||||
stmt: Any = (
|
||||
select(
|
||||
col(TTPTag.technique_id),
|
||||
col(TTPTag.sub_technique_id),
|
||||
func.max(col(TTPTag.tactic)).label("tactic"),
|
||||
func.count().label("count"),
|
||||
func.min(col(TTPTag.created_at)).label("first_seen"),
|
||||
func.max(col(TTPTag.created_at)).label("last_seen"),
|
||||
func.max(col(TTPTag.confidence)).label("confidence_max"),
|
||||
)
|
||||
.where(TTPTag.attacker_uuid == uuid)
|
||||
.group_by(TTPTag.technique_id, TTPTag.sub_technique_id)
|
||||
)
|
||||
res = await session.execute(stmt)
|
||||
return [
|
||||
IdentityTechniqueRow(
|
||||
technique_id=r.technique_id,
|
||||
sub_technique_id=r.sub_technique_id,
|
||||
tactic=r.tactic,
|
||||
count=r.count,
|
||||
first_seen=r.first_seen,
|
||||
last_seen=r.last_seen,
|
||||
confidence_max=r.confidence_max,
|
||||
)
|
||||
for r in res.all()
|
||||
]
|
||||
|
||||
async def list_techniques_by_campaign(
|
||||
self,
|
||||
uuid: str,
|
||||
) -> list[CampaignTechniqueRow]:
|
||||
"""Campaign-wide TTP rollup. Joins ``ttp_tag`` -> Identity ->
|
||||
``campaign_uuid`` and groups across all member Identities.
|
||||
"""Campaign-wide TTP rollup. Joins ``ttp_tag.identity_uuid`` →
|
||||
:class:`AttackerIdentity` and filters on
|
||||
``AttackerIdentity.campaign_id``. Note: the FK column is
|
||||
``campaign_id``, not ``campaign_uuid``.
|
||||
"""
|
||||
raise NotImplementedError(
|
||||
"list_techniques_by_campaign lands at E.3",
|
||||
)
|
||||
async with self._session() as session:
|
||||
stmt: Any = (
|
||||
select(
|
||||
col(TTPTag.technique_id),
|
||||
col(TTPTag.sub_technique_id),
|
||||
func.max(col(TTPTag.tactic)).label("tactic"),
|
||||
func.count().label("count"),
|
||||
func.count(func.distinct(col(TTPTag.identity_uuid))).label(
|
||||
"identity_count",
|
||||
),
|
||||
func.max(col(TTPTag.created_at)).label("last_seen"),
|
||||
)
|
||||
.join(
|
||||
AttackerIdentity,
|
||||
AttackerIdentity.uuid == TTPTag.identity_uuid,
|
||||
)
|
||||
.where(AttackerIdentity.campaign_id == uuid)
|
||||
.group_by(TTPTag.technique_id, TTPTag.sub_technique_id)
|
||||
)
|
||||
res = await session.execute(stmt)
|
||||
return [
|
||||
CampaignTechniqueRow(
|
||||
technique_id=r.technique_id,
|
||||
sub_technique_id=r.sub_technique_id,
|
||||
tactic=r.tactic,
|
||||
count=r.count,
|
||||
identity_count=r.identity_count,
|
||||
last_seen=r.last_seen,
|
||||
)
|
||||
for r in res.all()
|
||||
]
|
||||
|
||||
async def list_techniques_by_session(
|
||||
self,
|
||||
sid: str,
|
||||
) -> list[IdentityTechniqueRow]:
|
||||
"""Session-scoped TTP timeline. Filtered on ``ttp_tag.session_id``.
|
||||
Used by the SessionDetail page (post-v0).
|
||||
"""Session-scoped TTP timeline. Filtered on
|
||||
``ttp_tag.session_id``.
|
||||
"""
|
||||
raise NotImplementedError(
|
||||
"list_techniques_by_session lands at E.3",
|
||||
)
|
||||
async with self._session() as session:
|
||||
stmt: Any = (
|
||||
select(
|
||||
col(TTPTag.technique_id),
|
||||
col(TTPTag.sub_technique_id),
|
||||
func.max(col(TTPTag.tactic)).label("tactic"),
|
||||
func.count().label("count"),
|
||||
func.min(col(TTPTag.created_at)).label("first_seen"),
|
||||
func.max(col(TTPTag.created_at)).label("last_seen"),
|
||||
func.max(col(TTPTag.confidence)).label("confidence_max"),
|
||||
)
|
||||
.where(TTPTag.session_id == sid)
|
||||
.group_by(TTPTag.technique_id, TTPTag.sub_technique_id)
|
||||
)
|
||||
res = await session.execute(stmt)
|
||||
return [
|
||||
IdentityTechniqueRow(
|
||||
technique_id=r.technique_id,
|
||||
sub_technique_id=r.sub_technique_id,
|
||||
tactic=r.tactic,
|
||||
count=r.count,
|
||||
first_seen=r.first_seen,
|
||||
last_seen=r.last_seen,
|
||||
confidence_max=r.confidence_max,
|
||||
)
|
||||
for r in res.all()
|
||||
]
|
||||
|
||||
async def list_distinct_techniques(self) -> list[TechniqueRollupRow]:
|
||||
"""Fleet-wide distinct-technique rollup with counts +
|
||||
most-recent-seen timestamps. Backs ``GET /api/v1/ttp/techniques``.
|
||||
most-recent-seen timestamps.
|
||||
"""
|
||||
raise NotImplementedError(
|
||||
"list_distinct_techniques lands at E.3",
|
||||
)
|
||||
async with self._session() as session:
|
||||
stmt: Any = (
|
||||
select(
|
||||
col(TTPTag.technique_id),
|
||||
col(TTPTag.sub_technique_id),
|
||||
func.max(col(TTPTag.tactic)).label("tactic"),
|
||||
func.count().label("count"),
|
||||
func.max(col(TTPTag.created_at)).label("last_seen"),
|
||||
)
|
||||
.group_by(TTPTag.technique_id, TTPTag.sub_technique_id)
|
||||
)
|
||||
res = await session.execute(stmt)
|
||||
return [
|
||||
TechniqueRollupRow(
|
||||
technique_id=r.technique_id,
|
||||
sub_technique_id=r.sub_technique_id,
|
||||
tactic=r.tactic,
|
||||
count=r.count,
|
||||
last_seen=r.last_seen,
|
||||
)
|
||||
for r in res.all()
|
||||
]
|
||||
|
||||
Reference in New Issue
Block a user