Dialect-split: portable rollup queries on TTPMixin; bulk insert with ON CONFLICT DO NOTHING / INSERT IGNORE in the per-dialect repos. Confidence-floor (< 0.3) drop applied at mixin layer before the dialect hook. BaseRepository now declares the six TTP methods abstract. Tests in tests/web/db/test_ttp_repo.py flipped from pytest.fail stubs to real dual-backend behavioral tests; tests/ttp/test_confidence.py drop-below-floor xfail removed.
260 lines
10 KiB
Python
260 lines
10 KiB
Python
"""TTP-tagging repository — ``ttp_tag`` reads + idempotent inserts.
|
|
|
|
Implementation phase E.3.3 of ``development/TTP_TAGGING.md``. The
|
|
shape was pinned at E.1.10; this file fills in the bodies.
|
|
|
|
Dialect-split convention: portable rollup queries live here on the
|
|
mixin; the bulk-insert "ignore on duplicate" hook lands in the
|
|
per-dialect ``SQLiteRepository`` / ``MySQLRepository`` subclasses
|
|
(``decnet/web/db/sqlite/repository.py`` /
|
|
``decnet/web/db/mysql/repository.py``) where the actual
|
|
``ON CONFLICT DO NOTHING`` vs ``INSERT IGNORE`` SQL diverges.
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
from typing import Any
|
|
|
|
from sqlalchemy import func, select
|
|
from sqlmodel import col
|
|
|
|
from decnet.web.db.models import (
|
|
Attacker,
|
|
AttackerIdentity,
|
|
CampaignTechniqueRow,
|
|
IdentityTechniqueRow,
|
|
TechniqueRollupRow,
|
|
TTPTag,
|
|
)
|
|
from decnet.web.db.sqlmodel_repo._helpers import _MixinBase
|
|
|
|
|
|
# Confidence floor: tags computed below this value are silently dropped
|
|
# at insert time. Pinned by tests/ttp/test_confidence.py.
|
|
_CONFIDENCE_FLOOR: float = 0.3
|
|
|
|
|
|
class TTPMixin(_MixinBase):
|
|
"""Mixin: TTP-tag query + insert methods composed onto
|
|
:class:`SQLModelRepository`.
|
|
|
|
Expects ``self._session()`` from the base mixin and
|
|
``self._insert_tags_or_ignore()`` from the per-dialect repo.
|
|
Adding a new ``ttp_tag`` query method here requires adding a
|
|
contract test in ``tests/web/db/test_ttp_repo.py`` (E.2.13) AND a
|
|
parametrized run against both SQLite and MySQL via the existing
|
|
``db_backends`` fixture.
|
|
"""
|
|
|
|
async def _insert_tags_or_ignore(
|
|
self, rows: list[TTPTag],
|
|
) -> int:
|
|
"""Dialect-specific bulk INSERT … ON CONFLICT DO NOTHING.
|
|
|
|
Default body is the portable two-step (SELECT then ``add_all``)
|
|
used as a safety-net; the SQLite + MySQL repositories override
|
|
this with their native ``OR IGNORE`` / ``INSERT IGNORE`` SQL.
|
|
"""
|
|
raise NotImplementedError(
|
|
"_insert_tags_or_ignore is overridden in per-dialect repos",
|
|
)
|
|
|
|
async def insert_tags(self, rows: list[TTPTag]) -> int:
|
|
"""Bulk-upsert tags with ``INSERT OR IGNORE`` semantics.
|
|
|
|
Drops rows with ``confidence < _CONFIDENCE_FLOOR`` (= 0.3) before
|
|
the write. Returns the count of rows actually inserted (i.e. that
|
|
passed the floor AND were not already present at their
|
|
deterministic :func:`compute_tag_uuid` PK).
|
|
"""
|
|
if not rows:
|
|
return 0
|
|
kept = [r for r in rows if r.confidence >= _CONFIDENCE_FLOOR]
|
|
if not kept:
|
|
return 0
|
|
return await self._insert_tags_or_ignore(kept)
|
|
|
|
async def list_techniques_by_identity(
|
|
self,
|
|
uuid: str,
|
|
) -> list[IdentityTechniqueRow]:
|
|
"""Per-Identity TTP rollup. Includes (a) tags directly anchored
|
|
on this identity (``identity_uuid == uuid``) — covers identity-
|
|
rollup tags with NULL ``attacker_uuid`` — and (b) tags anchored
|
|
on an Attacker whose ``identity_id`` projects up to this
|
|
identity (per-Attacker tags rolling up to the Identity).
|
|
"""
|
|
async with self._session() as session:
|
|
attacker_uuids_subq = (
|
|
select(col(Attacker.uuid))
|
|
.where(col(Attacker.identity_id) == uuid)
|
|
.scalar_subquery()
|
|
)
|
|
stmt: Any = (
|
|
select(
|
|
col(TTPTag.technique_id),
|
|
col(TTPTag.sub_technique_id),
|
|
func.max(col(TTPTag.tactic)).label("tactic"),
|
|
func.count().label("count"),
|
|
func.min(col(TTPTag.created_at)).label("first_seen"),
|
|
func.max(col(TTPTag.created_at)).label("last_seen"),
|
|
func.max(col(TTPTag.confidence)).label("confidence_max"),
|
|
)
|
|
.where(
|
|
(col(TTPTag.identity_uuid) == uuid)
|
|
| (col(TTPTag.attacker_uuid).in_(attacker_uuids_subq))
|
|
)
|
|
.group_by(TTPTag.technique_id, TTPTag.sub_technique_id)
|
|
)
|
|
res = await session.execute(stmt)
|
|
return [
|
|
IdentityTechniqueRow(
|
|
technique_id=r.technique_id,
|
|
sub_technique_id=r.sub_technique_id,
|
|
tactic=r.tactic,
|
|
count=r.count,
|
|
first_seen=r.first_seen,
|
|
last_seen=r.last_seen,
|
|
confidence_max=r.confidence_max,
|
|
)
|
|
for r in res.all()
|
|
]
|
|
|
|
async def list_techniques_by_attacker(
|
|
self,
|
|
uuid: str,
|
|
) -> list[IdentityTechniqueRow]:
|
|
"""Per-Attacker (per-IP) TTP rollup. Identity-rollup tags
|
|
(``attacker_uuid IS NULL``) are deliberately excluded — those
|
|
belong to the Identity, not any one IP underneath it.
|
|
"""
|
|
async with self._session() as session:
|
|
stmt: Any = (
|
|
select(
|
|
col(TTPTag.technique_id),
|
|
col(TTPTag.sub_technique_id),
|
|
func.max(col(TTPTag.tactic)).label("tactic"),
|
|
func.count().label("count"),
|
|
func.min(col(TTPTag.created_at)).label("first_seen"),
|
|
func.max(col(TTPTag.created_at)).label("last_seen"),
|
|
func.max(col(TTPTag.confidence)).label("confidence_max"),
|
|
)
|
|
.where(TTPTag.attacker_uuid == uuid)
|
|
.group_by(TTPTag.technique_id, TTPTag.sub_technique_id)
|
|
)
|
|
res = await session.execute(stmt)
|
|
return [
|
|
IdentityTechniqueRow(
|
|
technique_id=r.technique_id,
|
|
sub_technique_id=r.sub_technique_id,
|
|
tactic=r.tactic,
|
|
count=r.count,
|
|
first_seen=r.first_seen,
|
|
last_seen=r.last_seen,
|
|
confidence_max=r.confidence_max,
|
|
)
|
|
for r in res.all()
|
|
]
|
|
|
|
async def list_techniques_by_campaign(
|
|
self,
|
|
uuid: str,
|
|
) -> list[CampaignTechniqueRow]:
|
|
"""Campaign-wide TTP rollup. Joins ``ttp_tag.identity_uuid`` →
|
|
:class:`AttackerIdentity` and filters on
|
|
``AttackerIdentity.campaign_id``. Note: the FK column is
|
|
``campaign_id``, not ``campaign_uuid``.
|
|
"""
|
|
async with self._session() as session:
|
|
stmt: Any = (
|
|
select(
|
|
col(TTPTag.technique_id),
|
|
col(TTPTag.sub_technique_id),
|
|
func.max(col(TTPTag.tactic)).label("tactic"),
|
|
func.count().label("count"),
|
|
func.count(func.distinct(col(TTPTag.identity_uuid))).label(
|
|
"identity_count",
|
|
),
|
|
func.max(col(TTPTag.created_at)).label("last_seen"),
|
|
)
|
|
.join(
|
|
AttackerIdentity,
|
|
AttackerIdentity.uuid == TTPTag.identity_uuid,
|
|
)
|
|
.where(AttackerIdentity.campaign_id == uuid)
|
|
.group_by(TTPTag.technique_id, TTPTag.sub_technique_id)
|
|
)
|
|
res = await session.execute(stmt)
|
|
return [
|
|
CampaignTechniqueRow(
|
|
technique_id=r.technique_id,
|
|
sub_technique_id=r.sub_technique_id,
|
|
tactic=r.tactic,
|
|
count=r.count,
|
|
identity_count=r.identity_count,
|
|
last_seen=r.last_seen,
|
|
)
|
|
for r in res.all()
|
|
]
|
|
|
|
async def list_techniques_by_session(
|
|
self,
|
|
sid: str,
|
|
) -> list[IdentityTechniqueRow]:
|
|
"""Session-scoped TTP timeline. Filtered on
|
|
``ttp_tag.session_id``.
|
|
"""
|
|
async with self._session() as session:
|
|
stmt: Any = (
|
|
select(
|
|
col(TTPTag.technique_id),
|
|
col(TTPTag.sub_technique_id),
|
|
func.max(col(TTPTag.tactic)).label("tactic"),
|
|
func.count().label("count"),
|
|
func.min(col(TTPTag.created_at)).label("first_seen"),
|
|
func.max(col(TTPTag.created_at)).label("last_seen"),
|
|
func.max(col(TTPTag.confidence)).label("confidence_max"),
|
|
)
|
|
.where(TTPTag.session_id == sid)
|
|
.group_by(TTPTag.technique_id, TTPTag.sub_technique_id)
|
|
)
|
|
res = await session.execute(stmt)
|
|
return [
|
|
IdentityTechniqueRow(
|
|
technique_id=r.technique_id,
|
|
sub_technique_id=r.sub_technique_id,
|
|
tactic=r.tactic,
|
|
count=r.count,
|
|
first_seen=r.first_seen,
|
|
last_seen=r.last_seen,
|
|
confidence_max=r.confidence_max,
|
|
)
|
|
for r in res.all()
|
|
]
|
|
|
|
async def list_distinct_techniques(self) -> list[TechniqueRollupRow]:
|
|
"""Fleet-wide distinct-technique rollup with counts +
|
|
most-recent-seen timestamps.
|
|
"""
|
|
async with self._session() as session:
|
|
stmt: Any = (
|
|
select(
|
|
col(TTPTag.technique_id),
|
|
col(TTPTag.sub_technique_id),
|
|
func.max(col(TTPTag.tactic)).label("tactic"),
|
|
func.count().label("count"),
|
|
func.max(col(TTPTag.created_at)).label("last_seen"),
|
|
)
|
|
.group_by(TTPTag.technique_id, TTPTag.sub_technique_id)
|
|
)
|
|
res = await session.execute(stmt)
|
|
return [
|
|
TechniqueRollupRow(
|
|
technique_id=r.technique_id,
|
|
sub_technique_id=r.sub_technique_id,
|
|
tactic=r.tactic,
|
|
count=r.count,
|
|
last_seen=r.last_seen,
|
|
)
|
|
for r in res.all()
|
|
]
|