feat(ttp): E.3.3 repository — insert_tags + listing rollups (dual backend)
Dialect-split: portable rollup queries on TTPMixin; bulk insert with ON CONFLICT DO NOTHING / INSERT IGNORE in the per-dialect repos. Confidence-floor (< 0.3) drop applied at mixin layer before the dialect hook. BaseRepository now declares the six TTP methods abstract. Tests in tests/web/db/test_ttp_repo.py flipped from pytest.fail stubs to real dual-backend behavioral tests; tests/ttp/test_confidence.py drop-below-floor xfail removed.
This commit is contained in:
@@ -1,19 +1,25 @@
|
||||
"""TTP-tagging repository — `ttp_tag` reads + idempotent inserts.
|
||||
"""TTP-tagging repository — ``ttp_tag`` reads + idempotent inserts.
|
||||
|
||||
Contract step E.1.10 of `development/TTP_TAGGING.md`. Method bodies
|
||||
raise ``NotImplementedError``; the SQL lands at E.3 implementation
|
||||
phase. The shape — argument types, return types, idempotency
|
||||
semantics on ``insert_tags`` — is the public contract from this
|
||||
commit forward.
|
||||
Implementation phase E.3.3 of ``development/TTP_TAGGING.md``. The
|
||||
shape was pinned at E.1.10; this file fills in the bodies.
|
||||
|
||||
Per the dual-DB-backend project convention, dialect-specific behavior
|
||||
(``INSERT OR IGNORE`` on SQLite vs ``INSERT IGNORE`` on MySQL) is
|
||||
overridden in the per-dialect subclasses (``decnet.web.db.sqlite``,
|
||||
``decnet.web.db.mysql``); the shared base lives here.
|
||||
Dialect-split convention: portable rollup queries live here on the
|
||||
mixin; the bulk-insert "ignore on duplicate" hook lands in the
|
||||
per-dialect ``SQLiteRepository`` / ``MySQLRepository`` subclasses
|
||||
(``decnet/web/db/sqlite/repository.py`` /
|
||||
``decnet/web/db/mysql/repository.py``) where the actual
|
||||
``ON CONFLICT DO NOTHING`` vs ``INSERT IGNORE`` SQL diverges.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any
|
||||
|
||||
from sqlalchemy import func, select
|
||||
from sqlmodel import col
|
||||
|
||||
from decnet.web.db.models import (
|
||||
Attacker,
|
||||
AttackerIdentity,
|
||||
CampaignTechniqueRow,
|
||||
IdentityTechniqueRow,
|
||||
TechniqueRollupRow,
|
||||
@@ -22,85 +28,232 @@ from decnet.web.db.models import (
|
||||
from decnet.web.db.sqlmodel_repo._helpers import _MixinBase
|
||||
|
||||
|
||||
# Confidence floor: tags computed below this value are silently dropped
|
||||
# at insert time. Pinned by tests/ttp/test_confidence.py.
|
||||
_CONFIDENCE_FLOOR: float = 0.3
|
||||
|
||||
|
||||
class TTPMixin(_MixinBase):
|
||||
"""Mixin: TTP-tag query + insert methods composed onto
|
||||
:class:`SQLModelRepository`.
|
||||
|
||||
Expects ``self._session()`` from the base mixin. Adding a new
|
||||
``ttp_tag`` query method here requires adding a contract test in
|
||||
``tests/web/db/test_ttp_repo.py`` (E.2.13) AND a parametrized run
|
||||
against both SQLite and MySQL via the existing ``db_backends``
|
||||
fixture.
|
||||
Expects ``self._session()`` from the base mixin and
|
||||
``self._insert_tags_or_ignore()`` from the per-dialect repo.
|
||||
Adding a new ``ttp_tag`` query method here requires adding a
|
||||
contract test in ``tests/web/db/test_ttp_repo.py`` (E.2.13) AND a
|
||||
parametrized run against both SQLite and MySQL via the existing
|
||||
``db_backends`` fixture.
|
||||
"""
|
||||
|
||||
async def _insert_tags_or_ignore(
|
||||
self, rows: list[TTPTag],
|
||||
) -> int:
|
||||
"""Dialect-specific bulk INSERT … ON CONFLICT DO NOTHING.
|
||||
|
||||
Default body is the portable two-step (SELECT then ``add_all``)
|
||||
used as a safety-net; the SQLite + MySQL repositories override
|
||||
this with their native ``OR IGNORE`` / ``INSERT IGNORE`` SQL.
|
||||
"""
|
||||
raise NotImplementedError(
|
||||
"_insert_tags_or_ignore is overridden in per-dialect repos",
|
||||
)
|
||||
|
||||
async def insert_tags(self, rows: list[TTPTag]) -> int:
|
||||
"""Bulk-upsert tags with ``INSERT OR IGNORE`` semantics.
|
||||
|
||||
Returns the number of rows actually inserted (i.e. that were
|
||||
not already present at their deterministic
|
||||
:func:`compute_tag_uuid` PK). The idempotency property is the
|
||||
load-bearing contract: replaying the same source events must
|
||||
converge to the same tag set without writing duplicates and
|
||||
without raising. See TTP_TAGGING.md §"Idempotency" + §"Bus
|
||||
topics — Loop-prevention invariant".
|
||||
Drops rows with ``confidence < _CONFIDENCE_FLOOR`` (= 0.3) before
|
||||
the write. Returns the count of rows actually inserted (i.e. that
|
||||
passed the floor AND were not already present at their
|
||||
deterministic :func:`compute_tag_uuid` PK).
|
||||
"""
|
||||
raise NotImplementedError(
|
||||
"insert_tags lands at E.3 implementation phase",
|
||||
)
|
||||
if not rows:
|
||||
return 0
|
||||
kept = [r for r in rows if r.confidence >= _CONFIDENCE_FLOOR]
|
||||
if not kept:
|
||||
return 0
|
||||
return await self._insert_tags_or_ignore(kept)
|
||||
|
||||
async def list_techniques_by_identity(
|
||||
self,
|
||||
uuid: str,
|
||||
) -> list[IdentityTechniqueRow]:
|
||||
"""Per-Identity TTP rollup. Joins ``ttp_tag`` on
|
||||
``identity_uuid`` and groups by ``(technique_id,
|
||||
sub_technique_id)``. Includes identity-rollup tags (with NULL
|
||||
``attacker_uuid``) and per-event tags whose denormalised
|
||||
``identity_uuid`` matches.
|
||||
"""Per-Identity TTP rollup. Includes (a) tags directly anchored
|
||||
on this identity (``identity_uuid == uuid``) — covers identity-
|
||||
rollup tags with NULL ``attacker_uuid`` — and (b) tags anchored
|
||||
on an Attacker whose ``identity_id`` projects up to this
|
||||
identity (per-Attacker tags rolling up to the Identity).
|
||||
"""
|
||||
raise NotImplementedError(
|
||||
"list_techniques_by_identity lands at E.3",
|
||||
)
|
||||
async with self._session() as session:
|
||||
attacker_uuids_subq = (
|
||||
select(col(Attacker.uuid))
|
||||
.where(col(Attacker.identity_id) == uuid)
|
||||
.scalar_subquery()
|
||||
)
|
||||
stmt: Any = (
|
||||
select(
|
||||
col(TTPTag.technique_id),
|
||||
col(TTPTag.sub_technique_id),
|
||||
func.max(col(TTPTag.tactic)).label("tactic"),
|
||||
func.count().label("count"),
|
||||
func.min(col(TTPTag.created_at)).label("first_seen"),
|
||||
func.max(col(TTPTag.created_at)).label("last_seen"),
|
||||
func.max(col(TTPTag.confidence)).label("confidence_max"),
|
||||
)
|
||||
.where(
|
||||
(col(TTPTag.identity_uuid) == uuid)
|
||||
| (col(TTPTag.attacker_uuid).in_(attacker_uuids_subq))
|
||||
)
|
||||
.group_by(TTPTag.technique_id, TTPTag.sub_technique_id)
|
||||
)
|
||||
res = await session.execute(stmt)
|
||||
return [
|
||||
IdentityTechniqueRow(
|
||||
technique_id=r.technique_id,
|
||||
sub_technique_id=r.sub_technique_id,
|
||||
tactic=r.tactic,
|
||||
count=r.count,
|
||||
first_seen=r.first_seen,
|
||||
last_seen=r.last_seen,
|
||||
confidence_max=r.confidence_max,
|
||||
)
|
||||
for r in res.all()
|
||||
]
|
||||
|
||||
async def list_techniques_by_attacker(
|
||||
self,
|
||||
uuid: str,
|
||||
) -> list[IdentityTechniqueRow]:
|
||||
"""Per-Attacker (per-IP) TTP rollup. Reads ``ttp_tag`` filtered
|
||||
on ``attacker_uuid``. Identity-rollup tags (NULL attacker
|
||||
anchor) are deliberately excluded — those belong to the
|
||||
Identity, not any one IP underneath it.
|
||||
"""Per-Attacker (per-IP) TTP rollup. Identity-rollup tags
|
||||
(``attacker_uuid IS NULL``) are deliberately excluded — those
|
||||
belong to the Identity, not any one IP underneath it.
|
||||
"""
|
||||
raise NotImplementedError(
|
||||
"list_techniques_by_attacker lands at E.3",
|
||||
)
|
||||
async with self._session() as session:
|
||||
stmt: Any = (
|
||||
select(
|
||||
col(TTPTag.technique_id),
|
||||
col(TTPTag.sub_technique_id),
|
||||
func.max(col(TTPTag.tactic)).label("tactic"),
|
||||
func.count().label("count"),
|
||||
func.min(col(TTPTag.created_at)).label("first_seen"),
|
||||
func.max(col(TTPTag.created_at)).label("last_seen"),
|
||||
func.max(col(TTPTag.confidence)).label("confidence_max"),
|
||||
)
|
||||
.where(TTPTag.attacker_uuid == uuid)
|
||||
.group_by(TTPTag.technique_id, TTPTag.sub_technique_id)
|
||||
)
|
||||
res = await session.execute(stmt)
|
||||
return [
|
||||
IdentityTechniqueRow(
|
||||
technique_id=r.technique_id,
|
||||
sub_technique_id=r.sub_technique_id,
|
||||
tactic=r.tactic,
|
||||
count=r.count,
|
||||
first_seen=r.first_seen,
|
||||
last_seen=r.last_seen,
|
||||
confidence_max=r.confidence_max,
|
||||
)
|
||||
for r in res.all()
|
||||
]
|
||||
|
||||
async def list_techniques_by_campaign(
|
||||
self,
|
||||
uuid: str,
|
||||
) -> list[CampaignTechniqueRow]:
|
||||
"""Campaign-wide TTP rollup. Joins ``ttp_tag`` -> Identity ->
|
||||
``campaign_uuid`` and groups across all member Identities.
|
||||
"""Campaign-wide TTP rollup. Joins ``ttp_tag.identity_uuid`` →
|
||||
:class:`AttackerIdentity` and filters on
|
||||
``AttackerIdentity.campaign_id``. Note: the FK column is
|
||||
``campaign_id``, not ``campaign_uuid``.
|
||||
"""
|
||||
raise NotImplementedError(
|
||||
"list_techniques_by_campaign lands at E.3",
|
||||
)
|
||||
async with self._session() as session:
|
||||
stmt: Any = (
|
||||
select(
|
||||
col(TTPTag.technique_id),
|
||||
col(TTPTag.sub_technique_id),
|
||||
func.max(col(TTPTag.tactic)).label("tactic"),
|
||||
func.count().label("count"),
|
||||
func.count(func.distinct(col(TTPTag.identity_uuid))).label(
|
||||
"identity_count",
|
||||
),
|
||||
func.max(col(TTPTag.created_at)).label("last_seen"),
|
||||
)
|
||||
.join(
|
||||
AttackerIdentity,
|
||||
AttackerIdentity.uuid == TTPTag.identity_uuid,
|
||||
)
|
||||
.where(AttackerIdentity.campaign_id == uuid)
|
||||
.group_by(TTPTag.technique_id, TTPTag.sub_technique_id)
|
||||
)
|
||||
res = await session.execute(stmt)
|
||||
return [
|
||||
CampaignTechniqueRow(
|
||||
technique_id=r.technique_id,
|
||||
sub_technique_id=r.sub_technique_id,
|
||||
tactic=r.tactic,
|
||||
count=r.count,
|
||||
identity_count=r.identity_count,
|
||||
last_seen=r.last_seen,
|
||||
)
|
||||
for r in res.all()
|
||||
]
|
||||
|
||||
async def list_techniques_by_session(
|
||||
self,
|
||||
sid: str,
|
||||
) -> list[IdentityTechniqueRow]:
|
||||
"""Session-scoped TTP timeline. Filtered on ``ttp_tag.session_id``.
|
||||
Used by the SessionDetail page (post-v0).
|
||||
"""Session-scoped TTP timeline. Filtered on
|
||||
``ttp_tag.session_id``.
|
||||
"""
|
||||
raise NotImplementedError(
|
||||
"list_techniques_by_session lands at E.3",
|
||||
)
|
||||
async with self._session() as session:
|
||||
stmt: Any = (
|
||||
select(
|
||||
col(TTPTag.technique_id),
|
||||
col(TTPTag.sub_technique_id),
|
||||
func.max(col(TTPTag.tactic)).label("tactic"),
|
||||
func.count().label("count"),
|
||||
func.min(col(TTPTag.created_at)).label("first_seen"),
|
||||
func.max(col(TTPTag.created_at)).label("last_seen"),
|
||||
func.max(col(TTPTag.confidence)).label("confidence_max"),
|
||||
)
|
||||
.where(TTPTag.session_id == sid)
|
||||
.group_by(TTPTag.technique_id, TTPTag.sub_technique_id)
|
||||
)
|
||||
res = await session.execute(stmt)
|
||||
return [
|
||||
IdentityTechniqueRow(
|
||||
technique_id=r.technique_id,
|
||||
sub_technique_id=r.sub_technique_id,
|
||||
tactic=r.tactic,
|
||||
count=r.count,
|
||||
first_seen=r.first_seen,
|
||||
last_seen=r.last_seen,
|
||||
confidence_max=r.confidence_max,
|
||||
)
|
||||
for r in res.all()
|
||||
]
|
||||
|
||||
async def list_distinct_techniques(self) -> list[TechniqueRollupRow]:
|
||||
"""Fleet-wide distinct-technique rollup with counts +
|
||||
most-recent-seen timestamps. Backs ``GET /api/v1/ttp/techniques``.
|
||||
most-recent-seen timestamps.
|
||||
"""
|
||||
raise NotImplementedError(
|
||||
"list_distinct_techniques lands at E.3",
|
||||
)
|
||||
async with self._session() as session:
|
||||
stmt: Any = (
|
||||
select(
|
||||
col(TTPTag.technique_id),
|
||||
col(TTPTag.sub_technique_id),
|
||||
func.max(col(TTPTag.tactic)).label("tactic"),
|
||||
func.count().label("count"),
|
||||
func.max(col(TTPTag.created_at)).label("last_seen"),
|
||||
)
|
||||
.group_by(TTPTag.technique_id, TTPTag.sub_technique_id)
|
||||
)
|
||||
res = await session.execute(stmt)
|
||||
return [
|
||||
TechniqueRollupRow(
|
||||
technique_id=r.technique_id,
|
||||
sub_technique_id=r.sub_technique_id,
|
||||
tactic=r.tactic,
|
||||
count=r.count,
|
||||
last_seen=r.last_seen,
|
||||
)
|
||||
for r in res.all()
|
||||
]
|
||||
|
||||
Reference in New Issue
Block a user