Files
DECNET/decnet/ttp/factory.py
anti 101127247e feat(ttp): E.3.14 worker bootstrap (insert + ttp.tagged publish)
Inner loop drains a per-process asyncio.Queue populated by one pump
task per topic in _TOPICS, dispatches each event through
CompositeTagger, persists via repo.insert_tags(), and publishes
ttp.tagged + per-technique ttp.rule.fired.<id> only when the insert
returned a non-zero rowcount.

CompositeTagger seeded with all six lifters (Behavioral, Intel,
CanaryFingerprint, Email, Identity, Credential).

Loop-prevention invariant from TTP_TAGGING.md §"Bus topics" enforced:
N replays of the same upstream event publish exactly one ttp.tagged
event. test_worker_bus covers both the direct invocation path and
the idempotency replay path.

Intel catch-up via attacker.session.ended is intentionally deferred
to E.3.14b — needs a session→intel join the repo doesn't expose yet.
2026-05-01 20:57:57 -04:00

141 lines
5.5 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""Tagger factory + composite tagger.
Contract step E.1.4 of ``development/TTP_TAGGING.md``. Mirrors the
provider-subpackage convention used by :mod:`decnet.intel.factory` and
:mod:`decnet.clustering.factory`: callers obtain the active tagger via
:func:`get_tagger` rather than instantiating a concrete class directly.
The composite tagger is the only shippable tagger type — per-lifter
classes (E.1.6) are children of the composite, not standalone tagger
``DECNET_TTP_TAGGER_TYPE`` values.
Configuration:
* ``DECNET_TTP_TAGGER_TYPE`` — which tagger to instantiate. Default
``"composite"``. Unknown values raise :class:`ValueError` so a typo
in ``decnet.ini`` surfaces immediately rather than silently falling
back.
"""
from __future__ import annotations
import asyncio
import logging
import os
from typing import Final
from decnet.ttp.base import KNOWN_SOURCE_KINDS, Tagger, TaggerEvent
from decnet.web.db.models.ttp import TTPTag
_log = logging.getLogger(__name__)
_KNOWN: Final[tuple[str, ...]] = ("composite",)
_DEFAULT: Final[str] = "composite"
class CompositeTagger(Tagger):
"""Fans an event out to every lifter that claims its ``source_kind``.
The composite is the runtime end of the closed-by-enumeration
bridge described in :mod:`decnet.ttp.base`: when an event arrives
with a ``source_kind`` no lifter claims, the composite emits a
structured log line so the silent-drop trap from the design doc
becomes observable.
During the contract phase (this commit) ``lifters=[]`` is the
legal state — E.1.6 wires the real per-source lifters in.
"""
name = "composite"
# The composite itself accepts every event; per-kind dispatch is
# delegated to children. Empty here is "n/a, computed from
# children" — the dispatch index below is what actually drives
# the fan-out.
HANDLES: frozenset[str] = frozenset()
def __init__(self, lifters: list[Tagger]) -> None:
self._lifters: list[Tagger] = list(lifters)
index: dict[str, list[Tagger]] = {}
for lifter in self._lifters:
for kind in lifter.HANDLES:
index.setdefault(kind, []).append(lifter)
self._by_kind: dict[str, list[Tagger]] = index
# Per-process dedup state so a flood of one unknown kind
# produces one log line, not one per event. A simple set
# is fine for the contract; E.1.6 may swap in a proper
# rate-limiter once production traffic shapes are known.
self._warned_known: set[str] = set()
self._informed_unknown: set[str] = set()
async def tag(self, event: TaggerEvent) -> list[TTPTag]:
lifters = self._by_kind.get(event.source_kind, [])
if not lifters:
self._log_unhandled(event.source_kind)
return []
results = await asyncio.gather(*(t.tag(event) for t in lifters))
out: list[TTPTag] = []
for tags in results:
out.extend(tags)
return out
def _log_unhandled(self, source_kind: str) -> None:
if source_kind in KNOWN_SOURCE_KINDS:
if source_kind not in self._warned_known:
self._warned_known.add(source_kind)
# Producer ships a kind that *should* be handled but
# no lifter claims it — almost certainly a missed
# E.1.6 update. Loud once per kind per process.
_log.warning(
"composite tagger: no lifter claims known "
"source_kind=%r; events will be dropped until a "
"lifter is registered",
source_kind,
)
else:
if source_kind not in self._informed_unknown:
self._informed_unknown.add(source_kind)
# Telemetry from a future feature, no lifter yet, by
# design (lines 160195 of the design doc). INFO once
# per process; never an error.
_log.info(
"composite tagger: unknown source_kind=%r "
"(not in KNOWN_SOURCE_KINDS); ignoring",
source_kind,
)
def get_tagger() -> Tagger:
"""Return the configured tagger instance.
Synchronous construction: each shipped lifter takes the shared
:class:`RuleStore` reference, but the per-lifter watch loops are
started by the worker (E.3.14), not by this factory. Tests that
instantiate via this path get an idle composite — exercising the
watch loop is the worker's contract.
"""
name = os.environ.get("DECNET_TTP_TAGGER_TYPE", _DEFAULT).strip().lower()
if name == "composite":
from decnet.ttp.impl.behavioral_lifter import BehavioralLifter
from decnet.ttp.impl.canary_fingerprint_lifter import (
CanaryFingerprintLifter,
)
from decnet.ttp.impl.credential_lifter import CredentialLifter
from decnet.ttp.impl.email_lifter import EmailLifter
from decnet.ttp.impl.identity_lifter import IdentityLifter
from decnet.ttp.impl.intel_lifter import IntelLifter
from decnet.ttp.store.factory import get_rule_store
store = get_rule_store()
return CompositeTagger(lifters=[
BehavioralLifter(store),
IntelLifter(store),
CanaryFingerprintLifter(store),
EmailLifter(store),
IdentityLifter(store),
CredentialLifter(store),
])
raise ValueError(
f"Unknown tagger: {name!r}. Known: {_KNOWN}"
)
__all__ = ["get_tagger", "CompositeTagger"]