Files
DECNET/decnet/ttp/factory.py
anti 44ade3eb63 fix(ttp): E.3.18a worker hydrates per-lifter rule indexes via watch_store
Each per-source lifter holds its own RuleIndex and exposes an
`async watch_store()` that loads the corpus and drains store change
events forever. Until this commit nothing called `watch_store()` in
production — every dispatch index stayed empty and no rule fired.

- Add `WatchableTagger` runtime-checkable Protocol in `decnet.ttp.base`.
- `CompositeTagger.iter_watchables()` yields lifters that satisfy it.
- `run_ttp_worker_loop` fans out one task per watchable, cancelled
  and awaited alongside pump/heartbeat/control in the existing finally.
- Watch failures log and exit the watch task without taking the
  worker down — mirrors the pump-task tolerance contract.
2026-05-02 01:25:15 -04:00

161 lines
6.1 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""Tagger factory + composite tagger.
Contract step E.1.4 of ``development/TTP_TAGGING.md``. Mirrors the
provider-subpackage convention used by :mod:`decnet.intel.factory` and
:mod:`decnet.clustering.factory`: callers obtain the active tagger via
:func:`get_tagger` rather than instantiating a concrete class directly.
The composite tagger is the only shippable tagger type — per-lifter
classes (E.1.6) are children of the composite, not standalone tagger
``DECNET_TTP_TAGGER_TYPE`` values.
Configuration:
* ``DECNET_TTP_TAGGER_TYPE`` — which tagger to instantiate. Default
``"composite"``. Unknown values raise :class:`ValueError` so a typo
in ``decnet.ini`` surfaces immediately rather than silently falling
back.
"""
from __future__ import annotations
import asyncio
import logging
import os
from typing import Final
from collections.abc import Iterator
from decnet.ttp.base import (
KNOWN_SOURCE_KINDS,
Tagger,
TaggerEvent,
WatchableTagger,
)
from decnet.web.db.models.ttp import TTPTag
_log = logging.getLogger(__name__)
_KNOWN: Final[tuple[str, ...]] = ("composite",)
_DEFAULT: Final[str] = "composite"
class CompositeTagger(Tagger):
"""Fans an event out to every lifter that claims its ``source_kind``.
The composite is the runtime end of the closed-by-enumeration
bridge described in :mod:`decnet.ttp.base`: when an event arrives
with a ``source_kind`` no lifter claims, the composite emits a
structured log line so the silent-drop trap from the design doc
becomes observable.
During the contract phase (this commit) ``lifters=[]`` is the
legal state — E.1.6 wires the real per-source lifters in.
"""
name = "composite"
# The composite itself accepts every event; per-kind dispatch is
# delegated to children. Empty here is "n/a, computed from
# children" — the dispatch index below is what actually drives
# the fan-out.
HANDLES: frozenset[str] = frozenset()
def __init__(self, lifters: list[Tagger]) -> None:
self._lifters: list[Tagger] = list(lifters)
index: dict[str, list[Tagger]] = {}
for lifter in self._lifters:
for kind in lifter.HANDLES:
index.setdefault(kind, []).append(lifter)
self._by_kind: dict[str, list[Tagger]] = index
# Per-process dedup state so a flood of one unknown kind
# produces one log line, not one per event. A simple set
# is fine for the contract; E.1.6 may swap in a proper
# rate-limiter once production traffic shapes are known.
self._warned_known: set[str] = set()
self._informed_unknown: set[str] = set()
def iter_watchables(self) -> Iterator[WatchableTagger]:
"""Yield every child lifter that hot-reloads from a RuleStore.
The worker (E.3.14) starts one ``asyncio.Task`` per yielded
lifter so its dispatch index hydrates at startup; without this
every index stays empty and no rule fires in production.
Filtering on the structural :class:`WatchableTagger` protocol
keeps the worker free of per-lifter type knowledge.
"""
for lifter in self._lifters:
if isinstance(lifter, WatchableTagger):
yield lifter
async def tag(self, event: TaggerEvent) -> list[TTPTag]:
lifters = self._by_kind.get(event.source_kind, [])
if not lifters:
self._log_unhandled(event.source_kind)
return []
results = await asyncio.gather(*(t.tag(event) for t in lifters))
out: list[TTPTag] = []
for tags in results:
out.extend(tags)
return out
def _log_unhandled(self, source_kind: str) -> None:
if source_kind in KNOWN_SOURCE_KINDS:
if source_kind not in self._warned_known:
self._warned_known.add(source_kind)
# Producer ships a kind that *should* be handled but
# no lifter claims it — almost certainly a missed
# E.1.6 update. Loud once per kind per process.
_log.warning(
"composite tagger: no lifter claims known "
"source_kind=%r; events will be dropped until a "
"lifter is registered",
source_kind,
)
else:
if source_kind not in self._informed_unknown:
self._informed_unknown.add(source_kind)
# Telemetry from a future feature, no lifter yet, by
# design (lines 160195 of the design doc). INFO once
# per process; never an error.
_log.info(
"composite tagger: unknown source_kind=%r "
"(not in KNOWN_SOURCE_KINDS); ignoring",
source_kind,
)
def get_tagger() -> Tagger:
"""Return the configured tagger instance.
Synchronous construction: each shipped lifter takes the shared
:class:`RuleStore` reference, but the per-lifter watch loops are
started by the worker (E.3.14), not by this factory. Tests that
instantiate via this path get an idle composite — exercising the
watch loop is the worker's contract.
"""
name = os.environ.get("DECNET_TTP_TAGGER_TYPE", _DEFAULT).strip().lower()
if name == "composite":
from decnet.ttp.impl.behavioral_lifter import BehavioralLifter
from decnet.ttp.impl.canary_fingerprint_lifter import (
CanaryFingerprintLifter,
)
from decnet.ttp.impl.credential_lifter import CredentialLifter
from decnet.ttp.impl.email_lifter import EmailLifter
from decnet.ttp.impl.identity_lifter import IdentityLifter
from decnet.ttp.impl.intel_lifter import IntelLifter
from decnet.ttp.store.factory import get_rule_store
store = get_rule_store()
return CompositeTagger(lifters=[
BehavioralLifter(store),
IntelLifter(store),
CanaryFingerprintLifter(store),
EmailLifter(store),
IdentityLifter(store),
CredentialLifter(store),
])
raise ValueError(
f"Unknown tagger: {name!r}. Known: {_KNOWN}"
)
__all__ = ["get_tagger", "CompositeTagger"]