Each per-source lifter holds its own RuleIndex and exposes an `async watch_store()` that loads the corpus and drains store change events forever. Until this commit nothing called `watch_store()` in production — every dispatch index stayed empty and no rule fired. - Add `WatchableTagger` runtime-checkable Protocol in `decnet.ttp.base`. - `CompositeTagger.iter_watchables()` yields lifters that satisfy it. - `run_ttp_worker_loop` fans out one task per watchable, cancelled and awaited alongside pump/heartbeat/control in the existing finally. - Watch failures log and exit the watch task without taking the worker down — mirrors the pump-task tolerance contract.
161 lines
6.1 KiB
Python
161 lines
6.1 KiB
Python
"""Tagger factory + composite tagger.
|
||
|
||
Contract step E.1.4 of ``development/TTP_TAGGING.md``. Mirrors the
|
||
provider-subpackage convention used by :mod:`decnet.intel.factory` and
|
||
:mod:`decnet.clustering.factory`: callers obtain the active tagger via
|
||
:func:`get_tagger` rather than instantiating a concrete class directly.
|
||
|
||
The composite tagger is the only shippable tagger type — per-lifter
|
||
classes (E.1.6) are children of the composite, not standalone tagger
|
||
``DECNET_TTP_TAGGER_TYPE`` values.
|
||
|
||
Configuration:
|
||
|
||
* ``DECNET_TTP_TAGGER_TYPE`` — which tagger to instantiate. Default
|
||
``"composite"``. Unknown values raise :class:`ValueError` so a typo
|
||
in ``decnet.ini`` surfaces immediately rather than silently falling
|
||
back.
|
||
"""
|
||
from __future__ import annotations
|
||
|
||
import asyncio
|
||
import logging
|
||
import os
|
||
from typing import Final
|
||
|
||
from collections.abc import Iterator
|
||
|
||
from decnet.ttp.base import (
|
||
KNOWN_SOURCE_KINDS,
|
||
Tagger,
|
||
TaggerEvent,
|
||
WatchableTagger,
|
||
)
|
||
from decnet.web.db.models.ttp import TTPTag
|
||
|
||
_log = logging.getLogger(__name__)
|
||
|
||
_KNOWN: Final[tuple[str, ...]] = ("composite",)
|
||
_DEFAULT: Final[str] = "composite"
|
||
|
||
|
||
class CompositeTagger(Tagger):
|
||
"""Fans an event out to every lifter that claims its ``source_kind``.
|
||
|
||
The composite is the runtime end of the closed-by-enumeration
|
||
bridge described in :mod:`decnet.ttp.base`: when an event arrives
|
||
with a ``source_kind`` no lifter claims, the composite emits a
|
||
structured log line so the silent-drop trap from the design doc
|
||
becomes observable.
|
||
|
||
During the contract phase (this commit) ``lifters=[]`` is the
|
||
legal state — E.1.6 wires the real per-source lifters in.
|
||
"""
|
||
|
||
name = "composite"
|
||
# The composite itself accepts every event; per-kind dispatch is
|
||
# delegated to children. Empty here is "n/a, computed from
|
||
# children" — the dispatch index below is what actually drives
|
||
# the fan-out.
|
||
HANDLES: frozenset[str] = frozenset()
|
||
|
||
def __init__(self, lifters: list[Tagger]) -> None:
|
||
self._lifters: list[Tagger] = list(lifters)
|
||
index: dict[str, list[Tagger]] = {}
|
||
for lifter in self._lifters:
|
||
for kind in lifter.HANDLES:
|
||
index.setdefault(kind, []).append(lifter)
|
||
self._by_kind: dict[str, list[Tagger]] = index
|
||
# Per-process dedup state so a flood of one unknown kind
|
||
# produces one log line, not one per event. A simple set
|
||
# is fine for the contract; E.1.6 may swap in a proper
|
||
# rate-limiter once production traffic shapes are known.
|
||
self._warned_known: set[str] = set()
|
||
self._informed_unknown: set[str] = set()
|
||
|
||
def iter_watchables(self) -> Iterator[WatchableTagger]:
|
||
"""Yield every child lifter that hot-reloads from a RuleStore.
|
||
|
||
The worker (E.3.14) starts one ``asyncio.Task`` per yielded
|
||
lifter so its dispatch index hydrates at startup; without this
|
||
every index stays empty and no rule fires in production.
|
||
Filtering on the structural :class:`WatchableTagger` protocol
|
||
keeps the worker free of per-lifter type knowledge.
|
||
"""
|
||
for lifter in self._lifters:
|
||
if isinstance(lifter, WatchableTagger):
|
||
yield lifter
|
||
|
||
async def tag(self, event: TaggerEvent) -> list[TTPTag]:
|
||
lifters = self._by_kind.get(event.source_kind, [])
|
||
if not lifters:
|
||
self._log_unhandled(event.source_kind)
|
||
return []
|
||
results = await asyncio.gather(*(t.tag(event) for t in lifters))
|
||
out: list[TTPTag] = []
|
||
for tags in results:
|
||
out.extend(tags)
|
||
return out
|
||
|
||
def _log_unhandled(self, source_kind: str) -> None:
|
||
if source_kind in KNOWN_SOURCE_KINDS:
|
||
if source_kind not in self._warned_known:
|
||
self._warned_known.add(source_kind)
|
||
# Producer ships a kind that *should* be handled but
|
||
# no lifter claims it — almost certainly a missed
|
||
# E.1.6 update. Loud once per kind per process.
|
||
_log.warning(
|
||
"composite tagger: no lifter claims known "
|
||
"source_kind=%r; events will be dropped until a "
|
||
"lifter is registered",
|
||
source_kind,
|
||
)
|
||
else:
|
||
if source_kind not in self._informed_unknown:
|
||
self._informed_unknown.add(source_kind)
|
||
# Telemetry from a future feature, no lifter yet, by
|
||
# design (lines 160–195 of the design doc). INFO once
|
||
# per process; never an error.
|
||
_log.info(
|
||
"composite tagger: unknown source_kind=%r "
|
||
"(not in KNOWN_SOURCE_KINDS); ignoring",
|
||
source_kind,
|
||
)
|
||
|
||
|
||
def get_tagger() -> Tagger:
|
||
"""Return the configured tagger instance.
|
||
|
||
Synchronous construction: each shipped lifter takes the shared
|
||
:class:`RuleStore` reference, but the per-lifter watch loops are
|
||
started by the worker (E.3.14), not by this factory. Tests that
|
||
instantiate via this path get an idle composite — exercising the
|
||
watch loop is the worker's contract.
|
||
"""
|
||
name = os.environ.get("DECNET_TTP_TAGGER_TYPE", _DEFAULT).strip().lower()
|
||
if name == "composite":
|
||
from decnet.ttp.impl.behavioral_lifter import BehavioralLifter
|
||
from decnet.ttp.impl.canary_fingerprint_lifter import (
|
||
CanaryFingerprintLifter,
|
||
)
|
||
from decnet.ttp.impl.credential_lifter import CredentialLifter
|
||
from decnet.ttp.impl.email_lifter import EmailLifter
|
||
from decnet.ttp.impl.identity_lifter import IdentityLifter
|
||
from decnet.ttp.impl.intel_lifter import IntelLifter
|
||
from decnet.ttp.store.factory import get_rule_store
|
||
store = get_rule_store()
|
||
return CompositeTagger(lifters=[
|
||
BehavioralLifter(store),
|
||
IntelLifter(store),
|
||
CanaryFingerprintLifter(store),
|
||
EmailLifter(store),
|
||
IdentityLifter(store),
|
||
CredentialLifter(store),
|
||
])
|
||
raise ValueError(
|
||
f"Unknown tagger: {name!r}. Known: {_KNOWN}"
|
||
)
|
||
|
||
|
||
__all__ = ["get_tagger", "CompositeTagger"]
|