Text/messaging-domain behavioral observation registry layered on core. SPDX: GPL-3.0-or-later (code) / CC-BY-SA-4.0 (attribution-recipes.md). Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
102 lines
4.0 KiB
Python
102 lines
4.0 KiB
Python
# SPDX-License-Identifier: GPL-3.0-or-later
|
|
"""Registry coverage tests for BEHAVE-TEXT.
|
|
|
|
Asserts that every primitive listed in scratchpad.md's tables has exactly one
|
|
entry in PRIMITIVE_REGISTRY. Drift-detector — failing this test means
|
|
scratchpad.md and the registry have diverged.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import re
|
|
from pathlib import Path
|
|
|
|
from decnet_behave_text.spec import PRIMITIVE_REGISTRY, ValueKind
|
|
|
|
# Primitive paths expected by scratchpad.md (hand-extracted; v0).
|
|
EXPECTED_PRIMITIVES = {
|
|
# stylometric.* (motor analog — 8)
|
|
"stylometric.punctuation_style",
|
|
"stylometric.capitalization_habit",
|
|
"stylometric.emoji_usage",
|
|
"stylometric.emoji_placement",
|
|
"stylometric.message_length_class",
|
|
"stylometric.message_length_variance_class",
|
|
"stylometric.linebreak_style",
|
|
"stylometric.typo_signature",
|
|
"stylometric.function_word_distribution_top50",
|
|
"stylometric.function_word_distribution_top200",
|
|
"stylometric.character_ngram_simhash",
|
|
"stylometric.distinctive_vocabulary_signature",
|
|
# lexical.* (cognitive analog — 8)
|
|
"lexical.vocabulary_richness",
|
|
"lexical.slang_density",
|
|
"lexical.code_switching_rate",
|
|
"lexical.code_switching_matrix_language",
|
|
"lexical.code_switching_embedded_languages",
|
|
"lexical.sentence_complexity_class",
|
|
"lexical.question_formation_style",
|
|
"lexical.imperative_style",
|
|
# temporal_evolution.* (lifecycle/change-over-time — 1, added v0.2)
|
|
"temporal_evolution.lifecycle_phase",
|
|
# network.* (governance/role-shape — 2, added v0.3)
|
|
"network.is_likely_bot",
|
|
"network.governance_role_signal",
|
|
# interaction.* (temporal analog — 6)
|
|
"interaction.response_latency_class",
|
|
"interaction.conversation_initiation_rate",
|
|
"interaction.message_burst_rate",
|
|
"interaction.active_hours_class",
|
|
"interaction.session_duration_class",
|
|
"interaction.attention_pattern",
|
|
# content.* (operational analog — 6, EXPERIMENTAL)
|
|
"content.role_signal",
|
|
"content.transactional_language",
|
|
"content.opsec_awareness",
|
|
"content.targeting_language",
|
|
"content.boasting_pattern",
|
|
"content.conflict_style",
|
|
}
|
|
|
|
|
|
def test_registry_covers_expected_primitives_exactly():
|
|
registry_keys = set(PRIMITIVE_REGISTRY.keys())
|
|
missing = EXPECTED_PRIMITIVES - registry_keys
|
|
extra = registry_keys - EXPECTED_PRIMITIVES
|
|
assert not missing, f"registry missing: {sorted(missing)}"
|
|
assert not extra, f"registry has unexpected entries: {sorted(extra)}"
|
|
|
|
|
|
def test_every_primitive_has_a_valid_spec():
|
|
for primitive, spec in PRIMITIVE_REGISTRY.items():
|
|
if spec.kind is ValueKind.CATEGORICAL:
|
|
assert spec.allowed, f"{primitive}: categorical must define `allowed`"
|
|
assert all(isinstance(v, str) for v in spec.allowed)
|
|
elif spec.kind is ValueKind.ARRAY:
|
|
assert spec.array_of is not None, f"{primitive}: array must define `array_of`"
|
|
assert spec.array_of is not ValueKind.ARRAY, (
|
|
f"{primitive}: nested arrays not supported in v0"
|
|
)
|
|
|
|
|
|
def test_primitive_paths_are_dotted_lowercase():
|
|
pattern = re.compile(r"^[a-z][a-z0-9_]*(\.[a-z][a-z0-9_]*)+$")
|
|
for primitive in PRIMITIVE_REGISTRY:
|
|
assert pattern.match(primitive), f"malformed primitive path: {primitive!r}"
|
|
|
|
|
|
def test_experimental_primitives_are_in_content_layer_only():
|
|
"""`status: experimental` should be confined to content.* in v0."""
|
|
for primitive, spec in PRIMITIVE_REGISTRY.items():
|
|
if spec.notes and "EXPERIMENTAL" in spec.notes:
|
|
assert primitive.startswith("content."), (
|
|
f"{primitive}: EXPERIMENTAL flag should only appear in content.* layer in v0"
|
|
)
|
|
|
|
|
|
def test_topic_namespace_uses_actor_not_attacker():
|
|
"""The text-domain topic prefix must be `actor.*`, not `attacker.*`."""
|
|
from decnet_behave_text.spec import TOPIC_PREFIX, event_topic_for
|
|
assert TOPIC_PREFIX == "actor.observation.text"
|
|
assert event_topic_for("stylometric.emoji_usage") == "actor.observation.text.stylometric.emoji_usage"
|