docs: per-package READMEs with full primitive catalog and registry notes backfill
- core/README.md: envelope contract, field table, PII discipline, quickstart - BEHAVE-SHELL/README.md: all 76 primitives documented across 9 categories; TLS/SSH/C2 fingerprint sections with [DRAFT — verify] markers on uncertain entries - BEHAVE-TEXT/README.md: all 35 primitives across 6 categories; Rutify calibration notes inline; content.* layer marked EXPERIMENTAL throughout - primitives.py (SHELL): backfilled notes for all previously undocumented primitives - primitives.py (TEXT): backfilled notes for capitalization_habit, emoji_*, length, linebreak_style, sentence_complexity_class, question_formation_style, imperative_style, response_latency_class, message_burst_rate License: CC-BY-SA-4.0 (prose) / GPL-3.0-or-later (code)
This commit is contained in:
@@ -114,10 +114,32 @@ def _array(of: ValueKind, notes: Optional[str] = None) -> ValueTypeSpec:
|
||||
PRIMITIVE_REGISTRY: dict[str, ValueTypeSpec] = {
|
||||
# ── stylometric.* (motor analog — 8) ──────────────────────────────────
|
||||
"stylometric.punctuation_style": _hash(notes="canonical punctuation-pattern fingerprint"),
|
||||
"stylometric.capitalization_habit": _cat("lowercase", "proper", "random_caps", "mixed_i"),
|
||||
"stylometric.emoji_usage": _cat("none", "occasional", "frequent", "exclusive"),
|
||||
"stylometric.emoji_placement": _cat("pre_punctuation", "post_punctuation", "no_punctuation", "mixed"),
|
||||
"stylometric.message_length_class": _cat("short", "medium", "long", "paragraph"),
|
||||
"stylometric.capitalization_habit": _cat(
|
||||
"lowercase", "proper", "random_caps", "mixed_i",
|
||||
notes="Dominant capitalization rule the author applies. lowercase=no capitals except "
|
||||
"after sentence breaks. proper=standard title/sentence case. random_caps=no "
|
||||
"consistent rule. mixed_i=author consistently writes 'i' in lowercase even "
|
||||
"mid-sentence — common in Spanish chat where 'I' is not a standalone word "
|
||||
"but the habit transfers from the native language's lowercase 'yo'.",
|
||||
),
|
||||
"stylometric.emoji_usage": _cat(
|
||||
"none", "occasional", "frequent", "exclusive",
|
||||
notes="Rate of emoji use per message. exclusive=messages rarely contain text without "
|
||||
"emoji. This captures tone and register — heavy emoji use in a criminal-market "
|
||||
"context is a distinct style trait worth preserving.",
|
||||
),
|
||||
"stylometric.emoji_placement": _cat(
|
||||
"pre_punctuation", "post_punctuation", "no_punctuation", "mixed",
|
||||
notes="Where emojis appear relative to sentence-ending punctuation. "
|
||||
"pre_punctuation='Hola 😊.' post_punctuation='Hola. 😊' "
|
||||
"Individual authors are strikingly consistent in this micro-habit.",
|
||||
),
|
||||
"stylometric.message_length_class": _cat(
|
||||
"short", "medium", "long", "paragraph",
|
||||
notes="Median message length bucket: short=1-5 words, medium=6-20 words, "
|
||||
"long=21-50 words, paragraph=>50 words. See also "
|
||||
"stylometric.message_length_variance_class for the distribution shape.",
|
||||
),
|
||||
"stylometric.message_length_variance_class": _cat(
|
||||
"tight", "varied", "bimodal",
|
||||
notes="Coefficient of variation of per-message word counts. Captures "
|
||||
@@ -129,7 +151,14 @@ PRIMITIVE_REGISTRY: dict[str, ValueTypeSpec] = {
|
||||
"rants). Added in v0.2 after Rutify calibration found median-only "
|
||||
"bucketing discarded most of the per-author variance signal.",
|
||||
),
|
||||
"stylometric.linebreak_style": _cat("single_thought", "multi_line", "wall_of_text"),
|
||||
"stylometric.linebreak_style": _cat(
|
||||
"single_thought", "multi_line", "wall_of_text",
|
||||
notes="Whether the author sends one complete thought per message or breaks a single "
|
||||
"statement into multiple sequential short messages. multi_line=habitual "
|
||||
"message-burst style (sends 3-5 short messages in rapid succession instead "
|
||||
"of one composed message). wall_of_text=rarely uses line breaks, sends dense "
|
||||
"blocks. Captures a stylistic rhythm that is hard to consciously alter.",
|
||||
),
|
||||
"stylometric.typo_signature": _hash(notes="sha256 of canonical persistent-typo set"),
|
||||
"stylometric.function_word_distribution_top50": _hash(
|
||||
notes="64-bit simhash over the 50-most-common Spanish function-word frequency "
|
||||
@@ -188,9 +217,31 @@ PRIMITIVE_REGISTRY: dict[str, ValueTypeSpec] = {
|
||||
"lexical.code_switching_matrix_language": _str(notes="BCP-47 of dominant language"),
|
||||
"lexical.code_switching_embedded_languages": _array(ValueKind.FREE_STRING,
|
||||
notes="BCP-47 list of non-matrix languages observed"),
|
||||
"lexical.sentence_complexity_class": _cat("simple", "compound", "complex"),
|
||||
"lexical.question_formation_style": _cat("punctuation_only", "lexical", "formal"),
|
||||
"lexical.imperative_style": _cat("informal_directive", "formal_directive", "polite"),
|
||||
"lexical.sentence_complexity_class": _cat(
|
||||
"simple", "compound", "complex",
|
||||
notes="Dominant clause structure. simple=single-clause messages (no conjunctions "
|
||||
"or subordination). compound=two independent clauses joined by coordinating "
|
||||
"conjunctions (pero, y, o, ni). complex=dependent clauses and subordination "
|
||||
"(aunque, porque, cuando, que + verb). Reflects education level and "
|
||||
"cognitive investment in message composition.",
|
||||
),
|
||||
"lexical.question_formation_style": _cat(
|
||||
"punctuation_only", "lexical", "formal",
|
||||
notes="How questions are formed. punctuation_only=question mark appended without "
|
||||
"interrogative words ('¿Cuánto?' or 'Mañana?') — very common in Spanish "
|
||||
"chat. lexical=explicit interrogatives (¿qué, cómo, cuándo, dónde). "
|
||||
"formal=inverted subject-verb order or formal register ('¿Podría usted...'). "
|
||||
"Captures register and education level.",
|
||||
),
|
||||
"lexical.imperative_style": _cat(
|
||||
"informal_directive", "formal_directive", "polite",
|
||||
notes="How commands and requests are framed. informal_directive=tú/vos imperative "
|
||||
"('dame', 'hazlo', 'mándame'). formal_directive=usted imperative "
|
||||
"('hágame el favor', 'envíeme'). polite=conditional or modal softening "
|
||||
"('¿podría...?', 'me gustaría...'). Stable per-author trait in criminal "
|
||||
"market contexts where hierarchical and peer relationships are expressed "
|
||||
"through register choice.",
|
||||
),
|
||||
|
||||
# ── temporal_evolution.* (lifecycle / change-over-time — 1) ───────────
|
||||
"temporal_evolution.lifecycle_phase": _cat(
|
||||
@@ -247,10 +298,22 @@ PRIMITIVE_REGISTRY: dict[str, ValueTypeSpec] = {
|
||||
),
|
||||
|
||||
# ── interaction.* (temporal analog — 6) ───────────────────────────────
|
||||
"interaction.response_latency_class": _cat("immediate", "fast", "normal", "slow", "sporadic"),
|
||||
"interaction.response_latency_class": _cat(
|
||||
"immediate", "fast", "normal", "slow", "sporadic",
|
||||
notes="How quickly the actor responds to messages directed at them. "
|
||||
"immediate=<30s (suggests active monitoring or automated response). "
|
||||
"fast=30s-5min. normal=5-60min (typical async chat). slow=1-24hr. "
|
||||
"sporadic=no consistent response latency — appears and disappears.",
|
||||
),
|
||||
"interaction.conversation_initiation_rate": _num(min_val=0.0, max_val=1.0,
|
||||
notes="thread-starting messages / total"),
|
||||
"interaction.message_burst_rate": _cat("single", "occasional", "habitual"),
|
||||
"interaction.message_burst_rate": _cat(
|
||||
"single", "occasional", "habitual",
|
||||
notes="Whether the actor sends multiple messages in rapid sequence within a "
|
||||
"conversation turn. habitual=almost always bursts (sends 3+ messages "
|
||||
"before any reply). single=almost always one message per turn. Tied to "
|
||||
"stylometric.linebreak_style multi_line.",
|
||||
),
|
||||
"interaction.active_hours_class": _str(notes="UTC active-hours window summary"),
|
||||
"interaction.session_duration_class": _cat("short", "medium", "long", "marathon",
|
||||
notes="REUSED enum from BEHAVE-SHELL temporal.session_duration"),
|
||||
|
||||
Reference in New Issue
Block a user