From 49da15823fbf09f60da47e4b9f208bc37005e254 Mon Sep 17 00:00:00 2001 From: anti Date: Mon, 27 Apr 2026 17:39:04 -0400 Subject: [PATCH] =?UTF-8?q?refactor(realism):=20single=20source=20of=20tru?= =?UTF-8?q?th=20for=20persona=E2=86=92login?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit decnet/realism/naming._home and decnet/canary/cultivator._persona_login both normalised "John Smith"→"johnsmith" with identical logic. Lift to decnet.realism.personas.login_for(persona) and have both consumers import it. Drift between the two would have left canary placement and realism path naming using different login derivations. --- decnet/canary/cultivator.py | 11 ++--------- decnet/realism/naming.py | 14 +++----------- decnet/realism/personas.py | 15 +++++++++++++++ development/DEVELOPMENT.md | 2 +- pyproject.toml | 5 ++++- tests/realism/test_personas.py | 26 ++++++++++++++++++++++++++ 6 files changed, 51 insertions(+), 22 deletions(-) diff --git a/decnet/canary/cultivator.py b/decnet/canary/cultivator.py index ec0f8e0f..1cc145b1 100644 --- a/decnet/canary/cultivator.py +++ b/decnet/canary/cultivator.py @@ -29,6 +29,7 @@ from typing import Any, Optional from decnet.canary.base import CanaryArtifact, CanaryContext from decnet.canary.factory import get_generator from decnet.logging import get_logger +from decnet.realism.personas import login_for from decnet.realism.taxonomy import ContentClass, Plan log = get_logger("canary.cultivator") @@ -64,14 +65,6 @@ _DEFAULT_PATH: dict[ContentClass, str] = { } -def _persona_login(persona: str) -> str: - """Mirror :func:`decnet.realism.naming._home`'s username conventions.""" - candidate = persona.lower().replace(" ", "") - if candidate.isalnum() and candidate.isascii() and candidate: - return candidate - return "user" - - def _path_for(plan: Plan) -> str: """Produce the canary placement path for *plan*. @@ -84,7 +77,7 @@ def _path_for(plan: Plan) -> str: template = _DEFAULT_PATH.get(plan.content_class) if template is None: return plan.target_path - return template.format(persona=_persona_login(plan.persona)) + return template.format(persona=login_for(plan.persona)) def _new_callback_token() -> str: diff --git a/decnet/realism/naming.py b/decnet/realism/naming.py index 7193581f..e6c9dfae 100644 --- a/decnet/realism/naming.py +++ b/decnet/realism/naming.py @@ -24,6 +24,7 @@ import secrets import string from typing import Callable, Optional +from decnet.realism.personas import login_for from decnet.realism.taxonomy import ContentClass @@ -32,17 +33,8 @@ from decnet.realism.taxonomy import ContentClass # paths (out of scope until per-OS personas land). For now everything # is POSIX. def _home(persona: str) -> str: - """Return the canonical home directory for *persona*. - - The persona's ``name`` is used as the linux username when it's a - plausible login (lowercase, no spaces); otherwise we fall back to - a generic ``user`` so the path doesn't reveal a persona display - name on the decky filesystem. - """ - candidate = persona.lower().replace(" ", "") - if candidate.isalnum() and candidate.isascii() and candidate: - return f"/home/{candidate}" - return "/home/user" + """Return the canonical home directory for *persona*.""" + return f"/home/{login_for(persona)}" def _random_token(rng: secrets.SystemRandom, length: int = 6) -> str: diff --git a/decnet/realism/personas.py b/decnet/realism/personas.py index 6916bb19..adc43da4 100644 --- a/decnet/realism/personas.py +++ b/decnet/realism/personas.py @@ -117,6 +117,21 @@ def parse_personas( return out +def login_for(persona: str) -> str: + """Return the linux login derived from a persona's display name. + + Lowercase, strip spaces; if the result isn't a plausible POSIX + login (alnum ASCII), fall back to ``user`` so the path doesn't + leak the persona's display name onto the decky filesystem. + Shared by realism path naming (``decnet/realism/naming.py``) and + canary cultivation (``decnet/canary/cultivator.py``). + """ + candidate = persona.lower().replace(" ", "") + if candidate.isalnum() and candidate.isascii() and candidate: + return candidate + return "user" + + def in_active_hours(persona: EmailPersona, now_hour: int) -> bool: """Return True if *now_hour* (0–23) falls in the persona's window. diff --git a/development/DEVELOPMENT.md b/development/DEVELOPMENT.md index 0e675f9a..fc9d04ec 100644 --- a/development/DEVELOPMENT.md +++ b/development/DEVELOPMENT.md @@ -57,7 +57,7 @@ - [x] **Real-time alerting via webhooks** — Admin-configurable outbound webhooks (SIEM/SOAR integration: Wazuh/Shuffle/TheHive/n8n) with HMAC-SHA256 signing, topic-pattern filtering, and bounded retry. Slack/Telegram-specific senders remain as per-destination work (they accept generic webhook payloads already). - [x] **Threat intel enrichment** — Auto-lookup IPs against AbuseIPDB, Shodan, and GreyNoise. -> Out-of-band `decnet enrich` worker, woken on `attacker.scored`/`attacker.observed`. v1 ships GreyNoise Community + AbuseIPDB + abuse.ch (Feodo Tracker bulk feed and ThreatFox per-IP). Shodan / Censys / OTX backlogged in DEVELOPMENT_V2.md. -- [ ] **Attack campaign clustering** — Group sessions by signatures and timing patterns. +- [x] **Attack campaign clustering** — Group sessions by signatures and timing patterns. - [x] **GeoIP mapping** — Visualize attacker origin and ASN data on a map. - [ ] **TTPs tagging** — Map observed behaviors to MITRE ATT&CK techniques. diff --git a/pyproject.toml b/pyproject.toml index c62ebbd2..3e7b499a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -39,7 +39,10 @@ dependencies = [ "httpx>=0.28.1", "requests>=2.33.1", "slowapi>=0.1.9", - "sqlite_vec>=0.1.9" + "sqlite_vec>=0.1.9", + "Pillow>=12.2.0", + "lxml>=6.1.0", + "pikepdf>=10.5.1", ] [project.optional-dependencies] diff --git a/tests/realism/test_personas.py b/tests/realism/test_personas.py index 089117b0..106da13f 100644 --- a/tests/realism/test_personas.py +++ b/tests/realism/test_personas.py @@ -6,6 +6,7 @@ import json from decnet.realism.personas import ( EmailPersona, in_active_hours, + login_for, parse_personas, ) @@ -99,3 +100,28 @@ def test_active_hours_malformed_treats_as_always_on(): def test_active_hours_equal_window_treated_as_always_on(): p = EmailPersona(**_persona(active_hours="10:00-10:00")) assert in_active_hours(p, 5) is True + + +def test_login_for_normalises_display_name(): + assert login_for("John Smith") == "johnsmith" + assert login_for("alice") == "alice" + + +def test_login_for_falls_back_to_user_on_punctuation(): + # The realism namer and canary cultivator both rely on this so the + # decky filesystem doesn't end up with an unexpected username. + assert login_for("Mr. Robot") == "user" + assert login_for("") == "user" + assert login_for("Renée") == "user" # non-ASCII falls back + + +def test_login_for_shared_by_naming_and_cultivator(): + """Single source of truth: realism naming and canary cultivator + must agree on the persona→login mapping.""" + from decnet.canary import cultivator + from decnet.realism import naming + persona = "John Smith" + expected = login_for(persona) + assert naming._home(persona) == f"/home/{expected}" + # cultivator imports login_for; not duplicated. + assert cultivator.login_for is login_for