DECNET/tests/clustering/test_campaign_factory.py

"""Determinism + DSL-validation tests for the synthetic campaign factory."""
from __future__ import annotations

import pytest

from decnet.clustering.ukc import UKCPhase
from tests.factories.campaign_factory import (
    DSLValidationError,
    generate,
)


def _minimal_spec() -> dict:
    return {
        "campaign": {
            "id": "c-test",
            "actors": [{"id": "a-1", "asn": 64512}],
            "phases": [{"name": "delivery", "actor": "a-1"}],
            "duration_days": 1,
        }
    }


def test_generation_is_deterministic_given_seed() -> None:
    spec = _minimal_spec()
    a = generate(spec, seed=42)
    b = generate(spec, seed=42)
    # IDs are RNG-driven — same seed must produce identical IDs, not
    # merely identical structure. Otherwise federation gossip and
    # fixture diffing both break.
    assert [att.attacker_id for att in a.attackers] == [
        att.attacker_id for att in b.attackers
    ]
    assert [s.session_id for s in a.sessions] == [s.session_id for s in b.sessions]


def test_different_seeds_produce_different_ids() -> None:
    spec = _minimal_spec()
    a = generate(spec, seed=1)
    b = generate(spec, seed=2)
    assert a.attackers[0].attacker_id != b.attackers[0].attacker_id


def test_truth_labels_match_dsl() -> None:
    spec = _minimal_spec()
    corpus = generate(spec, seed=0)
    assert corpus.attackers[0].truth_campaign_id == "c-test"
    assert corpus.attackers[0].truth_actor_id == "a-1"
    # truth_labels() returns the dict the metric harness consumes.
    labels = corpus.truth_labels()
    assert labels[corpus.attackers[0].attacker_id] == "c-test"


def test_unobservable_phase_emits_no_events() -> None:
    spec = _minimal_spec()
    spec["campaign"]["phases"] = [
        {"name": "reconnaissance", "actor": "a-1"},  # pre-target, unobservable
        {"name": "delivery", "actor": "a-1"},
    ]
    corpus = generate(spec, seed=0)
    # Only the delivery phase should produce sessions.
    assert all(s.phase == UKCPhase.DELIVERY for s in corpus.sessions)
    assert len(corpus.sessions) == 1


def test_unknown_phase_name_raises() -> None:
    spec = _minimal_spec()
    spec["campaign"]["phases"] = [{"name": "make_coffee", "actor": "a-1"}]
    with pytest.raises(DSLValidationError, match="unknown UKC phase"):
        generate(spec, seed=0)


def test_phase_referencing_unknown_actor_raises() -> None:
    spec = _minimal_spec()
    spec["campaign"]["phases"] = [{"name": "delivery", "actor": "ghost"}]
    with pytest.raises(DSLValidationError, match="unknown actor"):
        generate(spec, seed=0)


def test_noise_scanners_are_truth_singletons() -> None:
    spec = {
        "corpus": {
            "campaigns": [_minimal_spec()],
            "noise": {"scanner_count": 5},
        }
    }
    corpus = generate(spec, seed=0)
    # 1 campaign actor + 5 noise scanners = 6 distinct truth campaigns.
    truth_campaigns = {a.truth_campaign_id for a in corpus.attackers}
    assert len(truth_campaigns) == 6


def test_multi_actor_campaign_shares_campaign_id() -> None:
    spec = {
        "campaign": {
            "id": "c-shared",
            "actors": [
                {"id": "a-1", "asn": 14061},
                {"id": "a-2", "asn": 14061},
            ],
            "phases": [
                {"name": "delivery", "actor": "a-1"},
                {"name": "discovery", "actor": "a-2"},
            ],
            "duration_days": 1,
        }
    }
    corpus = generate(spec, seed=0)
    truth = corpus.truth_labels()
    # Both attacker rows must point to the SAME truth_campaign_id —
    # this is the property fixture 5 (multi_operator) hinges on.
    assert set(truth.values()) == {"c-shared"}