Four-part fix for the collection bottleneck that was blocking the dev loop: 1. Lazy mitreattack.stix20 import in attack_stix.py — deferred to first _load() call (TYPE_CHECKING guard at top level) 2. Lazy misp_stix_converter import in both MISP export routers — moved from module level into the route handler body 3. Lazy attack_catalog / attack_stix in ttp.py repo mixin — thin wrapper functions so the import chain never fires at module load time 4. tests/api/conftest.py — `from decnet.web.api import app` moved inside the `client()` fixture; `pytest_ignore_collect` broadened to skip all test_schemathesis*.py variants (not just test_schemathesis.py), which were launching a subprocess server at module-import time 5. pyproject.toml — `norecursedirs` for tests/live, tests/stress, tests/service_testing, tests/docker, tests/perf so these directories are never entered; `-m` filter removed from addopts (now redundant); `--dist loadscope` → `--dist load` to unblock workers immediately 6. behave_core / behave_shell rename — BEHAVE packages dropped the `decnet_` prefix; reinstalled editable installs and updated all 14 import sites across profiler, ttp, bus, and correlation modules
518 lines
18 KiB
Python
518 lines
18 KiB
Python
"""STIX 2.1 bundle builder for a DECNET attacker observation.
|
|
|
|
Pure function — no I/O. The caller (router) does all DB reads and
|
|
passes dicts; this module assembles the STIX bundle.
|
|
|
|
SDO/SRO mapping
|
|
---------------
|
|
DECNET data → STIX type
|
|
----------- ---------
|
|
Producer (DECNET) → identity (org, deterministic ID)
|
|
attacker.ip → ipv4-addr SCO
|
|
first/last seen + count → observed-data SDO
|
|
AttackerIdentity or IP → threat-actor SDO
|
|
Per-technique rollup → attack-pattern SDO + relationship(uses) SRO
|
|
Per ttp_tag row → sighting SRO
|
|
ObservedAttachment / Log → file SCO + observed-data SDO
|
|
SmtpTarget → domain-name SCO + observed-data SDO
|
|
AttackerIntel verdict → note SDO
|
|
|
|
Attack-pattern SDOs carry the canonical MITRE STIX IDs pulled from the
|
|
loaded enterprise bundle so the objects are deduplicated against the
|
|
public ATT&CK bundle by any consumer that already has it.
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
import base64
|
|
import json
|
|
import uuid as _uuid
|
|
from datetime import datetime, timezone
|
|
from typing import Any
|
|
|
|
import stix2
|
|
|
|
from decnet.ttp import attack_stix
|
|
from decnet.ttp.stix_custom import (
|
|
ACTOR_FINGERPRINT_EXT_ID,
|
|
FINGERPRINT_EXT_DEF,
|
|
DecnetActorFingerprintExt,
|
|
XDecnetBehaveProfile,
|
|
)
|
|
|
|
# Deterministic DECNET org identity ID — stable across all bundles this
|
|
# instance produces. Consumers can correlate across exports.
|
|
_NS = _uuid.UUID("b5d2c3a1-8f4e-4d1b-9a6c-0e7f5b3d2c1a")
|
|
_DECNET_ORG_STIX_ID = f"identity--{_uuid.uuid5(_NS, 'decnet-honeypot')}"
|
|
|
|
|
|
def _aware(dt: datetime | str | None) -> datetime | None:
|
|
if dt is None:
|
|
return None
|
|
if isinstance(dt, str):
|
|
dt = datetime.fromisoformat(dt.replace("Z", "+00:00"))
|
|
if dt.tzinfo is None:
|
|
return dt.replace(tzinfo=timezone.utc)
|
|
return dt
|
|
|
|
|
|
def _decnet_org() -> stix2.Identity:
|
|
return stix2.Identity(
|
|
id=_DECNET_ORG_STIX_ID,
|
|
name="DECNET",
|
|
identity_class="organization",
|
|
description="DECNET honeypot platform — automated threat observation",
|
|
)
|
|
|
|
|
|
def _parse_json_field(v: Any) -> Any:
|
|
"""JSON-decode strings; return non-strings unchanged."""
|
|
if isinstance(v, str):
|
|
try:
|
|
return json.loads(v)
|
|
except Exception:
|
|
return v
|
|
return v
|
|
|
|
|
|
def _extract_fingerprint_bounty_data(
|
|
bounties: list[dict[str, Any]],
|
|
) -> tuple[list[str], list[dict[str, Any]]]:
|
|
"""Return (jarm_hashes, http_quirks) extracted from fingerprint bounty rows.
|
|
|
|
jarm_hashes: deduplicated list of JARM hash strings.
|
|
http_quirks: deduplicated list of {order, casing_category, tool_guess}
|
|
dicts keyed by order_hash so two requests from the same
|
|
client stack produce one entry.
|
|
"""
|
|
jarm_hashes: list[str] = []
|
|
jarm_seen: set[str] = set()
|
|
http_quirks: list[dict[str, Any]] = []
|
|
http_seen: set[str] = set()
|
|
for b in bounties:
|
|
payload = b.get("payload") or {}
|
|
if not isinstance(payload, dict):
|
|
continue
|
|
fp_type = payload.get("fingerprint_type")
|
|
if fp_type == "jarm":
|
|
h = payload.get("hash")
|
|
if h and h not in jarm_seen:
|
|
jarm_hashes.append(h)
|
|
jarm_seen.add(h)
|
|
elif fp_type == "http_quirks":
|
|
key = payload.get("order_hash", "")
|
|
if key not in http_seen:
|
|
entry: dict[str, Any] = {}
|
|
if payload.get("order"):
|
|
entry["order"] = payload["order"]
|
|
if payload.get("casing_category"):
|
|
entry["casing_category"] = payload["casing_category"]
|
|
if payload.get("tool_guess"):
|
|
entry["tool_guess"] = payload["tool_guess"]
|
|
if entry:
|
|
http_quirks.append(entry)
|
|
http_seen.add(key)
|
|
return jarm_hashes, http_quirks
|
|
|
|
|
|
def _threat_actor(
|
|
attacker: dict[str, Any],
|
|
identity: dict[str, Any] | None,
|
|
created_by: str,
|
|
behavior: dict[str, Any] | None = None,
|
|
observations: list[dict[str, Any]] | None = None,
|
|
fingerprint_bounties: list[dict[str, Any]] | None = None,
|
|
) -> tuple[stix2.ThreatActor, "XDecnetBehaveProfile | None"]:
|
|
"""Build a ThreatActor SDO plus an optional XDecnetBehaveProfile SDO.
|
|
|
|
Returns ``(threat_actor, behave_profile_or_None)``.
|
|
"""
|
|
if identity:
|
|
name = f"DECNET-identity-{identity['uuid'][:8]}"
|
|
else:
|
|
name = f"DECNET-attacker-{attacker['uuid'][:8]}"
|
|
|
|
kwargs: dict[str, Any] = dict(
|
|
id=f"threat-actor--{_uuid.uuid5(_NS, attacker['uuid'])}",
|
|
name=name,
|
|
threat_actor_types=["unknown"],
|
|
created_by_ref=created_by,
|
|
allow_custom=True,
|
|
)
|
|
|
|
# Tier 1 — stable scalars
|
|
if attacker.get("country_code"):
|
|
kwargs["x_decnet_country_code"] = attacker["country_code"]
|
|
if attacker.get("asn"):
|
|
kwargs["x_decnet_asn"] = attacker["asn"]
|
|
if attacker.get("as_name"):
|
|
kwargs["x_decnet_as_name"] = attacker["as_name"]
|
|
|
|
# Tier 2 — DecnetActorFingerprintExt (network_behavior + protocol_fingerprints)
|
|
network_behavior: dict[str, Any] = {}
|
|
protocol_fingerprints: dict[str, Any] = {}
|
|
|
|
if behavior:
|
|
for key in ("os_guess", "hop_distance", "retransmit_count",
|
|
"behavior_class", "beacon_interval_s", "beacon_jitter_pct"):
|
|
v = behavior.get(key)
|
|
if v is not None:
|
|
network_behavior[key] = v
|
|
for key in ("tcp_fingerprint", "timing_stats", "phase_sequence", "tool_guesses"):
|
|
v = _parse_json_field(behavior.get(key))
|
|
if v:
|
|
network_behavior[key] = v
|
|
for key in ("kex_order_raw", "ssh_client_banners"):
|
|
v = _parse_json_field(behavior.get(key))
|
|
if v:
|
|
protocol_fingerprints[key] = v
|
|
|
|
if identity:
|
|
for key in ("ja3_hashes", "hassh_hashes", "tls_cert_sha256", "payload_simhashes"):
|
|
v = _parse_json_field(identity.get(key))
|
|
if v:
|
|
protocol_fingerprints[key] = v
|
|
if identity.get("c2_endpoints"):
|
|
protocol_fingerprints["c2_endpoints"] = _parse_json_field(
|
|
identity["c2_endpoints"]
|
|
)
|
|
|
|
if fingerprint_bounties:
|
|
jarm_hashes, http_quirks = _extract_fingerprint_bounty_data(fingerprint_bounties)
|
|
if jarm_hashes:
|
|
protocol_fingerprints["jarm_hashes"] = jarm_hashes
|
|
if http_quirks:
|
|
protocol_fingerprints["http_quirks"] = http_quirks
|
|
|
|
if network_behavior or protocol_fingerprints:
|
|
ext_kwargs: dict[str, Any] = {"extension_type": "property-extension"}
|
|
if network_behavior:
|
|
ext_kwargs["network_behavior"] = network_behavior
|
|
if protocol_fingerprints:
|
|
ext_kwargs["protocol_fingerprints"] = protocol_fingerprints
|
|
kwargs["extensions"] = {
|
|
ACTOR_FINGERPRINT_EXT_ID: DecnetActorFingerprintExt(**ext_kwargs),
|
|
}
|
|
|
|
# Tier 3 — XDecnetBehaveProfile (BEHAVE observations)
|
|
behave_profile: XDecnetBehaveProfile | None = None
|
|
kd_hash: str | None = None
|
|
if identity:
|
|
raw_kd = identity.get("kd_digraph_simhash")
|
|
if raw_kd is not None:
|
|
if isinstance(raw_kd, (bytes, bytearray)):
|
|
kd_hash = raw_kd.hex()
|
|
elif isinstance(raw_kd, str) and raw_kd:
|
|
try:
|
|
kd_hash = base64.b64decode(raw_kd).hex()
|
|
except Exception:
|
|
kd_hash = raw_kd
|
|
|
|
obs_list = observations or []
|
|
if obs_list or kd_hash is not None:
|
|
from behave_shell.spec.envelope import OBSERVATION_SCHEMA_VERSION
|
|
profile_id = (
|
|
f"x-decnet-behave-profile--{_uuid.uuid5(_NS, attacker['uuid'])}"
|
|
)
|
|
behave_profile = XDecnetBehaveProfile( # type: ignore[call-arg]
|
|
id=profile_id,
|
|
created_by_ref=created_by,
|
|
schema_version=OBSERVATION_SCHEMA_VERSION,
|
|
kd_digraph_simhash=kd_hash,
|
|
observations=obs_list,
|
|
)
|
|
kwargs["x_decnet_behave_profile_ref"] = profile_id
|
|
|
|
return stix2.ThreatActor(**kwargs), behave_profile
|
|
|
|
|
|
def _attack_pattern_sdo(technique_id: str, created_by: str) -> stix2.AttackPattern | None:
|
|
obj = attack_stix._attack_pattern_by_id(technique_id)
|
|
if obj is None:
|
|
return None
|
|
ext_refs = obj.get("external_references", [])
|
|
mitre_ref = next(
|
|
(r for r in ext_refs if r.get("source_name") == "mitre-attack"), None,
|
|
)
|
|
er_args = [
|
|
stix2.ExternalReference(
|
|
source_name="mitre-attack",
|
|
external_id=mitre_ref["external_id"],
|
|
url=mitre_ref.get("url", ""),
|
|
)
|
|
] if mitre_ref else []
|
|
return stix2.AttackPattern(
|
|
id=obj["id"],
|
|
name=obj.get("name", technique_id),
|
|
external_references=er_args,
|
|
created_by_ref=created_by,
|
|
)
|
|
|
|
|
|
def _intel_note(
|
|
intel: dict[str, Any],
|
|
ta_id: str,
|
|
created_by: str,
|
|
) -> stix2.Note | None:
|
|
verdict = intel.get("aggregate_verdict") or "unknown"
|
|
lines: list[str] = [f"aggregate_verdict: {verdict}"]
|
|
if intel.get("abuseipdb_score") is not None:
|
|
lines.append(f"abuseipdb_score: {intel['abuseipdb_score']}")
|
|
if intel.get("greynoise_classification"):
|
|
tags = intel.get("greynoise_tags") or []
|
|
if isinstance(tags, str):
|
|
try:
|
|
tags = json.loads(tags)
|
|
except Exception:
|
|
tags = []
|
|
lines.append(f"greynoise: {intel['greynoise_classification']} ({', '.join(tags)})")
|
|
if intel.get("feodo_listed"):
|
|
lines.append(f"feodo: {intel.get('feodo_malware_family', 'listed')}")
|
|
if intel.get("threatfox_listed"):
|
|
tt = intel.get("threatfox_threat_types") or []
|
|
if isinstance(tt, str):
|
|
try:
|
|
tt = json.loads(tt)
|
|
except Exception:
|
|
tt = []
|
|
lines.append(f"threatfox: {', '.join(tt) if tt else 'listed'}")
|
|
return stix2.Note(
|
|
abstract="DECNET threat-intel verdict",
|
|
content="\n".join(lines),
|
|
object_refs=[ta_id],
|
|
created_by_ref=created_by,
|
|
)
|
|
|
|
|
|
def build_attacker_bundle(
|
|
attacker: dict[str, Any],
|
|
behavior: dict[str, Any] | None,
|
|
identity: dict[str, Any] | None,
|
|
intel: dict[str, Any] | None,
|
|
technique_rollup: list[dict[str, Any]],
|
|
raw_tags: list[dict[str, Any]],
|
|
artifacts: list[dict[str, Any]],
|
|
smtp_targets: list[dict[str, Any]],
|
|
commands: list[str] | None = None,
|
|
observations: list[dict[str, Any]] | None = None,
|
|
fingerprint_bounties: list[dict[str, Any]] | None = None,
|
|
) -> stix2.Bundle:
|
|
"""Assemble a STIX 2.1 Bundle for *attacker*.
|
|
|
|
All arguments are plain dicts (the shape returned by the DECNET
|
|
repo). Never raises — unknown/missing data is silently omitted from
|
|
the bundle.
|
|
"""
|
|
objs: list[Any] = []
|
|
|
|
org = _decnet_org()
|
|
objs.append(org)
|
|
|
|
# ── IP observation ──────────────────────────────────────────────
|
|
ipv4 = stix2.IPv4Address(value=attacker["ip"])
|
|
objs.append(ipv4)
|
|
|
|
fs = _aware(attacker.get("first_seen"))
|
|
ls = _aware(attacker.get("last_seen"))
|
|
now = datetime.now(timezone.utc)
|
|
ip_obs = stix2.ObservedData(
|
|
first_observed=fs or now,
|
|
last_observed=ls or now,
|
|
number_observed=max(1, attacker.get("event_count") or 1),
|
|
object_refs=[ipv4.id],
|
|
created_by_ref=org.id,
|
|
)
|
|
objs.append(ip_obs)
|
|
|
|
# ── Threat actor + BEHAVE profile ────────────────────────────────
|
|
ta, behave_profile = _threat_actor(
|
|
attacker, identity, org.id,
|
|
behavior=behavior,
|
|
observations=observations,
|
|
fingerprint_bounties=fingerprint_bounties,
|
|
)
|
|
objs.append(ta)
|
|
if behave_profile is not None:
|
|
objs.append(behave_profile)
|
|
objs.append(FINGERPRINT_EXT_DEF)
|
|
objs.append(
|
|
stix2.Relationship(
|
|
relationship_type="characterizes",
|
|
source_ref=behave_profile.id, # type: ignore[attr-defined]
|
|
target_ref=ta.id,
|
|
created_by_ref=org.id,
|
|
allow_custom=True,
|
|
)
|
|
)
|
|
|
|
# ── ATT&CK — attack-patterns + uses relationships + sightings ───
|
|
# Build per-technique once; sightings reference the same AP STIX ID.
|
|
ap_stix_ids: dict[str, str] = {} # technique_id → attack-pattern STIX id
|
|
for row in technique_rollup:
|
|
tid = row.get("sub_technique_id") or row.get("technique_id")
|
|
if not tid or tid in ap_stix_ids:
|
|
continue
|
|
ap = _attack_pattern_sdo(tid, org.id)
|
|
if ap is None:
|
|
continue
|
|
ap_stix_ids[tid] = ap.id
|
|
objs.append(ap)
|
|
objs.append(
|
|
stix2.Relationship(
|
|
source_ref=ta.id,
|
|
target_ref=ap.id,
|
|
relationship_type="uses",
|
|
created_by_ref=org.id,
|
|
)
|
|
)
|
|
|
|
for tag in raw_tags:
|
|
tid = tag.get("sub_technique_id") or tag.get("technique_id")
|
|
if not tid or tid not in ap_stix_ids:
|
|
continue
|
|
ts = _aware(tag.get("created_at"))
|
|
if ts is None:
|
|
ts = now
|
|
objs.append(
|
|
stix2.Sighting(
|
|
sighting_of_ref=ap_stix_ids[tid],
|
|
first_seen=ts,
|
|
last_seen=ts,
|
|
count=1,
|
|
where_sighted_refs=[org.id],
|
|
observed_data_refs=[ip_obs.id],
|
|
created_by_ref=org.id,
|
|
)
|
|
)
|
|
|
|
# ── Artifacts (file_captured log rows) ──────────────────────────
|
|
for art in artifacts:
|
|
fields = art.get("fields") or {}
|
|
if isinstance(fields, str):
|
|
try:
|
|
fields = json.loads(fields)
|
|
except Exception:
|
|
fields = {}
|
|
sha = fields.get("sha256") or fields.get("hash")
|
|
if not sha:
|
|
continue
|
|
file_kwargs: dict[str, Any] = {"hashes": {"SHA-256": sha.lower()}}
|
|
name = fields.get("filename") or fields.get("stored_as")
|
|
if name:
|
|
file_kwargs["name"] = name
|
|
f = stix2.File(**file_kwargs)
|
|
objs.append(f)
|
|
fts = _aware(art.get("timestamp"))
|
|
objs.append(
|
|
stix2.ObservedData(
|
|
first_observed=fts or now,
|
|
last_observed=fts or now,
|
|
number_observed=1,
|
|
object_refs=[f.id],
|
|
created_by_ref=org.id,
|
|
)
|
|
)
|
|
|
|
# ── SMTP targets ────────────────────────────────────────────────
|
|
for tgt in smtp_targets:
|
|
domain = tgt.get("domain")
|
|
if not domain:
|
|
continue
|
|
dn = stix2.DomainName(value=domain)
|
|
objs.append(dn)
|
|
s_fs = _aware(tgt.get("first_seen"))
|
|
s_ls = _aware(tgt.get("last_seen"))
|
|
objs.append(
|
|
stix2.ObservedData(
|
|
first_observed=s_fs or now,
|
|
last_observed=s_ls or now,
|
|
number_observed=max(1, tgt.get("count") or 1),
|
|
object_refs=[dn.id],
|
|
created_by_ref=org.id,
|
|
)
|
|
)
|
|
|
|
# ── Shell commands (process SCOs + observed-data) ────────────────
|
|
seen_cmds: set[str] = set()
|
|
for cmd_line in commands or []:
|
|
if not cmd_line or cmd_line in seen_cmds:
|
|
continue
|
|
seen_cmds.add(cmd_line)
|
|
proc = stix2.Process(command_line=cmd_line, is_hidden=False)
|
|
objs.append(proc)
|
|
proc_obs = stix2.ObservedData(
|
|
first_observed=fs or now,
|
|
last_observed=ls or now,
|
|
number_observed=1,
|
|
object_refs=[proc.id],
|
|
created_by_ref=org.id,
|
|
)
|
|
objs.append(proc_obs)
|
|
objs.append(
|
|
stix2.Sighting(
|
|
sighting_of_ref=ta.id,
|
|
first_seen=fs or now,
|
|
last_seen=ls or now,
|
|
count=1,
|
|
where_sighted_refs=[org.id],
|
|
observed_data_refs=[proc_obs.id],
|
|
created_by_ref=org.id,
|
|
)
|
|
)
|
|
|
|
# ── Intel note ───────────────────────────────────────────────────
|
|
if intel:
|
|
note = _intel_note(intel, ta.id, org.id)
|
|
objs.append(note)
|
|
|
|
return stix2.Bundle(objects=objs, allow_custom=True)
|
|
|
|
|
|
def build_fleet_bundle(
|
|
rows: list[dict[str, Any]],
|
|
ttp_by_attacker: dict[str, list[dict[str, Any]]],
|
|
observations_by_attacker: dict[str, list[dict[str, Any]]] | None = None,
|
|
fingerprint_bounties_by_ip: dict[str, list[dict[str, Any]]] | None = None,
|
|
) -> stix2.Bundle:
|
|
"""Assemble a STIX 2.1 Bundle covering all attackers in *rows*.
|
|
|
|
Deduplicates by STIX ID — attack-pattern SDOs with the same canonical
|
|
MITRE UUID appear once regardless of how many attackers used the technique.
|
|
Per-tag Sightings, Artifacts, and SMTP targets are omitted in fleet mode
|
|
(too verbose; use the per-attacker endpoint for full fidelity).
|
|
"""
|
|
objs_by_id: dict[str, Any] = {}
|
|
obs_map = observations_by_attacker or {}
|
|
fp_map = fingerprint_bounties_by_ip or {}
|
|
|
|
for row in rows:
|
|
raw_cmds = row.get("commands") or []
|
|
if isinstance(raw_cmds, str):
|
|
try:
|
|
raw_cmds = json.loads(raw_cmds)
|
|
except Exception:
|
|
raw_cmds = []
|
|
cmds = [
|
|
str(e.get("command_text") or e.get("command") or "").strip()
|
|
for e in raw_cmds
|
|
if isinstance(e, dict) and (e.get("command_text") or e.get("command"))
|
|
]
|
|
|
|
intel = row.get("threat_intel")
|
|
bundle = build_attacker_bundle(
|
|
attacker=row,
|
|
behavior=None,
|
|
identity=None,
|
|
intel=intel,
|
|
technique_rollup=ttp_by_attacker.get(row["uuid"], []),
|
|
raw_tags=[],
|
|
artifacts=[],
|
|
smtp_targets=[],
|
|
commands=cmds,
|
|
observations=obs_map.get(row["uuid"]),
|
|
fingerprint_bounties=fp_map.get(row.get("ip", ""), []),
|
|
)
|
|
for obj in bundle.objects:
|
|
objs_by_id[obj.id] = obj
|
|
|
|
return stix2.Bundle(objects=list(objs_by_id.values()), allow_custom=True)
|