Wire fingerprint bounties (JARM hashes, HTTP header quirks) from the bounties table into the DecnetActorFingerprintExt.protocol_fingerprints group so the sniffer/profiler-captured HTTP fingerprinting data surfaces in every STIX export. Add a stix2.Relationship(relationship_type="characterizes") SRO linking each x-decnet-behave-profile SDO back to its ThreatActor so graph-traversal tools can follow the edge without relying on the bare x_decnet_behave_profile_ref custom string property alone. New repo surface: - get_fingerprint_bounties_by_ip(ip) -> list[dict] - get_all_fingerprint_bounties_for_export() -> dict[str, list[dict]] All 4 export endpoints (per-attacker + fleet, STIX + MISP) extended with the new gather slot. 50/50 tests green, mypy clean.
518 lines
18 KiB
Python
518 lines
18 KiB
Python
"""STIX 2.1 bundle builder for a DECNET attacker observation.
|
|
|
|
Pure function — no I/O. The caller (router) does all DB reads and
|
|
passes dicts; this module assembles the STIX bundle.
|
|
|
|
SDO/SRO mapping
|
|
---------------
|
|
DECNET data → STIX type
|
|
----------- ---------
|
|
Producer (DECNET) → identity (org, deterministic ID)
|
|
attacker.ip → ipv4-addr SCO
|
|
first/last seen + count → observed-data SDO
|
|
AttackerIdentity or IP → threat-actor SDO
|
|
Per-technique rollup → attack-pattern SDO + relationship(uses) SRO
|
|
Per ttp_tag row → sighting SRO
|
|
ObservedAttachment / Log → file SCO + observed-data SDO
|
|
SmtpTarget → domain-name SCO + observed-data SDO
|
|
AttackerIntel verdict → note SDO
|
|
|
|
Attack-pattern SDOs carry the canonical MITRE STIX IDs pulled from the
|
|
loaded enterprise bundle so the objects are deduplicated against the
|
|
public ATT&CK bundle by any consumer that already has it.
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
import base64
|
|
import json
|
|
import uuid as _uuid
|
|
from datetime import datetime, timezone
|
|
from typing import Any
|
|
|
|
import stix2
|
|
|
|
from decnet.ttp import attack_stix
|
|
from decnet.ttp.stix_custom import (
|
|
ACTOR_FINGERPRINT_EXT_ID,
|
|
FINGERPRINT_EXT_DEF,
|
|
DecnetActorFingerprintExt,
|
|
XDecnetBehaveProfile,
|
|
)
|
|
|
|
# Deterministic DECNET org identity ID — stable across all bundles this
|
|
# instance produces. Consumers can correlate across exports.
|
|
_NS = _uuid.UUID("b5d2c3a1-8f4e-4d1b-9a6c-0e7f5b3d2c1a")
|
|
_DECNET_ORG_STIX_ID = f"identity--{_uuid.uuid5(_NS, 'decnet-honeypot')}"
|
|
|
|
|
|
def _aware(dt: datetime | str | None) -> datetime | None:
|
|
if dt is None:
|
|
return None
|
|
if isinstance(dt, str):
|
|
dt = datetime.fromisoformat(dt.replace("Z", "+00:00"))
|
|
if dt.tzinfo is None:
|
|
return dt.replace(tzinfo=timezone.utc)
|
|
return dt
|
|
|
|
|
|
def _decnet_org() -> stix2.Identity:
|
|
return stix2.Identity(
|
|
id=_DECNET_ORG_STIX_ID,
|
|
name="DECNET",
|
|
identity_class="organization",
|
|
description="DECNET honeypot platform — automated threat observation",
|
|
)
|
|
|
|
|
|
def _parse_json_field(v: Any) -> Any:
|
|
"""JSON-decode strings; return non-strings unchanged."""
|
|
if isinstance(v, str):
|
|
try:
|
|
return json.loads(v)
|
|
except Exception:
|
|
return v
|
|
return v
|
|
|
|
|
|
def _extract_fingerprint_bounty_data(
|
|
bounties: list[dict[str, Any]],
|
|
) -> tuple[list[str], list[dict[str, Any]]]:
|
|
"""Return (jarm_hashes, http_quirks) extracted from fingerprint bounty rows.
|
|
|
|
jarm_hashes: deduplicated list of JARM hash strings.
|
|
http_quirks: deduplicated list of {order, casing_category, tool_guess}
|
|
dicts keyed by order_hash so two requests from the same
|
|
client stack produce one entry.
|
|
"""
|
|
jarm_hashes: list[str] = []
|
|
jarm_seen: set[str] = set()
|
|
http_quirks: list[dict[str, Any]] = []
|
|
http_seen: set[str] = set()
|
|
for b in bounties:
|
|
payload = b.get("payload") or {}
|
|
if not isinstance(payload, dict):
|
|
continue
|
|
fp_type = payload.get("fingerprint_type")
|
|
if fp_type == "jarm":
|
|
h = payload.get("hash")
|
|
if h and h not in jarm_seen:
|
|
jarm_hashes.append(h)
|
|
jarm_seen.add(h)
|
|
elif fp_type == "http_quirks":
|
|
key = payload.get("order_hash", "")
|
|
if key not in http_seen:
|
|
entry: dict[str, Any] = {}
|
|
if payload.get("order"):
|
|
entry["order"] = payload["order"]
|
|
if payload.get("casing_category"):
|
|
entry["casing_category"] = payload["casing_category"]
|
|
if payload.get("tool_guess"):
|
|
entry["tool_guess"] = payload["tool_guess"]
|
|
if entry:
|
|
http_quirks.append(entry)
|
|
http_seen.add(key)
|
|
return jarm_hashes, http_quirks
|
|
|
|
|
|
def _threat_actor(
|
|
attacker: dict[str, Any],
|
|
identity: dict[str, Any] | None,
|
|
created_by: str,
|
|
behavior: dict[str, Any] | None = None,
|
|
observations: list[dict[str, Any]] | None = None,
|
|
fingerprint_bounties: list[dict[str, Any]] | None = None,
|
|
) -> tuple[stix2.ThreatActor, "XDecnetBehaveProfile | None"]:
|
|
"""Build a ThreatActor SDO plus an optional XDecnetBehaveProfile SDO.
|
|
|
|
Returns ``(threat_actor, behave_profile_or_None)``.
|
|
"""
|
|
if identity:
|
|
name = f"DECNET-identity-{identity['uuid'][:8]}"
|
|
else:
|
|
name = f"DECNET-attacker-{attacker['uuid'][:8]}"
|
|
|
|
kwargs: dict[str, Any] = dict(
|
|
id=f"threat-actor--{_uuid.uuid5(_NS, attacker['uuid'])}",
|
|
name=name,
|
|
threat_actor_types=["unknown"],
|
|
created_by_ref=created_by,
|
|
allow_custom=True,
|
|
)
|
|
|
|
# Tier 1 — stable scalars
|
|
if attacker.get("country_code"):
|
|
kwargs["x_decnet_country_code"] = attacker["country_code"]
|
|
if attacker.get("asn"):
|
|
kwargs["x_decnet_asn"] = attacker["asn"]
|
|
if attacker.get("as_name"):
|
|
kwargs["x_decnet_as_name"] = attacker["as_name"]
|
|
|
|
# Tier 2 — DecnetActorFingerprintExt (network_behavior + protocol_fingerprints)
|
|
network_behavior: dict[str, Any] = {}
|
|
protocol_fingerprints: dict[str, Any] = {}
|
|
|
|
if behavior:
|
|
for key in ("os_guess", "hop_distance", "retransmit_count",
|
|
"behavior_class", "beacon_interval_s", "beacon_jitter_pct"):
|
|
v = behavior.get(key)
|
|
if v is not None:
|
|
network_behavior[key] = v
|
|
for key in ("tcp_fingerprint", "timing_stats", "phase_sequence", "tool_guesses"):
|
|
v = _parse_json_field(behavior.get(key))
|
|
if v:
|
|
network_behavior[key] = v
|
|
for key in ("kex_order_raw", "ssh_client_banners"):
|
|
v = _parse_json_field(behavior.get(key))
|
|
if v:
|
|
protocol_fingerprints[key] = v
|
|
|
|
if identity:
|
|
for key in ("ja3_hashes", "hassh_hashes", "tls_cert_sha256", "payload_simhashes"):
|
|
v = _parse_json_field(identity.get(key))
|
|
if v:
|
|
protocol_fingerprints[key] = v
|
|
if identity.get("c2_endpoints"):
|
|
protocol_fingerprints["c2_endpoints"] = _parse_json_field(
|
|
identity["c2_endpoints"]
|
|
)
|
|
|
|
if fingerprint_bounties:
|
|
jarm_hashes, http_quirks = _extract_fingerprint_bounty_data(fingerprint_bounties)
|
|
if jarm_hashes:
|
|
protocol_fingerprints["jarm_hashes"] = jarm_hashes
|
|
if http_quirks:
|
|
protocol_fingerprints["http_quirks"] = http_quirks
|
|
|
|
if network_behavior or protocol_fingerprints:
|
|
ext_kwargs: dict[str, Any] = {"extension_type": "property-extension"}
|
|
if network_behavior:
|
|
ext_kwargs["network_behavior"] = network_behavior
|
|
if protocol_fingerprints:
|
|
ext_kwargs["protocol_fingerprints"] = protocol_fingerprints
|
|
kwargs["extensions"] = {
|
|
ACTOR_FINGERPRINT_EXT_ID: DecnetActorFingerprintExt(**ext_kwargs),
|
|
}
|
|
|
|
# Tier 3 — XDecnetBehaveProfile (BEHAVE observations)
|
|
behave_profile: XDecnetBehaveProfile | None = None
|
|
kd_hash: str | None = None
|
|
if identity:
|
|
raw_kd = identity.get("kd_digraph_simhash")
|
|
if raw_kd is not None:
|
|
if isinstance(raw_kd, (bytes, bytearray)):
|
|
kd_hash = raw_kd.hex()
|
|
elif isinstance(raw_kd, str) and raw_kd:
|
|
try:
|
|
kd_hash = base64.b64decode(raw_kd).hex()
|
|
except Exception:
|
|
kd_hash = raw_kd
|
|
|
|
obs_list = observations or []
|
|
if obs_list or kd_hash is not None:
|
|
from decnet_behave_shell.spec.envelope import OBSERVATION_SCHEMA_VERSION
|
|
profile_id = (
|
|
f"x-decnet-behave-profile--{_uuid.uuid5(_NS, attacker['uuid'])}"
|
|
)
|
|
behave_profile = XDecnetBehaveProfile( # type: ignore[call-arg]
|
|
id=profile_id,
|
|
created_by_ref=created_by,
|
|
schema_version=OBSERVATION_SCHEMA_VERSION,
|
|
kd_digraph_simhash=kd_hash,
|
|
observations=obs_list,
|
|
)
|
|
kwargs["x_decnet_behave_profile_ref"] = profile_id
|
|
|
|
return stix2.ThreatActor(**kwargs), behave_profile
|
|
|
|
|
|
def _attack_pattern_sdo(technique_id: str, created_by: str) -> stix2.AttackPattern | None:
|
|
obj = attack_stix._attack_pattern_by_id(technique_id)
|
|
if obj is None:
|
|
return None
|
|
ext_refs = obj.get("external_references", [])
|
|
mitre_ref = next(
|
|
(r for r in ext_refs if r.get("source_name") == "mitre-attack"), None,
|
|
)
|
|
er_args = [
|
|
stix2.ExternalReference(
|
|
source_name="mitre-attack",
|
|
external_id=mitre_ref["external_id"],
|
|
url=mitre_ref.get("url", ""),
|
|
)
|
|
] if mitre_ref else []
|
|
return stix2.AttackPattern(
|
|
id=obj["id"],
|
|
name=obj.get("name", technique_id),
|
|
external_references=er_args,
|
|
created_by_ref=created_by,
|
|
)
|
|
|
|
|
|
def _intel_note(
|
|
intel: dict[str, Any],
|
|
ta_id: str,
|
|
created_by: str,
|
|
) -> stix2.Note | None:
|
|
verdict = intel.get("aggregate_verdict") or "unknown"
|
|
lines: list[str] = [f"aggregate_verdict: {verdict}"]
|
|
if intel.get("abuseipdb_score") is not None:
|
|
lines.append(f"abuseipdb_score: {intel['abuseipdb_score']}")
|
|
if intel.get("greynoise_classification"):
|
|
tags = intel.get("greynoise_tags") or []
|
|
if isinstance(tags, str):
|
|
try:
|
|
tags = json.loads(tags)
|
|
except Exception:
|
|
tags = []
|
|
lines.append(f"greynoise: {intel['greynoise_classification']} ({', '.join(tags)})")
|
|
if intel.get("feodo_listed"):
|
|
lines.append(f"feodo: {intel.get('feodo_malware_family', 'listed')}")
|
|
if intel.get("threatfox_listed"):
|
|
tt = intel.get("threatfox_threat_types") or []
|
|
if isinstance(tt, str):
|
|
try:
|
|
tt = json.loads(tt)
|
|
except Exception:
|
|
tt = []
|
|
lines.append(f"threatfox: {', '.join(tt) if tt else 'listed'}")
|
|
return stix2.Note(
|
|
abstract="DECNET threat-intel verdict",
|
|
content="\n".join(lines),
|
|
object_refs=[ta_id],
|
|
created_by_ref=created_by,
|
|
)
|
|
|
|
|
|
def build_attacker_bundle(
|
|
attacker: dict[str, Any],
|
|
behavior: dict[str, Any] | None,
|
|
identity: dict[str, Any] | None,
|
|
intel: dict[str, Any] | None,
|
|
technique_rollup: list[dict[str, Any]],
|
|
raw_tags: list[dict[str, Any]],
|
|
artifacts: list[dict[str, Any]],
|
|
smtp_targets: list[dict[str, Any]],
|
|
commands: list[str] | None = None,
|
|
observations: list[dict[str, Any]] | None = None,
|
|
fingerprint_bounties: list[dict[str, Any]] | None = None,
|
|
) -> stix2.Bundle:
|
|
"""Assemble a STIX 2.1 Bundle for *attacker*.
|
|
|
|
All arguments are plain dicts (the shape returned by the DECNET
|
|
repo). Never raises — unknown/missing data is silently omitted from
|
|
the bundle.
|
|
"""
|
|
objs: list[Any] = []
|
|
|
|
org = _decnet_org()
|
|
objs.append(org)
|
|
|
|
# ── IP observation ──────────────────────────────────────────────
|
|
ipv4 = stix2.IPv4Address(value=attacker["ip"])
|
|
objs.append(ipv4)
|
|
|
|
fs = _aware(attacker.get("first_seen"))
|
|
ls = _aware(attacker.get("last_seen"))
|
|
now = datetime.now(timezone.utc)
|
|
ip_obs = stix2.ObservedData(
|
|
first_observed=fs or now,
|
|
last_observed=ls or now,
|
|
number_observed=max(1, attacker.get("event_count") or 1),
|
|
object_refs=[ipv4.id],
|
|
created_by_ref=org.id,
|
|
)
|
|
objs.append(ip_obs)
|
|
|
|
# ── Threat actor + BEHAVE profile ────────────────────────────────
|
|
ta, behave_profile = _threat_actor(
|
|
attacker, identity, org.id,
|
|
behavior=behavior,
|
|
observations=observations,
|
|
fingerprint_bounties=fingerprint_bounties,
|
|
)
|
|
objs.append(ta)
|
|
if behave_profile is not None:
|
|
objs.append(behave_profile)
|
|
objs.append(FINGERPRINT_EXT_DEF)
|
|
objs.append(
|
|
stix2.Relationship(
|
|
relationship_type="characterizes",
|
|
source_ref=behave_profile.id, # type: ignore[attr-defined]
|
|
target_ref=ta.id,
|
|
created_by_ref=org.id,
|
|
allow_custom=True,
|
|
)
|
|
)
|
|
|
|
# ── ATT&CK — attack-patterns + uses relationships + sightings ───
|
|
# Build per-technique once; sightings reference the same AP STIX ID.
|
|
ap_stix_ids: dict[str, str] = {} # technique_id → attack-pattern STIX id
|
|
for row in technique_rollup:
|
|
tid = row.get("sub_technique_id") or row.get("technique_id")
|
|
if not tid or tid in ap_stix_ids:
|
|
continue
|
|
ap = _attack_pattern_sdo(tid, org.id)
|
|
if ap is None:
|
|
continue
|
|
ap_stix_ids[tid] = ap.id
|
|
objs.append(ap)
|
|
objs.append(
|
|
stix2.Relationship(
|
|
source_ref=ta.id,
|
|
target_ref=ap.id,
|
|
relationship_type="uses",
|
|
created_by_ref=org.id,
|
|
)
|
|
)
|
|
|
|
for tag in raw_tags:
|
|
tid = tag.get("sub_technique_id") or tag.get("technique_id")
|
|
if not tid or tid not in ap_stix_ids:
|
|
continue
|
|
ts = _aware(tag.get("created_at"))
|
|
if ts is None:
|
|
ts = now
|
|
objs.append(
|
|
stix2.Sighting(
|
|
sighting_of_ref=ap_stix_ids[tid],
|
|
first_seen=ts,
|
|
last_seen=ts,
|
|
count=1,
|
|
where_sighted_refs=[org.id],
|
|
observed_data_refs=[ip_obs.id],
|
|
created_by_ref=org.id,
|
|
)
|
|
)
|
|
|
|
# ── Artifacts (file_captured log rows) ──────────────────────────
|
|
for art in artifacts:
|
|
fields = art.get("fields") or {}
|
|
if isinstance(fields, str):
|
|
try:
|
|
fields = json.loads(fields)
|
|
except Exception:
|
|
fields = {}
|
|
sha = fields.get("sha256") or fields.get("hash")
|
|
if not sha:
|
|
continue
|
|
file_kwargs: dict[str, Any] = {"hashes": {"SHA-256": sha.lower()}}
|
|
name = fields.get("filename") or fields.get("stored_as")
|
|
if name:
|
|
file_kwargs["name"] = name
|
|
f = stix2.File(**file_kwargs)
|
|
objs.append(f)
|
|
fts = _aware(art.get("timestamp"))
|
|
objs.append(
|
|
stix2.ObservedData(
|
|
first_observed=fts or now,
|
|
last_observed=fts or now,
|
|
number_observed=1,
|
|
object_refs=[f.id],
|
|
created_by_ref=org.id,
|
|
)
|
|
)
|
|
|
|
# ── SMTP targets ────────────────────────────────────────────────
|
|
for tgt in smtp_targets:
|
|
domain = tgt.get("domain")
|
|
if not domain:
|
|
continue
|
|
dn = stix2.DomainName(value=domain)
|
|
objs.append(dn)
|
|
s_fs = _aware(tgt.get("first_seen"))
|
|
s_ls = _aware(tgt.get("last_seen"))
|
|
objs.append(
|
|
stix2.ObservedData(
|
|
first_observed=s_fs or now,
|
|
last_observed=s_ls or now,
|
|
number_observed=max(1, tgt.get("count") or 1),
|
|
object_refs=[dn.id],
|
|
created_by_ref=org.id,
|
|
)
|
|
)
|
|
|
|
# ── Shell commands (process SCOs + observed-data) ────────────────
|
|
seen_cmds: set[str] = set()
|
|
for cmd_line in commands or []:
|
|
if not cmd_line or cmd_line in seen_cmds:
|
|
continue
|
|
seen_cmds.add(cmd_line)
|
|
proc = stix2.Process(command_line=cmd_line, is_hidden=False)
|
|
objs.append(proc)
|
|
proc_obs = stix2.ObservedData(
|
|
first_observed=fs or now,
|
|
last_observed=ls or now,
|
|
number_observed=1,
|
|
object_refs=[proc.id],
|
|
created_by_ref=org.id,
|
|
)
|
|
objs.append(proc_obs)
|
|
objs.append(
|
|
stix2.Sighting(
|
|
sighting_of_ref=ta.id,
|
|
first_seen=fs or now,
|
|
last_seen=ls or now,
|
|
count=1,
|
|
where_sighted_refs=[org.id],
|
|
observed_data_refs=[proc_obs.id],
|
|
created_by_ref=org.id,
|
|
)
|
|
)
|
|
|
|
# ── Intel note ───────────────────────────────────────────────────
|
|
if intel:
|
|
note = _intel_note(intel, ta.id, org.id)
|
|
objs.append(note)
|
|
|
|
return stix2.Bundle(objects=objs, allow_custom=True)
|
|
|
|
|
|
def build_fleet_bundle(
|
|
rows: list[dict[str, Any]],
|
|
ttp_by_attacker: dict[str, list[dict[str, Any]]],
|
|
observations_by_attacker: dict[str, list[dict[str, Any]]] | None = None,
|
|
fingerprint_bounties_by_ip: dict[str, list[dict[str, Any]]] | None = None,
|
|
) -> stix2.Bundle:
|
|
"""Assemble a STIX 2.1 Bundle covering all attackers in *rows*.
|
|
|
|
Deduplicates by STIX ID — attack-pattern SDOs with the same canonical
|
|
MITRE UUID appear once regardless of how many attackers used the technique.
|
|
Per-tag Sightings, Artifacts, and SMTP targets are omitted in fleet mode
|
|
(too verbose; use the per-attacker endpoint for full fidelity).
|
|
"""
|
|
objs_by_id: dict[str, Any] = {}
|
|
obs_map = observations_by_attacker or {}
|
|
fp_map = fingerprint_bounties_by_ip or {}
|
|
|
|
for row in rows:
|
|
raw_cmds = row.get("commands") or []
|
|
if isinstance(raw_cmds, str):
|
|
try:
|
|
raw_cmds = json.loads(raw_cmds)
|
|
except Exception:
|
|
raw_cmds = []
|
|
cmds = [
|
|
str(e.get("command_text") or e.get("command") or "").strip()
|
|
for e in raw_cmds
|
|
if isinstance(e, dict) and (e.get("command_text") or e.get("command"))
|
|
]
|
|
|
|
intel = row.get("threat_intel")
|
|
bundle = build_attacker_bundle(
|
|
attacker=row,
|
|
behavior=None,
|
|
identity=None,
|
|
intel=intel,
|
|
technique_rollup=ttp_by_attacker.get(row["uuid"], []),
|
|
raw_tags=[],
|
|
artifacts=[],
|
|
smtp_targets=[],
|
|
commands=cmds,
|
|
observations=obs_map.get(row["uuid"]),
|
|
fingerprint_bounties=fp_map.get(row.get("ip", ""), []),
|
|
)
|
|
for obj in bundle.objects:
|
|
objs_by_id[obj.id] = obj
|
|
|
|
return stix2.Bundle(objects=list(objs_by_id.values()), allow_custom=True)
|