Files
DECNET/decnet/web/db/models/observations.py
anti f2b3393669 chore: relicense to AGPL-3.0-or-later and add SPDX headers
Replaces LICENSE (GPLv3 -> AGPLv3) and prepends
`SPDX-License-Identifier: AGPL-3.0-or-later` to every source file
across decnet/, decnet_web/, tests/, scripts/, and tools/.

Rationale: closes the GPLv3 ASP loophole so any party operating a
modified DECNET as a network service must offer their modified
source. Personal copyright (Samuel Paschuan) + inbound=outbound
contributions make a future unilateral relicense infeasible.

- LICENSE: full AGPL-3.0 text (gnu.org/licenses/agpl-3.0.txt)
- COPYRIGHT: project copyright notice
- tools/add_spdx_headers.py: idempotent header injector
  (shebang- and PEP 263-aware)

Touches 1565 source files (.py, .ts, .tsx, .js, .jsx, .css, .sh).
No behavior change; comments only.
2026-05-22 21:04:16 -04:00

82 lines
3.3 KiB
Python

# SPDX-License-Identifier: AGPL-3.0-or-later
"""BEHAVE-SHELL observation rows — generic table holding every
emitted Observation envelope.
Mirrors the BEHAVE-SHELL ``Observation`` Pydantic envelope
(``behave_core.spec.envelope.Observation``) field-for-field, plus
one DECNET-side denormalisation (``attacker_uuid``) for cheap joins.
The class is named ``ObservationRow`` to avoid colliding with the
BEHAVE Pydantic class when both are imported into the same module —
the Pydantic envelope is the wire format; this is the storage row.
See ``development/BEHAVE-INTEGRATION.md`` §"Storage" for the full
rationale.
Idempotency is enforced at the schema level by the
``UniqueConstraint(evidence_ref, primitive)`` index — re-running the
extractor on the same shard+sid produces a DB-side conflict that the
repo's upsert path resolves deterministically. ``evidence_ref`` is
NOT NULL for DECNET-emitted observations even though the BEHAVE
envelope makes it ``Optional[str]``: the worker's "have we already
profiled this session?" check keys on it, and the shape
``shard:{decky}/{service}/{date}.jsonl#sid`` is mandatory at the
worker layer.
"""
from __future__ import annotations
from typing import Any
from sqlalchemy import JSON, Column, Index, UniqueConstraint
from sqlmodel import Field, SQLModel
class ObservationRow(SQLModel, table=True):
"""One BEHAVE-SHELL observation persisted to ``observations``.
Re-derivable from the upstream session shard; this row is a cache
for cheap dashboard reads, not the source of truth (which is the
asciinema shard on disk + the BEHAVE-SHELL extractor).
Type alignment with BEHAVE: ``id`` is a hex-string UUID (matching
BEHAVE's ``Observation.id: str = Field(default_factory=lambda:
uuid.uuid4().hex)``), not a typed UUID column. ``identity_ref``
is ``str | None``, ditto.
"""
__tablename__ = "observations"
__table_args__ = (
Index(
"ix_observations_attacker_primitive_ts",
"attacker_uuid", "primitive", "ts",
),
Index("ix_observations_primitive_ts", "primitive", "ts"),
UniqueConstraint(
"evidence_ref", "primitive",
name="uq_observations_evidence_primitive",
),
)
# ── envelope fields (types match BEHAVE exactly) ─────────────────────
id: str = Field(primary_key=True)
identity_ref: str | None = Field(default=None)
primitive: str = Field(index=True)
value: dict[str, Any] | str | int | float | bool | list = Field(
sa_column=Column(JSON, nullable=False),
)
confidence: float
window_start_ts: float
window_end_ts: float
source: str
evidence_ref: str = Field(nullable=False)
envelope_v: int
ts: float = Field(index=True)
# ── DECNET-side denormalisation (NOT in BEHAVE envelope) ─────────────
# The envelope identifies the attacker via ``identity_ref`` once
# attribution exists; pre-attribution, observations carry no
# attacker linkage. DECNET resolves the (decky, service, sid, src_ip)
# tuple to ``attacker_uuid`` at write time so AttackerDetail can
# query without joining through the (still-empty)
# ``attacker_identities`` table.
attacker_uuid: str = Field(foreign_key="attackers.uuid", index=True)