build(db): add Alembic scaffolding + baseline migration

Introduce Alembic at v1. Migrations live inside the package (decnet/web/db/migrations) so they ship with installs; alembic.ini at the repo root drives the CLI. env.py is async and dual-backend, selecting the engine from DECNET_DB_TYPE (mirroring db/factory.py) and reusing the app's own connection when run programmatically. The baseline captures all 39 tables. _BIG_TEXT round-trips as Text().with_variant(MEDIUMTEXT, 'mysql'), so both backends get the right column type from the migration. kd_digraph_simhash gains a sqlite BLOB variant: BINARY(8) reflects as NUMERIC on SQLite and would otherwise trip 'alembic check' forever.
2026-06-16 16:30:29 -04:00
parent 4f141c1a54
commit ef4d67cbef
8 changed files with 1392 additions and 3 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -19,6 +19,8 @@ decnet-topology-*-compose.yml
 .docker/
 decnet-state.json
 *.ini
+# tracked: Alembic CLI config (migrations live in decnet/web/db/migrations)
+!alembic.ini
 decnet.log*
 *.loggy
 *.nmap
--- a/alembic.ini
+++ b/alembic.ini
@@ -0,0 +1,147 @@
+# A generic, single database configuration.
+
+[alembic]
+# path to migration scripts.
+# this is typically a path given in POSIX (e.g. forward slashes)
+# format, relative to the token %(here)s which refers to the location of this
+# ini file
+script_location = %(here)s/decnet/web/db/migrations
+
+# template used to generate migration file names; The default value is %%(rev)s_%%(slug)s
+# Uncomment the line below if you want the files to be prepended with date and time
+# see https://alembic.sqlalchemy.org/en/latest/tutorial.html#editing-the-ini-file
+# for all available tokens
+# file_template = %%(year)d_%%(month).2d_%%(day).2d_%%(hour).2d%%(minute).2d-%%(rev)s_%%(slug)s
+# Or organize into date-based subdirectories (requires recursive_version_locations = true)
+# file_template = %%(year)d/%%(month).2d/%%(day).2d_%%(hour).2d%%(minute).2d_%%(second).2d_%%(rev)s_%%(slug)s
+
+# sys.path path, will be prepended to sys.path if present.
+# defaults to the current working directory.  for multiple paths, the path separator
+# is defined by "path_separator" below.
+prepend_sys_path = .
+
+
+# timezone to use when rendering the date within the migration file
+# as well as the filename.
+# If specified, requires the tzdata library which can be installed by adding
+# `alembic[tz]` to the pip requirements.
+# string value is passed to ZoneInfo()
+# leave blank for localtime
+# timezone =
+
+# max length of characters to apply to the "slug" field
+# truncate_slug_length = 40
+
+# set to 'true' to run the environment during
+# the 'revision' command, regardless of autogenerate
+# revision_environment = false
+
+# set to 'true' to allow .pyc and .pyo files without
+# a source .py file to be detected as revisions in the
+# versions/ directory
+# sourceless = false
+
+# version location specification; This defaults
+# to <script_location>/versions.  When using multiple version
+# directories, initial revisions must be specified with --version-path.
+# The path separator used here should be the separator specified by "path_separator"
+# below.
+# version_locations = %(here)s/bar:%(here)s/bat:%(here)s/alembic/versions
+
+# path_separator; This indicates what character is used to split lists of file
+# paths, including version_locations and prepend_sys_path within configparser
+# files such as alembic.ini.
+# The default rendered in new alembic.ini files is "os", which uses os.pathsep
+# to provide os-dependent path splitting.
+#
+# Note that in order to support legacy alembic.ini files, this default does NOT
+# take place if path_separator is not present in alembic.ini.  If this
+# option is omitted entirely, fallback logic is as follows:
+#
+# 1. Parsing of the version_locations option falls back to using the legacy
+#    "version_path_separator" key, which if absent then falls back to the legacy
+#    behavior of splitting on spaces and/or commas.
+# 2. Parsing of the prepend_sys_path option falls back to the legacy
+#    behavior of splitting on spaces, commas, or colons.
+#
+# Valid values for path_separator are:
+#
+# path_separator = :
+# path_separator = ;
+# path_separator = space
+# path_separator = newline
+#
+# Use os.pathsep. Default configuration used for new projects.
+path_separator = os
+
+# set to 'true' to search source files recursively
+# in each "version_locations" directory
+# new in Alembic version 1.10
+# recursive_version_locations = false
+
+# the output encoding used when revision files
+# are written from script.py.mako
+# output_encoding = utf-8
+
+# NOTE: no sqlalchemy.url here on purpose. env.py selects the engine from
+# DECNET_DB_TYPE (sqlite|mysql), mirroring decnet/web/db/factory.py.
+
+
+[post_write_hooks]
+# post_write_hooks defines scripts or Python functions that are run
+# on newly generated revision scripts.  See the documentation for further
+# detail and examples
+
+# format using "black" - use the console_scripts runner, against the "black" entrypoint
+# hooks = black
+# black.type = console_scripts
+# black.entrypoint = black
+# black.options = -l 79 REVISION_SCRIPT_FILENAME
+
+# lint with attempts to fix using "ruff" - use the module runner, against the "ruff" module
+# hooks = ruff
+# ruff.type = module
+# ruff.module = ruff
+# ruff.options = check --fix REVISION_SCRIPT_FILENAME
+
+# Alternatively, use the exec runner to execute a binary found on your PATH
+# hooks = ruff
+# ruff.type = exec
+# ruff.executable = ruff
+# ruff.options = check --fix REVISION_SCRIPT_FILENAME
+
+# Logging configuration.  This is also consumed by the user-maintained
+# env.py script only.
+[loggers]
+keys = root,sqlalchemy,alembic
+
+[handlers]
+keys = console
+
+[formatters]
+keys = generic
+
+[logger_root]
+level = WARNING
+handlers = console
+qualname =
+
+[logger_sqlalchemy]
+level = WARNING
+handlers =
+qualname = sqlalchemy.engine
+
+[logger_alembic]
+level = INFO
+handlers =
+qualname = alembic
+
+[handler_console]
+class = StreamHandler
+args = (sys.stderr,)
+level = NOTSET
+formatter = generic
+
+[formatter_generic]
+format = %(levelname)-5.5s [%(name)s] %(message)s
+datefmt = %H:%M:%S
--- a/decnet/web/db/migrations/README
+++ b/decnet/web/db/migrations/README
@@ -0,0 +1 @@
+Generic single-database configuration.
--- a/decnet/web/db/migrations/env.py
+++ b/decnet/web/db/migrations/env.py
@@ -0,0 +1,90 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
+"""Alembic environment — async, dual-backend (sqlite | mysql).
+
+Two entry shapes:
+
+* **Programmatic** (app boot): :func:`decnet.web.db.migrate.run_migrations`
+  passes the app's own sync ``Connection`` via ``config.attributes`` so the
+  upgrade rides the existing engine — no second connection, no extra driver.
+* **Standalone** (``alembic`` CLI: autogenerate, upgrade, history): builds its
+  own async engine from ``DECNET_DB_TYPE``, mirroring ``db/factory.py``.
+"""
+from __future__ import annotations
+
+import asyncio
+import os
+from logging.config import fileConfig
+
+from alembic import context
+from sqlalchemy.engine import Connection
+from sqlmodel import SQLModel
+
+# Importing the models package registers every table on SQLModel.metadata,
+# which is what autogenerate diffs against.
+import decnet.web.db.models  # noqa: F401
+
+config = context.config
+
+# Standalone CLI runs configure logging from alembic.ini; the programmatic
+# path builds a Config with no file, so guard on it.
+if config.config_file_name is not None:
+    fileConfig(config.config_file_name)
+
+target_metadata = SQLModel.metadata
+
+
+def _build_async_engine():
+    """Standalone-only: pick an async engine the way db/factory.py does."""
+    db_type = os.environ.get("DECNET_DB_TYPE", "sqlite").lower()
+    if db_type == "sqlite":
+        from decnet.config import _ROOT
+        from decnet.web.db.sqlite.database import get_async_engine as sqlite_engine
+        db_path = os.environ.get("DECNET_DB_PATH", str(_ROOT / "decnet.db"))
+        return sqlite_engine(db_path)
+    if db_type == "mysql":
+        from decnet.web.db.mysql.database import get_async_engine as mysql_engine
+        return mysql_engine()
+    raise ValueError(f"Unsupported database type: {db_type}")
+
+
+def _configure_and_run(connection: Connection) -> None:
+    context.configure(
+        connection=connection,
+        target_metadata=target_metadata,
+        # SQLite can't ALTER in place; batch mode rewrites the table so future
+        # migrations (drop/alter column) work on both backends.
+        render_as_batch=connection.dialect.name == "sqlite",
+        compare_type=True,
+    )
+    with context.begin_transaction():
+        context.run_migrations()
+
+
+async def _run_standalone() -> None:
+    engine = _build_async_engine()
+    async with engine.connect() as connection:
+        await connection.run_sync(_configure_and_run)
+    await engine.dispose()
+
+
+def run_migrations_online() -> None:
+    connection = config.attributes.get("connection", None)
+    if connection is not None:
+        # Programmatic: app handed us a live sync Connection (via run_sync).
+        _configure_and_run(connection)
+    else:
+        asyncio.run(_run_standalone())
+
+
+if context.is_offline_mode():
+    # Offline (--sql) mode: emit DDL without a DB. Cheap to support and keeps
+    # `alembic upgrade head --sql` working for operators who want to review SQL.
+    context.configure(
+        url=os.environ.get("DECNET_DB_URL"),
+        target_metadata=target_metadata,
+        literal_binds=True,
+    )
+    with context.begin_transaction():
+        context.run_migrations()
+else:
+    run_migrations_online()
--- a/decnet/web/db/migrations/script.py.mako
+++ b/decnet/web/db/migrations/script.py.mako
@@ -0,0 +1,29 @@
+"""${message}
+
+Revision ID: ${up_revision}
+Revises: ${down_revision | comma,n}
+Create Date: ${create_date}
+
+"""
+from typing import Sequence, Union
+
+from alembic import op
+import sqlalchemy as sa
+import sqlmodel  # SQLModel column types (AutoString, …) referenced by autogenerate
+${imports if imports else ""}
+
+# revision identifiers, used by Alembic.
+revision: str = ${repr(up_revision)}
+down_revision: Union[str, Sequence[str], None] = ${repr(down_revision)}
+branch_labels: Union[str, Sequence[str], None] = ${repr(branch_labels)}
+depends_on: Union[str, Sequence[str], None] = ${repr(depends_on)}
+
+
+def upgrade() -> None:
+    """Upgrade schema."""
+    ${upgrades if upgrades else "pass"}
+
+
+def downgrade() -> None:
+    """Downgrade schema."""
+    ${downgrades if downgrades else "pass"}
--- a/decnet/web/db/migrations/versions/4a914b1d62a0_baseline_schema.py
+++ b/decnet/web/db/migrations/versions/4a914b1d62a0_baseline_schema.py
--- a/decnet/web/db/models/attackers.py
+++ b/decnet/web/db/models/attackers.py
@@ -13,7 +13,7 @@ from datetime import datetime, timezone
 from typing import Any, List, Optional

 from pydantic import BaseModel
-from sqlalchemy import BINARY, Column, Text, UniqueConstraint
+from sqlalchemy import BINARY, Column, LargeBinary, Text, UniqueConstraint
 from sqlmodel import Field, SQLModel

 from ._base import _BIG_TEXT
@@ -238,10 +238,18 @@ class AttackerIdentity(SQLModel, table=True):
    # registry); this column is the rollup the (future) attribution
    # engine will write into so the federation gossip layer
    # has one identity-level fingerprint to compare across operators.
-    # BINARY(8) so MySQL can index without a prefix length.
+    # BINARY(8) so MySQL can index without a prefix length. SQLite has no
+    # fixed-width binary type (BINARY → NUMERIC affinity, which reflects back
+    # as NUMERIC and trips `alembic check`), so use a BLOB variant there —
+    # bytes round-trip identically and the type matches what SQLite reports.
    kd_digraph_simhash: Optional[bytes] = Field(
        default=None,
-        sa_column=Column("kd_digraph_simhash", BINARY(8), nullable=True, index=True),
+        sa_column=Column(
+            "kd_digraph_simhash",
+            BINARY(8).with_variant(LargeBinary(), "sqlite"),
+            nullable=True,
+            index=True,
+        ),
    )
    # Soft-merge audit trail. When the clusterer collapses two
    # identities, the loser's row stays in place with this set to the
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -34,6 +34,9 @@ dependencies = [
    "psutil>=5.9.0",
    "python-dotenv>=1.0.0",
    "sqlmodel>=0.0.16",
+    # Schema migrations. Runtime dep (not dev-only): the API runs
+    # `alembic upgrade head` at boot for managed DBs (see db/migrate.py).
+    "alembic>=1.13",
    "scapy>=2.6.1",
    "orjson>=3.10",
    "cryptography>=48.0.1",