build(db): add Alembic scaffolding + baseline migration

Introduce Alembic at v1. Migrations live inside the package
(decnet/web/db/migrations) so they ship with installs; alembic.ini at the
repo root drives the CLI. env.py is async and dual-backend, selecting the
engine from DECNET_DB_TYPE (mirroring db/factory.py) and reusing the app's
own connection when run programmatically.

The baseline captures all 39 tables. _BIG_TEXT round-trips as
Text().with_variant(MEDIUMTEXT, 'mysql'), so both backends get the right
column type from the migration. kd_digraph_simhash gains a sqlite BLOB
variant: BINARY(8) reflects as NUMERIC on SQLite and would otherwise trip
'alembic check' forever.
This commit is contained in:
2026-06-16 16:30:29 -04:00
parent 4f141c1a54
commit ef4d67cbef
8 changed files with 1392 additions and 3 deletions

2
.gitignore vendored
View File

@@ -19,6 +19,8 @@ decnet-topology-*-compose.yml
.docker/
decnet-state.json
*.ini
# tracked: Alembic CLI config (migrations live in decnet/web/db/migrations)
!alembic.ini
decnet.log*
*.loggy
*.nmap

147
alembic.ini Normal file
View File

@@ -0,0 +1,147 @@
# A generic, single database configuration.
[alembic]
# path to migration scripts.
# this is typically a path given in POSIX (e.g. forward slashes)
# format, relative to the token %(here)s which refers to the location of this
# ini file
script_location = %(here)s/decnet/web/db/migrations
# template used to generate migration file names; The default value is %%(rev)s_%%(slug)s
# Uncomment the line below if you want the files to be prepended with date and time
# see https://alembic.sqlalchemy.org/en/latest/tutorial.html#editing-the-ini-file
# for all available tokens
# file_template = %%(year)d_%%(month).2d_%%(day).2d_%%(hour).2d%%(minute).2d-%%(rev)s_%%(slug)s
# Or organize into date-based subdirectories (requires recursive_version_locations = true)
# file_template = %%(year)d/%%(month).2d/%%(day).2d_%%(hour).2d%%(minute).2d_%%(second).2d_%%(rev)s_%%(slug)s
# sys.path path, will be prepended to sys.path if present.
# defaults to the current working directory. for multiple paths, the path separator
# is defined by "path_separator" below.
prepend_sys_path = .
# timezone to use when rendering the date within the migration file
# as well as the filename.
# If specified, requires the tzdata library which can be installed by adding
# `alembic[tz]` to the pip requirements.
# string value is passed to ZoneInfo()
# leave blank for localtime
# timezone =
# max length of characters to apply to the "slug" field
# truncate_slug_length = 40
# set to 'true' to run the environment during
# the 'revision' command, regardless of autogenerate
# revision_environment = false
# set to 'true' to allow .pyc and .pyo files without
# a source .py file to be detected as revisions in the
# versions/ directory
# sourceless = false
# version location specification; This defaults
# to <script_location>/versions. When using multiple version
# directories, initial revisions must be specified with --version-path.
# The path separator used here should be the separator specified by "path_separator"
# below.
# version_locations = %(here)s/bar:%(here)s/bat:%(here)s/alembic/versions
# path_separator; This indicates what character is used to split lists of file
# paths, including version_locations and prepend_sys_path within configparser
# files such as alembic.ini.
# The default rendered in new alembic.ini files is "os", which uses os.pathsep
# to provide os-dependent path splitting.
#
# Note that in order to support legacy alembic.ini files, this default does NOT
# take place if path_separator is not present in alembic.ini. If this
# option is omitted entirely, fallback logic is as follows:
#
# 1. Parsing of the version_locations option falls back to using the legacy
# "version_path_separator" key, which if absent then falls back to the legacy
# behavior of splitting on spaces and/or commas.
# 2. Parsing of the prepend_sys_path option falls back to the legacy
# behavior of splitting on spaces, commas, or colons.
#
# Valid values for path_separator are:
#
# path_separator = :
# path_separator = ;
# path_separator = space
# path_separator = newline
#
# Use os.pathsep. Default configuration used for new projects.
path_separator = os
# set to 'true' to search source files recursively
# in each "version_locations" directory
# new in Alembic version 1.10
# recursive_version_locations = false
# the output encoding used when revision files
# are written from script.py.mako
# output_encoding = utf-8
# NOTE: no sqlalchemy.url here on purpose. env.py selects the engine from
# DECNET_DB_TYPE (sqlite|mysql), mirroring decnet/web/db/factory.py.
[post_write_hooks]
# post_write_hooks defines scripts or Python functions that are run
# on newly generated revision scripts. See the documentation for further
# detail and examples
# format using "black" - use the console_scripts runner, against the "black" entrypoint
# hooks = black
# black.type = console_scripts
# black.entrypoint = black
# black.options = -l 79 REVISION_SCRIPT_FILENAME
# lint with attempts to fix using "ruff" - use the module runner, against the "ruff" module
# hooks = ruff
# ruff.type = module
# ruff.module = ruff
# ruff.options = check --fix REVISION_SCRIPT_FILENAME
# Alternatively, use the exec runner to execute a binary found on your PATH
# hooks = ruff
# ruff.type = exec
# ruff.executable = ruff
# ruff.options = check --fix REVISION_SCRIPT_FILENAME
# Logging configuration. This is also consumed by the user-maintained
# env.py script only.
[loggers]
keys = root,sqlalchemy,alembic
[handlers]
keys = console
[formatters]
keys = generic
[logger_root]
level = WARNING
handlers = console
qualname =
[logger_sqlalchemy]
level = WARNING
handlers =
qualname = sqlalchemy.engine
[logger_alembic]
level = INFO
handlers =
qualname = alembic
[handler_console]
class = StreamHandler
args = (sys.stderr,)
level = NOTSET
formatter = generic
[formatter_generic]
format = %(levelname)-5.5s [%(name)s] %(message)s
datefmt = %H:%M:%S

View File

@@ -0,0 +1 @@
Generic single-database configuration.

View File

@@ -0,0 +1,90 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
"""Alembic environment — async, dual-backend (sqlite | mysql).
Two entry shapes:
* **Programmatic** (app boot): :func:`decnet.web.db.migrate.run_migrations`
passes the app's own sync ``Connection`` via ``config.attributes`` so the
upgrade rides the existing engine — no second connection, no extra driver.
* **Standalone** (``alembic`` CLI: autogenerate, upgrade, history): builds its
own async engine from ``DECNET_DB_TYPE``, mirroring ``db/factory.py``.
"""
from __future__ import annotations
import asyncio
import os
from logging.config import fileConfig
from alembic import context
from sqlalchemy.engine import Connection
from sqlmodel import SQLModel
# Importing the models package registers every table on SQLModel.metadata,
# which is what autogenerate diffs against.
import decnet.web.db.models # noqa: F401
config = context.config
# Standalone CLI runs configure logging from alembic.ini; the programmatic
# path builds a Config with no file, so guard on it.
if config.config_file_name is not None:
fileConfig(config.config_file_name)
target_metadata = SQLModel.metadata
def _build_async_engine():
"""Standalone-only: pick an async engine the way db/factory.py does."""
db_type = os.environ.get("DECNET_DB_TYPE", "sqlite").lower()
if db_type == "sqlite":
from decnet.config import _ROOT
from decnet.web.db.sqlite.database import get_async_engine as sqlite_engine
db_path = os.environ.get("DECNET_DB_PATH", str(_ROOT / "decnet.db"))
return sqlite_engine(db_path)
if db_type == "mysql":
from decnet.web.db.mysql.database import get_async_engine as mysql_engine
return mysql_engine()
raise ValueError(f"Unsupported database type: {db_type}")
def _configure_and_run(connection: Connection) -> None:
context.configure(
connection=connection,
target_metadata=target_metadata,
# SQLite can't ALTER in place; batch mode rewrites the table so future
# migrations (drop/alter column) work on both backends.
render_as_batch=connection.dialect.name == "sqlite",
compare_type=True,
)
with context.begin_transaction():
context.run_migrations()
async def _run_standalone() -> None:
engine = _build_async_engine()
async with engine.connect() as connection:
await connection.run_sync(_configure_and_run)
await engine.dispose()
def run_migrations_online() -> None:
connection = config.attributes.get("connection", None)
if connection is not None:
# Programmatic: app handed us a live sync Connection (via run_sync).
_configure_and_run(connection)
else:
asyncio.run(_run_standalone())
if context.is_offline_mode():
# Offline (--sql) mode: emit DDL without a DB. Cheap to support and keeps
# `alembic upgrade head --sql` working for operators who want to review SQL.
context.configure(
url=os.environ.get("DECNET_DB_URL"),
target_metadata=target_metadata,
literal_binds=True,
)
with context.begin_transaction():
context.run_migrations()
else:
run_migrations_online()

View File

@@ -0,0 +1,29 @@
"""${message}
Revision ID: ${up_revision}
Revises: ${down_revision | comma,n}
Create Date: ${create_date}
"""
from typing import Sequence, Union
from alembic import op
import sqlalchemy as sa
import sqlmodel # SQLModel column types (AutoString, …) referenced by autogenerate
${imports if imports else ""}
# revision identifiers, used by Alembic.
revision: str = ${repr(up_revision)}
down_revision: Union[str, Sequence[str], None] = ${repr(down_revision)}
branch_labels: Union[str, Sequence[str], None] = ${repr(branch_labels)}
depends_on: Union[str, Sequence[str], None] = ${repr(depends_on)}
def upgrade() -> None:
"""Upgrade schema."""
${upgrades if upgrades else "pass"}
def downgrade() -> None:
"""Downgrade schema."""
${downgrades if downgrades else "pass"}

File diff suppressed because it is too large Load Diff

View File

@@ -13,7 +13,7 @@ from datetime import datetime, timezone
from typing import Any, List, Optional
from pydantic import BaseModel
from sqlalchemy import BINARY, Column, Text, UniqueConstraint
from sqlalchemy import BINARY, Column, LargeBinary, Text, UniqueConstraint
from sqlmodel import Field, SQLModel
from ._base import _BIG_TEXT
@@ -238,10 +238,18 @@ class AttackerIdentity(SQLModel, table=True):
# registry); this column is the rollup the (future) attribution
# engine will write into so the federation gossip layer
# has one identity-level fingerprint to compare across operators.
# BINARY(8) so MySQL can index without a prefix length.
# BINARY(8) so MySQL can index without a prefix length. SQLite has no
# fixed-width binary type (BINARY → NUMERIC affinity, which reflects back
# as NUMERIC and trips `alembic check`), so use a BLOB variant there —
# bytes round-trip identically and the type matches what SQLite reports.
kd_digraph_simhash: Optional[bytes] = Field(
default=None,
sa_column=Column("kd_digraph_simhash", BINARY(8), nullable=True, index=True),
sa_column=Column(
"kd_digraph_simhash",
BINARY(8).with_variant(LargeBinary(), "sqlite"),
nullable=True,
index=True,
),
)
# Soft-merge audit trail. When the clusterer collapses two
# identities, the loser's row stays in place with this set to the

View File

@@ -34,6 +34,9 @@ dependencies = [
"psutil>=5.9.0",
"python-dotenv>=1.0.0",
"sqlmodel>=0.0.16",
# Schema migrations. Runtime dep (not dev-only): the API runs
# `alembic upgrade head` at boot for managed DBs (see db/migrate.py).
"alembic>=1.13",
"scapy>=2.6.1",
"orjson>=3.10",
"cryptography>=48.0.1",