build(db): add Alembic scaffolding + baseline migration

Introduce Alembic at v1. Migrations live inside the package
(decnet/web/db/migrations) so they ship with installs; alembic.ini at the
repo root drives the CLI. env.py is async and dual-backend, selecting the
engine from DECNET_DB_TYPE (mirroring db/factory.py) and reusing the app's
own connection when run programmatically.

The baseline captures all 39 tables. _BIG_TEXT round-trips as
Text().with_variant(MEDIUMTEXT, 'mysql'), so both backends get the right
column type from the migration. kd_digraph_simhash gains a sqlite BLOB
variant: BINARY(8) reflects as NUMERIC on SQLite and would otherwise trip
'alembic check' forever.
This commit is contained in:
2026-06-16 16:30:29 -04:00
parent 4f141c1a54
commit ef4d67cbef
8 changed files with 1392 additions and 3 deletions

View File

@@ -0,0 +1 @@
Generic single-database configuration.

View File

@@ -0,0 +1,90 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
"""Alembic environment — async, dual-backend (sqlite | mysql).
Two entry shapes:
* **Programmatic** (app boot): :func:`decnet.web.db.migrate.run_migrations`
passes the app's own sync ``Connection`` via ``config.attributes`` so the
upgrade rides the existing engine — no second connection, no extra driver.
* **Standalone** (``alembic`` CLI: autogenerate, upgrade, history): builds its
own async engine from ``DECNET_DB_TYPE``, mirroring ``db/factory.py``.
"""
from __future__ import annotations
import asyncio
import os
from logging.config import fileConfig
from alembic import context
from sqlalchemy.engine import Connection
from sqlmodel import SQLModel
# Importing the models package registers every table on SQLModel.metadata,
# which is what autogenerate diffs against.
import decnet.web.db.models # noqa: F401
config = context.config
# Standalone CLI runs configure logging from alembic.ini; the programmatic
# path builds a Config with no file, so guard on it.
if config.config_file_name is not None:
fileConfig(config.config_file_name)
target_metadata = SQLModel.metadata
def _build_async_engine():
"""Standalone-only: pick an async engine the way db/factory.py does."""
db_type = os.environ.get("DECNET_DB_TYPE", "sqlite").lower()
if db_type == "sqlite":
from decnet.config import _ROOT
from decnet.web.db.sqlite.database import get_async_engine as sqlite_engine
db_path = os.environ.get("DECNET_DB_PATH", str(_ROOT / "decnet.db"))
return sqlite_engine(db_path)
if db_type == "mysql":
from decnet.web.db.mysql.database import get_async_engine as mysql_engine
return mysql_engine()
raise ValueError(f"Unsupported database type: {db_type}")
def _configure_and_run(connection: Connection) -> None:
context.configure(
connection=connection,
target_metadata=target_metadata,
# SQLite can't ALTER in place; batch mode rewrites the table so future
# migrations (drop/alter column) work on both backends.
render_as_batch=connection.dialect.name == "sqlite",
compare_type=True,
)
with context.begin_transaction():
context.run_migrations()
async def _run_standalone() -> None:
engine = _build_async_engine()
async with engine.connect() as connection:
await connection.run_sync(_configure_and_run)
await engine.dispose()
def run_migrations_online() -> None:
connection = config.attributes.get("connection", None)
if connection is not None:
# Programmatic: app handed us a live sync Connection (via run_sync).
_configure_and_run(connection)
else:
asyncio.run(_run_standalone())
if context.is_offline_mode():
# Offline (--sql) mode: emit DDL without a DB. Cheap to support and keeps
# `alembic upgrade head --sql` working for operators who want to review SQL.
context.configure(
url=os.environ.get("DECNET_DB_URL"),
target_metadata=target_metadata,
literal_binds=True,
)
with context.begin_transaction():
context.run_migrations()
else:
run_migrations_online()

View File

@@ -0,0 +1,29 @@
"""${message}
Revision ID: ${up_revision}
Revises: ${down_revision | comma,n}
Create Date: ${create_date}
"""
from typing import Sequence, Union
from alembic import op
import sqlalchemy as sa
import sqlmodel # SQLModel column types (AutoString, …) referenced by autogenerate
${imports if imports else ""}
# revision identifiers, used by Alembic.
revision: str = ${repr(up_revision)}
down_revision: Union[str, Sequence[str], None] = ${repr(down_revision)}
branch_labels: Union[str, Sequence[str], None] = ${repr(branch_labels)}
depends_on: Union[str, Sequence[str], None] = ${repr(depends_on)}
def upgrade() -> None:
"""Upgrade schema."""
${upgrades if upgrades else "pass"}
def downgrade() -> None:
"""Downgrade schema."""
${downgrades if downgrades else "pass"}

File diff suppressed because it is too large Load Diff

View File

@@ -13,7 +13,7 @@ from datetime import datetime, timezone
from typing import Any, List, Optional
from pydantic import BaseModel
from sqlalchemy import BINARY, Column, Text, UniqueConstraint
from sqlalchemy import BINARY, Column, LargeBinary, Text, UniqueConstraint
from sqlmodel import Field, SQLModel
from ._base import _BIG_TEXT
@@ -238,10 +238,18 @@ class AttackerIdentity(SQLModel, table=True):
# registry); this column is the rollup the (future) attribution
# engine will write into so the federation gossip layer
# has one identity-level fingerprint to compare across operators.
# BINARY(8) so MySQL can index without a prefix length.
# BINARY(8) so MySQL can index without a prefix length. SQLite has no
# fixed-width binary type (BINARY → NUMERIC affinity, which reflects back
# as NUMERIC and trips `alembic check`), so use a BLOB variant there —
# bytes round-trip identically and the type matches what SQLite reports.
kd_digraph_simhash: Optional[bytes] = Field(
default=None,
sa_column=Column("kd_digraph_simhash", BINARY(8), nullable=True, index=True),
sa_column=Column(
"kd_digraph_simhash",
BINARY(8).with_variant(LargeBinary(), "sqlite"),
nullable=True,
index=True,
),
)
# Soft-merge audit trail. When the clusterer collapses two
# identities, the loser's row stays in place with this set to the