Files
DECNET/decnet/web/db/mysql/repository.py
anti 12aa98a83c fix: migrate TEXT→MEDIUMTEXT for attacker/state columns on MySQL
Existing MySQL databases hit a DataError when the commands/fingerprints
JSON blobs exceed 64 KiB (TEXT limit). _BIG_TEXT emits MEDIUMTEXT only
at CREATE TABLE time; create_all() is a no-op on existing columns.

Add MySQLRepository._migrate_column_types() that queries
information_schema and issues ALTER TABLE … MODIFY COLUMN … MEDIUMTEXT
for the five affected columns (commands, fingerprints, services, deckies,
state.value) whenever they are still TEXT. Called from an overridden
initialize() after _migrate_attackers_table() and before create_all().

Add tests/test_mysql_migration.py covering: ALTER issued for TEXT columns,
no-op for already-MEDIUMTEXT, idempotency, DEFAULT clause correctness,
and initialize() call order.
2026-04-15 12:59:54 -04:00

131 lines
5.6 KiB
Python

"""
MySQL implementation of :class:`BaseRepository`.
Inherits the portable SQLModel query code from :class:`SQLModelRepository`
and only overrides the two places where MySQL's SQL dialect differs from
SQLite's:
* :meth:`_migrate_attackers_table` — uses ``information_schema`` (MySQL
has no ``PRAGMA``).
* :meth:`get_log_histogram` — uses ``FROM_UNIXTIME`` /
``UNIX_TIMESTAMP`` + integer division for bucketing.
"""
from __future__ import annotations
from typing import List, Optional
from sqlalchemy import func, select, text, literal_column
from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker
from sqlmodel.sql.expression import SelectOfScalar
from decnet.web.db.models import Log
from decnet.web.db.mysql.database import get_async_engine
from decnet.web.db.sqlmodel_repo import SQLModelRepository
class MySQLRepository(SQLModelRepository):
"""MySQL backend — uses ``aiomysql``."""
def __init__(self, url: Optional[str] = None, **engine_kwargs) -> None:
self.engine = get_async_engine(url=url, **engine_kwargs)
self.session_factory = async_sessionmaker(
self.engine, class_=AsyncSession, expire_on_commit=False
)
async def _migrate_attackers_table(self) -> None:
"""Drop the legacy (pre-UUID) ``attackers`` table if it exists without a ``uuid`` column.
MySQL exposes column metadata via ``information_schema.COLUMNS``.
``DATABASE()`` scopes the lookup to the currently connected schema.
"""
async with self.engine.begin() as conn:
rows = (await conn.execute(text(
"SELECT COLUMN_NAME FROM information_schema.COLUMNS "
"WHERE TABLE_SCHEMA = DATABASE() AND TABLE_NAME = 'attackers'"
))).fetchall()
if rows and not any(r[0] == "uuid" for r in rows):
await conn.execute(text("DROP TABLE attackers"))
async def _migrate_column_types(self) -> None:
"""Upgrade TEXT → MEDIUMTEXT for columns that accumulate large JSON blobs.
``create_all()`` never alters existing columns, so tables created before
``_BIG_TEXT`` was introduced keep their 64 KiB ``TEXT`` cap. This method
inspects ``information_schema`` and issues ``ALTER TABLE … MODIFY COLUMN``
for each offending column found.
"""
targets: dict[str, dict[str, str]] = {
"attackers": {
"commands": "MEDIUMTEXT NOT NULL DEFAULT '[]'",
"fingerprints": "MEDIUMTEXT NOT NULL DEFAULT '[]'",
"services": "MEDIUMTEXT NOT NULL DEFAULT '[]'",
"deckies": "MEDIUMTEXT NOT NULL DEFAULT '[]'",
},
"state": {
"value": "MEDIUMTEXT NOT NULL",
},
}
async with self.engine.begin() as conn:
rows = (await conn.execute(text(
"SELECT TABLE_NAME, COLUMN_NAME FROM information_schema.COLUMNS "
"WHERE TABLE_SCHEMA = DATABASE() "
" AND TABLE_NAME IN ('attackers', 'state') "
" AND COLUMN_NAME IN ('commands','fingerprints','services','deckies','value') "
" AND DATA_TYPE = 'text'"
))).fetchall()
for table_name, col_name in rows:
spec = targets.get(table_name, {}).get(col_name)
if spec:
await conn.execute(text(
f"ALTER TABLE `{table_name}` MODIFY COLUMN `{col_name}` {spec}"
))
async def initialize(self) -> None:
"""Create tables and run all MySQL-specific migrations."""
from sqlmodel import SQLModel
await self._migrate_attackers_table()
await self._migrate_column_types()
async with self.engine.begin() as conn:
await conn.run_sync(SQLModel.metadata.create_all)
await self._ensure_admin_user()
def _json_field_equals(self, key: str):
# MySQL 5.7+ exposes JSON_EXTRACT; quoted string result returned for
# TEXT-stored JSON, same behavior we rely on in SQLite.
return text(f"JSON_UNQUOTE(JSON_EXTRACT(fields, '$.{key}')) = :val")
async def get_log_histogram(
self,
search: Optional[str] = None,
start_time: Optional[str] = None,
end_time: Optional[str] = None,
interval_minutes: int = 15,
) -> List[dict]:
bucket_seconds = max(interval_minutes, 1) * 60
# Truncate each timestamp to the start of its bucket:
# FROM_UNIXTIME( (UNIX_TIMESTAMP(timestamp) DIV N) * N )
# DIV is MySQL's integer division operator.
bucket_expr = literal_column(
f"FROM_UNIXTIME((UNIX_TIMESTAMP(timestamp) DIV {bucket_seconds}) * {bucket_seconds})"
).label("bucket_time")
statement: SelectOfScalar = select(bucket_expr, func.count().label("count")).select_from(Log)
statement = self._apply_filters(statement, search, start_time, end_time)
statement = statement.group_by(literal_column("bucket_time")).order_by(
literal_column("bucket_time")
)
async with self.session_factory() as session:
results = await session.execute(statement)
# Normalize to ISO string for API parity with the SQLite backend
# (SQLite's datetime() returns a string already; FROM_UNIXTIME
# returns a datetime).
out: List[dict] = []
for r in results.all():
ts = r[0]
out.append({
"time": ts.isoformat(sep=" ") if hasattr(ts, "isoformat") else ts,
"count": r[1],
})
return out