fix(db): SessionProfile.kd_digraph_simhash must be BINARY(8), not BLOB

MySQL can't index a BLOB/TEXT column without a prefix length, so
create_all() on a fresh MySQL schema blew up with "BLOB/TEXT column
'kd_digraph_simhash' used in key specification without a key length".

SimHashes are a fixed 8 bytes — the variable-length type was a
SQLAlchemy-side auto-mapping from 'Optional[bytes]', not an actual
schema requirement. Switch to BINARY(8), which is portable: MySQL gets
a fixed-width indexable BINARY, SQLite treats it as BLOB and doesn't
care about key length.
This commit is contained in:
2026-04-23 22:06:38 -04:00
parent f0b0967b16
commit 26d04d5eb8

View File

@@ -3,7 +3,7 @@ from datetime import datetime, timezone
from typing import Any, List, Optional
from pydantic import BaseModel
from sqlalchemy import Column, Text, UniqueConstraint
from sqlalchemy import BINARY, Column, Text, UniqueConstraint
from sqlmodel import Field, SQLModel
from ._base import _BIG_TEXT
@@ -138,7 +138,13 @@ class SessionProfile(SQLModel, table=True):
kd_arrow_rate: Optional[float] = None
kd_tab_rate: Optional[float] = None
# 8-byte SimHash over keystroke digraphs — Hamming-comparable across sessions.
kd_digraph_simhash: Optional[bytes] = Field(default=None, index=True)
# Fixed-width BINARY(8) rather than BLOB: MySQL can't index BLOB/TEXT
# columns without a prefix length, and SimHashes are always exactly 8
# bytes so a variable-length type gains nothing here.
kd_digraph_simhash: Optional[bytes] = Field(
default=None,
sa_column=Column("kd_digraph_simhash", BINARY(8), nullable=True, index=True),
)
# Derived totals.
total_keystrokes: Optional[int] = None
session_duration_s: Optional[float] = None