Files
stealergram/utils/database.py
anti 4c104cddd2 Add web frontend with JWT auth, RBAC, SSE dashboard, and config editor
- FastAPI + htmx + Jinja2 web frontend, started with --web flag
- JWT HS256 auth (WEB_SECRET_KEY) with httpOnly cookies; access (15 min) +
  refresh (7 day) tokens; refresh rotation + JTI revocation in data/web.db
- RBAC: superadmin > admin > reader enforced per route
- Live SSE dashboard fed by tui/events broadcast queue
- Config editor: keyword groups and channel list saved to data/runtime_config.json
  and hot-reloaded in-process (scorer.reload_from_config, signal_channel_changed)
- config.py migrated to load groups/channels from runtime_config.json;
  falls back to hardcoded defaults when file absent
- tui/events.py: subscribe/unsubscribe broadcast, set_bot_context/signal_channel_changed
- utils/scorer.py: import config as _config (fixes local binding); reload_from_config()
- utils/database.py: count_by_severity, recent_for_domains, count_by_severity_for_domains
- 53 new tests (events bus, JWT lifecycle, web DB CRUD, RBAC enforcement,
  config round-trip); total 141 passing
2026-04-02 11:41:46 -03:00

217 lines
7.8 KiB
Python

"""
database.py — SQLite storage for credential hits.
Schema:
hits table:
- id auto-increment primary key
- url the target URL from the credential line
- username extracted username/email
- password extracted password
- raw the full original line
- source channel/bot it came from
- filename the file it was found in
- timestamp UTC time of discovery
- severity CRITICAL / HIGH / MEDIUM / LOW
- score numeric score (higher = worse)
- reasons pipe-separated list of scoring reasons
- seen_before whether this was a duplicate (for stats)
"""
import sqlite3
import logging
from datetime import datetime, timezone
from pathlib import Path
from contextlib import contextmanager
log = logging.getLogger(__name__)
DB_FILE = Path("./data/hits.db")
# ─── Setup ────────────────────────────────────────────────────────────────────
@contextmanager
def _connect():
conn = sqlite3.connect(DB_FILE)
conn.row_factory = sqlite3.Row
try:
yield conn
conn.commit()
except Exception:
conn.rollback()
raise
finally:
conn.close()
def init_db() -> None:
"""Create tables if they don't exist yet."""
with _connect() as conn:
conn.execute("""
CREATE TABLE IF NOT EXISTS hits (
id INTEGER PRIMARY KEY AUTOINCREMENT,
url TEXT,
username TEXT,
password TEXT,
raw TEXT NOT NULL,
source TEXT,
filename TEXT,
timestamp TEXT NOT NULL,
severity TEXT NOT NULL DEFAULT 'LOW',
score INTEGER NOT NULL DEFAULT 10,
reasons TEXT,
seen_before INTEGER NOT NULL DEFAULT 0
)
""")
conn.execute("CREATE INDEX IF NOT EXISTS idx_url ON hits(url)")
conn.execute("CREATE INDEX IF NOT EXISTS idx_username ON hits(username)")
conn.execute("CREATE INDEX IF NOT EXISTS idx_source ON hits(source)")
conn.execute("CREATE INDEX IF NOT EXISTS idx_timestamp ON hits(timestamp)")
conn.execute("CREATE INDEX IF NOT EXISTS idx_severity ON hits(severity)")
log.info(f"Database ready: {DB_FILE}")
# ─── Writing ─────────────────────────────────────────────────────────────────
def insert_hits(
scored_hits: list,
source: str,
filename: str,
seen_before: bool = False,
) -> int:
"""
Insert a list of ScoredHit objects into the database.
Returns the number of rows inserted.
"""
timestamp = datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M:%S UTC")
rows = []
for h in scored_hits:
rows.append((
h.url,
h.username,
h.password,
h.raw,
source,
filename,
timestamp,
h.severity,
h.score,
" | ".join(h.reasons),
1 if seen_before else 0,
))
with _connect() as conn:
conn.executemany("""
INSERT INTO hits
(url, username, password, raw, source, filename, timestamp,
severity, score, reasons, seen_before)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
""", rows)
log.info(f" DB: inserted {len(rows)} row(s) from {filename}")
return len(rows)
# ─── Querying ─────────────────────────────────────────────────────────────────
def search(keyword: str) -> list[sqlite3.Row]:
"""Search hits by keyword across url, username, raw fields."""
with _connect() as conn:
return conn.execute("""
SELECT * FROM hits
WHERE url LIKE ? OR username LIKE ? OR raw LIKE ?
ORDER BY score DESC, timestamp DESC
""", (f"%{keyword}%",) * 3).fetchall()
def recent(limit: int = 50) -> list[sqlite3.Row]:
"""Return the most recent hits."""
with _connect() as conn:
return conn.execute("""
SELECT * FROM hits
ORDER BY timestamp DESC
LIMIT ?
""", (limit,)).fetchall()
def by_severity(severity: str) -> list[sqlite3.Row]:
"""Return all hits of a given severity level."""
with _connect() as conn:
return conn.execute("""
SELECT * FROM hits
WHERE severity = ? AND seen_before = 0
ORDER BY timestamp DESC
""", (severity,)).fetchall()
def recent_for_domains(patterns: list[str], limit: int = 100) -> list[sqlite3.Row]:
"""Return recent hits whose `raw` field matches any of the given regex-like patterns."""
if not patterns:
return []
conditions = " OR ".join("raw LIKE ?" for _ in patterns)
args = [f"%{p.replace(r'\.','.').replace('@','').replace('^','').replace('$','')}%" for p in patterns]
args.append(limit)
with _connect() as conn:
return conn.execute(
f"SELECT * FROM hits WHERE ({conditions}) ORDER BY timestamp DESC LIMIT ?",
args,
).fetchall()
def count_by_severity_for_domains(patterns: list[str]) -> dict:
"""Severity counts filtered to hits matching any of the given patterns."""
if not patterns:
return {"CRITICAL": 0, "HIGH": 0, "MEDIUM": 0, "LOW": 0}
conditions = " OR ".join("raw LIKE ?" for _ in patterns)
args = [f"%{p.replace(r'\.','.').replace('@','').replace('^','').replace('$','')}%" for p in patterns]
with _connect() as conn:
rows = conn.execute(
f"SELECT severity, COUNT(*) FROM hits WHERE ({conditions}) GROUP BY severity",
args,
).fetchall()
counts = {"CRITICAL": 0, "HIGH": 0, "MEDIUM": 0, "LOW": 0}
for row in rows:
if row[0] in counts:
counts[row[0]] = row[1]
return counts
def count_by_severity() -> dict:
"""Overall severity counts (unique hits only)."""
with _connect() as conn:
rows = conn.execute(
"SELECT severity, COUNT(*) FROM hits WHERE seen_before=0 GROUP BY severity"
).fetchall()
counts = {"CRITICAL": 0, "HIGH": 0, "MEDIUM": 0, "LOW": 0}
for row in rows:
if row[0] in counts:
counts[row[0]] = row[1]
return counts
def stats() -> dict:
"""Return summary statistics."""
with _connect() as conn:
total = conn.execute("SELECT COUNT(*) FROM hits").fetchone()[0]
unique = conn.execute("SELECT COUNT(*) FROM hits WHERE seen_before=0").fetchone()[0]
critical = conn.execute("SELECT COUNT(*) FROM hits WHERE severity='CRITICAL' AND seen_before=0").fetchone()[0]
high = conn.execute("SELECT COUNT(*) FROM hits WHERE severity='HIGH' AND seen_before=0").fetchone()[0]
medium = conn.execute("SELECT COUNT(*) FROM hits WHERE severity='MEDIUM' AND seen_before=0").fetchone()[0]
low = conn.execute("SELECT COUNT(*) FROM hits WHERE severity='LOW' AND seen_before=0").fetchone()[0]
sources = conn.execute("SELECT COUNT(DISTINCT source) FROM hits").fetchone()[0]
top_source = conn.execute("""
SELECT source, COUNT(*) as cnt FROM hits
GROUP BY source ORDER BY cnt DESC LIMIT 1
""").fetchone()
return {
"total": total,
"unique": unique,
"duplicates": total - unique,
"critical": critical,
"high": high,
"medium": medium,
"low": low,
"sources": sources,
"top_source": dict(top_source) if top_source else None,
}