Files
stealergram/utils/database.py
anti 741e6bb0d3 Rename to stealergram, add pyproject.toml, purge em-dashes
- Rename project to stealergram throughout
- Add pyproject.toml (replaces requirements.txt split, folds pytest.ini)
- Replace all em-dashes with hyphens across all source files

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-19 10:06:30 -04:00

217 lines
7.8 KiB
Python

"""
database.py - SQLite storage for credential hits.
Schema:
hits table:
- id auto-increment primary key
- url the target URL from the credential line
- username extracted username/email
- password extracted password
- raw the full original line
- source channel/bot it came from
- filename the file it was found in
- timestamp UTC time of discovery
- severity CRITICAL / HIGH / MEDIUM / LOW
- score numeric score (higher = worse)
- reasons pipe-separated list of scoring reasons
- seen_before whether this was a duplicate (for stats)
"""
import sqlite3
import logging
from datetime import datetime, timezone
from pathlib import Path
from contextlib import contextmanager
log = logging.getLogger(__name__)
DB_FILE = Path("./data/hits.db")
# ─── Setup ────────────────────────────────────────────────────────────────────
@contextmanager
def _connect():
conn = sqlite3.connect(DB_FILE)
conn.row_factory = sqlite3.Row
try:
yield conn
conn.commit()
except Exception:
conn.rollback()
raise
finally:
conn.close()
def init_db() -> None:
"""Create tables if they don't exist yet."""
with _connect() as conn:
conn.execute("""
CREATE TABLE IF NOT EXISTS hits (
id INTEGER PRIMARY KEY AUTOINCREMENT,
url TEXT,
username TEXT,
password TEXT,
raw TEXT NOT NULL,
source TEXT,
filename TEXT,
timestamp TEXT NOT NULL,
severity TEXT NOT NULL DEFAULT 'LOW',
score INTEGER NOT NULL DEFAULT 10,
reasons TEXT,
seen_before INTEGER NOT NULL DEFAULT 0
)
""")
conn.execute("CREATE INDEX IF NOT EXISTS idx_url ON hits(url)")
conn.execute("CREATE INDEX IF NOT EXISTS idx_username ON hits(username)")
conn.execute("CREATE INDEX IF NOT EXISTS idx_source ON hits(source)")
conn.execute("CREATE INDEX IF NOT EXISTS idx_timestamp ON hits(timestamp)")
conn.execute("CREATE INDEX IF NOT EXISTS idx_severity ON hits(severity)")
log.info(f"Database ready: {DB_FILE}")
# ─── Writing ─────────────────────────────────────────────────────────────────
def insert_hits(
scored_hits: list,
source: str,
filename: str,
seen_before: bool = False,
) -> int:
"""
Insert a list of ScoredHit objects into the database.
Returns the number of rows inserted.
"""
timestamp = datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M:%S UTC")
rows = []
for h in scored_hits:
rows.append((
h.url,
h.username,
h.password,
h.raw,
source,
filename,
timestamp,
h.severity,
h.score,
" | ".join(h.reasons),
1 if seen_before else 0,
))
with _connect() as conn:
conn.executemany("""
INSERT INTO hits
(url, username, password, raw, source, filename, timestamp,
severity, score, reasons, seen_before)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
""", rows)
log.info(f" DB: inserted {len(rows)} row(s) from {filename}")
return len(rows)
# ─── Querying ─────────────────────────────────────────────────────────────────
def search(keyword: str) -> list[sqlite3.Row]:
"""Search hits by keyword across url, username, raw fields."""
with _connect() as conn:
return conn.execute("""
SELECT * FROM hits
WHERE url LIKE ? OR username LIKE ? OR raw LIKE ?
ORDER BY score DESC, timestamp DESC
""", (f"%{keyword}%",) * 3).fetchall()
def recent(limit: int = 50) -> list[sqlite3.Row]:
"""Return the most recent hits."""
with _connect() as conn:
return conn.execute("""
SELECT * FROM hits
ORDER BY timestamp DESC
LIMIT ?
""", (limit,)).fetchall()
def by_severity(severity: str) -> list[sqlite3.Row]:
"""Return all hits of a given severity level."""
with _connect() as conn:
return conn.execute("""
SELECT * FROM hits
WHERE severity = ? AND seen_before = 0
ORDER BY timestamp DESC
""", (severity,)).fetchall()
def recent_for_domains(patterns: list[str], limit: int = 100) -> list[sqlite3.Row]:
"""Return recent hits whose `raw` field matches any of the given regex-like patterns."""
if not patterns:
return []
conditions = " OR ".join("raw LIKE ?" for _ in patterns)
args = [f"%{p.replace(r'\.','.').replace('@','').replace('^','').replace('$','')}%" for p in patterns]
args.append(limit)
with _connect() as conn:
return conn.execute(
f"SELECT * FROM hits WHERE ({conditions}) ORDER BY timestamp DESC LIMIT ?",
args,
).fetchall()
def count_by_severity_for_domains(patterns: list[str]) -> dict:
"""Severity counts filtered to hits matching any of the given patterns."""
if not patterns:
return {"CRITICAL": 0, "HIGH": 0, "MEDIUM": 0, "LOW": 0}
conditions = " OR ".join("raw LIKE ?" for _ in patterns)
args = [f"%{p.replace(r'\.','.').replace('@','').replace('^','').replace('$','')}%" for p in patterns]
with _connect() as conn:
rows = conn.execute(
f"SELECT severity, COUNT(*) FROM hits WHERE ({conditions}) GROUP BY severity",
args,
).fetchall()
counts = {"CRITICAL": 0, "HIGH": 0, "MEDIUM": 0, "LOW": 0}
for row in rows:
if row[0] in counts:
counts[row[0]] = row[1]
return counts
def count_by_severity() -> dict:
"""Overall severity counts (unique hits only)."""
with _connect() as conn:
rows = conn.execute(
"SELECT severity, COUNT(*) FROM hits WHERE seen_before=0 GROUP BY severity"
).fetchall()
counts = {"CRITICAL": 0, "HIGH": 0, "MEDIUM": 0, "LOW": 0}
for row in rows:
if row[0] in counts:
counts[row[0]] = row[1]
return counts
def stats() -> dict:
"""Return summary statistics."""
with _connect() as conn:
total = conn.execute("SELECT COUNT(*) FROM hits").fetchone()[0]
unique = conn.execute("SELECT COUNT(*) FROM hits WHERE seen_before=0").fetchone()[0]
critical = conn.execute("SELECT COUNT(*) FROM hits WHERE severity='CRITICAL' AND seen_before=0").fetchone()[0]
high = conn.execute("SELECT COUNT(*) FROM hits WHERE severity='HIGH' AND seen_before=0").fetchone()[0]
medium = conn.execute("SELECT COUNT(*) FROM hits WHERE severity='MEDIUM' AND seen_before=0").fetchone()[0]
low = conn.execute("SELECT COUNT(*) FROM hits WHERE severity='LOW' AND seen_before=0").fetchone()[0]
sources = conn.execute("SELECT COUNT(DISTINCT source) FROM hits").fetchone()[0]
top_source = conn.execute("""
SELECT source, COUNT(*) as cnt FROM hits
GROUP BY source ORDER BY cnt DESC LIMIT 1
""").fetchone()
return {
"total": total,
"unique": unique,
"duplicates": total - unique,
"critical": critical,
"high": high,
"medium": medium,
"low": low,
"sources": sources,
"top_source": dict(top_source) if top_source else None,
}