- Core Telegram monitoring pipeline (scraper, processor, notifier, downloaders) - Textual TUI frontend with thread-safe event bus - SQLite persistence, severity scoring, dedup cache - Fixed ULP parser: handles https:// truncation, port+path URLs, semicolon separator - Test suite: 88 tests across scorer, cache, database, processor
172 lines
6.1 KiB
Python
172 lines
6.1 KiB
Python
"""
|
|
database.py — SQLite storage for credential hits.
|
|
|
|
Schema:
|
|
hits table:
|
|
- id auto-increment primary key
|
|
- url the target URL from the credential line
|
|
- username extracted username/email
|
|
- password extracted password
|
|
- raw the full original line
|
|
- source channel/bot it came from
|
|
- filename the file it was found in
|
|
- timestamp UTC time of discovery
|
|
- severity CRITICAL / HIGH / MEDIUM / LOW
|
|
- score numeric score (higher = worse)
|
|
- reasons pipe-separated list of scoring reasons
|
|
- seen_before whether this was a duplicate (for stats)
|
|
"""
|
|
|
|
import sqlite3
|
|
import logging
|
|
from datetime import datetime, timezone
|
|
from pathlib import Path
|
|
from contextlib import contextmanager
|
|
|
|
log = logging.getLogger(__name__)
|
|
|
|
DB_FILE = Path("./data/hits.db")
|
|
|
|
|
|
# ─── Setup ────────────────────────────────────────────────────────────────────
|
|
|
|
@contextmanager
|
|
def _connect():
|
|
conn = sqlite3.connect(DB_FILE)
|
|
conn.row_factory = sqlite3.Row
|
|
try:
|
|
yield conn
|
|
conn.commit()
|
|
except Exception:
|
|
conn.rollback()
|
|
raise
|
|
finally:
|
|
conn.close()
|
|
|
|
|
|
def init_db() -> None:
|
|
"""Create tables if they don't exist yet."""
|
|
with _connect() as conn:
|
|
conn.execute("""
|
|
CREATE TABLE IF NOT EXISTS hits (
|
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
url TEXT,
|
|
username TEXT,
|
|
password TEXT,
|
|
raw TEXT NOT NULL,
|
|
source TEXT,
|
|
filename TEXT,
|
|
timestamp TEXT NOT NULL,
|
|
severity TEXT NOT NULL DEFAULT 'LOW',
|
|
score INTEGER NOT NULL DEFAULT 10,
|
|
reasons TEXT,
|
|
seen_before INTEGER NOT NULL DEFAULT 0
|
|
)
|
|
""")
|
|
conn.execute("CREATE INDEX IF NOT EXISTS idx_url ON hits(url)")
|
|
conn.execute("CREATE INDEX IF NOT EXISTS idx_username ON hits(username)")
|
|
conn.execute("CREATE INDEX IF NOT EXISTS idx_source ON hits(source)")
|
|
conn.execute("CREATE INDEX IF NOT EXISTS idx_timestamp ON hits(timestamp)")
|
|
conn.execute("CREATE INDEX IF NOT EXISTS idx_severity ON hits(severity)")
|
|
log.info(f"Database ready: {DB_FILE}")
|
|
|
|
|
|
# ─── Writing ─────────────────────────────────────────────────────────────────
|
|
|
|
def insert_hits(
|
|
scored_hits: list,
|
|
source: str,
|
|
filename: str,
|
|
seen_before: bool = False,
|
|
) -> int:
|
|
"""
|
|
Insert a list of ScoredHit objects into the database.
|
|
Returns the number of rows inserted.
|
|
"""
|
|
timestamp = datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M:%S UTC")
|
|
rows = []
|
|
for h in scored_hits:
|
|
rows.append((
|
|
h.url,
|
|
h.username,
|
|
h.password,
|
|
h.raw,
|
|
source,
|
|
filename,
|
|
timestamp,
|
|
h.severity,
|
|
h.score,
|
|
" | ".join(h.reasons),
|
|
1 if seen_before else 0,
|
|
))
|
|
|
|
with _connect() as conn:
|
|
conn.executemany("""
|
|
INSERT INTO hits
|
|
(url, username, password, raw, source, filename, timestamp,
|
|
severity, score, reasons, seen_before)
|
|
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
""", rows)
|
|
|
|
log.info(f" DB: inserted {len(rows)} row(s) from {filename}")
|
|
return len(rows)
|
|
|
|
|
|
# ─── Querying ─────────────────────────────────────────────────────────────────
|
|
|
|
def search(keyword: str) -> list[sqlite3.Row]:
|
|
"""Search hits by keyword across url, username, raw fields."""
|
|
with _connect() as conn:
|
|
return conn.execute("""
|
|
SELECT * FROM hits
|
|
WHERE url LIKE ? OR username LIKE ? OR raw LIKE ?
|
|
ORDER BY score DESC, timestamp DESC
|
|
""", (f"%{keyword}%",) * 3).fetchall()
|
|
|
|
|
|
def recent(limit: int = 50) -> list[sqlite3.Row]:
|
|
"""Return the most recent hits."""
|
|
with _connect() as conn:
|
|
return conn.execute("""
|
|
SELECT * FROM hits
|
|
ORDER BY timestamp DESC
|
|
LIMIT ?
|
|
""", (limit,)).fetchall()
|
|
|
|
|
|
def by_severity(severity: str) -> list[sqlite3.Row]:
|
|
"""Return all hits of a given severity level."""
|
|
with _connect() as conn:
|
|
return conn.execute("""
|
|
SELECT * FROM hits
|
|
WHERE severity = ? AND seen_before = 0
|
|
ORDER BY timestamp DESC
|
|
""", (severity,)).fetchall()
|
|
|
|
|
|
def stats() -> dict:
|
|
"""Return summary statistics."""
|
|
with _connect() as conn:
|
|
total = conn.execute("SELECT COUNT(*) FROM hits").fetchone()[0]
|
|
unique = conn.execute("SELECT COUNT(*) FROM hits WHERE seen_before=0").fetchone()[0]
|
|
critical = conn.execute("SELECT COUNT(*) FROM hits WHERE severity='CRITICAL' AND seen_before=0").fetchone()[0]
|
|
high = conn.execute("SELECT COUNT(*) FROM hits WHERE severity='HIGH' AND seen_before=0").fetchone()[0]
|
|
medium = conn.execute("SELECT COUNT(*) FROM hits WHERE severity='MEDIUM' AND seen_before=0").fetchone()[0]
|
|
low = conn.execute("SELECT COUNT(*) FROM hits WHERE severity='LOW' AND seen_before=0").fetchone()[0]
|
|
sources = conn.execute("SELECT COUNT(DISTINCT source) FROM hits").fetchone()[0]
|
|
top_source = conn.execute("""
|
|
SELECT source, COUNT(*) as cnt FROM hits
|
|
GROUP BY source ORDER BY cnt DESC LIMIT 1
|
|
""").fetchone()
|
|
return {
|
|
"total": total,
|
|
"unique": unique,
|
|
"duplicates": total - unique,
|
|
"critical": critical,
|
|
"high": high,
|
|
"medium": medium,
|
|
"low": low,
|
|
"sources": sources,
|
|
"top_source": dict(top_source) if top_source else None,
|
|
}
|