Initial commit: ULPgrammer
- Core Telegram monitoring pipeline (scraper, processor, notifier, downloaders) - Textual TUI frontend with thread-safe event bus - SQLite persistence, severity scoring, dedup cache - Fixed ULP parser: handles https:// truncation, port+path URLs, semicolon separator - Test suite: 88 tests across scorer, cache, database, processor
This commit is contained in:
171
utils/database.py
Normal file
171
utils/database.py
Normal file
@@ -0,0 +1,171 @@
|
||||
"""
|
||||
database.py — SQLite storage for credential hits.
|
||||
|
||||
Schema:
|
||||
hits table:
|
||||
- id auto-increment primary key
|
||||
- url the target URL from the credential line
|
||||
- username extracted username/email
|
||||
- password extracted password
|
||||
- raw the full original line
|
||||
- source channel/bot it came from
|
||||
- filename the file it was found in
|
||||
- timestamp UTC time of discovery
|
||||
- severity CRITICAL / HIGH / MEDIUM / LOW
|
||||
- score numeric score (higher = worse)
|
||||
- reasons pipe-separated list of scoring reasons
|
||||
- seen_before whether this was a duplicate (for stats)
|
||||
"""
|
||||
|
||||
import sqlite3
|
||||
import logging
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
from contextlib import contextmanager
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
DB_FILE = Path("./data/hits.db")
|
||||
|
||||
|
||||
# ─── Setup ────────────────────────────────────────────────────────────────────
|
||||
|
||||
@contextmanager
|
||||
def _connect():
|
||||
conn = sqlite3.connect(DB_FILE)
|
||||
conn.row_factory = sqlite3.Row
|
||||
try:
|
||||
yield conn
|
||||
conn.commit()
|
||||
except Exception:
|
||||
conn.rollback()
|
||||
raise
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
def init_db() -> None:
|
||||
"""Create tables if they don't exist yet."""
|
||||
with _connect() as conn:
|
||||
conn.execute("""
|
||||
CREATE TABLE IF NOT EXISTS hits (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
url TEXT,
|
||||
username TEXT,
|
||||
password TEXT,
|
||||
raw TEXT NOT NULL,
|
||||
source TEXT,
|
||||
filename TEXT,
|
||||
timestamp TEXT NOT NULL,
|
||||
severity TEXT NOT NULL DEFAULT 'LOW',
|
||||
score INTEGER NOT NULL DEFAULT 10,
|
||||
reasons TEXT,
|
||||
seen_before INTEGER NOT NULL DEFAULT 0
|
||||
)
|
||||
""")
|
||||
conn.execute("CREATE INDEX IF NOT EXISTS idx_url ON hits(url)")
|
||||
conn.execute("CREATE INDEX IF NOT EXISTS idx_username ON hits(username)")
|
||||
conn.execute("CREATE INDEX IF NOT EXISTS idx_source ON hits(source)")
|
||||
conn.execute("CREATE INDEX IF NOT EXISTS idx_timestamp ON hits(timestamp)")
|
||||
conn.execute("CREATE INDEX IF NOT EXISTS idx_severity ON hits(severity)")
|
||||
log.info(f"Database ready: {DB_FILE}")
|
||||
|
||||
|
||||
# ─── Writing ─────────────────────────────────────────────────────────────────
|
||||
|
||||
def insert_hits(
|
||||
scored_hits: list,
|
||||
source: str,
|
||||
filename: str,
|
||||
seen_before: bool = False,
|
||||
) -> int:
|
||||
"""
|
||||
Insert a list of ScoredHit objects into the database.
|
||||
Returns the number of rows inserted.
|
||||
"""
|
||||
timestamp = datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M:%S UTC")
|
||||
rows = []
|
||||
for h in scored_hits:
|
||||
rows.append((
|
||||
h.url,
|
||||
h.username,
|
||||
h.password,
|
||||
h.raw,
|
||||
source,
|
||||
filename,
|
||||
timestamp,
|
||||
h.severity,
|
||||
h.score,
|
||||
" | ".join(h.reasons),
|
||||
1 if seen_before else 0,
|
||||
))
|
||||
|
||||
with _connect() as conn:
|
||||
conn.executemany("""
|
||||
INSERT INTO hits
|
||||
(url, username, password, raw, source, filename, timestamp,
|
||||
severity, score, reasons, seen_before)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||
""", rows)
|
||||
|
||||
log.info(f" DB: inserted {len(rows)} row(s) from {filename}")
|
||||
return len(rows)
|
||||
|
||||
|
||||
# ─── Querying ─────────────────────────────────────────────────────────────────
|
||||
|
||||
def search(keyword: str) -> list[sqlite3.Row]:
|
||||
"""Search hits by keyword across url, username, raw fields."""
|
||||
with _connect() as conn:
|
||||
return conn.execute("""
|
||||
SELECT * FROM hits
|
||||
WHERE url LIKE ? OR username LIKE ? OR raw LIKE ?
|
||||
ORDER BY score DESC, timestamp DESC
|
||||
""", (f"%{keyword}%",) * 3).fetchall()
|
||||
|
||||
|
||||
def recent(limit: int = 50) -> list[sqlite3.Row]:
|
||||
"""Return the most recent hits."""
|
||||
with _connect() as conn:
|
||||
return conn.execute("""
|
||||
SELECT * FROM hits
|
||||
ORDER BY timestamp DESC
|
||||
LIMIT ?
|
||||
""", (limit,)).fetchall()
|
||||
|
||||
|
||||
def by_severity(severity: str) -> list[sqlite3.Row]:
|
||||
"""Return all hits of a given severity level."""
|
||||
with _connect() as conn:
|
||||
return conn.execute("""
|
||||
SELECT * FROM hits
|
||||
WHERE severity = ? AND seen_before = 0
|
||||
ORDER BY timestamp DESC
|
||||
""", (severity,)).fetchall()
|
||||
|
||||
|
||||
def stats() -> dict:
|
||||
"""Return summary statistics."""
|
||||
with _connect() as conn:
|
||||
total = conn.execute("SELECT COUNT(*) FROM hits").fetchone()[0]
|
||||
unique = conn.execute("SELECT COUNT(*) FROM hits WHERE seen_before=0").fetchone()[0]
|
||||
critical = conn.execute("SELECT COUNT(*) FROM hits WHERE severity='CRITICAL' AND seen_before=0").fetchone()[0]
|
||||
high = conn.execute("SELECT COUNT(*) FROM hits WHERE severity='HIGH' AND seen_before=0").fetchone()[0]
|
||||
medium = conn.execute("SELECT COUNT(*) FROM hits WHERE severity='MEDIUM' AND seen_before=0").fetchone()[0]
|
||||
low = conn.execute("SELECT COUNT(*) FROM hits WHERE severity='LOW' AND seen_before=0").fetchone()[0]
|
||||
sources = conn.execute("SELECT COUNT(DISTINCT source) FROM hits").fetchone()[0]
|
||||
top_source = conn.execute("""
|
||||
SELECT source, COUNT(*) as cnt FROM hits
|
||||
GROUP BY source ORDER BY cnt DESC LIMIT 1
|
||||
""").fetchone()
|
||||
return {
|
||||
"total": total,
|
||||
"unique": unique,
|
||||
"duplicates": total - unique,
|
||||
"critical": critical,
|
||||
"high": high,
|
||||
"medium": medium,
|
||||
"low": low,
|
||||
"sources": sources,
|
||||
"top_source": dict(top_source) if top_source else None,
|
||||
}
|
||||
Reference in New Issue
Block a user