- FastAPI + htmx + Jinja2 web frontend, started with --web flag - JWT HS256 auth (WEB_SECRET_KEY) with httpOnly cookies; access (15 min) + refresh (7 day) tokens; refresh rotation + JTI revocation in data/web.db - RBAC: superadmin > admin > reader enforced per route - Live SSE dashboard fed by tui/events broadcast queue - Config editor: keyword groups and channel list saved to data/runtime_config.json and hot-reloaded in-process (scorer.reload_from_config, signal_channel_changed) - config.py migrated to load groups/channels from runtime_config.json; falls back to hardcoded defaults when file absent - tui/events.py: subscribe/unsubscribe broadcast, set_bot_context/signal_channel_changed - utils/scorer.py: import config as _config (fixes local binding); reload_from_config() - utils/database.py: count_by_severity, recent_for_domains, count_by_severity_for_domains - 53 new tests (events bus, JWT lifecycle, web DB CRUD, RBAC enforcement, config round-trip); total 141 passing
284 lines
9.7 KiB
Python
284 lines
9.7 KiB
Python
"""
|
|
scorer.py — Severity scoring for credential hits.
|
|
|
|
Scoring logic (highest match wins):
|
|
|
|
CRITICAL — Employee credentials (internal email domain)
|
|
e.g. jdoe@yourclinic.cl:password
|
|
— Admin/privileged service URLs
|
|
e.g. admin., vpn., ssh., rdp., gitlab., jira.
|
|
|
|
HIGH — Internal-facing services
|
|
e.g. intranet., erp., crm., portal., citrix.
|
|
— Password manager or SSO hits
|
|
— Any credential where username looks like an employee email
|
|
|
|
MEDIUM — Client-facing portals
|
|
e.g. app., patient., client., booking.
|
|
— Domain match on a non-privileged service
|
|
|
|
LOW — Generic domain keyword match
|
|
— No URL parsed, just a raw domain mention
|
|
|
|
Each scored hit gets a dict with:
|
|
- severity: CRITICAL / HIGH / MEDIUM / LOW
|
|
- score: int (higher = worse)
|
|
- reasons: list of human-readable reasons
|
|
- raw: original line
|
|
"""
|
|
|
|
import re
|
|
import logging
|
|
from dataclasses import dataclass, field
|
|
import config as _config
|
|
|
|
log = logging.getLogger(__name__)
|
|
|
|
|
|
# ─── Severity levels ─────────────────────────────────────────────────────────
|
|
|
|
CRITICAL = "CRITICAL"
|
|
HIGH = "HIGH"
|
|
MEDIUM = "MEDIUM"
|
|
LOW = "LOW"
|
|
|
|
SEVERITY_SCORES = {
|
|
CRITICAL: 40,
|
|
HIGH: 30,
|
|
MEDIUM: 20,
|
|
LOW: 10,
|
|
}
|
|
|
|
SEVERITY_EMOJI = {
|
|
CRITICAL: "🔴",
|
|
HIGH: "🟠",
|
|
MEDIUM: "🟡",
|
|
LOW: "🟢",
|
|
}
|
|
|
|
|
|
# ─── Pattern banks ───────────────────────────────────────────────────────────
|
|
|
|
# Subdomains/services that indicate privileged access
|
|
CRITICAL_SERVICES = re.compile(
|
|
r"(?:^|https?://|\.)"
|
|
r"(admin|vpn|ssh|rdp|ftp|sftp|gitlab|github|bitbucket|jenkins|"
|
|
r"jira|confluence|grafana|kibana|sentry|vault|bastion|jump|"
|
|
r"firewall|router|switch|proxy|ldap|ad\.|activedirectory|"
|
|
r"exchange|mail\.)",
|
|
re.IGNORECASE
|
|
)
|
|
|
|
HIGH_SERVICES = re.compile(
|
|
r"(?:^|https?://|\.)"
|
|
r"(intranet|erp|crm|portal|citrix|workspace|webmail|owa|"
|
|
r"sharepoint|teams|slack|zoom|meet|sso|login|auth|oauth|"
|
|
r"accounts?|dashboard|internal|corp|staff|hr|payroll|"
|
|
r"finance|accounting)",
|
|
re.IGNORECASE
|
|
)
|
|
|
|
MEDIUM_SERVICES = re.compile(
|
|
r"(?:^|https?://|\.)"
|
|
r"(app|patient|client|customer|booking|appointment|"
|
|
r"reserva|cita|paciente|user|member|registro|signup|"
|
|
r"support|helpdesk|ticket)",
|
|
re.IGNORECASE
|
|
)
|
|
|
|
# Looks like a corporate email (user@domain)
|
|
EMAIL_PATTERN = re.compile(r"[a-zA-Z0-9._%+\-]+@([a-zA-Z0-9.\-]+\.[a-zA-Z]{2,})")
|
|
|
|
# ULP line parser
|
|
# Separator set: colon, semicolon, comma, pipe, tab.
|
|
# URL field: optional scheme (http/https/ftp) consumed first so '://' is never
|
|
# mistaken for a separator; then an optional port group ':\d+/' absorbs port+path
|
|
# (port is digits immediately followed by '/') so 'http://host:88/path:user:pass'
|
|
# yields url='http://host:88/path', not url='http'.
|
|
ULP_PATTERN = re.compile(
|
|
r"^(?P<url>"
|
|
r"(?:(?:https?|ftp)://)?[^\s:;,|\t]+" # optional scheme + host/path
|
|
r"(?::\d+/[^\s:;,|\t]*)?" # optional :port/path (port = digits then /)
|
|
r")"
|
|
r"(?:[:;,|\t])"
|
|
r"(?P<username>[^\s:;,|\t]+)"
|
|
r"(?:[:;,|\t])"
|
|
r"(?P<password>.+)$"
|
|
)
|
|
|
|
|
|
# ─── Derived from config ──────────────────────────────────────────────────────
|
|
|
|
def _kw_to_domain(kw: str) -> str:
|
|
"""Strip regex syntax from a keyword to get a plain domain string."""
|
|
return kw.replace(r"@", "").replace(r"\.", ".").strip("^$").lstrip(".")
|
|
|
|
|
|
def _build_employee_domains() -> list[tuple[str, re.Pattern]]:
|
|
"""
|
|
Keywords that contain '@' are employee email domain patterns.
|
|
|
|
Pattern anchors at '@<domain>' so that a URL containing the org domain
|
|
never causes a false CRITICAL on an unrelated email like @gmail.com.
|
|
|
|
Returns list of (domain_str, compiled_pattern) tuples.
|
|
"""
|
|
patterns = []
|
|
for kw in _config.TARGET_KEYWORDS:
|
|
if "@" in kw:
|
|
domain = _kw_to_domain(kw)
|
|
if domain:
|
|
pat = re.compile(
|
|
r"@" + re.escape(domain) + r"(?:[^a-zA-Z0-9.\-]|$)",
|
|
re.IGNORECASE,
|
|
)
|
|
patterns.append((domain, pat))
|
|
return patterns
|
|
|
|
EMPLOYEE_DOMAINS = _build_employee_domains()
|
|
|
|
|
|
def _build_org_domains() -> list[re.Pattern]:
|
|
"""
|
|
All keywords as plain domain patterns for the LOW baseline match.
|
|
Checks that the org domain appears anywhere in the line.
|
|
"""
|
|
patterns = []
|
|
for kw in _config.TARGET_KEYWORDS:
|
|
domain = _kw_to_domain(kw)
|
|
if domain:
|
|
patterns.append(re.compile(re.escape(domain), re.IGNORECASE))
|
|
return patterns
|
|
|
|
ORG_DOMAINS = _build_org_domains()
|
|
|
|
|
|
def reload_from_config() -> None:
|
|
"""
|
|
Rebuild EMPLOYEE_DOMAINS and ORG_DOMAINS from the current config.TARGET_KEYWORDS.
|
|
Call after save_runtime_config() updates the keyword list.
|
|
"""
|
|
global EMPLOYEE_DOMAINS, ORG_DOMAINS
|
|
EMPLOYEE_DOMAINS = _build_employee_domains()
|
|
ORG_DOMAINS = _build_org_domains()
|
|
|
|
|
|
|
|
# ─── Scoring logic ────────────────────────────────────────────────────────────
|
|
|
|
@dataclass
|
|
class ScoredHit:
|
|
raw: str
|
|
severity: str
|
|
score: int
|
|
reasons: list[str] = field(default_factory=list)
|
|
url: str | None = None
|
|
username: str | None = None
|
|
password: str | None = None
|
|
|
|
@property
|
|
def emoji(self) -> str:
|
|
return SEVERITY_EMOJI.get(self.severity, "⚪")
|
|
|
|
def __str__(self) -> str:
|
|
return f"{self.emoji} [{self.severity}] {self.raw}"
|
|
|
|
|
|
def score_hit(line: str) -> ScoredHit:
|
|
"""
|
|
Score a single credential line.
|
|
Returns a ScoredHit with severity, score, and reasons.
|
|
"""
|
|
line = line.strip()
|
|
reasons = []
|
|
scores = []
|
|
|
|
# Parse ULP fields if possible
|
|
url = username = password = None
|
|
m = ULP_PATTERN.match(line)
|
|
if m:
|
|
url = m.group("url")
|
|
username = m.group("username")
|
|
password = m.group("password")
|
|
|
|
# ── Check 1: Employee email domain in username or line ───────────────
|
|
# EMPLOYEE_DOMAINS entries are (domain_str, pattern) where the pattern
|
|
# requires '@' immediately before the domain, so a URL containing the
|
|
# org domain never triggers a CRITICAL on an unrelated email (@gmail etc).
|
|
for domain_str, pat in EMPLOYEE_DOMAINS:
|
|
# Try the parsed username field first; fall back to full line.
|
|
# Either way the pattern requires a literal '@' before the domain.
|
|
field = username if username else ""
|
|
if not pat.search(field):
|
|
field = line
|
|
if pat.search(field):
|
|
scores.append(CRITICAL)
|
|
reasons.append(f"Employee email domain: {domain_str}")
|
|
break
|
|
|
|
# ── Check 2: Is the URL a privileged/critical service? ────────────────
|
|
if url and CRITICAL_SERVICES.search(url):
|
|
scores.append(CRITICAL)
|
|
reasons.append(f"Critical service URL: {url}")
|
|
|
|
# ── Check 3: Is the URL a high-value internal service? ────────────────
|
|
if url and HIGH_SERVICES.search(url):
|
|
scores.append(HIGH)
|
|
reasons.append(f"High-value internal service: {url}")
|
|
|
|
# ── Check 4: Is the URL a client-facing service? ──────────────────────
|
|
if url and MEDIUM_SERVICES.search(url):
|
|
scores.append(MEDIUM)
|
|
reasons.append(f"Client-facing service: {url}")
|
|
|
|
# ── Check 5: Generic org domain match (baseline) ─────────────────────
|
|
for pattern in ORG_DOMAINS:
|
|
if pattern.search(line):
|
|
if not scores:
|
|
scores.append(LOW)
|
|
reasons.append(f"Org domain match in line")
|
|
break
|
|
|
|
# ── Check 6: Weak/empty password flag ────────────────────────────────
|
|
if password:
|
|
if len(password) <= 6:
|
|
reasons.append(f"⚠ Weak password ({len(password)} chars)")
|
|
if password.lower() in {"123456", "password", "qwerty", "111111", "admin", "letmein"}:
|
|
reasons.append(f"⚠ Common password: {password}")
|
|
|
|
# ── Resolve final severity ────────────────────────────────────────────
|
|
severity_order = [CRITICAL, HIGH, MEDIUM, LOW]
|
|
final_severity = LOW # default
|
|
for s in severity_order:
|
|
if s in scores:
|
|
final_severity = s
|
|
break
|
|
|
|
if not reasons:
|
|
reasons.append("Pattern match")
|
|
|
|
return ScoredHit(
|
|
raw = line,
|
|
severity = final_severity,
|
|
score = SEVERITY_SCORES[final_severity],
|
|
reasons = reasons,
|
|
url = url,
|
|
username = username,
|
|
password = password,
|
|
)
|
|
|
|
|
|
def score_hits(lines: list[str]) -> list[ScoredHit]:
|
|
"""Score a list of credential lines. Returns sorted by score descending."""
|
|
scored = [score_hit(line) for line in lines]
|
|
scored.sort(key=lambda h: h.score, reverse=True)
|
|
return scored
|
|
|
|
|
|
def summarize(scored: list[ScoredHit]) -> dict:
|
|
"""Count hits by severity level."""
|
|
summary = {CRITICAL: 0, HIGH: 0, MEDIUM: 0, LOW: 0}
|
|
for h in scored:
|
|
summary[h.severity] += 1
|
|
return summary
|