""" Tests for utils/scorer.py - severity scoring and ULP line parsing. All tests use the `patched_keywords` fixture (see conftest.py) which replaces TARGET_KEYWORDS with two entries: @testcorp.com - employee email domain (CRITICAL trigger) testcorp.com - plain domain match (LOW baseline) """ import pytest from utils.scorer import score_hit, score_hits, summarize, CRITICAL, HIGH, MEDIUM, LOW # ─── ULP line parsing ───────────────────────────────────────────────────────── class TestULPParsing: def test_parses_pipe_separated_fields(self, patched_keywords): hit = score_hit("site.com|jdoe@testcorp.com|pass123") assert hit.url == "site.com" assert hit.username == "jdoe@testcorp.com" assert hit.password == "pass123" def test_parses_colon_separated_fields(self, patched_keywords): # 'site.com' has no colon, so url field captures it cleanly hit = score_hit("site.com:jdoe@testcorp.com:pass123") assert hit.url == "site.com" assert hit.username == "jdoe@testcorp.com" assert hit.password == "pass123" def test_malformed_line_yields_none_fields(self, patched_keywords): hit = score_hit("justaplaindomainmatch_testcorp.com") assert hit.url is None assert hit.username is None assert hit.password is None def test_raw_field_preserved_exactly(self, patched_keywords): line = "site.com|jdoe@testcorp.com|pass123" hit = score_hit(line) assert hit.raw == line # ─── Real-world ULP format coverage ────────────────────────────────────────── class TestULPParsingRealWorld: """ Parametrized against real stealer-log lines. Only field extraction is asserted (url/username/password), not severity, so no patched_keywords fixture is needed. """ @pytest.mark.parametrize("line,exp_url,exp_user,exp_pass", [ # ── Protocol + port + path, colon separator ────────────────────────── # Port is digits followed by '/' - must be consumed as part of the URL. ( "http://portal.fakehosp.example.com:88/:55512309-1:hunter2", "http://portal.fakehosp.example.com:88/", "55512309-1", "hunter2", ), ( "http://portal.fakehosp.example.com:8085/app/booking/:3:letmein", "http://portal.fakehosp.example.com:8085/app/booking/", "3", "letmein", ), ( "https://portal.fakehosp.example.com:81/app/FrmResetPassword.aspx:30219876-K:Spr!ng22@", "https://portal.fakehosp.example.com:81/app/FrmResetPassword.aspx", "30219876-K", "Spr!ng22@", ), # ── Protocol + no port, ID-style username looks like port but has hyphen ── # ':\d+-' must NOT be consumed as a port (no '/' after the digits). ( "https://booking.fakehosp.example.com:40293817-6:Summ3r99..", "https://booking.fakehosp.example.com", "40293817-6", "Summ3r99..", ), ( "https://booking.fakehosp.example.com/:40293817-6:Summ3r99..", "https://booking.fakehosp.example.com/", "40293817-6", "Summ3r99..", ), # ── Protocol + email username directly after host (no trailing slash) ─ ( "https://booking.fakehosp.example.com:carlos.gomez@gmail.com:Qwerty99", "https://booking.fakehosp.example.com", "carlos.gomez@gmail.com", "Qwerty99", ), ( "https://accounts.saas-vendor.example.com/signin:jdoe@fakehosp.example.com:W1nter20", "https://accounts.saas-vendor.example.com/signin", "jdoe@fakehosp.example.com", "W1nter20", ), ( "https://login.sso-provider.example.com/common/oauth2/authorize:jdoe@fakehosp.example.com:Passw0rd!", "https://login.sso-provider.example.com/common/oauth2/authorize", "jdoe@fakehosp.example.com", "Passw0rd!", ), # ── Pipe separator (unambiguous - port stays in URL) ────────────────── ( "http://portal.fakehosp.example.com:88/|22.987.654-3|florida88", "http://portal.fakehosp.example.com:88/", "22.987.654-3", "florida88", ), ( "https://booking.fakehosp.example.com/|77341209-0|Ninja42", "https://booking.fakehosp.example.com/", "77341209-0", "Ninja42", ), # ── Mixed separators: pipe after URL, colon between user/password ───── ( "http://portal.fakehosp.example.com:8085/app/booking/|Z:wd1980wd", "http://portal.fakehosp.example.com:8085/app/booking/", "Z", "wd1980wd", ), # ── No protocol, port in URL ───────────────────────────────────────── ( "portal.fakehosp.example.com:88/:22.987.654-3:florida88", "portal.fakehosp.example.com:88/", "22.987.654-3", "florida88", ), # ── No protocol, no port - plain colon separators ──────────────────── ( "booking.fakehosp.example.com:66778899-7:correcthorse", "booking.fakehosp.example.com", "66778899-7", "correcthorse", ), ( "booking.fakehosp.example.com/:smithjohnathan:Bb881955", "booking.fakehosp.example.com/", "smithjohnathan", "Bb881955", ), # ── Password with special characters ───────────────────────────────── ( "https://booking.fakehosp.example.com/:11223344-5:dragonball99*", "https://booking.fakehosp.example.com/", "11223344-5", "dragonball99*", ), ( "https://booking.fakehosp.example.com/:9988776-65:abc.456#", "https://booking.fakehosp.example.com/", "9988776-65", "abc.456#", ), # ── Semicolon separator ─────────────────────────────────────────────── ( "booking.fakehosp.example.com;smithjohnathan;Bb881955", "booking.fakehosp.example.com", "smithjohnathan", "Bb881955", ), ]) def test_real_world_ulp_parsing(self, line, exp_url, exp_user, exp_pass): hit = score_hit(line) assert hit.url == exp_url, f"URL mismatch for: {line!r}" assert hit.username == exp_user, f"Username mismatch for: {line!r}" assert hit.password == exp_pass, f"Password mismatch for: {line!r}" # ─── Severity classification ────────────────────────────────────────────────── class TestSeverityClassification: def test_employee_email_in_username_is_critical(self, patched_keywords): hit = score_hit("site.com|jdoe@testcorp.com|pass123") assert hit.severity == CRITICAL def test_gmail_on_org_url_is_not_critical(self, patched_keywords): """ Core documented footgun: org domain appears in the URL, but the credential username is a gmail address. Must NOT be CRITICAL. The employee-domain pattern requires a literal '@' before the domain, so 'testcorp.com' in the URL field never triggers it. """ hit = score_hit("testcorp.com|user@gmail.com|pass123") assert hit.severity != CRITICAL def test_critical_service_subdomain_is_critical(self, patched_keywords): hit = score_hit("admin.testcorp.com|user|pass123") assert hit.severity == CRITICAL def test_vpn_subdomain_is_critical(self, patched_keywords): hit = score_hit("vpn.testcorp.com|user|pass123") assert hit.severity == CRITICAL def test_gitlab_subdomain_is_critical(self, patched_keywords): hit = score_hit("gitlab.testcorp.com|user|pass123") assert hit.severity == CRITICAL def test_intranet_subdomain_is_high(self, patched_keywords): hit = score_hit("intranet.testcorp.com|user|pass123") assert hit.severity == HIGH def test_sso_subdomain_is_high(self, patched_keywords): hit = score_hit("sso.testcorp.com|user|pass123") assert hit.severity == HIGH def test_app_subdomain_is_medium(self, patched_keywords): hit = score_hit("app.testcorp.com|user|pass123") assert hit.severity == MEDIUM def test_booking_subdomain_is_medium(self, patched_keywords): hit = score_hit("booking.testcorp.com|user|pass123") assert hit.severity == MEDIUM def test_plain_domain_match_is_low(self, patched_keywords): hit = score_hit("testcorp.com|user|pass123") assert hit.severity == LOW def test_employee_email_beats_high_service(self, patched_keywords): """Employee email domain must win over a HIGH service classification.""" hit = score_hit("intranet.testcorp.com|jdoe@testcorp.com|pass") assert hit.severity == CRITICAL def test_employee_email_beats_medium_service(self, patched_keywords): hit = score_hit("app.testcorp.com|jdoe@testcorp.com|pass") assert hit.severity == CRITICAL def test_multiple_checks_accumulate_reasons(self, patched_keywords): """A line matching both employee email and a critical service URL collects both reasons.""" hit = score_hit("admin.testcorp.com|jdoe@testcorp.com|pass") assert hit.severity == CRITICAL assert len(hit.reasons) >= 2 def test_score_matches_severity(self, patched_keywords): from utils.scorer import SEVERITY_SCORES for line, expected_severity in [ ("admin.testcorp.com|user|pass", CRITICAL), ("intranet.testcorp.com|user|pass", HIGH), ("app.testcorp.com|user|pass", MEDIUM), ("testcorp.com|user|pass", LOW), ]: hit = score_hit(line) assert hit.score == SEVERITY_SCORES[expected_severity] # ─── Weak password flags ────────────────────────────────────────────────────── class TestWeakPasswordFlags: def test_short_password_adds_reason(self, patched_keywords): hit = score_hit("testcorp.com|user|abc") assert any("Weak password" in r for r in hit.reasons) def test_common_password_adds_reason(self, patched_keywords): hit = score_hit("testcorp.com|user|password") assert any("Common password" in r for r in hit.reasons) def test_weak_password_does_not_escalate_severity(self, patched_keywords): """Weak password flags are informational - they must not change severity.""" hit = score_hit("testcorp.com|user|abc") assert hit.severity == LOW def test_strong_password_adds_no_warning(self, patched_keywords): hit = score_hit("testcorp.com|user|Xk9#mP2qLrTv") assert not any("password" in r.lower() for r in hit.reasons if "Employee" not in r and "domain" not in r.lower() and "service" not in r.lower()) # ─── score_hits and summarize ───────────────────────────────────────────────── class TestScoreHitsAndSummarize: def test_score_hits_sorted_descending(self, patched_keywords): lines = [ "testcorp.com|user|pass", # LOW "admin.testcorp.com|user|pass", # CRITICAL "intranet.testcorp.com|user|pass", # HIGH "app.testcorp.com|user|pass", # MEDIUM ] hits = score_hits(lines) scores = [h.score for h in hits] assert scores == sorted(scores, reverse=True) def test_summarize_counts_each_severity(self, patched_keywords): lines = [ "admin.testcorp.com|user|pass", # CRITICAL "intranet.testcorp.com|user|pass", # HIGH "app.testcorp.com|user|pass", # MEDIUM "testcorp.com|user|pass", # LOW ] summary = summarize(score_hits(lines)) assert summary[CRITICAL] == 1 assert summary[HIGH] == 1 assert summary[MEDIUM] == 1 assert summary[LOW] == 1 def test_summarize_zero_for_absent_severities(self, patched_keywords): hits = score_hits(["testcorp.com|user|pass"]) # LOW only summary = summarize(hits) assert summary[CRITICAL] == 0 assert summary[HIGH] == 0 assert summary[MEDIUM] == 0 assert summary[LOW] == 1 def test_score_hits_empty_list(self, patched_keywords): assert score_hits([]) == []