"""
Tests for utils/scorer.py - severity scoring and ULP line parsing.

All tests use the `patched_keywords` fixture (see conftest.py) which
replaces TARGET_KEYWORDS with two entries:
  @testcorp.com - employee email domain (CRITICAL trigger)
  testcorp.com - plain domain match    (LOW baseline)
"""

import pytest
from utils.scorer import score_hit, score_hits, summarize, CRITICAL, HIGH, MEDIUM, LOW


# ─── ULP line parsing ─────────────────────────────────────────────────────────

class TestULPParsing:
    def test_parses_pipe_separated_fields(self, patched_keywords):
        hit = score_hit("site.com|jdoe@testcorp.com|pass123")
        assert hit.url == "site.com"
        assert hit.username == "jdoe@testcorp.com"
        assert hit.password == "pass123"

    def test_parses_colon_separated_fields(self, patched_keywords):
        # 'site.com' has no colon, so url field captures it cleanly
        hit = score_hit("site.com:jdoe@testcorp.com:pass123")
        assert hit.url == "site.com"
        assert hit.username == "jdoe@testcorp.com"
        assert hit.password == "pass123"

    def test_malformed_line_yields_none_fields(self, patched_keywords):
        hit = score_hit("justaplaindomainmatch_testcorp.com")
        assert hit.url is None
        assert hit.username is None
        assert hit.password is None

    def test_raw_field_preserved_exactly(self, patched_keywords):
        line = "site.com|jdoe@testcorp.com|pass123"
        hit = score_hit(line)
        assert hit.raw == line


# ─── Real-world ULP format coverage ──────────────────────────────────────────

class TestULPParsingRealWorld:
    """
    Parametrized against real stealer-log lines.
    Only field extraction is asserted (url/username/password), not severity,
    so no patched_keywords fixture is needed.
    """

    @pytest.mark.parametrize("line,exp_url,exp_user,exp_pass", [
        # ── Protocol + port + path, colon separator ──────────────────────────
        # Port is digits followed by '/' - must be consumed as part of the URL.
        (
            "http://portal.fakehosp.example.com:88/:55512309-1:hunter2",
            "http://portal.fakehosp.example.com:88/", "55512309-1", "hunter2",
        ),
        (
            "http://portal.fakehosp.example.com:8085/app/booking/:3:letmein",
            "http://portal.fakehosp.example.com:8085/app/booking/", "3", "letmein",
        ),
        (
            "https://portal.fakehosp.example.com:81/app/FrmResetPassword.aspx:30219876-K:Spr!ng22@",
            "https://portal.fakehosp.example.com:81/app/FrmResetPassword.aspx",
            "30219876-K", "Spr!ng22@",
        ),

        # ── Protocol + no port, ID-style username looks like port but has hyphen ──
        # ':\d+-' must NOT be consumed as a port (no '/' after the digits).
        (
            "https://booking.fakehosp.example.com:40293817-6:Summ3r99..",
            "https://booking.fakehosp.example.com", "40293817-6", "Summ3r99..",
        ),
        (
            "https://booking.fakehosp.example.com/:40293817-6:Summ3r99..",
            "https://booking.fakehosp.example.com/", "40293817-6", "Summ3r99..",
        ),

        # ── Protocol + email username directly after host (no trailing slash) ─
        (
            "https://booking.fakehosp.example.com:carlos.gomez@gmail.com:Qwerty99",
            "https://booking.fakehosp.example.com", "carlos.gomez@gmail.com", "Qwerty99",
        ),
        (
            "https://accounts.saas-vendor.example.com/signin:jdoe@fakehosp.example.com:W1nter20",
            "https://accounts.saas-vendor.example.com/signin", "jdoe@fakehosp.example.com", "W1nter20",
        ),
        (
            "https://login.sso-provider.example.com/common/oauth2/authorize:jdoe@fakehosp.example.com:Passw0rd!",
            "https://login.sso-provider.example.com/common/oauth2/authorize",
            "jdoe@fakehosp.example.com", "Passw0rd!",
        ),

        # ── Pipe separator (unambiguous - port stays in URL) ──────────────────
        (
            "http://portal.fakehosp.example.com:88/|22.987.654-3|florida88",
            "http://portal.fakehosp.example.com:88/", "22.987.654-3", "florida88",
        ),
        (
            "https://booking.fakehosp.example.com/|77341209-0|Ninja42",
            "https://booking.fakehosp.example.com/", "77341209-0", "Ninja42",
        ),

        # ── Mixed separators: pipe after URL, colon between user/password ─────
        (
            "http://portal.fakehosp.example.com:8085/app/booking/|Z:wd1980wd",
            "http://portal.fakehosp.example.com:8085/app/booking/", "Z", "wd1980wd",
        ),

        # ── No protocol, port in URL ─────────────────────────────────────────
        (
            "portal.fakehosp.example.com:88/:22.987.654-3:florida88",
            "portal.fakehosp.example.com:88/", "22.987.654-3", "florida88",
        ),

        # ── No protocol, no port - plain colon separators ────────────────────
        (
            "booking.fakehosp.example.com:66778899-7:correcthorse",
            "booking.fakehosp.example.com", "66778899-7", "correcthorse",
        ),
        (
            "booking.fakehosp.example.com/:smithjohnathan:Bb881955",
            "booking.fakehosp.example.com/", "smithjohnathan", "Bb881955",
        ),

        # ── Password with special characters ─────────────────────────────────
        (
            "https://booking.fakehosp.example.com/:11223344-5:dragonball99*",
            "https://booking.fakehosp.example.com/", "11223344-5", "dragonball99*",
        ),
        (
            "https://booking.fakehosp.example.com/:9988776-65:abc.456#",
            "https://booking.fakehosp.example.com/", "9988776-65", "abc.456#",
        ),

        # ── Semicolon separator ───────────────────────────────────────────────
        (
            "booking.fakehosp.example.com;smithjohnathan;Bb881955",
            "booking.fakehosp.example.com", "smithjohnathan", "Bb881955",
        ),
    ])
    def test_real_world_ulp_parsing(self, line, exp_url, exp_user, exp_pass):
        hit = score_hit(line)
        assert hit.url == exp_url,      f"URL mismatch for: {line!r}"
        assert hit.username == exp_user, f"Username mismatch for: {line!r}"
        assert hit.password == exp_pass, f"Password mismatch for: {line!r}"


# ─── Severity classification ──────────────────────────────────────────────────

class TestSeverityClassification:
    def test_employee_email_in_username_is_critical(self, patched_keywords):
        hit = score_hit("site.com|jdoe@testcorp.com|pass123")
        assert hit.severity == CRITICAL

    def test_gmail_on_org_url_is_not_critical(self, patched_keywords):
        """
        Core documented footgun: org domain appears in the URL, but the
        credential username is a gmail address. Must NOT be CRITICAL.
        The employee-domain pattern requires a literal '@' before the domain,
        so 'testcorp.com' in the URL field never triggers it.
        """
        hit = score_hit("testcorp.com|user@gmail.com|pass123")
        assert hit.severity != CRITICAL

    def test_critical_service_subdomain_is_critical(self, patched_keywords):
        hit = score_hit("admin.testcorp.com|user|pass123")
        assert hit.severity == CRITICAL

    def test_vpn_subdomain_is_critical(self, patched_keywords):
        hit = score_hit("vpn.testcorp.com|user|pass123")
        assert hit.severity == CRITICAL

    def test_gitlab_subdomain_is_critical(self, patched_keywords):
        hit = score_hit("gitlab.testcorp.com|user|pass123")
        assert hit.severity == CRITICAL

    def test_intranet_subdomain_is_high(self, patched_keywords):
        hit = score_hit("intranet.testcorp.com|user|pass123")
        assert hit.severity == HIGH

    def test_sso_subdomain_is_high(self, patched_keywords):
        hit = score_hit("sso.testcorp.com|user|pass123")
        assert hit.severity == HIGH

    def test_app_subdomain_is_medium(self, patched_keywords):
        hit = score_hit("app.testcorp.com|user|pass123")
        assert hit.severity == MEDIUM

    def test_booking_subdomain_is_medium(self, patched_keywords):
        hit = score_hit("booking.testcorp.com|user|pass123")
        assert hit.severity == MEDIUM

    def test_plain_domain_match_is_low(self, patched_keywords):
        hit = score_hit("testcorp.com|user|pass123")
        assert hit.severity == LOW

    def test_employee_email_beats_high_service(self, patched_keywords):
        """Employee email domain must win over a HIGH service classification."""
        hit = score_hit("intranet.testcorp.com|jdoe@testcorp.com|pass")
        assert hit.severity == CRITICAL

    def test_employee_email_beats_medium_service(self, patched_keywords):
        hit = score_hit("app.testcorp.com|jdoe@testcorp.com|pass")
        assert hit.severity == CRITICAL

    def test_multiple_checks_accumulate_reasons(self, patched_keywords):
        """A line matching both employee email and a critical service URL collects both reasons."""
        hit = score_hit("admin.testcorp.com|jdoe@testcorp.com|pass")
        assert hit.severity == CRITICAL
        assert len(hit.reasons) >= 2

    def test_score_matches_severity(self, patched_keywords):
        from utils.scorer import SEVERITY_SCORES
        for line, expected_severity in [
            ("admin.testcorp.com|user|pass", CRITICAL),
            ("intranet.testcorp.com|user|pass", HIGH),
            ("app.testcorp.com|user|pass", MEDIUM),
            ("testcorp.com|user|pass", LOW),
        ]:
            hit = score_hit(line)
            assert hit.score == SEVERITY_SCORES[expected_severity]


# ─── Weak password flags ──────────────────────────────────────────────────────

class TestWeakPasswordFlags:
    def test_short_password_adds_reason(self, patched_keywords):
        hit = score_hit("testcorp.com|user|abc")
        assert any("Weak password" in r for r in hit.reasons)

    def test_common_password_adds_reason(self, patched_keywords):
        hit = score_hit("testcorp.com|user|password")
        assert any("Common password" in r for r in hit.reasons)

    def test_weak_password_does_not_escalate_severity(self, patched_keywords):
        """Weak password flags are informational - they must not change severity."""
        hit = score_hit("testcorp.com|user|abc")
        assert hit.severity == LOW

    def test_strong_password_adds_no_warning(self, patched_keywords):
        hit = score_hit("testcorp.com|user|Xk9#mP2qLrTv")
        assert not any("password" in r.lower() for r in hit.reasons if "Employee" not in r and "domain" not in r.lower() and "service" not in r.lower())


# ─── score_hits and summarize ─────────────────────────────────────────────────

class TestScoreHitsAndSummarize:
    def test_score_hits_sorted_descending(self, patched_keywords):
        lines = [
            "testcorp.com|user|pass",           # LOW
            "admin.testcorp.com|user|pass",     # CRITICAL
            "intranet.testcorp.com|user|pass",  # HIGH
            "app.testcorp.com|user|pass",       # MEDIUM
        ]
        hits = score_hits(lines)
        scores = [h.score for h in hits]
        assert scores == sorted(scores, reverse=True)

    def test_summarize_counts_each_severity(self, patched_keywords):
        lines = [
            "admin.testcorp.com|user|pass",     # CRITICAL
            "intranet.testcorp.com|user|pass",  # HIGH
            "app.testcorp.com|user|pass",       # MEDIUM
            "testcorp.com|user|pass",           # LOW
        ]
        summary = summarize(score_hits(lines))
        assert summary[CRITICAL] == 1
        assert summary[HIGH] == 1
        assert summary[MEDIUM] == 1
        assert summary[LOW] == 1

    def test_summarize_zero_for_absent_severities(self, patched_keywords):
        hits = score_hits(["testcorp.com|user|pass"])  # LOW only
        summary = summarize(hits)
        assert summary[CRITICAL] == 0
        assert summary[HIGH] == 0
        assert summary[MEDIUM] == 0
        assert summary[LOW] == 1

    def test_score_hits_empty_list(self, patched_keywords):
        assert score_hits([]) == []