Initial commit: ULPgrammer

- Core Telegram monitoring pipeline (scraper, processor, notifier, downloaders) - Textual TUI frontend with thread-safe event bus - SQLite persistence, severity scoring, dedup cache - Fixed ULP parser: handles https:// truncation, port+path URLs, semicolon separator - Test suite: 88 tests across scorer, cache, database, processor
2026-04-02 01:58:49 -03:00
commit 48f486ac97
41 changed files with 5270 additions and 0 deletions
--- a/.claudeignore
+++ b/.claudeignore
@@ -0,0 +1,25 @@
 # Sessions
 *.session
 *.session-journal
 bot_session*
 # Data — keep the folder, ignore contents
 data/hits.db
 data/hits.txt
 data/hits.csv
 data/dedup.json
 data/cache.json
 data/tmp/
 data/logs/
 !data/.gitkeep
 # Env
 .env
 # Python
 __pycache__/
 *.pyc
 *.pyo
 .venv/
 venv/
--- a/.env.example
+++ b/.env.example
@@ -0,0 +1,22 @@
 # ─── Telegram API credentials ──────────────────────────────────────────────
 # Get these from https://my.telegram.org → API development tools
 API_ID=12345678
 API_HASH=your_api_hash_here
 # ─── Bot credentials ────────────────────────────────────────────────────────
 # Create a bot via @BotFather and paste the token here
 BOT_TOKEN=123456789:ABCdefGHIjklMNOpqrSTUvwxYZ
 # ─── Alert destination ──────────────────────────────────────────────────────
 # Chat ID to send hit notifications to (your personal ID or a group)
 # Tip: message @userinfobot on Telegram to get your ID
 NOTIFY_CHAT_ID=987654321
 # ─── Session name (just a filename, no extension needed) ────────────────────
 SESSION_NAME=monitor_session
 # ─── tdl (fast Go downloader) — optional but strongly recommended ───────────
 # Install: https://github.com/iyear/tdl
 # After installing, run once: tdl login -n <SESSION_NAME>
 # SESSION_NAME above is shared between Telethon and tdl — no double login needed.
 # If tdl is not on PATH the bot falls back to Telethon automatically.
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1,28 @@
 # Sessions
 *.session
 *.session-journal
 bot_session*
 # Data — keep the folder, ignore contents
 data/hits.db
 data/hits.txt
 data/hits.csv
 data/dedup.json
 data/cache.json
 data/tmp/
 data/logs/
 !data/.gitkeep
 # Env
 .env
 # Python
 __pycache__/
 *.pyc
 *.pyo
 .venv/
 venv/
 # Claude things
 CLAUDE.md
 .claude/*
--- a/QUICK_REF.md
+++ b/QUICK_REF.md
@@ -0,0 +1,182 @@
 # ULP Monitor — Quick Reference
 > For Claude Code: read the per-file `.md` alongside each `.py` before editing.  
 > Full docs in `README.md`.
 ---
 ## Project layout
 ```
 ulp_monitor/
 ├── main.py           Entry point (--no-tui flag for CLI mode)
 ├── config.py         All settings — edit this for keywords, channels, paths
 │
 ├── core/             Telegram I/O pipeline (all async, Telethon-dependent)
 │   ├── scraper.py        Live listener + backfill orchestration
 │   ├── tdl_downloader.py tdl subprocess wrapper + Telethon fallback
 │   ├── bot_downloader.py Inline "DOWNLOAD" button click flow
 │   ├── processor.py      Archive extraction (.zip/.7z/.rar) + line search
 │   └── notifier.py       Scoring → dedup → DB → hits.txt/csv → Telegram alert
 │
 ├── utils/            Pure logic, no Telegram deps, no async
 │   ├── scorer.py         Severity scoring (CRITICAL/HIGH/MEDIUM/LOW)
 │   ├── cache.py          Seen file-ID dedup (data/cache.json)
 │   └── database.py       SQLite read/write (data/hits.db)
 │
 ├── tui/              Textual TUI — runs in main thread
 │   ├── app.py            MonitorApp + all screens + bot thread launcher
 │   └── events.py         Thread-safe queue.Queue event bus
 │
 └── data/             Runtime output — gitignored
    ├── hits.db
    ├── hits.txt
    ├── hits.csv
    ├── cache.json
    ├── dedup.json
    └── logs/monitor.log
 ```
 ---
 ## Data flow
 ```
 Telegram channel
  └─ new message with file / download button
       │
       ├─ core/scraper.py          detects + guards (size, extension, dedup)
       │
       ├─ core/tdl_downloader.py   downloads via tdl (batched)
       │   └─ core/scraper.py      Telethon fallback if tdl fails
       │
       ├─ core/bot_downloader.py   handles inline button → bot reply flow
       │
       ├─ core/processor.py        extracts archive → searches .txt line by line
       │
       └─ core/notifier.py         scores → deduplicates → persists → alerts
            ├─ utils/scorer.py
            ├─ utils/database.py
            └─ tui/events.py       posts EvHit to TUI
 ```
 ---
 ## Threading architecture
 ```
 main thread (Textual's event loop)
  ├─ MonitorApp.on_mount()
  │   ├─ bus.init_bus()            creates queue.Queue on THIS loop
  │   ├─ threading.Thread → _run_bot_thread()
  │   └─ set_interval(0.1, _drain_bus)
  │
  ├─ _drain_bus() [every 100ms]
  │   └─ queue.Queue.get_nowait() → dispatch to widgets
  │
  └─ Textual widgets, screens, keybindings
 bot thread (own asyncio event loop)
  └─ _bot_main()
      ├─ bot_client.connect() + sign_in()
      ├─ user_client.connect() + is_user_authorized()
      ├─ warm_entity_cache()
      ├─ _make_handler() → NewMessage handler registered
      ├─ backfill_all()
      └─ run_until_disconnected() + _watch_channels() [gathered]
 cross-thread communication
  bot → TUI:  bus.post(event)              [queue.Queue.put_nowait, always safe]
  TUI → bot:  loop.call_soon_threadsafe()  [asyncio.Event.set for channel changes]
 ```
 ---
 ## Config quick reference (`config.py`)
 | Setting | Type | Description |
 |---------|------|-------------|
 | `API_ID` | int | From my.telegram.org |
 | `API_HASH` | str | From my.telegram.org |
 | `BOT_TOKEN` | str | From @BotFather |
 | `NOTIFY_CHAT_ID` | int | Your Telegram user/group ID |
 | `SESSION_NAME` | str | Session file name (default: `monitor_session`) |
 | `TARGET_KEYWORDS` | list[str] | Regex patterns. `@`-prefixed → employee email (CRITICAL). Plain → domain match (LOW) |
 | `WATCHED_CHANNELS` | list[str\|int] | Usernames or `-100xxxxxxxxxx` IDs |
 | `BACKFILL_LIMIT` | int | Messages to scan per channel on startup (0 = off) |
 | `ALLOWED_EXTENSIONS` | set | `.txt .zip .7z .rar` |
 | `MAX_FILE_SIZE` | int | Bytes (default 4 GB) |
 | `ARCHIVE_PASSWORDS` | list[bytes] | Tried in order on locked archives |
 | `TDL_NAMESPACE` | str\|None | `tdl login -n <name>` namespace |
 | `TDL_THREADS` | int | Chunk workers per file (`-t`) |
 | `TDL_PERFILE` | int | Concurrent files per tdl call (`-l`) |
 | `TDL_AMOUNT` | int | Messages per batch |
 | `TEMP_DIR` | Path | `data/tmp` |
 | `HITS_FILE` | Path | `data/hits.txt` |
 | `LOG_FILE` | Path | `data/logs/monitor.log` |
 ---
 ## Severity scoring summary
 | Severity | Score | Triggers |
 |----------|-------|----------|
 | CRITICAL | 40 | Employee email (`@myorg.cl` in username) · Privileged service URL (admin, vpn, rdp, gitlab…) |
 | HIGH | 30 | Internal service URL (intranet, erp, sso, owa…) |
 | MEDIUM | 20 | Client-facing URL (app, booking, helpdesk…) |
 | LOW | 10 | Org domain appears anywhere in line |
 `@`-keyword rule: pattern requires literal `@` before domain — `user@gmail.com` on a URL containing `myorg.cl` does **not** trigger CRITICAL.
 ---
 ## TUI keybindings
 | Key | Action | Screen |
 |-----|--------|--------|
 | `s` | Search hits DB | → SearchScreen |
 | `h` | Browse hits by severity | → HitsDBScreen |
 | `k` | Edit keyword patterns live | → KeywordsScreen |
 | `c` | Clear download + hits logs | main |
 | `r` | Force-refresh stats bar | main |
 | `q` / `ctrl+c` | Quit | any |
 | `Escape` | Back to main | sub-screens |
 | `1`/`2`/`3`/`4` | Filter CRITICAL/HIGH/MEDIUM/LOW | HitsDBScreen |
 | `r` | Load recent 50 | HitsDBScreen |
 ---
 ## Per-file reference docs
 | File | Reference |
 |------|-----------|
 | `utils/scorer.py` | `utils/scorer.md` |
 | `utils/cache.py` | `utils/cache.md` |
 | `utils/database.py` | `utils/database.md` |
 | `core/scraper.py` | `core/scraper.md` |
 | `core/processor.py` | `core/processor.md` |
 | `core/notifier.py` | `core/notifier.md` |
 | `core/tdl_downloader.py` | `core/tdl_downloader.md` |
 | `core/bot_downloader.py` | `core/bot_downloader.md` |
 | `tui/app.py` | `tui/app.md` |
 | `tui/events.py` | `tui/events.md` |
 ---
 ## Common tasks
 **Add a new keyword at runtime:** open the TUI → press `k` → add pattern → active immediately. Copy to `config.TARGET_KEYWORDS` to persist.
 **Add a channel at runtime:** type username or numeric ID in the Channels panel → ➕ Add. Handler re-registers immediately. Edit `config.WATCHED_CHANNELS` to persist.
 **Query hits from CLI:**
 ```bash
 sqlite3 data/hits.db "SELECT severity, username, url FROM hits WHERE seen_before=0 ORDER BY score DESC LIMIT 20"
 ```
 **Re-process all files** (wipe cache):
 ```bash
 rm data/cache.json data/dedup.json
 ```
 **Check what's happening:** `tail -f data/logs/monitor.log`
--- a/README.md
+++ b/README.md
@@ -0,0 +1,146 @@
 # ULP Credential Monitor
 A Telegram-based credential exposure monitor for threat intelligence teams.
 Watches channels for combo/stealer log files and alerts you when your
 organization's credentials appear in them.
 ---
 ## How it works
 ```
 User session (Telethon)
  └─ watches N channels
       └─ detects file attachments (.txt, .zip, .7z, .rar)
            └─ downloads → extracts → searches line by line
                 └─ hit? → writes to data/ + sends bot alert
                 └─ no hit? → deletes file, moves on
 ```
 ---
 ## Project structure
 ```
 ulp_monitor/
 ├── main.py           Entry point
 ├── config.py         All settings (keywords, channels, paths)
 │
 ├── core/             Telegram I/O pipeline
 │   ├── scraper.py        Live listener + backfill
 │   ├── tdl_downloader.py Fast downloads via tdl (Go MTProto)
 │   ├── bot_downloader.py Inline button / bot-dispatched file flows
 │   ├── processor.py      Archive extraction + line-by-line search
 │   └── notifier.py       hits.txt / hits.csv writer + bot alerts
 │
 ├── utils/            Pure logic — no Telegram dependencies
 │   ├── scorer.py         Hit severity scoring
 │   ├── cache.py          Seen-file deduplication
 │   └── database.py       SQLite persistence layer
 │
 ├── tui/              Textual TUI frontend
 │   ├── app.py            MonitorApp + all Screen classes
 │   └── events.py         Thread-safe event bus (bot thread → TUI)
 │
 └── data/             Runtime-generated (gitignored)
    ├── hits.db           SQLite database
    ├── hits.txt          Human-readable hit log
    ├── hits.csv          CSV hit log (importable into Excel / pandas)
    ├── dedup.json        Deduplication hashes
    ├── cache.json        Seen file-ID cache
    └── logs/monitor.log
 ```
 ---
 ## Setup
 ### 1. Get Telegram API credentials
 - Go to https://my.telegram.org → *API development tools*
 - Create an app → note your `api_id` and `api_hash`
 ### 2. Create a bot
 - Message [@BotFather](https://t.me/BotFather) → `/newbot`
 - Start a chat with your new bot before running
 ### 3. Get your chat ID
 - Message [@userinfobot](https://t.me/userinfobot)
 ### 4. Configure
 ```bash
 cp .env.example .env
 # fill in API_ID, API_HASH, BOT_TOKEN, NOTIFY_CHAT_ID
 ```
 Open `config.py` and set:
 - **`TARGET_KEYWORDS`** — your org's domains and email patterns.
  Keywords with `@` (e.g. `r"@myorg\.cl"`) are **employee email domains** → CRITICAL.
  Keywords without `@` are plain domain matches → LOW baseline.
 - **`WATCHED_CHANNELS`** — channel usernames or numeric IDs
 - **`BACKFILL_LIMIT`** — past messages to scan per channel on startup
 ### 5. Install dependencies
 ```bash
 pip install -r requirements.txt
 # rarfile needs the unrar binary:
 # Ubuntu/Debian: sudo apt install unrar
 # macOS:         brew install rar
 ```
 ### 5a. Install tdl (strongly recommended)
 ```bash
 curl -sSL https://raw.githubusercontent.com/iyear/tdl/main/scripts/install.sh | bash
 tdl login -n monitor_session
 ```
 ### 6. First run — complete Telegram auth
 ```bash
 python main.py --no-tui
 # follow the phone + 2FA prompts once
 ```
 ### 7. Run
 ```bash
 python main.py          # TUI mode (recommended)
 python main.py --no-tui # plain CLI
 ```
 ---
 ## TUI keybindings
 | Key | Action |
 |-----|--------|
 | `s` | Search hits database |
 | `h` | Browse hits by severity |
 | `k` | Edit keyword patterns live |
 | `c` | Clear logs |
 | `r` | Refresh stats |
 | `q` | Quit |
 ---
 ## Output
 | File | Description |
 |------|-------------|
 | `data/hits.db`  | SQLite — all hits with scores, severity, dedup flag |
 | `data/hits.txt` | Human-readable grouped log |
 | `data/hits.csv` | CSV — easy to pull into Excel / pandas |
 | `data/logs/monitor.log` | Full run log |
 Telegram alerts fire for CRITICAL / HIGH / MEDIUM only. LOW is stored silently.
 ---
 ## Notes
 - **Session files are sensitive** — equivalent to a logged-in account. Gitignored, never share.
 - **Flood limits** — `FloodWaitError` is handled automatically.
 - **Private channels** — your user account must already be a member.
--- a/config.py
+++ b/config.py
@@ -0,0 +1,100 @@
 """
 config.py — Loads and validates all settings from .env
 """
 import os
 from pathlib import Path
 from dotenv import load_dotenv
 load_dotenv()
 # -- Timeouts --
 BOT_REPLY_TIMEOUT = 10
 # ─── Telegram credentials ────────────────────────────────────────────────────
 API_ID   = int(os.environ["API_ID"])
 API_HASH = os.environ["API_HASH"]
 BOT_TOKEN = os.environ["BOT_TOKEN"]
 NOTIFY_CHAT_ID = int(os.environ["NOTIFY_CHAT_ID"])
 SESSION_NAME = os.getenv("SESSION_NAME", "monitor_session")
 # ─── Target keywords ─────────────────────────────────────────────────────────
 # Add your org's domains, email patterns, IP ranges, known usernames, etc.
 # All patterns are case-insensitive regex.
 TARGET_KEYWORDS: list[str] = [
    r"sanatorioaleman\.cl",
    r"@sanatorioaleman\.cl",
    # r"192\.168\.10\.",            # internal IP range example
    # r"specificuser",              # known internal usernames
 ]
 # ─── Channels to watch ───────────────────────────────────────────────────────
 # Use usernames (without @) or numeric channel IDs (-100xxxxxxxxxx)
 WATCHED_CHANNELS: list[str | int] = [
    #-1002230225603,
    "cloudxlog",
    #-1001967030016, # daisycloud
    #"berserklogs", # berserklogs
    #"BorwitaFreeLogs", # borwita
    -1002748707556, # darkcloud
    -1001684073398, # BHF Cloud
    -1003163621939, # Wich Love from R
    -1003611713618, # Khazan Cloud
    -1003328682684, # LogsPlanet
    -1003204260194, # JDP
    -1002828367761, # HesoyamCloud
    -1003513974925, # Slurm Logs
    -1003599300787, # Arhont Corp
    -1002582513379, # OnlyLogs
    -1002788333372, # Ickis Cloud
    #-1001234567890,  # private channel by ID
 ]
 # ─── File handling ───────────────────────────────────────────────────────────
 TEMP_DIR  = Path("./tmp")
 HITS_FILE = Path("./hits.txt")
 LOG_FILE  = Path("./logs/monitor.log")
 # Extensions to download and process
 ALLOWED_EXTENSIONS = {".txt", ".zip", ".7z", ".rar"}
 # Max file size to download (bytes). Default: 200 MB.
 # Very large files are skipped to avoid abuse of your session.
 MAX_FILE_SIZE = 4 * 1024 * 1024 * 1024  # 4 GB (Telegram Premium max)
 # ─── Archive passwords to try ────────────────────────────────────────────────
 ARCHIVE_PASSWORDS: list[bytes] = [
    b"1234",
    b"0000",
    b"infected",
    b"telegram",
    b"password",
    b"12345",
    b"",
    b"Borwita",
    b"@WichLoveFromR",
 ]
 # ─── Backfill settings ───────────────────────────────────────────────────────
 # How many historical messages to scan per channel on startup (0 = skip backfill)
 BACKFILL_LIMIT = 500
 # ─── tdl downloader settings ─────────────────────────────────────────────────
 # Namespace tdl was logged into.  Run `tdl login` with no -n flag → namespace
 # is "default".  Run `tdl login -n foo` → namespace is "foo".
 # Set to None to omit -n entirely (tdl will use "default" anyway).
 TDL_NAMESPACE: str | None = "ulpmon"
 # Parallel chunk workers per file (-t / --threads global flag)
 TDL_THREADS = 8
 # Max concurrent files per tdl invocation (-l / --limit global flag)
 TDL_PERFILE = 4
 # Max messages to batch into a single tdl invocation during backfill.
 # tdl handles the parallelism internally via -l and -t.
 TDL_AMOUNT = 4
 # Whether to use a Telegram takeout session for downloads (lower flood limits).
 # Takeout sessions are rate-limited differently — good for bulk backfill.
 TDL_TAKEOUT = True
--- a/core/init.py
+++ b/core/init.py
@@ -0,0 +1 @@
 """core — Telegram I/O pipeline (scraper, downloader, processor, notifier)."""
--- a/core/bot_downloader.md
+++ b/core/bot_downloader.md
@@ -0,0 +1,68 @@
 # core/bot_downloader.py
 Handles "click to download" inline button flows. Some Telegram channels post files via a bot behind a button rather than directly attaching them.
 ## Public API
 ```python
 from core.bot_downloader import (
    handle_bot_download_message,
    has_download_button,
    extract_password,
 )
 ```
 ### `handle_bot_download_message(client, bot, msg, source_name, patterns, password=None)`
 **async.** Full pipeline:
 1. Detect download button
 2. Click it (URL button → `/start payload` to the bot; callback button → `.click()`)
 3. Wait up to `BOT_REPLY_TIMEOUT` seconds for the bot to send a file back
 4. Hand each file response to `core.scraper.handle_message()`
 ### `has_download_button(msg) -> bool`
 Returns `True` if the message contains a recognisable download button.  
 Checked in live handler and backfill before calling this module.
 ### `extract_password(msg) -> str | None`
 Scans message text for `Pass: ...` / `Password: ...` / `Contraseña: ...` patterns.  
 Returns the extracted password string, or `None`.
 ---
 ## Button detection
 Recognised button text keywords (case-insensitive):
 ```
 DOWNLOAD, DESCARGAR, GET FILE, GET PACK, ⬇, 📥
 ```
 ---
 ## URL button flow (most common)
 ```
 Button URL: https://t.me/SomeBot?start=ABC123
  → parse bot username + payload
  → client.send_message(bot_entity, "/start ABC123")
  → poll get_messages(bot_entity, limit=3) every 1s for BOT_REPLY_TIMEOUT seconds
  → return file messages found
 ```
 ## Callback button flow (fallback)
 ```
 btn.click()
 → sleep 2s
 → get_messages(sender, limit=5)
 → return file messages found
 ```
 ---
 ## Constants
 | Name | Value | Description |
 |------|-------|-------------|
 | `BOT_REPLY_TIMEOUT` | `10` | Seconds to wait for bot file reply |
 | `DOWNLOAD_BUTTON_KEYWORDS` | see above | Button text triggers |
 | `PASSWORD_PATTERN` | regex | Matches `Pass[word]: value` in message text |
--- a/core/bot_downloader.py
+++ b/core/bot_downloader.py
@@ -0,0 +1,161 @@
 """
 bot_downloader.py — Handles "click to download" inline button flows.
 Some Telegram channels post messages with a DOWNLOAD button that triggers
 a bot to send you the actual file. This module simulates that click and
 captures the bot's file response.
 """
 import asyncio
 import re
 import logging
 from telethon import TelegramClient
 from telethon.tl.types import MessageMediaDocument, KeyboardButtonUrl
 from telethon.errors import FloodWaitError
 log = logging.getLogger(__name__)
 DOWNLOAD_BUTTON_KEYWORDS = ["DOWNLOAD", "DESCARGAR", "GET FILE", "GET PACK", "⬇", "📥"]
 BOT_REPLY_TIMEOUT = 10
 PASSWORD_PATTERN = re.compile(
    r"(?:Pass|Password|Contraseña|Contrasena|Clave)[\s]*:[\s]*(.+)$",
    re.IGNORECASE | re.MULTILINE
 )
 # ─── Password extraction ──────────────────────────────────────────────────────
 def extract_password(msg) -> str | None:
    if not msg.text:
        return None
    match = PASSWORD_PATTERN.search(msg.text)
    if match:
        pwd = match.group(1).strip()
        # Strip markdown formatting characters
        pwd = pwd.strip("*`_~")
        log.info(f"  Found password in message: '{pwd}'")
        return pwd
    return None
 # ─── Button detection ─────────────────────────────────────────────────────────
 def find_download_button(msg):
    """
    Scans a message's inline keyboard for a download-like button.
    Returns the button object or None.
    """
    if not msg.buttons:
        return None
    for row in msg.buttons:
        for btn in row:
            if any(kw in btn.text.upper() for kw in DOWNLOAD_BUTTON_KEYWORDS):
                return btn
    return None
 def has_download_button(msg) -> bool:
    return find_download_button(msg) is not None
 # ─── Click + wait flow ────────────────────────────────────────────────────────
 async def click_download_button(client: TelegramClient, msg) -> list:
    """
    Clicks the download button on a message, then waits for the bot to reply
    with a file. Returns a list of response messages containing documents.
    """
    btn = find_download_button(msg)
    if not btn:
        return []
    log.info(f"  Clicking button: '{btn.text}'")
    # ── URL button (most common) ───────────────────────────────────────────
    if isinstance(btn.button, KeyboardButtonUrl):
        url = btn.button.url  # e.g. https://t.me/SomeBot?start=ABC123
        match = re.search(r"t\.me/([A-Za-z0-9_]+)\?start=(.+)", url)
        if not match:
            log.warning(f"  Unrecognised URL format: {url}")
            return []
        bot_username, payload = match.group(1), match.group(2)
        log.info(f"  → Messaging @{bot_username} with /start {payload}")
        try:
            bot_entity = await client.get_entity(bot_username)
            await client.send_message(bot_entity, f"/start {payload}")
        except Exception as e:
            log.error(f"  Failed to message bot: {e}")
            return []
        # Poll for reply
        log.info(f"  Waiting up to {BOT_REPLY_TIMEOUT}s for bot reply...")
        for _ in range(BOT_REPLY_TIMEOUT):
            await asyncio.sleep(1)
            try:
                recent = await client.get_messages(bot_entity, limit=3)
                files = [m for m in recent if m.media and isinstance(m.media, MessageMediaDocument)]
                if files:
                    log.info(f"  ✓ Got file from bot.")
                    return files
            except Exception as e:
                log.warning(f"  Poll error: {e}")
                break
        log.warning(f"  Bot did not reply within {BOT_REPLY_TIMEOUT}s.")
        return []
    # ── Callback button (less common) ─────────────────────────────────────
    else:
        try:
            await btn.click()
            await asyncio.sleep(2)
        except Exception as e:
            log.error(f"  Callback click failed: {e}")
            return []
        try:
            sender = await msg.get_sender()
            recent = await client.get_messages(sender, limit=5)
            return [m for m in recent if m.media and isinstance(m.media, MessageMediaDocument)]
        except Exception as e:
            log.warning(f"  Fallback poll failed: {e}")
            return []
 # ─── Main entry point ─────────────────────────────────────────────────────────
 async def handle_bot_download_message(
    client: TelegramClient,
    bot: TelegramClient,
    msg,
    source_name: str,
    patterns,
    password: str | None = None,
 ) -> None:
    """
    Full pipeline for a message with a download button:
      1. Detect download button
      2. Click it
      3. Wait for bot to send back a file
      4. Hand off to the normal handle_message() flow
    """
    if not has_download_button(msg):
        return
    log.info(f"[BotDL] Download button detected in {source_name}")
    responses = await click_download_button(client, msg)
    if not responses:
        log.warning(f"[BotDL] No file received for message in {source_name}.")
        return
    from core.scraper import handle_message
    for resp in responses:
        log.info(f"  [BotDL] Response media type: {type(resp.media).__name__}, attrs: {getattr(resp.media.document, 'attributes', []) if hasattr(resp.media, 'document') else 'none'}")
        await handle_message(client, bot, resp, f"{source_name}[bot]", patterns, password=password)
--- a/core/notifier.md
+++ b/core/notifier.md
@@ -0,0 +1,67 @@
 # core/notifier.py
 Scores hits, deduplicates, persists to disk and DB, sends Telegram alerts.
 ## Public API
 ```python
 from core.notifier import notify, send_status
 ```
 ### `notify(bot, hits: list[str], source: str, filename: str)`
 **async.** Full notification pipeline:
 1. `score_hits(hits)` → `list[ScoredHit]`
 2. Deduplicate via SHA-256 hashes (`data/dedup.json`)
 3. `insert_hits()` into SQLite for new + dupes (flagged accordingly)
 4. `write_hits()` → append to `data/hits.txt`
 5. `write_hits_csv()` → append to `data/hits.csv`
 6. `send_alert()` → Telegram message for CRITICAL/HIGH/MEDIUM only
 7. Post `EvHit` events onto the TUI bus for each new hit
 ### `send_status(bot, message: str)`
 **async.** Sends a plain Markdown message to `config.NOTIFY_CHAT_ID`. Used for startup/status notifications.
 ---
 ## Internal functions
 | Function | Description |
 |----------|-------------|
 | `deduplicate(hits)` | Returns `(new_hits, dupe_hits)`; updates `data/dedup.json` |
 | `write_hits(scored_hits, source)` | Appends grouped human-readable block to `data/hits.txt` |
 | `write_hits_csv(scored_hits, source, filename)` | Appends rows to `data/hits.csv`; writes header on first call |
 | `send_alert(bot, scored_hits, source, filename)` | Sends Telegram message grouped by severity; skips if all LOW |
 ---
 ## Output files
 | File | Format | Notes |
 |------|--------|-------|
 | `data/hits.txt` | Plain text, grouped by severity | Human-readable, append-only |
 | `data/hits.csv` | CSV with header | Columns: `timestamp, severity, score, url, username, password, reasons, source, filename` |
 | `data/dedup.json` | JSON array of SHA-256 hex strings | Hashes of `line.strip().lower()` |
 ---
 ## Alert behaviour
 - CRITICAL / HIGH / MEDIUM → Telegram alert sent immediately
 - LOW → stored in DB + files, **no** Telegram alert
 - Duplicates → stored in DB with `seen_before=1`, no alert, no file write
 ## Telegram alert format
 ```
 🚨 Credential hit(s) detected
 📁 `filename`
 📢 `source`
 🕐 `timestamp`
 Summary: 🔴 N  🟠 N  🟡 N  🟢 N
 🔴 CRITICAL (N)
 `url:user:pass`
 ↳ reason | reason
 ... (up to 10 per severity; remainder counted)
 ```
--- a/core/notifier.py
+++ b/core/notifier.py
@@ -0,0 +1,248 @@
 """
 notifier.py — Persists hits to disk and sends Telegram bot alerts.
 Includes:
  - Severity scoring via scorer.py
  - Deduplication: same credential never written or alerted twice
  - SQLite storage via database.py
  - hits.txt kept as a human-readable backup
  - Telegram alerts grouped by severity
 """
 import logging
 import hashlib
 import json
 from datetime import datetime, timezone
 from pathlib import Path
 from telethon import TelegramClient
 import csv
 from config import HITS_FILE, NOTIFY_CHAT_ID
 from utils.scorer import score_hits, summarize, CRITICAL, HIGH, MEDIUM, LOW, SEVERITY_EMOJI
 from utils.database import insert_hits
 from tui import events as bus
 HITS_CSV = HITS_FILE.with_suffix(".csv")
 log = logging.getLogger(__name__)
 MAX_PREVIEW = 10   # hits to show per severity group in alert
 DEDUP_FILE  = Path("./data/dedup.json")
 # Only alert immediately for these severities — LOW hits are silent
 ALERT_SEVERITIES = {CRITICAL, HIGH, MEDIUM}
 # ─── Deduplication ────────────────────────────────────────────────────────────
 def _hash(line: str) -> str:
    return hashlib.sha256(line.strip().lower().encode()).hexdigest()
 def _load_seen_hashes() -> set:
    if not DEDUP_FILE.exists():
        return set()
    try:
        with open(DEDUP_FILE, "r") as f:
            return set(json.load(f))
    except Exception:
        return set()
 def _save_seen_hashes(seen: set) -> None:
    try:
        with open(DEDUP_FILE, "w") as f:
            json.dump(list(seen), f)
    except Exception as e:
        log.warning(f"Could not save dedup file: {e}")
 def deduplicate(hits: list) -> tuple[list, list]:
    """
    Accepts a list of ScoredHit objects.
    Returns (new_hits, dupe_hits).
    """
    seen       = _load_seen_hashes()
    new_hits   = []
    dupe_hits  = []
    new_hashes = set()
    for h in hits:
        digest = _hash(h.raw)
        if digest in seen:
            dupe_hits.append(h)
        else:
            new_hits.append(h)
            new_hashes.add(digest)
    if new_hashes:
        seen.update(new_hashes)
        _save_seen_hashes(seen)
    log.info(
        f"  Dedup: {len(hits)} raw hit(s) → "
        f"{len(new_hits)} new, {len(dupe_hits)} duplicate(s)"
    )
    return new_hits, dupe_hits
 # ─── Helpers ─────────────────────────────────────────────────────────────────
 def _timestamp() -> str:
    return datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M:%S UTC")
 # ─── Output ──────────────────────────────────────────────────────────────────
 def write_hits(scored_hits: list, source: str) -> None:
    """Append new hits to hits.txt grouped by severity."""
    HITS_FILE.parent.mkdir(parents=True, exist_ok=True)
    summary = summarize(scored_hits)
    with open(HITS_FILE, "a", encoding="utf-8") as f:
        f.write(f"\n{'='*60}\n")
        f.write(f"Source  : {source}\n")
        f.write(f"Time    : {_timestamp()}\n")
        f.write(f"Hits    : {len(scored_hits)} ")
        f.write(f"(CRITICAL={summary[CRITICAL]} HIGH={summary[HIGH]} ")
        f.write(f"MEDIUM={summary[MEDIUM]} LOW={summary[LOW]})\n")
        f.write(f"{'='*60}\n")
        for severity in [CRITICAL, HIGH, MEDIUM, LOW]:
            group = [h for h in scored_hits if h.severity == severity]
            if not group:
                continue
            emoji = SEVERITY_EMOJI[severity]
            f.write(f"\n{emoji} {severity} ({len(group)})\n")
            for h in group:
                f.write(f"  {h.raw}\n")
                f.write(f"  → {' | '.join(h.reasons)}\n")
    log.info(f"  Wrote {len(scored_hits)} hit(s) to {HITS_FILE}")
 def write_hits_csv(scored_hits: list, source: str, filename: str) -> None:
    """Append new hits to hits.csv — one row per hit, easy to import."""
    HITS_CSV.parent.mkdir(parents=True, exist_ok=True)
    write_header = not HITS_CSV.exists()
    timestamp = _timestamp()
    with open(HITS_CSV, "a", newline="", encoding="utf-8") as f:
        writer = csv.writer(f)
        if write_header:
            writer.writerow([
                "timestamp", "severity", "score", "url", "username",
                "password", "reasons", "source", "filename",
            ])
        for h in scored_hits:
            writer.writerow([
                timestamp, h.severity, h.score,
                h.url or "", h.username or "", h.password or "",
                " | ".join(h.reasons), source, filename,
            ])
    log.info(f"  Wrote {len(scored_hits)} hit(s) to {HITS_CSV}")
 async def send_alert(
    bot: TelegramClient,
    scored_hits: list,
    source: str,
    filename: str,
 ) -> None:
    """
    Send a Telegram alert grouped by severity.
    Only includes CRITICAL, HIGH, MEDIUM — LOW hits are omitted from alerts.
    """
    summary  = summarize(scored_hits)
    alertable = [h for h in scored_hits if h.severity in ALERT_SEVERITIES]
    if not alertable:
        log.info("  No alertable hits (all LOW) — skipping Telegram notification.")
        return
    lines = [
        f"🚨 *Credential hit(s) detected*",
        f"",
        f"📁 `{filename}`",
        f"📢 `{source}`",
        f"🕐 `{_timestamp()}`",
        f"",
        f"*Summary:*",
        f"🔴 CRITICAL: `{summary[CRITICAL]}`  "
        f"🟠 HIGH: `{summary[HIGH]}`  "
        f"🟡 MEDIUM: `{summary[MEDIUM]}`  "
        f"🟢 LOW: `{summary[LOW]}`",
    ]
    for severity in [CRITICAL, HIGH, MEDIUM]:
        group = [h for h in scored_hits if h.severity == severity]
        if not group:
            continue
        emoji = SEVERITY_EMOJI[severity]
        lines.append(f"\n{emoji} *{severity}* ({len(group)})")
        for h in group[:MAX_PREVIEW]:
            safe = h.raw.replace("`", "'")
            lines.append(f"`{safe}`")
            lines.append(f"_↳ {' | '.join(h.reasons)}_")
        if len(group) > MAX_PREVIEW:
            lines.append(f"_...and {len(group) - MAX_PREVIEW} more_")
    try:
        await bot.send_message(NOTIFY_CHAT_ID, "\n".join(lines), parse_mode="markdown")
    except Exception as e:
        log.error(f"Failed to send Telegram alert: {e}")
 # ─── Main entry point ────────────────────────────────────────────────────────
 async def notify(bot: TelegramClient, hits: list[str], source: str, filename: str) -> None:
    """
    Full notification pipeline:
      1. Score all hits
      2. Deduplicate
      3. Insert all hits into SQLite (new + dupes, flagged accordingly)
      4. Write new hits to hits.txt
      5. Send Telegram alert for new alertable hits only
    """
    if not hits:
        return
    # Score first
    scored = score_hits(hits)
    log.info(f"  Scored {len(scored)} hit(s) — {summarize(scored)}")
    # Deduplicate
    new_hits, dupe_hits = deduplicate(scored)
    # Always insert into DB
    if new_hits:
        insert_hits(new_hits, source, filename, seen_before=False)
    if dupe_hits:
        insert_hits(dupe_hits, source, filename, seen_before=True)
    if not new_hits:
        log.info("  All hits already seen before — no alert sent.")
        return
    # Push hits to TUI
    for h in new_hits:
        bus.post(bus.EvHit(
            severity=h.severity,
            raw=h.raw,
            source=source,
            filename=filename,
            reasons=h.reasons,
        ))
    write_hits(new_hits, source)
    write_hits_csv(new_hits, source, filename)
    await send_alert(bot, new_hits, source, filename)
 async def send_status(bot: TelegramClient, message: str) -> None:
    """Send a plain status/info message to the notify chat."""
    try:
        await bot.send_message(NOTIFY_CHAT_ID, message, parse_mode="markdown")
    except Exception as e:
        log.error(f"Failed to send status message: {e}")
--- a/core/processor.md
+++ b/core/processor.md
@@ -0,0 +1,69 @@
 # core/processor.py
 Archive extraction and hit searching. No Telegram deps, no async.
 ## Public API
 ```python
 from core.processor import compile_patterns, process_file
 ```
 ### `compile_patterns(keywords: list[str]) -> list[re.Pattern]`
 Compiles a list of keyword strings into case-insensitive regex patterns.  
 Call once at startup; pass the result everywhere patterns are needed.
 ```python
 patterns = compile_patterns(config.TARGET_KEYWORDS)
 ```
 ### `process_file(filepath: Path, patterns, password=None) -> list[str]`
 Full pipeline: unpack → search each `.txt` → recurse into nested archives → clean up everything.  
 Returns list of matching raw lines (hits). Deletes the original file and all extracted contents on completion.
 ```python
 hits = process_file(Path("data/tmp/combo.zip"), patterns, password="infected")
 ```
 ---
 ## Internal functions
 | Function | Signature | Description |
 |----------|-----------|-------------|
 | `search_file` | `(filepath, patterns) -> list[str]` | Stream-reads `.txt` line by line; ignores encoding errors |
 | `unpack` | `(filepath, extra_password) -> (files, extract_dir\|None)` | Dispatches to correct extractor; plain `.txt` returned as-is |
 | `extract_zip` | `(filepath, dest, extra_password)` | Tries no password first, then `ARCHIVE_PASSWORDS` list |
 | `extract_7z` | `(filepath, dest, extra_password)` | Requires `py7zr`; skips if not installed |
 | `extract_rar` | `(filepath, dest, extra_password)` | Requires `rarfile` + `unrar` binary |
 | `_try_passwords` | `(extract_fn, passwords)` | Iterates password list, stops on first success |
 ---
 ## Supported formats
 | Extension | Library | Notes |
 |-----------|---------|-------|
 | `.txt` | built-in | Stream-read, no load into memory |
 | `.zip` | `zipfile` | stdlib |
 | `.7z` | `py7zr` | optional; skipped if not installed |
 | `.rar` | `rarfile` | optional; requires `unrar` system binary |
 Nested archives are recursed **one level** only.
 ---
 ## Password order
 1. `extra_password` (from message/channel carry-forward) — tried first
 2. `config.ARCHIVE_PASSWORDS` — tried in order
 ---
 ## Cleanup guarantee
 `process_file` always deletes:
 - Extracted individual files
 - Extract subdirectory
 - Original downloaded file
 Even if no hits are found.
--- a/core/processor.py
+++ b/core/processor.py
@@ -0,0 +1,233 @@
 """
 processor.py — Archive extraction and hit searching logic.
 Supports: .txt, .zip, .7z, .rar
 Stream-processes files line by line — safe for large combo lists.
 """
 import rarfile
 rarfile.UNRAR_TOOL = "unrar"
 import re
 import zipfile
 import logging
 import shutil
 from pathlib import Path
 try:
    import py7zr
    HAS_7Z = True
 except ImportError:
    HAS_7Z = False
 try:
    import rarfile
    HAS_RAR = True
 except ImportError:
    HAS_RAR = False
 from config import ARCHIVE_PASSWORDS
 log = logging.getLogger(__name__)
 # ─── Searching ───────────────────────────────────────────────────────────────
 def compile_patterns(keywords: list[str]) -> list[re.Pattern]:
    return [re.compile(kw, re.IGNORECASE) for kw in keywords]
 def search_file(filepath: Path, patterns: list[re.Pattern]) -> list[str]:
    """
    Stream-reads a text file line by line and returns lines matching any pattern.
    Ignores encoding errors — combo files are often messy.
    """
    hits: list[str] = []
    try:
        with open(filepath, "r", encoding="utf-8", errors="ignore") as f:
            for line in f:
                stripped = line.strip()
                if stripped and any(p.search(stripped) for p in patterns):
                    hits.append(stripped)
    except Exception as e:
        log.warning(f"Could not read {filepath.name}: {e}")
    return hits
 # ─── Extraction ──────────────────────────────────────────────────────────────
 def _try_passwords(extract_fn, passwords: list[bytes]) -> bool:
    """Try a list of passwords against an extract function. Returns True on success."""
    for pwd in passwords:
        try:
            extract_fn(pwd)
            return True
        except Exception:
            continue
    return False
 def extract_zip(filepath: Path, dest: Path, extra_password: str | None = None) -> list[Path]:
    passwords = ARCHIVE_PASSWORDS.copy()
    if extra_password:
        passwords.insert(0, extra_password.encode())
    extracted: list[Path] = []
    try:
        with zipfile.ZipFile(filepath) as zf:
            def try_extract(pwd: bytes):
                zf.extractall(dest, pwd=pwd or None)
            try:
                zf.extractall(dest)
            except RuntimeError:
                log.info(f"  ZIP is password-protected, trying common passwords...")
                if not _try_passwords(try_extract, ARCHIVE_PASSWORDS):
                    log.warning(f"  Could not unlock {filepath.name} — skipping.")
                    return []
            extracted = [p for p in dest.rglob("*") if p.is_file()]
    except zipfile.BadZipFile:
        log.warning(f"  {filepath.name} is not a valid ZIP.")
    except Exception as e:
        log.warning(f"  ZIP extraction error on {filepath.name}: {e}")
    return extracted
 def extract_7z(filepath: Path, dest: Path, extra_password: str | None = None) -> list[Path]:
    if not HAS_7Z:
        log.warning("py7zr not installed — skipping .7z file.")
        return []
    extracted: list[Path] = []
    passwords = ARCHIVE_PASSWORDS.copy()
    if extra_password:
        passwords.insert(0, extra_password.encode())
    try:
        # Try without password first
        try:
            with py7zr.SevenZipFile(filepath, mode="r") as z:
                z.extractall(dest)
        except py7zr.exceptions.PasswordRequired:
            log.info(f"  7z is password-protected, trying common passwords...")
            success = False
            for pwd in ARCHIVE_PASSWORDS:
                try:
                    with py7zr.SevenZipFile(filepath, mode="r", password=pwd.decode()) as z:
                        z.extractall(dest)
                    success = True
                    break
                except Exception:
                    continue
            if not success:
                log.warning(f"  Could not unlock {filepath.name} — skipping.")
                return []
        extracted = [p for p in dest.rglob("*") if p.is_file()]
    except Exception as e:
        log.warning(f"  7z extraction error on {filepath.name}: {e}")
    return extracted
 def extract_rar(filepath: Path, dest: Path, extra_password: str | None = None) -> list[Path]:
    if not HAS_RAR:
        log.warning("rarfile not installed — skipping .rar file.")
        return []
    passwords = ARCHIVE_PASSWORDS.copy()
    if extra_password:
        passwords.insert(0, extra_password.encode())
    extracted: list[Path] = []
    try:
        with rarfile.RarFile(filepath) as rf:
            def try_extract(pwd: bytes):
                rf.extractall(dest, pwd=pwd.decode() if pwd else None)
            try:
                rf.extractall(dest)
            except rarfile.BadRarFile:
                log.warning(f"  {filepath.name} is not a valid RAR.")
                return []
            except Exception:
                log.info(f"  RAR may be password-protected, trying common passwords...")
                if not _try_passwords(try_extract, ARCHIVE_PASSWORDS):
                    log.warning(f"  Could not unlock {filepath.name} — skipping.")
                    return []
        extracted = [p for p in dest.rglob("*") if p.is_file()]
    except Exception as e:
        log.warning(f"  RAR extraction error on {filepath.name}: {e}")
    return extracted
 def unpack(filepath: Path, extra_password: str | None = None) -> tuple[list[Path], Path | None]:
    """
    Unpacks an archive into a sibling directory.
    Returns (list of extracted files, extract_dir or None).
    If it's not an archive, returns ([filepath], None).
    """
    suffix = filepath.suffix.lower()
    extract_dir = filepath.parent / filepath.stem
    if suffix == ".zip":
        extract_dir.mkdir(exist_ok=True)
        files = extract_zip(filepath, extract_dir, extra_password)
        return files, extract_dir
    elif suffix == ".7z":
        extract_dir.mkdir(exist_ok=True)
        files = extract_7z(filepath, extract_dir, extra_password)
        return files, extract_dir
    elif suffix == ".rar":
        extract_dir.mkdir(exist_ok=True)
        files = extract_rar(filepath, extract_dir, extra_password)
        return files, extract_dir
    else:
        # Plain file — return as-is, no extract dir to clean up
        return [filepath], None
 # ─── Main entry point ────────────────────────────────────────────────────────
 def process_file(filepath: Path, patterns, password: str | None = None) -> list[str]:
    """
    Full pipeline: unpack → search each file → clean up everything.
    Returns list of matching lines (hits).
    """
    log.info(f"  Processing: {filepath.name}")
    all_hits: list[str] = []
    files, extract_dir = unpack(filepath, extra_password=password)
    for f in files:
        if f.suffix.lower() == ".txt":
            hits = search_file(f, patterns)
            if hits:
                log.info(f"    ✓ {len(hits)} hit(s) in {f.name}")
            all_hits.extend(hits)
        # Nested archives — recurse one level
        elif f.suffix.lower() in {".zip", ".7z", ".rar"} and f != filepath:
            log.info(f"    → Nested archive: {f.name}")
            nested_hits = process_file(f, patterns)
            all_hits.extend(nested_hits)
            continue  # process_file already cleaned up f
        # Clean up extracted file
        try:
            f.unlink(missing_ok=True)
        except Exception:
            pass
    # Clean up extract dir
    if extract_dir and extract_dir.exists():
        shutil.rmtree(extract_dir, ignore_errors=True)
    # Clean up original download
    try:
        filepath.unlink(missing_ok=True)
    except Exception:
        pass
    return all_hits
--- a/core/scraper.md
+++ b/core/scraper.md
@@ -0,0 +1,65 @@
 # core/scraper.py
 Telethon user-client layer. Handles live listening, backfill, and the single-message download pipeline.
 ## Public API
 ```python
 from core.scraper import handle_message, backfill_all, register_handlers, warm_entity_cache
 ```
 ### `handle_message(client, bot, msg, source_name, patterns, password=None)`
 **async.** Full pipeline for one document message:
 1. Extract filename + size, check allowlist + size guard
 2. Check `utils.cache` — skip if already seen
 3. Try `tdl` download → Telethon fallback
 4. `core.processor.process_file()` → hits
 5. `core.notifier.notify()` if hits found
 6. `utils.cache.mark_seen()`
 Called by: live handler, `bot_downloader`, backfill fallback path.
 ### `backfill_all(client, bot, patterns)`
 **async.** Iterates `config.WATCHED_CHANNELS`, calls `backfill_channel()` for each.  
 No-op if `config.BACKFILL_LIMIT == 0`.
 ### `register_handlers(client, bot, patterns)`
 Registers a `NewMessage` Telethon event handler on `config.WATCHED_CHANNELS`.  
 Used in **CLI mode only** (`--no-tui`). The TUI manages its own handler via `_make_handler()` in `tui/app.py`.
 ### `warm_entity_cache(client)`
 **async.** Iterates `client.iter_dialogs()` so Telethon caches entity mappings.  
 Must be called before using raw numeric channel IDs.
 ---
 ## Internal functions
 | Function | Description |
 |----------|-------------|
 | `get_filename(msg)` | Extracts filename from `MessageMediaDocument`; falls back to `{msg_id}{ext}` from MIME |
 | `get_filesize(msg)` | Returns document size in bytes |
 | `is_processable(filename, size)` | Checks extension allowlist + size limit; returns `(bool, reason)` |
 | `_make_dest(msg, filename)` | Resolves temp path, handles collision with `{msg_id}_{filename}` |
 | `_telethon_download(client, msg, dest, ...)` | Telethon fallback with tqdm progress + flood-wait handling. Posts `EvDownload*` bus events |
 | `backfill_channel(client, bot, channel, patterns, limit)` | Scans history with password carry-forward; batches via tdl |
 | `_process_batch(client, bot, batch, patterns)` | One tdl invocation for up to `TDL_AMOUNT` messages; per-file Telethon fallback |
 ---
 ## Password carry-forward (backfill)
 Channels often post the archive password as a separate text message.  
 `backfill_channel` iterates newest→oldest, carrying `last_password` so both older and newer file messages in the same scan pick it up.
 ---
 ## Download strategy
 ```
 is_tdl_available()?
  yes → download_single_with_tdl() / download_batch_with_tdl()
          ↓ failed?
        _telethon_download()
  no  → _telethon_download() directly
 ```
--- a/core/scraper.py
+++ b/core/scraper.py
@@ -0,0 +1,410 @@
 """
 scraper.py — Telethon user client.
 Handles:
  - Listening for new file messages in watched channels
  - Listening for messages with inline download buttons (bot-dispatched files)
  - Backfilling recent channel history on startup (batched via tdl)
  - Downloading files safely (size guard, flood wait)
 """
 import asyncio
 import logging
 import time
 from pathlib import Path
 from tqdm import tqdm
 from telethon import TelegramClient, events
 from telethon.errors import FloodWaitError, ChannelPrivateError, UsernameNotOccupiedError
 from telethon.tl.types import (
    MessageMediaDocument,
    DocumentAttributeFilename,
    InputDocumentFileLocation,
 )
 from config import (
    ALLOWED_EXTENSIONS,
    BACKFILL_LIMIT,
    MAX_FILE_SIZE,
    TEMP_DIR,
    WATCHED_CHANNELS,
    TDL_AMOUNT,
 )
 from core.bot_downloader import handle_bot_download_message, has_download_button, extract_password
 from utils.cache import is_seen, mark_seen
 from core.processor import process_file
 from core.notifier import notify
 from core.tdl_downloader import (
    BatchEntry,
    download_batch_with_tdl,
    download_single_with_tdl,
    is_tdl_available,
 )
 from tui import events as bus
 log = logging.getLogger(__name__)
 # ─── Helpers ──────────────────────────────────────────────────────────────────
 def get_filename(msg) -> str | None:
    """Extract the filename from a document message, if any."""
    if not isinstance(msg.media, MessageMediaDocument):
        return None
    doc = msg.media.document
    for attr in doc.attributes:
        if isinstance(attr, DocumentAttributeFilename):
            return attr.file_name
    mime = getattr(doc, "mime_type", "") or ""
    ext_map = {
        "application/x-rar-compressed": ".rar",
        "application/vnd.rar":          ".rar",
        "application/zip":              ".zip",
        "application/x-7z-compressed":  ".7z",
        "text/plain":                   ".txt",
    }
    return f"{msg.id}{ext_map.get(mime, '.bin')}"
 def get_filesize(msg) -> int:
    """Return document size in bytes, or 0 if not a document."""
    if not isinstance(msg.media, MessageMediaDocument):
        return 0
    return msg.media.document.size or 0
 def is_processable(filename: str, size: int) -> tuple[bool, str]:
    """Check whether a file should be downloaded. Returns (ok, reason)."""
    suffix = Path(filename).suffix.lower()
    if suffix not in ALLOWED_EXTENSIONS:
        return False, f"extension {suffix!r} not in allowlist"
    if size > MAX_FILE_SIZE:
        mb = size / (1024 * 1024)
        return False, f"too large ({mb:.1f} MB > {MAX_FILE_SIZE // (1024 * 1024)} MB limit)"
    return True, ""
 def _make_dest(msg, filename: str) -> Path:
    """Resolve the destination path, avoiding name collisions."""
    TEMP_DIR.mkdir(exist_ok=True)
    dest = TEMP_DIR / filename
    if dest.exists():
        dest = TEMP_DIR / f"{msg.id}_{filename}"
    return dest
 # ─── Telethon fallback download ───────────────────────────────────────────────
 async def _telethon_download(client: TelegramClient, msg, dest: Path, filename: str, size: int, batch_id: str | None = None) -> bool:
    """Download a single file via Telethon. Returns True on success."""
    _bid = batch_id or f"telethon_{int(time.monotonic_ns())}"
    if batch_id is None:
        # Standalone call (not already queued by tdl path) — post queued event
        bus.post(bus.EvDownloadQueued(
            batch_id=_bid, filename=filename,
            size_mb=round(size / (1024 * 1024), 2),
            source="telethon", password=None,
        ))
    bus.post(bus.EvDownloadStarted(batch_id=_bid, filename=filename))
    try:
        with tqdm(
            total=size,
            unit="B",
            unit_scale=True,
            unit_divisor=1024,
            desc=filename[:40],
            colour="cyan",
        ) as pbar:
            async def progress(current, total):
                pbar.n = current
                pbar.refresh()
            doc = msg.media.document
            location = InputDocumentFileLocation(
                id=doc.id,
                access_hash=doc.access_hash,
                file_reference=doc.file_reference,
                thumb_size="",
            )
            await client.download_file(
                location,
                file=dest,
                part_size_kb=512,
                progress_callback=progress,
            )
        bus.post(bus.EvDownloadDone(batch_id=_bid, filename=filename, via="telethon"))
        return True
    except FloodWaitError as e:
        log.warning(f"  Flood wait: sleeping {e.seconds}s...")
        await asyncio.sleep(e.seconds)
        await client.download_media(msg, file=dest)
        bus.post(bus.EvDownloadDone(batch_id=_bid, filename=filename, via="telethon"))
        return True
    except Exception as e:
        log.error(f"  Telethon download failed for {filename}: {e}")
        bus.post(bus.EvDownloadFailed(batch_id=_bid, filename=filename, reason=str(e)))
        return False
 # ─── Single-message pipeline (live handler + bot_downloader) ──────────────────
 async def handle_message(
    client: TelegramClient,
    bot: TelegramClient,
    msg,
    source_name: str,
    patterns,
    password: str | None = None,
 ) -> None:
    """Download and process a single file message."""
    filename = get_filename(msg)
    if not filename:
        log.warning("  handle_message: could not extract filename, skipping.")
        return
    size = get_filesize(msg)
    ok, reason = is_processable(filename, size)
    if not ok:
        log.warning(f"  handle_message: skipping '{filename}' — {reason}")
        return
    doc_id = msg.media.document.id
    if is_seen(doc_id):
        log.info(f"  Skipping {filename} — already processed.")
        return
    dest = _make_dest(msg, filename)
    log.info(f"↓ Downloading: {filename} ({size / 1024:.1f} KB) from {source_name}")
    # tdl single → Telethon fallback
    downloaded = await download_single_with_tdl(msg, dest) if is_tdl_available() else False
    if not downloaded:
        if is_tdl_available():
            log.warning("  [tdl] failed — falling back to Telethon")
        downloaded = await _telethon_download(client, msg, dest, filename, size)
    if not downloaded:
        log.error(f"  All download attempts failed for {filename}")
        return
    hits = process_file(dest, patterns, password=password)
    mark_seen(doc_id)
    if hits:
        await notify(bot, hits, source_name, filename)
    else:
        log.info(f"  No hits in {filename}")
 # ─── Batch pipeline (backfill only) ───────────────────────────────────────────
 async def _process_batch(
    client: TelegramClient,
    bot: TelegramClient,
    batch: list[tuple],   # list of (msg, source_name, password)
    patterns,
 ) -> int:
    """
    Download up to TDL_AMOUNT messages in one tdl invocation, then process
    each. Falls back to Telethon per-file for anything tdl missed.
    Returns the number of files successfully processed.
    """
    if not batch:
        return 0
    # Build BatchEntry list
    entries: list[BatchEntry] = []
    for msg, source_name, password in batch:
        filename = get_filename(msg)
        if not filename:
            continue
        entries.append(BatchEntry(
            msg=msg,
            filename=filename,
            dest=_make_dest(msg, filename),
            doc_id=msg.media.document.id,
            source_name=source_name,
            password=password,
        ))
    names = ", ".join(e.filename for e in entries)
    log.info(f"[Batch] {len(entries)} file(s): {names}")
    # One tdl call for the whole batch
    results = await download_batch_with_tdl(entries)
    processed = 0
    for entry in entries:
        tdl_ok = results.get(entry.doc_id, False)
        if not tdl_ok:
            # Per-file Telethon fallback
            log.info(f"  [Batch] Telethon fallback: {entry.filename}")
            size = get_filesize(entry.msg)
            tdl_ok = await _telethon_download(client, entry.msg, entry.dest, entry.filename, size)
        if not tdl_ok:
            log.error(f"  [Batch] All attempts failed: {entry.filename}")
            continue
        hits = process_file(entry.dest, patterns, password=entry.password)
        mark_seen(entry.doc_id)
        if hits:
            await notify(bot, hits, entry.source_name, entry.filename)
        else:
            log.info(f"  No hits in {entry.filename}")
        processed += 1
    return processed
 # ─── Backfill ─────────────────────────────────────────────────────────────────
 async def backfill_channel(
    client: TelegramClient,
    bot: TelegramClient,
    channel: str | int,
    patterns,
    limit: int,
 ) -> None:
    """Scan the last `limit` messages of a channel for file attachments."""
    log.info(f"[Backfill] Scanning history: {channel} (last {limit} messages)")
    total = 0
    batch: list[tuple] = []   # (msg, source_name, password)
    last_password: str | None = None  # carry password across adjacent messages
    async def flush_batch():
        nonlocal total
        if batch:
            total += await _process_batch(client, bot, batch, patterns)
            batch.clear()
    try:
        async for msg in client.iter_messages(channel, limit=limit):
            source_name = str(channel)
            # Extract password from this message if present, and remember it.
            # iter_messages goes newest→oldest, so a password post that appears
            # above the files in the channel will arrive AFTER them here.
            # We therefore carry last_password in both directions:
            #   - apply it to file messages that have no inline password
            #   - update it whenever we see a fresh password, so subsequent
            #     (older) file messages in the same batch pick it up too.
            msg_password = extract_password(msg)
            if msg_password:
                last_password = msg_password
            password = msg_password or last_password
            if msg.media and isinstance(msg.media, MessageMediaDocument):
                filename = get_filename(msg)
                size = get_filesize(msg)
                if not filename:
                    continue
                ok, reason = is_processable(filename, size)
                if not ok:
                    log.warning(f"  [Backfill] Skipping '{filename}' — {reason}")
                    continue
                if is_seen(msg.media.document.id):
                    log.info(f"  [Backfill] Already seen: {filename}")
                    continue
                if is_tdl_available():
                    batch.append((msg, source_name, password))
                    if len(batch) >= TDL_AMOUNT:
                        await flush_batch()
                else:
                    # No tdl — fall straight through to single handle_message
                    await handle_message(client, bot, msg, source_name, patterns, password=password)
                    total += 1
                    await asyncio.sleep(0.5)
            elif msg.buttons and has_download_button(msg):
                # Bot-button messages can't be batched — handle individually
                await flush_batch()  # flush any pending batch first
                await handle_bot_download_message(client, bot, msg, source_name, patterns, password=password)
                total += 1
                await asyncio.sleep(1.5)
        # Flush whatever's left
        await flush_batch()
    except (ChannelPrivateError, UsernameNotOccupiedError) as e:
        log.error(f"[Backfill] Cannot access {channel}: {e}")
    except Exception as e:
        log.error(f"[Backfill] Error scanning {channel}: {e}")
    log.info(f"[Backfill] Done: {channel} — {total} file(s) processed")
 async def backfill_all(
    client: TelegramClient,
    bot: TelegramClient,
    patterns,
 ) -> None:
    """Backfill all watched channels sequentially."""
    if BACKFILL_LIMIT <= 0:
        log.info("[Backfill] Disabled (BACKFILL_LIMIT=0)")
        return
    log.info(f"[Backfill] Starting for {len(WATCHED_CHANNELS)} channel(s)...")
    for ch in WATCHED_CHANNELS:
        await backfill_channel(client, bot, ch, patterns, BACKFILL_LIMIT)
    log.info("[Backfill] Complete.")
 # ─── Entity cache warmup ──────────────────────────────────────────────────────
 async def warm_entity_cache(client: TelegramClient) -> None:
    """
    Fetches your dialog list so Telethon caches all entity mappings.
    Required before using raw numeric IDs.
    """
    log.info("Warming entity cache (fetching dialogs)...")
    async for _ in client.iter_dialogs():
        pass
    log.info("Entity cache ready.")
 # ─── Live listener ────────────────────────────────────────────────────────────
 def register_handlers(
    client: TelegramClient,
    bot: TelegramClient,
    patterns,
 ) -> None:
    """Register the NewMessage event handler for all watched channels."""
    # Per-channel password cache for the live handler.
    # Channels often post a text message with the password separately from
    # the file message.  We remember the last seen password per channel so
    # that the file message that follows (or precedes by seconds) picks it up.
    _channel_passwords: dict[int, str] = {}
    @client.on(events.NewMessage(chats=WATCHED_CHANNELS))
    async def on_new_message(event):
        msg = event.message
        try:
            source = event.chat.username or str(event.chat_id)
        except Exception:
            source = str(event.chat_id)
        chat_id = event.chat_id
        log.info(f"[Live] New message in {source}")
        # Update cache if this message carries a password
        msg_password = extract_password(msg)
        if msg_password:
            _channel_passwords[chat_id] = msg_password
            log.debug(f"[Live] Password cached for {source}: '{msg_password}'")
        password = msg_password or _channel_passwords.get(chat_id)
        if msg.media and isinstance(msg.media, MessageMediaDocument):
            await handle_message(client, bot, msg, source, patterns, password=password)
        elif msg.buttons and has_download_button(msg):
            await handle_bot_download_message(client, bot, msg, source, patterns, password=password)
--- a/core/tdl_downloader.md
+++ b/core/tdl_downloader.md
@@ -0,0 +1,70 @@
 # core/tdl_downloader.py
 Fast file downloads via `tdl` (Go MTProto). Falls back gracefully if tdl is not installed.
 ## Public API
 ```python
 from core.tdl_downloader import (
    is_tdl_available,
    download_single_with_tdl,
    download_batch_with_tdl,
    BatchEntry,
 )
 ```
 ### `is_tdl_available() -> bool`
 Returns `True` if `tdl` binary is on PATH.
 ### `download_single_with_tdl(msg, dest: Path) -> bool`
 **async.** Downloads one message's document. Returns `True` on success.  
 Used by the live handler and `bot_downloader`.
 ### `download_batch_with_tdl(entries: list[BatchEntry]) -> dict[int, bool]`
 **async.** Downloads up to `TDL_AMOUNT` messages in a single `tdl dl` invocation.  
 Returns `{doc_id: True|False}` — `False` means Telethon fallback needed.
 ---
 ## BatchEntry dataclass
 ```python
@dataclass
 class BatchEntry:
    msg:         object       # Telethon Message
    filename:    str
    dest:        Path         # final destination path in TEMP_DIR
    doc_id:      int          # msg.media.document.id
    source_name: str
    password:    str | None
 ```
 ---
 ## TUI output pipeline
 In TUI mode (`bus.tui_active == True`), `_run_tdl` pipes stdout+stderr and relays lines as `EvTdlOutput` events in real time.  
 **Reads raw 256-byte chunks** (not line-by-line) and splits on `\r` and `\n`, because tdl uses `\r` to overwrite its progress bar in place.
 In CLI mode: subprocess inherits the terminal, progress bars render natively.
 ---
 ## Staging directory isolation
 Each batch/single download gets a unique `data/tmp/_tdl_{monotonic_ns}/` staging dir.  
 After `tdl` exits, files are matched by name (with fuzzy stem fallback for `filenamify()` mangling) and moved to final `dest`. Staging dir is removed regardless of outcome.
 `--template '{{ filenamify .FileName }}'` — tdl uses the original Telegram filename, not its default `DialogID_MessageID_filename` format.
 ---
 ## Config knobs (`config.py`)
 | Setting | Default | Description |
 |---------|---------|-------------|
 | `TDL_NAMESPACE` | `"default"` | `-n` flag; `None` omits it |
 | `TDL_THREADS` | `8` | `-t` chunk workers per file |
 | `TDL_PERFILE` | `4` | `-l` concurrent files per invocation |
 | `TDL_AMOUNT` | `4` | Max messages per batch |
 | `TDL_TAKEOUT` | `False` | `--takeout` session flag |
--- a/core/tdl_downloader.py
+++ b/core/tdl_downloader.py
@@ -0,0 +1,363 @@
 """
 tdl_downloader.py — Fast file downloads via tdl (Go MTProto implementation).
 Install: https://github.com/iyear/tdl
    curl -sSL https://raw.githubusercontent.com/iyear/tdl/main/scripts/install.sh | bash
 First-time setup — log in once:
    tdl login               # saves to namespace "default"
    tdl login -n myns       # saves to a named namespace
 Relevant config.py knobs:
    TDL_NAMESPACE  str|None  Session namespace (default "default"; None omits -n)
    TDL_THREADS    int       Chunk workers per file  (-t, default 4)
    TDL_PERFILE    int       Concurrent files        (-l, default 4)
    TDL_AMOUNT     int       Messages per tdl batch  (default 4)
    TDL_TAKEOUT    bool      Use takeout session      (--takeout)
 Flag reference:
  Global (BEFORE subcommand): -n --ns, -t --threads, -l --limit
  dl-specific:                -u --url, -d --dir, --template, --continue, --takeout
 Download isolation strategy:
  Each batch gets its own staging subdirectory (TEMP_DIR/<batch_id>/) so that
  concurrent downloads and homoglyph filename collisions can never cause tdl's
  internal .tmp → final rename to fail.  Files are moved to TEMP_DIR after
  the batch completes and the staging dir is removed.
 """
 import asyncio
 import logging
 import shutil
 import time
 from dataclasses import dataclass
 from pathlib import Path
 from config import TDL_NAMESPACE, TDL_THREADS, TDL_PERFILE, TDL_TAKEOUT, TEMP_DIR
 from tui import events as bus
 log = logging.getLogger(__name__)
 # ─── Availability ─────────────────────────────────────────────────────────────
 def is_tdl_available() -> bool:
    return shutil.which("tdl") is not None
 # ─── Message → URL ────────────────────────────────────────────────────────────
 def _build_message_url(msg) -> str:
    """
    Build a t.me/c/<channel_id>/<msg_id> link from a Telethon Message.
    Works for public and private channels alike.
    """
    peer = msg.peer_id
    if hasattr(peer, "channel_id"):
        return f"https://t.me/c/{peer.channel_id}/{msg.id}"
    elif hasattr(peer, "chat_id"):
        return f"https://t.me/c/{peer.chat_id}/{msg.id}"
    elif hasattr(peer, "user_id"):
        return f"https://t.me/c/{peer.user_id}/{msg.id}"
    raise ValueError(f"Cannot build message URL from peer: {peer!r}")
 # ─── Command builder ──────────────────────────────────────────────────────────
 def _build_cmd(urls: list[str], staging_dir: Path) -> list[str]:
    """
    Build the full tdl dl command.
    Global flags (-n, -t, -l) MUST precede the subcommand.
    staging_dir is always an absolute path to a fresh per-batch directory,
    so tdl's internal .tmp → final rename can never collide with an existing
    file of the same name.
    --template '{{ filenamify .FileName }}' keeps just the original filename
    (no DialogID_MessageID_ prefix).
    --continue is kept so interrupted downloads resume rather than restart.
    --skip-same is intentionally omitted — deduplication is handled upstream
    by is_seen(), and --skip-same can cause the .tmp rename to fail when a
    same-named file already exists in the directory.
    """
    global_flags: list[str] = []
    if TDL_NAMESPACE:
        global_flags += ["-n", str(TDL_NAMESPACE)]
    global_flags += ["-t", str(TDL_THREADS), "-l", str(TDL_PERFILE)]
    url_flags: list[str] = []
    for url in urls:
        url_flags += ["-u", url]
    dl_flags = [
        "-d", str(staging_dir),
        "--template", "{{ filenamify .FileName }}",
        "--continue",
    ]
    if TDL_TAKEOUT:
        dl_flags.append("--takeout")
    return ["tdl", *global_flags, "dl", *url_flags, *dl_flags]
 # ─── Runner ───────────────────────────────────────────────────────────────────
 # ANSI escape stripper — tdl emits colour codes even when not a TTY
 import re as _re
 _ANSI_RE = _re.compile(r"\x1b\[[0-9;]*[mGKHFJA-Z]|\x1b=|\x1b>|\x1b\[\?[0-9]+[hl]")
 def _strip_ansi(text: str) -> str:
    return _ANSI_RE.sub("", text)
 async def _run_tdl(cmd: list[str], label: str) -> bool:
    """
    Spawn tdl and handle output based on whether the TUI is running:
      - TUI mode:  pipe stdout+stderr, read raw chunks (NOT line-by-line),
                   split on both \\r and \\n, strip ANSI, post non-empty
                   segments immediately as EvTdlOutput.
                   tdl uses \\r to overwrite its progress bar in place, so
                   async-for-line on the stream would block until EOF.
                   Chunk-reading + manual split delivers progress live.
      - CLI mode:  inherit the terminal so tdl's progress bars render natively.
    Returns True on exit code 0, False otherwise.
    """
    log.debug(f"[tdl] cmd: {' '.join(cmd)}")
    try:
        if bus.tui_active:
            proc = await asyncio.create_subprocess_exec(
                *cmd,
                stdout=asyncio.subprocess.PIPE,
                stderr=asyncio.subprocess.PIPE,
            )
            async def _relay(stream):
                buf = ""
                while True:
                    chunk = await stream.read(256)
                    if not chunk:
                        break
                    buf += chunk.decode(errors="replace")
                    # Split on both \r and \n; process all complete segments
                    parts = _re.split(r"[\r\n]", buf)
                    # Last element may be an incomplete segment — keep in buffer
                    buf = parts[-1]
                    for part in parts[:-1]:
                        clean = _strip_ansi(part).strip()
                        if clean:
                            bus.post(bus.EvTdlOutput(line=clean))
                # Flush any remaining buffer content
                if buf:
                    clean = _strip_ansi(buf).strip()
                    if clean:
                        bus.post(bus.EvTdlOutput(line=clean))
            await asyncio.gather(_relay(proc.stdout), _relay(proc.stderr))
            await proc.wait()
        else:
            proc = await asyncio.create_subprocess_exec(*cmd)
            await proc.wait()
        if proc.returncode == 0:
            log.info(f"[tdl] ✓ {label}")
            return True
        else:
            log.error(f"[tdl] ✗ exit {proc.returncode} — {label}")
            return False
    except FileNotFoundError:
        log.error("[tdl] binary not found at runtime")
        return False
    except Exception as e:
        log.error(f"[tdl] Unexpected error: {e}")
        return False
 # ─── Staging dir helpers ──────────────────────────────────────────────────────
 def _make_staging_dir() -> Path:
    """Create a unique staging subdirectory under TEMP_DIR for one batch."""
    staging = TEMP_DIR.resolve() / f"_tdl_{int(time.monotonic_ns())}"
    staging.mkdir(parents=True, exist_ok=True)
    return staging
 def _find_in_staging(staging: Path, expected_name: str) -> Path | None:
    """
    Locate a downloaded file in the staging dir by matching its name.
    filenamify() can munge characters (strips @, collapses unicode, etc.)
    so we do a normalised stem comparison as a fallback.
    """
    # Exact match first
    exact = staging / expected_name
    if exact.exists():
        return exact
    expected_stem = Path(expected_name).stem.lower().lstrip("@").replace(" ", "")
    expected_suffix = Path(expected_name).suffix.lower()
    for candidate in staging.iterdir():
        if not candidate.is_file():
            continue
        if candidate.suffix.lower() != expected_suffix:
            continue
        cand_stem = candidate.stem.lower().lstrip("@").replace(" ", "")
        if cand_stem == expected_stem:
            return candidate
    return None
 def _move_from_staging(staging: Path, expected_name: str, final_dest: Path) -> bool:
    """
    Find the file in staging, move it to final_dest, return True on success.
    """
    found = _find_in_staging(staging, expected_name)
    if not found:
        log.warning(f"[tdl] Not found in staging: '{expected_name}' (staging: {staging})")
        return False
    try:
        found.rename(final_dest)
        log.debug(f"[tdl] Moved: {found.name} → {final_dest}")
        return True
    except Exception as e:
        log.error(f"[tdl] Move failed {found} → {final_dest}: {e}")
        return False
 def _cleanup_staging(staging: Path) -> None:
    try:
        shutil.rmtree(staging, ignore_errors=True)
    except Exception:
        pass
 # ─── Public API ───────────────────────────────────────────────────────────────
@dataclass
 class BatchEntry:
    """Carries everything needed to process one file after a batch download."""
    msg: object          # Telethon Message
    filename: str
    dest: Path
    doc_id: int
    source_name: str
    password: str | None
 async def download_batch_with_tdl(entries: list[BatchEntry]) -> dict[int, bool]:
    """
    Download a batch of messages in a single tdl invocation.
    Each batch gets its own staging subdirectory so filenames can never
    collide with existing files in TEMP_DIR.  After tdl exits, files are
    moved from staging to their final dest paths.
    Returns dict mapping doc_id → True (ready at entry.dest) / False (fallback needed).
    """
    if not entries:
        return {}
    if not is_tdl_available():
        log.warning("[tdl] not available — all entries need Telethon fallback")
        return {e.doc_id: False for e in entries}
    urls: list[str] = []
    for entry in entries:
        try:
            urls.append(_build_message_url(entry.msg))
        except ValueError as exc:
            log.error(f"[tdl] Skipping {entry.filename}: {exc}")
            urls.append("")
    valid_entries = [(e, u) for e, u in zip(entries, urls) if u]
    if not valid_entries:
        return {e.doc_id: False for e in entries}
    batch_id = f"batch_{int(time.monotonic_ns())}"
    names = ", ".join(e.filename for e, _ in valid_entries)
    log.info(f"[tdl] Batch ({len(valid_entries)} files): {names}")
    # Notify TUI: all files in this batch are queued
    for entry, _ in valid_entries:
        size_mb = (entry.msg.media.document.size or 0) / (1024 * 1024)
        bus.post(bus.EvDownloadQueued(
            batch_id=batch_id,
            filename=entry.filename,
            size_mb=round(size_mb, 2),
            source=entry.source_name,
            password=entry.password,
        ))
    staging = _make_staging_dir()
    cmd = _build_cmd([u for _, u in valid_entries], staging)
    # Signal batch started
    for entry, _ in valid_entries:
        bus.post(bus.EvDownloadStarted(batch_id=batch_id, filename=entry.filename))
    tdl_ok = await _run_tdl(cmd, f"batch of {len(valid_entries)}")
    results: dict[int, bool] = {}
    for entry in entries:
        if not any(e.doc_id == entry.doc_id for e, _ in valid_entries):
            results[entry.doc_id] = False
            continue
        if tdl_ok:
            moved = _move_from_staging(staging, entry.filename, entry.dest)
            results[entry.doc_id] = moved
            if moved:
                bus.post(bus.EvDownloadDone(batch_id=batch_id, filename=entry.filename, via="tdl"))
            else:
                log.warning(f"[tdl] Fallback needed: {entry.filename}")
                bus.post(bus.EvDownloadFailed(batch_id=batch_id, filename=entry.filename, reason="staging mismatch"))
        else:
            results[entry.doc_id] = False
            bus.post(bus.EvDownloadFailed(batch_id=batch_id, filename=entry.filename, reason="tdl exit error"))
    _cleanup_staging(staging)
    return results
 async def download_single_with_tdl(msg, dest: Path) -> bool:
    """
    Download a single message with tdl. Used by the live handler and
    bot_downloader where batching doesn't apply.
    """
    if not is_tdl_available():
        log.warning("[tdl] not available — falling back to Telethon")
        return False
    try:
        url = _build_message_url(msg)
    except ValueError as e:
        log.error(f"[tdl] Cannot build URL: {e}")
        return False
    batch_id = f"single_{int(time.monotonic_ns())}"
    size_mb = (msg.media.document.size or 0) / (1024 * 1024) if hasattr(msg, "media") and msg.media else 0
    bus.post(bus.EvDownloadQueued(
        batch_id=batch_id, filename=dest.name,
        size_mb=round(size_mb, 2), source="live", password=None,
    ))
    bus.post(bus.EvDownloadStarted(batch_id=batch_id, filename=dest.name))
    staging = _make_staging_dir()
    cmd = _build_cmd([url], staging)
    log.info(f"[tdl] Single: {dest.name}  ({url})")
    tdl_ok = await _run_tdl(cmd, dest.name)
    if tdl_ok:
        result = _move_from_staging(staging, dest.name, dest)
    else:
        result = False
    _cleanup_staging(staging)
    if result:
        bus.post(bus.EvDownloadDone(batch_id=batch_id, filename=dest.name, via="tdl"))
    else:
        bus.post(bus.EvDownloadFailed(batch_id=batch_id, filename=dest.name, reason="tdl failed"))
    return result
--- a/data/.gitkeep
+++ b/data/.gitkeep
--- a/logs/monitor.log
+++ b/logs/monitor.log
@@ -0,0 +1,54 @@
 2026-04-02 00:45:48,909 [INFO] utils.database: Database ready: data/hits.db
 2026-04-02 00:45:49,119 [INFO] telethon.crypto.aes: libssl detected, it will be used for encryption
 2026-04-02 00:45:49,156 [INFO] utils.database: Database ready: data/hits.db
 2026-04-02 00:45:49,159 [INFO] tui.app: [bot] Connecting bot_client...
 2026-04-02 00:45:49,159 [INFO] telethon.network.mtprotosender: Connecting to 149.154.175.59:443/TcpFull...
 2026-04-02 00:45:49,203 [INFO] tui.app: [bus] EvStatus: Starting — 12 channel(s), 2 pattern(s)
 2026-04-02 00:45:49,281 [INFO] telethon.network.mtprotosender: Connection to 149.154.175.59:443/TcpFull complete!
 2026-04-02 00:45:49,900 [INFO] tui.app: [bot] bot_client connected, authorizing...
 2026-04-02 00:45:49,901 [INFO] tui.app: [bot] bot_client ready
 2026-04-02 00:45:49,901 [INFO] tui.app: [bot] Connecting user_client...
 2026-04-02 00:45:49,901 [INFO] telethon.network.mtprotosender: Connecting to 149.154.175.59:443/TcpFull...
 2026-04-02 00:45:49,908 [INFO] __main__: Cleaning up tmp/...
 2026-04-02 00:54:16,429 [INFO] utils.database: Database ready: data/hits.db
 2026-04-02 00:54:16,638 [INFO] telethon.crypto.aes: libssl detected, it will be used for encryption
 2026-04-02 00:54:16,666 [ERROR] tui.app: [bot-thread] Unhandled exception: cannot import name 'HITS_CSV' from 'config' (/home/anti/Tools/sj/telegrammer/config.py)
 Traceback (most recent call last):
  File "/home/anti/Tools/sj/telegrammer/tui/app.py", line 848, in _run_bot_thread
    loop.run_until_complete(self._bot_main())
    ~~~~~~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^
  File "/usr/lib64/python3.14/asyncio/base_events.py", line 719, in run_until_complete
    return future.result()
           ~~~~~~~~~~~~~^^
  File "/home/anti/Tools/sj/telegrammer/tui/app.py", line 865, in _bot_main
    from core.notifier import send_status
  File "/home/anti/Tools/sj/telegrammer/core/notifier.py", line 22, in <module>
    from config import HITS_FILE, HITS_CSV, NOTIFY_CHAT_ID
 ImportError: cannot import name 'HITS_CSV' from 'config' (/home/anti/Tools/sj/telegrammer/config.py)
 2026-04-02 00:54:16,716 [INFO] tui.app: [bus] EvStatus: Bot thread crashed: cannot import name 'HITS_CSV' from 'config' (/home/anti/Tools/sj/telegrammer/config.py)
 2026-04-02 00:54:22,624 [INFO] __main__: Cleaning up tmp/...
 2026-04-02 00:54:34,773 [INFO] utils.database: Database ready: data/hits.db
 2026-04-02 00:54:34,983 [INFO] telethon.crypto.aes: libssl detected, it will be used for encryption
 2026-04-02 00:54:35,015 [INFO] utils.database: Database ready: data/hits.db
 2026-04-02 00:54:35,015 [INFO] tui.app: [bot] Connecting bot_client...
 2026-04-02 00:54:35,015 [INFO] telethon.network.mtprotosender: Connecting to 149.154.175.59:443/TcpFull...
 2026-04-02 00:54:35,063 [INFO] tui.app: [bus] EvStatus: Starting — 12 channel(s), 2 pattern(s)
 2026-04-02 00:54:35,120 [INFO] telethon.network.mtprotosender: Connection to 149.154.175.59:443/TcpFull complete!
 2026-04-02 00:54:35,698 [INFO] tui.app: [bot] bot_client connected, authorizing...
 2026-04-02 00:54:35,698 [INFO] tui.app: [bot] bot_client ready
 2026-04-02 00:54:35,698 [INFO] tui.app: [bot] Connecting user_client...
 2026-04-02 00:54:35,698 [INFO] telethon.network.mtprotosender: Connecting to 149.154.175.59:443/TcpFull...
 2026-04-02 00:54:35,810 [INFO] telethon.network.mtprotosender: Connection to 149.154.175.59:443/TcpFull complete!
 2026-04-02 00:54:36,420 [INFO] tui.app: [bot] user_client connected, checking auth...
 2026-04-02 00:54:36,420 [INFO] tui.app: [bot] user_client ready
 2026-04-02 00:54:36,563 [INFO] tui.app: [bus] EvStatus: Connected as 4n (@clp_c)
 2026-04-02 00:54:36,653 [INFO] core.scraper: Warming entity cache (fetching dialogs)...
 2026-04-02 00:54:38,437 [INFO] core.scraper: Entity cache ready.
 2026-04-02 00:54:38,437 [INFO] tui.app: [bot] Handler registered for 12 channel(s)
 2026-04-02 00:54:38,437 [INFO] core.scraper: [Backfill] Starting for 12 channel(s)...
 2026-04-02 00:54:38,437 [INFO] core.scraper: [Backfill] Scanning history: cloudxlog (last 500 messages)
 2026-04-02 00:54:38,463 [INFO] tui.app: [bus] EvStatus: Watching 12 channel(s)
 2026-04-02 00:54:38,463 [INFO] tui.app: [bus] EvStatus: Live listener active
 2026-04-02 00:54:38,585 [INFO] core.scraper: [Batch] 4 file(s): @cloud t13.txt, @cloud t12.txt, @cloud t11.txt, @cloud t10.txt
 2026-04-02 00:54:38,585 [INFO] core.tdl_downloader: [tdl] Batch (4 files): @cloud t13.txt, @cloud t12.txt, @cloud t11.txt, @cloud t10.txt
 2026-04-02 00:54:40,248 [INFO] __main__: Cleaning up tmp/...
--- a/main.py
+++ b/main.py
@@ -0,0 +1,142 @@
 """
 main.py — Entry point for the ULP credential monitor.
 Usage:
    python main.py          # TUI mode (default, requires textual)
    python main.py --no-tui # Plain CLI mode
 First run will prompt for your Telegram phone number and 2FA code
 to create a session file. Subsequent runs are fully automatic.
 """
 import asyncio
 import logging
 import sys
 import shutil
 import argparse
 import config
 from utils.database import init_db
 # ─── Logging setup ────────────────────────────────────────────────────────────
 config.LOG_FILE.parent.mkdir(parents=True, exist_ok=True)
 config.TEMP_DIR.mkdir(parents=True, exist_ok=True)
 logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
    handlers=[
        logging.FileHandler(config.LOG_FILE, encoding="utf-8"),
    ],
 )
 log = logging.getLogger(__name__)
 init_db()
 # ─── Plain CLI mode ───────────────────────────────────────────────────────────
 async def _cli_main():
    """Original asyncio main — runs without the TUI."""
    logging.getLogger().addHandler(logging.StreamHandler(sys.stdout))
    from telethon import TelegramClient
    from core.processor import compile_patterns
    from core.notifier import send_status
    from core.scraper import backfill_all, register_handlers, warm_entity_cache
    log.info("=" * 60)
    log.info("  ULP Credential Monitor — CLI mode")
    log.info("=" * 60)
    patterns = compile_patterns(config.TARGET_KEYWORDS)
    log.info(f"Loaded {len(patterns)} keyword pattern(s)")
    log.info(f"Watching {len(config.WATCHED_CHANNELS)} channel(s)")
    user_client = TelegramClient(
        config.SESSION_NAME, config.API_ID, config.API_HASH,
        connection_retries=5, auto_reconnect=True, request_retries=5,
    )
    bot_client = TelegramClient(
        "bot_session", config.API_ID, config.API_HASH,
    )
    async with user_client, bot_client:
        await bot_client.start(bot_token=config.BOT_TOKEN)
        log.info("Bot client connected.")
        await user_client.start()
        me = await user_client.get_me()
        log.info(f"User client connected as: {me.first_name} (@{me.username})")
        await send_status(
            bot_client,
            f"✅ *Monitor started*\n"
            f"User: `{me.first_name}`\n"
            f"Channels: `{len(config.WATCHED_CHANNELS)}`\n"
            f"Patterns: `{len(patterns)}`\n"
            f"Backfill: `{config.BACKFILL_LIMIT} msg/channel`",
        )
        await warm_entity_cache(user_client)
        register_handlers(user_client, bot_client, patterns)
        log.info("Live listener registered.")
        await backfill_all(user_client, bot_client, patterns)
        log.info("Listening for new messages... (Ctrl+C to stop)")
        await user_client.run_until_disconnected()
    log.info("Monitor stopped.")
 # ─── Entry point ──────────────────────────────────────────────────────────────
 def main():
    parser = argparse.ArgumentParser(description="ULP Credential Monitor")
    parser.add_argument(
        "--no-tui",
        action="store_true",
        help="Run in plain CLI mode (no Textual TUI)",
    )
    args = parser.parse_args()
    if args.no_tui:
        try:
            asyncio.run(_cli_main())
        except KeyboardInterrupt:
            log.info("Interrupted by user.")
        finally:
            log.info("Cleaning up tmp/...")
            if config.TEMP_DIR.exists():
                shutil.rmtree(config.TEMP_DIR, ignore_errors=True)
                config.TEMP_DIR.mkdir()
            log.info("Done.")
    else:
        try:
            from tui.app import run_tui
        except ImportError:
            print(
                "⚠  Textual is not installed. Install it with:\n"
                "     pip install textual\n"
                "Or run in plain CLI mode:\n"
                "     python main.py --no-tui",
                file=sys.stderr,
            )
            sys.exit(1)
        try:
            run_tui()
        except KeyboardInterrupt:
            pass
        finally:
            log.info("Cleaning up tmp/...")
            if config.TEMP_DIR.exists():
                shutil.rmtree(config.TEMP_DIR, ignore_errors=True)
                config.TEMP_DIR.mkdir()
 if __name__ == "__main__":
    main()
--- a/pytest.ini
+++ b/pytest.ini
@@ -0,0 +1,2 @@
 [pytest]
 testpaths = tests
--- a/requirements-dev.txt
+++ b/requirements-dev.txt
@@ -0,0 +1 @@
 pytest
--- a/requirements.txt
+++ b/requirements.txt
@@ -0,0 +1,16 @@
 # Telegram
 telethon
 tgcrypto
 # TUI
 textual
 # Config
 python-dotenv
 # Progress bars (CLI mode)
 tqdm
 # Archive extraction
 py7zr
 rarfile
--- a/tests/init.py
+++ b/tests/init.py
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -0,0 +1,31 @@
 import os
 # Must be set before config.py is imported by any module.
 # load_dotenv() runs at import time; these setdefaults fill the gap when .env is absent.
 os.environ.setdefault("API_ID", "12345")
 os.environ.setdefault("API_HASH", "dummy_hash_for_tests")
 os.environ.setdefault("BOT_TOKEN", "0:dummy_bot_token")
 os.environ.setdefault("NOTIFY_CHAT_ID", "99999")
 import pytest
 import config
 import utils.scorer as scorer
 # Two test keywords:
 #   @testcorp\.com  — employee email domain (triggers CRITICAL)
 #   testcorp\.com   — plain domain match   (triggers LOW baseline)
 TEST_KEYWORDS = [r"@testcorp\.com", r"testcorp\.com"]
@pytest.fixture
 def patched_keywords(monkeypatch):
    """
    Override TARGET_KEYWORDS for the duration of a test and rebuild the
    scorer's module-level globals so scoring logic uses known test patterns.
    """
    monkeypatch.setattr(config, "TARGET_KEYWORDS", TEST_KEYWORDS)
    # scorer.py uses `from config import TARGET_KEYWORDS` — a local binding that
    # doesn't update when config.TARGET_KEYWORDS is patched. Patch it directly.
    monkeypatch.setattr(scorer, "TARGET_KEYWORDS", TEST_KEYWORDS)
    monkeypatch.setattr(scorer, "EMPLOYEE_DOMAINS", scorer._build_employee_domains())
    monkeypatch.setattr(scorer, "ORG_DOMAINS", scorer._build_org_domains())
--- a/tests/test_cache.py
+++ b/tests/test_cache.py
@@ -0,0 +1,55 @@
 """
 Tests for utils/cache.py — file-ID deduplication cache.
 Each test gets an isolated cache file via the `isolated_cache` fixture
 so tests never touch data/cache.json.
 """
 import pytest
 import utils.cache as cache_module
@pytest.fixture(autouse=True)
 def isolated_cache(tmp_path, monkeypatch):
    monkeypatch.setattr(cache_module, "CACHE_FILE", tmp_path / "cache.json")
 def test_unseen_id_returns_false():
    assert cache_module.is_seen(12345) is False
 def test_mark_seen_makes_id_seen():
    cache_module.mark_seen(12345)
    assert cache_module.is_seen(12345) is True
 def test_multiple_ids_stored_independently():
    cache_module.mark_seen(1)
    cache_module.mark_seen(2)
    cache_module.mark_seen(3)
    assert cache_module.is_seen(1)
    assert cache_module.is_seen(2)
    assert cache_module.is_seen(3)
    assert not cache_module.is_seen(4)
 def test_persists_to_disk_between_calls():
    """
    is_seen() and mark_seen() each load from disk independently.
    This verifies the persist-on-write / load-on-read contract
    (simulating what happens across separate function calls in the bot loop).
    """
    cache_module.mark_seen(999)
    assert cache_module.is_seen(999) is True
 def test_missing_cache_file_handled_gracefully(tmp_path, monkeypatch):
    monkeypatch.setattr(cache_module, "CACHE_FILE", tmp_path / "nonexistent.json")
    assert cache_module.is_seen(42) is False
 def test_mark_seen_is_idempotent():
    cache_module.mark_seen(7)
    cache_module.mark_seen(7)
    cache_module.mark_seen(7)
    assert cache_module.is_seen(7) is True
--- a/tests/test_database.py
+++ b/tests/test_database.py
@@ -0,0 +1,188 @@
 """
 Tests for utils/database.py — SQLite persistence layer.
 Each test gets an isolated in-memory-equivalent DB via the `isolated_db`
 fixture so tests never touch data/hits.db.
 """
 import pytest
 import utils.database as db_module
 from utils.scorer import ScoredHit, CRITICAL, HIGH, MEDIUM, LOW
 def make_hit(severity=LOW, url="testcorp.com", username="user", password="pass", raw=None):
    """Build a minimal ScoredHit for insertion tests."""
    scores = {CRITICAL: 40, HIGH: 30, MEDIUM: 20, LOW: 10}
    return ScoredHit(
        raw=raw or f"{url}|{username}|{password}",
        severity=severity,
        score=scores[severity],
        reasons=["Test reason"],
        url=url,
        username=username,
        password=password,
    )
@pytest.fixture(autouse=True)
 def isolated_db(tmp_path, monkeypatch):
    monkeypatch.setattr(db_module, "DB_FILE", tmp_path / "test_hits.db")
    db_module.init_db()
 # ─── init_db ─────────────────────────────────────────────────────────────────
 def test_init_db_is_idempotent():
    db_module.init_db()
    db_module.init_db()  # must not raise
 # ─── insert_hits ──────────────────────────────────────────────────────────────
 def test_insert_returns_correct_row_count():
    hits = [make_hit(), make_hit(severity=CRITICAL)]
    count = db_module.insert_hits(hits, source="testchan", filename="combo.txt")
    assert count == 2
 def test_insert_stores_all_fields():
    hit = make_hit(severity=HIGH, url="intranet.testcorp.com", username="jdoe", password="s3cr3t")
    db_module.insert_hits([hit], source="mychan", filename="creds.zip")
    rows = db_module.search("jdoe")
    assert len(rows) == 1
    row = rows[0]
    assert row["url"] == "intranet.testcorp.com"
    assert row["username"] == "jdoe"
    assert row["password"] == "s3cr3t"
    assert row["severity"] == HIGH
    assert row["score"] == 30
    assert row["source"] == "mychan"
    assert row["filename"] == "creds.zip"
    assert row["seen_before"] == 0
 def test_insert_seen_before_flag():
    hit = make_hit()
    db_module.insert_hits([hit], source="chan", filename="f.txt", seen_before=True)
    rows = db_module.search("testcorp")
    assert rows[0]["seen_before"] == 1
 # ─── search ───────────────────────────────────────────────────────────────────
 def test_search_finds_by_username():
    db_module.insert_hits([make_hit(username="jdoe@testcorp.com")], source="c", filename="f.txt")
    results = db_module.search("jdoe")
    assert len(results) == 1
    assert results[0]["username"] == "jdoe@testcorp.com"
 def test_search_finds_by_url():
    db_module.insert_hits([make_hit(url="admin.testcorp.com")], source="c", filename="f.txt")
    results = db_module.search("admin.testcorp")
    assert len(results) == 1
 def test_search_finds_by_raw():
    db_module.insert_hits([make_hit(raw="raw_unique_token_xyz")], source="c", filename="f.txt")
    results = db_module.search("unique_token")
    assert len(results) == 1
 def test_search_returns_empty_for_no_match():
    db_module.insert_hits([make_hit()], source="c", filename="f.txt")
    assert db_module.search("zzznomatch_xyz") == []
 def test_search_sorted_by_score_descending():
    db_module.insert_hits([make_hit(severity=LOW)], source="c", filename="f.txt")
    db_module.insert_hits([make_hit(severity=CRITICAL, url="admin.testcorp.com")], source="c", filename="f.txt")
    results = db_module.search("testcorp")
    assert results[0]["score"] >= results[-1]["score"]
 # ─── by_severity ──────────────────────────────────────────────────────────────
 def test_by_severity_returns_correct_severity():
    db_module.insert_hits([make_hit(severity=CRITICAL, url="admin.testcorp.com")], source="c", filename="f.txt")
    db_module.insert_hits([make_hit(severity=LOW)], source="c", filename="f.txt")
    results = db_module.by_severity(CRITICAL)
    assert len(results) == 1
    assert results[0]["severity"] == CRITICAL
 def test_by_severity_excludes_duplicates():
    """seen_before=1 rows must be invisible to by_severity — they are stored for stats only."""
    hit = make_hit(severity=HIGH, url="intranet.testcorp.com")
    db_module.insert_hits([hit], source="c", filename="f.txt", seen_before=True)
    assert db_module.by_severity(HIGH) == []
 def test_by_severity_returns_empty_when_none():
    assert db_module.by_severity(CRITICAL) == []
 # ─── stats ───────────────────────────────────────────────────────────────────
 def test_stats_counts_by_severity():
    db_module.insert_hits([make_hit(severity=CRITICAL, url="admin.testcorp.com")], source="c", filename="f.txt")
    db_module.insert_hits([make_hit(severity=HIGH, url="intranet.testcorp.com")], source="c", filename="f.txt")
    db_module.insert_hits([make_hit(severity=MEDIUM, url="app.testcorp.com")], source="c", filename="f.txt")
    db_module.insert_hits([make_hit(severity=LOW)], source="c", filename="f.txt")
    s = db_module.stats()
    assert s["critical"] == 1
    assert s["high"] == 1
    assert s["medium"] == 1
    assert s["low"] == 1
    assert s["total"] == 4
    assert s["unique"] == 4
    assert s["duplicates"] == 0
 def test_stats_separates_duplicates():
    hit = make_hit()
    db_module.insert_hits([hit], source="c", filename="f.txt", seen_before=False)
    db_module.insert_hits([hit], source="c", filename="f.txt", seen_before=True)
    s = db_module.stats()
    assert s["total"] == 2
    assert s["unique"] == 1
    assert s["duplicates"] == 1
 def test_stats_severity_counts_exclude_duplicates():
    hit = make_hit(severity=CRITICAL, url="admin.testcorp.com")
    db_module.insert_hits([hit], source="c", filename="f.txt", seen_before=False)
    db_module.insert_hits([hit], source="c", filename="f.txt", seen_before=True)
    s = db_module.stats()
    assert s["critical"] == 1  # only the unique one
 def test_stats_empty_db():
    s = db_module.stats()
    assert s["total"] == 0
    assert s["unique"] == 0
    assert s["top_source"] is None
 def test_stats_top_source():
    db_module.insert_hits([make_hit()], source="channelA", filename="f.txt")
    db_module.insert_hits([make_hit()], source="channelA", filename="f.txt")
    db_module.insert_hits([make_hit()], source="channelB", filename="f.txt")
    s = db_module.stats()
    assert s["top_source"]["source"] == "channelA"
 # ─── recent ───────────────────────────────────────────────────────────────────
 def test_recent_respects_limit():
    for i in range(5):
        db_module.insert_hits([make_hit(raw=f"testcorp.com|user{i}|pass")], source="c", filename="f.txt")
    rows = db_module.recent(limit=3)
    assert len(rows) == 3
 def test_recent_returns_all_when_under_limit():
    db_module.insert_hits([make_hit()], source="c", filename="f.txt")
    db_module.insert_hits([make_hit()], source="c", filename="f.txt")
    rows = db_module.recent(limit=50)
    assert len(rows) == 2
--- a/tests/test_processor.py
+++ b/tests/test_processor.py
@@ -0,0 +1,223 @@
 """
 Tests for core/processor.py — archive extraction and line-by-line search.
 No Telegram deps, no async. Tests create real archive fixtures in tmp_path
 so process_file's cleanup guarantee can be verified against actual disk state.
 """
 import zipfile
 import pytest
 from pathlib import Path
 from core.processor import compile_patterns, search_file, process_file
@pytest.fixture
 def patterns():
    return compile_patterns([r"testcorp\.com"])
 # ─── compile_patterns ─────────────────────────────────────────────────────────
 class TestCompilePatterns:
    def test_returns_case_insensitive_patterns(self):
        pats = compile_patterns([r"hello"])
        assert pats[0].search("HELLO") is not None
        assert pats[0].search("Hello") is not None
    def test_multiple_patterns(self):
        pats = compile_patterns([r"alpha", r"beta"])
        assert len(pats) == 2
        assert pats[0].search("alpha_line")
        assert pats[1].search("beta_line")
    def test_empty_list(self):
        assert compile_patterns([]) == []
 # ─── search_file ──────────────────────────────────────────────────────────────
 class TestSearchFile:
    def test_returns_matching_lines(self, tmp_path, patterns):
        f = tmp_path / "combo.txt"
        f.write_text("testcorp.com|user|pass\nothersite.com|user|pass\n")
        assert search_file(f, patterns) == ["testcorp.com|user|pass"]
    def test_returns_empty_when_no_match(self, tmp_path, patterns):
        f = tmp_path / "combo.txt"
        f.write_text("nomatch.com|user|pass\nanother.net|x|y\n")
        assert search_file(f, patterns) == []
    def test_strips_whitespace_from_returned_lines(self, tmp_path, patterns):
        f = tmp_path / "combo.txt"
        f.write_text("  testcorp.com|user|pass  \n")
        hits = search_file(f, patterns)
        assert hits[0] == "testcorp.com|user|pass"
    def test_skips_blank_lines(self, tmp_path, patterns):
        f = tmp_path / "combo.txt"
        f.write_text("\n\ntestcorp.com|user|pass\n\n")
        assert search_file(f, patterns) == ["testcorp.com|user|pass"]
    def test_handles_encoding_errors_gracefully(self, tmp_path, patterns):
        """Combo files are often messy — invalid bytes must not crash the search."""
        f = tmp_path / "combo.txt"
        f.write_bytes(
            b"testcorp.com|user1|pass\n"
            b"\xff\xfe invalid bytes here\n"
            b"testcorp.com|user2|pass\n"
        )
        hits = search_file(f, patterns)
        assert len(hits) == 2
    def test_multiple_matching_lines_all_returned(self, tmp_path, patterns):
        f = tmp_path / "combo.txt"
        f.write_text(
            "testcorp.com|alice|pass1\n"
            "nomatch.com|bob|pass2\n"
            "testcorp.com|carol|pass3\n"
        )
        hits = search_file(f, patterns)
        assert len(hits) == 2
 # ─── process_file — plain .txt ────────────────────────────────────────────────
 class TestProcessFilePlainText:
    def test_returns_hits(self, tmp_path, patterns):
        f = tmp_path / "combo.txt"
        f.write_text("testcorp.com|user|pass\nnomatch.com|x|y\n")
        hits = process_file(f, patterns)
        assert hits == ["testcorp.com|user|pass"]
    def test_deletes_file_after_processing(self, tmp_path, patterns):
        f = tmp_path / "combo.txt"
        f.write_text("testcorp.com|user|pass\n")
        process_file(f, patterns)
        assert not f.exists()
    def test_deletes_file_even_with_no_hits(self, tmp_path, patterns):
        f = tmp_path / "combo.txt"
        f.write_text("nomatch.com|x|y\n")
        hits = process_file(f, patterns)
        assert hits == []
        assert not f.exists()
 # ─── process_file — .zip extraction ──────────────────────────────────────────
 class TestProcessFileZip:
    def _make_zip(self, tmp_path: Path, content: str, filename="content.txt") -> Path:
        txt = tmp_path / filename
        txt.write_text(content)
        zf = tmp_path / "combo.zip"
        with zipfile.ZipFile(zf, "w") as z:
            z.write(txt, filename)
        txt.unlink()
        return zf
    def test_extracts_and_returns_hits(self, tmp_path, patterns):
        zf = self._make_zip(tmp_path, "testcorp.com|user|pass\nnomatch.com|x|y\n")
        hits = process_file(zf, patterns)
        assert hits == ["testcorp.com|user|pass"]
    def test_deletes_zip_after_processing(self, tmp_path, patterns):
        zf = self._make_zip(tmp_path, "testcorp.com|user|pass\n")
        process_file(zf, patterns)
        assert not zf.exists()
    def test_deletes_extract_dir_after_processing(self, tmp_path, patterns):
        zf = self._make_zip(tmp_path, "testcorp.com|user|pass\n")
        extract_dir = tmp_path / "combo"  # sibling dir named after zip stem
        process_file(zf, patterns)
        assert not extract_dir.exists()
    def test_no_hits_still_cleans_up(self, tmp_path, patterns):
        zf = self._make_zip(tmp_path, "nomatch.com|x|y\n")
        extract_dir = tmp_path / "combo"
        process_file(zf, patterns)
        assert not zf.exists()
        assert not extract_dir.exists()
    def test_zip_with_multiple_txt_files(self, tmp_path, patterns):
        txt1 = tmp_path / "a.txt"
        txt1.write_text("testcorp.com|alice|pass\n")
        txt2 = tmp_path / "b.txt"
        txt2.write_text("testcorp.com|bob|pass\n")
        zf = tmp_path / "combo.zip"
        with zipfile.ZipFile(zf, "w") as z:
            z.write(txt1, "a.txt")
            z.write(txt2, "b.txt")
        txt1.unlink()
        txt2.unlink()
        hits = process_file(zf, patterns)
        assert len(hits) == 2
 # ─── process_file — nested archives ──────────────────────────────────────────
 class TestProcessFileNested:
    def test_nested_zip_is_recursed(self, tmp_path, patterns):
        inner_txt = tmp_path / "inner.txt"
        inner_txt.write_text("testcorp.com|user|pass\n")
        inner_zip = tmp_path / "inner.zip"
        with zipfile.ZipFile(inner_zip, "w") as z:
            z.write(inner_txt, "inner.txt")
        inner_txt.unlink()
        outer_zip = tmp_path / "outer.zip"
        with zipfile.ZipFile(outer_zip, "w") as z:
            z.write(inner_zip, "inner.zip")
        inner_zip.unlink()
        hits = process_file(outer_zip, patterns)
        assert hits == ["testcorp.com|user|pass"]
        assert not outer_zip.exists()
        assert not (tmp_path / "outer").exists()
 # ─── process_file — password-protected .7z ───────────────────────────────────
 class TestProcessFile7zPassword:
    def test_unlocks_with_correct_password(self, tmp_path, patterns, monkeypatch):
        try:
            import py7zr
        except ImportError:
            pytest.skip("py7zr not installed")
        import core.processor as proc_module
        # Isolate to a single known password so the test doesn't depend on config
        monkeypatch.setattr(proc_module, "ARCHIVE_PASSWORDS", [b"secretpwd"])
        txt = tmp_path / "content.txt"
        txt.write_text("testcorp.com|user|pass\n")
        szf = tmp_path / "combo.7z"
        with py7zr.SevenZipFile(szf, "w", password="secretpwd") as z:
            z.write(txt, "content.txt")
        txt.unlink()
        hits = process_file(szf, patterns)
        assert hits == ["testcorp.com|user|pass"]
        assert not szf.exists()
    def test_skips_when_no_password_matches(self, tmp_path, patterns, monkeypatch):
        try:
            import py7zr
        except ImportError:
            pytest.skip("py7zr not installed")
        import core.processor as proc_module
        monkeypatch.setattr(proc_module, "ARCHIVE_PASSWORDS", [b"wrongpwd"])
        txt = tmp_path / "content.txt"
        txt.write_text("testcorp.com|user|pass\n")
        szf = tmp_path / "combo.7z"
        with py7zr.SevenZipFile(szf, "w", password="correctpwd") as z:
            z.write(txt, "content.txt")
        txt.unlink()
        # No hits — archive could not be opened
        hits = process_file(szf, patterns)
        assert hits == []
--- a/tests/test_scorer.py
+++ b/tests/test_scorer.py
@@ -0,0 +1,282 @@
 """
 Tests for utils/scorer.py — severity scoring and ULP line parsing.
 All tests use the `patched_keywords` fixture (see conftest.py) which
 replaces TARGET_KEYWORDS with two entries:
  @testcorp.com  — employee email domain (CRITICAL trigger)
  testcorp.com   — plain domain match    (LOW baseline)
 """
 import pytest
 from utils.scorer import score_hit, score_hits, summarize, CRITICAL, HIGH, MEDIUM, LOW
 # ─── ULP line parsing ─────────────────────────────────────────────────────────
 class TestULPParsing:
    def test_parses_pipe_separated_fields(self, patched_keywords):
        hit = score_hit("site.com|jdoe@testcorp.com|pass123")
        assert hit.url == "site.com"
        assert hit.username == "jdoe@testcorp.com"
        assert hit.password == "pass123"
    def test_parses_colon_separated_fields(self, patched_keywords):
        # 'site.com' has no colon, so url field captures it cleanly
        hit = score_hit("site.com:jdoe@testcorp.com:pass123")
        assert hit.url == "site.com"
        assert hit.username == "jdoe@testcorp.com"
        assert hit.password == "pass123"
    def test_malformed_line_yields_none_fields(self, patched_keywords):
        hit = score_hit("justaplaindomainmatch_testcorp.com")
        assert hit.url is None
        assert hit.username is None
        assert hit.password is None
    def test_raw_field_preserved_exactly(self, patched_keywords):
        line = "site.com|jdoe@testcorp.com|pass123"
        hit = score_hit(line)
        assert hit.raw == line
 # ─── Real-world ULP format coverage ──────────────────────────────────────────
 class TestULPParsingRealWorld:
    """
    Parametrized against real stealer-log lines.
    Only field extraction is asserted (url/username/password), not severity,
    so no patched_keywords fixture is needed.
    """
    @pytest.mark.parametrize("line,exp_url,exp_user,exp_pass", [
        # ── Protocol + port + path, colon separator ──────────────────────────
        # Port is digits followed by '/' — must be consumed as part of the URL.
        (
            "http://portal.fakehosp.example.com:88/:55512309-1:hunter2",
            "http://portal.fakehosp.example.com:88/", "55512309-1", "hunter2",
        ),
        (
            "http://portal.fakehosp.example.com:8085/app/booking/:3:letmein",
            "http://portal.fakehosp.example.com:8085/app/booking/", "3", "letmein",
        ),
        (
            "https://portal.fakehosp.example.com:81/app/FrmResetPassword.aspx:30219876-K:Spr!ng22@",
            "https://portal.fakehosp.example.com:81/app/FrmResetPassword.aspx",
            "30219876-K", "Spr!ng22@",
        ),
        # ── Protocol + no port, ID-style username looks like port but has hyphen ──
        # ':\d+-' must NOT be consumed as a port (no '/' after the digits).
        (
            "https://booking.fakehosp.example.com:40293817-6:Summ3r99..",
            "https://booking.fakehosp.example.com", "40293817-6", "Summ3r99..",
        ),
        (
            "https://booking.fakehosp.example.com/:40293817-6:Summ3r99..",
            "https://booking.fakehosp.example.com/", "40293817-6", "Summ3r99..",
        ),
        # ── Protocol + email username directly after host (no trailing slash) ─
        (
            "https://booking.fakehosp.example.com:carlos.gomez@gmail.com:Qwerty99",
            "https://booking.fakehosp.example.com", "carlos.gomez@gmail.com", "Qwerty99",
        ),
        (
            "https://accounts.saas-vendor.example.com/signin:jdoe@fakehosp.example.com:W1nter20",
            "https://accounts.saas-vendor.example.com/signin", "jdoe@fakehosp.example.com", "W1nter20",
        ),
        (
            "https://login.sso-provider.example.com/common/oauth2/authorize:jdoe@fakehosp.example.com:Passw0rd!",
            "https://login.sso-provider.example.com/common/oauth2/authorize",
            "jdoe@fakehosp.example.com", "Passw0rd!",
        ),
        # ── Pipe separator (unambiguous — port stays in URL) ──────────────────
        (
            "http://portal.fakehosp.example.com:88/|22.987.654-3|florida88",
            "http://portal.fakehosp.example.com:88/", "22.987.654-3", "florida88",
        ),
        (
            "https://booking.fakehosp.example.com/|77341209-0|Ninja42",
            "https://booking.fakehosp.example.com/", "77341209-0", "Ninja42",
        ),
        # ── Mixed separators: pipe after URL, colon between user/password ─────
        (
            "http://portal.fakehosp.example.com:8085/app/booking/|Z:wd1980wd",
            "http://portal.fakehosp.example.com:8085/app/booking/", "Z", "wd1980wd",
        ),
        # ── No protocol, port in URL ─────────────────────────────────────────
        (
            "portal.fakehosp.example.com:88/:22.987.654-3:florida88",
            "portal.fakehosp.example.com:88/", "22.987.654-3", "florida88",
        ),
        # ── No protocol, no port — plain colon separators ────────────────────
        (
            "booking.fakehosp.example.com:66778899-7:correcthorse",
            "booking.fakehosp.example.com", "66778899-7", "correcthorse",
        ),
        (
            "booking.fakehosp.example.com/:smithjohnathan:Bb881955",
            "booking.fakehosp.example.com/", "smithjohnathan", "Bb881955",
        ),
        # ── Password with special characters ─────────────────────────────────
        (
            "https://booking.fakehosp.example.com/:11223344-5:dragonball99*",
            "https://booking.fakehosp.example.com/", "11223344-5", "dragonball99*",
        ),
        (
            "https://booking.fakehosp.example.com/:9988776-65:abc.456#",
            "https://booking.fakehosp.example.com/", "9988776-65", "abc.456#",
        ),
        # ── Semicolon separator ───────────────────────────────────────────────
        (
            "booking.fakehosp.example.com;smithjohnathan;Bb881955",
            "booking.fakehosp.example.com", "smithjohnathan", "Bb881955",
        ),
    ])
    def test_real_world_ulp_parsing(self, line, exp_url, exp_user, exp_pass):
        hit = score_hit(line)
        assert hit.url == exp_url,      f"URL mismatch for: {line!r}"
        assert hit.username == exp_user, f"Username mismatch for: {line!r}"
        assert hit.password == exp_pass, f"Password mismatch for: {line!r}"
 # ─── Severity classification ──────────────────────────────────────────────────
 class TestSeverityClassification:
    def test_employee_email_in_username_is_critical(self, patched_keywords):
        hit = score_hit("site.com|jdoe@testcorp.com|pass123")
        assert hit.severity == CRITICAL
    def test_gmail_on_org_url_is_not_critical(self, patched_keywords):
        """
        Core documented footgun: org domain appears in the URL, but the
        credential username is a gmail address. Must NOT be CRITICAL.
        The employee-domain pattern requires a literal '@' before the domain,
        so 'testcorp.com' in the URL field never triggers it.
        """
        hit = score_hit("testcorp.com|user@gmail.com|pass123")
        assert hit.severity != CRITICAL
    def test_critical_service_subdomain_is_critical(self, patched_keywords):
        hit = score_hit("admin.testcorp.com|user|pass123")
        assert hit.severity == CRITICAL
    def test_vpn_subdomain_is_critical(self, patched_keywords):
        hit = score_hit("vpn.testcorp.com|user|pass123")
        assert hit.severity == CRITICAL
    def test_gitlab_subdomain_is_critical(self, patched_keywords):
        hit = score_hit("gitlab.testcorp.com|user|pass123")
        assert hit.severity == CRITICAL
    def test_intranet_subdomain_is_high(self, patched_keywords):
        hit = score_hit("intranet.testcorp.com|user|pass123")
        assert hit.severity == HIGH
    def test_sso_subdomain_is_high(self, patched_keywords):
        hit = score_hit("sso.testcorp.com|user|pass123")
        assert hit.severity == HIGH
    def test_app_subdomain_is_medium(self, patched_keywords):
        hit = score_hit("app.testcorp.com|user|pass123")
        assert hit.severity == MEDIUM
    def test_booking_subdomain_is_medium(self, patched_keywords):
        hit = score_hit("booking.testcorp.com|user|pass123")
        assert hit.severity == MEDIUM
    def test_plain_domain_match_is_low(self, patched_keywords):
        hit = score_hit("testcorp.com|user|pass123")
        assert hit.severity == LOW
    def test_employee_email_beats_high_service(self, patched_keywords):
        """Employee email domain must win over a HIGH service classification."""
        hit = score_hit("intranet.testcorp.com|jdoe@testcorp.com|pass")
        assert hit.severity == CRITICAL
    def test_employee_email_beats_medium_service(self, patched_keywords):
        hit = score_hit("app.testcorp.com|jdoe@testcorp.com|pass")
        assert hit.severity == CRITICAL
    def test_multiple_checks_accumulate_reasons(self, patched_keywords):
        """A line matching both employee email and a critical service URL collects both reasons."""
        hit = score_hit("admin.testcorp.com|jdoe@testcorp.com|pass")
        assert hit.severity == CRITICAL
        assert len(hit.reasons) >= 2
    def test_score_matches_severity(self, patched_keywords):
        from utils.scorer import SEVERITY_SCORES
        for line, expected_severity in [
            ("admin.testcorp.com|user|pass", CRITICAL),
            ("intranet.testcorp.com|user|pass", HIGH),
            ("app.testcorp.com|user|pass", MEDIUM),
            ("testcorp.com|user|pass", LOW),
        ]:
            hit = score_hit(line)
            assert hit.score == SEVERITY_SCORES[expected_severity]
 # ─── Weak password flags ──────────────────────────────────────────────────────
 class TestWeakPasswordFlags:
    def test_short_password_adds_reason(self, patched_keywords):
        hit = score_hit("testcorp.com|user|abc")
        assert any("Weak password" in r for r in hit.reasons)
    def test_common_password_adds_reason(self, patched_keywords):
        hit = score_hit("testcorp.com|user|password")
        assert any("Common password" in r for r in hit.reasons)
    def test_weak_password_does_not_escalate_severity(self, patched_keywords):
        """Weak password flags are informational — they must not change severity."""
        hit = score_hit("testcorp.com|user|abc")
        assert hit.severity == LOW
    def test_strong_password_adds_no_warning(self, patched_keywords):
        hit = score_hit("testcorp.com|user|Xk9#mP2qLrTv")
        assert not any("password" in r.lower() for r in hit.reasons if "Employee" not in r and "domain" not in r.lower() and "service" not in r.lower())
 # ─── score_hits and summarize ─────────────────────────────────────────────────
 class TestScoreHitsAndSummarize:
    def test_score_hits_sorted_descending(self, patched_keywords):
        lines = [
            "testcorp.com|user|pass",           # LOW
            "admin.testcorp.com|user|pass",     # CRITICAL
            "intranet.testcorp.com|user|pass",  # HIGH
            "app.testcorp.com|user|pass",       # MEDIUM
        ]
        hits = score_hits(lines)
        scores = [h.score for h in hits]
        assert scores == sorted(scores, reverse=True)
    def test_summarize_counts_each_severity(self, patched_keywords):
        lines = [
            "admin.testcorp.com|user|pass",     # CRITICAL
            "intranet.testcorp.com|user|pass",  # HIGH
            "app.testcorp.com|user|pass",       # MEDIUM
            "testcorp.com|user|pass",           # LOW
        ]
        summary = summarize(score_hits(lines))
        assert summary[CRITICAL] == 1
        assert summary[HIGH] == 1
        assert summary[MEDIUM] == 1
        assert summary[LOW] == 1
    def test_summarize_zero_for_absent_severities(self, patched_keywords):
        hits = score_hits(["testcorp.com|user|pass"])  # LOW only
        summary = summarize(hits)
        assert summary[CRITICAL] == 0
        assert summary[HIGH] == 0
        assert summary[MEDIUM] == 0
        assert summary[LOW] == 1
    def test_score_hits_empty_list(self, patched_keywords):
        assert score_hits([]) == []
--- a/tui/init.py
+++ b/tui/init.py
@@ -0,0 +1 @@
 """tui — Textual TUI frontend and event bus."""
--- a/tui/app.md
+++ b/tui/app.md
@@ -0,0 +1,130 @@
 # tui/app.py
 Textual TUI frontend. Entry point: `run_tui()`.
 ## Entry point
 ```python
 from tui.app import run_tui
 run_tui()   # called by main.py
 ```
 ---
 ## Screen hierarchy
 ```
 MonitorApp (App)
 ├── [default screen]
 │   ├── Header
 │   ├── #top-row (Horizontal)
 │   │   ├── DownloadPanel  #dl-panel
 │   │   └── HitsPanel      #hits-panel
 │   ├── StatsPanel         #stats-panel
 │   ├── ChannelPanel       #ch-panel
 │   └── Footer
 ├── SearchScreen     (push/pop via 's')
 ├── HitsDBScreen     (push/pop via 'h')
 └── KeywordsScreen   (push/pop via 'k')
 ```
 ---
 ## MonitorApp
 ### Threading model
 - **Bot backend** → `threading.Thread(daemon=True)` with its own `asyncio.new_event_loop()`  
  Runs `_bot_main()` — Telethon is completely isolated from Textual's loop.
 - **TUI drain** → `set_interval(0.1, _drain_bus)` — polls `queue.Queue` every 100ms on Textual's loop.
 ### Key methods
 | Method | Description |
 |--------|-------------|
 | `on_mount()` | Calls `bus.init_bus()`, starts bot thread, sets drain interval |
 | `_drain_bus()` | Drains all pending events from `queue.Queue`, dispatches to widgets |
 | `_run_bot_thread()` | Thread entry: creates event loop, runs `_bot_main()` |
 | `_bot_main()` | Async bot backend: connect, auth, backfill, live handler loop |
 | `_signal_channel_changed()` | Thread-safely sets the bot loop's `asyncio.Event` via `call_soon_threadsafe` |
 ### Keybindings
 | Key | Action |
 |-----|--------|
 | `s` | Push `SearchScreen` |
 | `h` | Push `HitsDBScreen` |
 | `k` | Push `KeywordsScreen` |
 | `c` | Clear download + hits logs |
 | `r` | Force-refresh stats bar |
 | `q` / `ctrl+c` | Quit |
 ---
 ## Widgets
 ### DownloadPanel
 Left panel. Two `RichLog` widgets separated by a dashed line:
 - **top** (`#tdl-out`): raw tdl output lines (ANSI stripped)
 - **bottom** (`#dl-log`): structured download status entries
 Methods: `tdl_line(line)`, `queued(filename, size_mb, source, password)`, `status(filename, state, via)`, `clear_logs()`
 States for `status()`: `queued` · `downloading` · `done_tdl` · `done_tel` · `failed`
 ### HitsPanel
 Right panel. Single `RichLog` with color-coded hit entries.  
 Reactive `hit_count` updates the panel title badge automatically.
 Methods: `add_hit(severity, raw, source, filename, reasons)`, `clear_log()`
 ### StatsPanel
 Slim horizontal bar. Polls `utils.database.stats()` every 10s via `set_interval`.  
 Also refreshed immediately on each `EvHit` event.
 ### ChannelPanel
 Bottom panel. `ListView` + `Input` + buttons.  
 Add/remove posts `EvChannelAdded` / `EvChannelRemoved` onto the bus.  
 Changes apply immediately (handler re-registered). Not persisted to `config.py` automatically.
 ---
 ## Screens
 ### SearchScreen (`s`)
 - Text input → queries `utils.database.search(keyword)`
 - Results in a `DataTable` with columns: Sev, Time, URL, Username, Password, Source, File
 - Submit with `↵` or Search button; `Escape` to dismiss
 ### HitsDBScreen (`h`)
 - Toolbar buttons + number keys filter by severity
 - `r` → recent 50, `1`→CRITICAL, `2`→HIGH, `3`→MEDIUM, `4`→LOW
 - Calls `utils.database.recent()` / `by_severity()`
 ### KeywordsScreen (`k`)
 - Live-edit `config.TARGET_KEYWORDS`
 - Validates regex before adding
 - On change: rebuilds `utils.scorer.EMPLOYEE_DOMAINS` and `ORG_DOMAINS`
 - Bot handler recompiles patterns on the next incoming message automatically
 - **Changes are in-memory only** — copy to `config.py` to persist
 ---
 ## Bot auth flow (`_bot_main`)
 ```
 await bot_client.connect()
 await bot_client.is_user_authorized()? → sign_in(bot_token=...)
 await user_client.connect()
 await user_client.is_user_authorized()? → log error + return (must run --no-tui first)
 warm_entity_cache()
 _make_handler(channels)       ← registers NewMessage handler
 backfill_all()
 run_until_disconnected()  ┐
 _watch_channels()         ┘  gathered
 ```
 Channel-change signal path:
 ```
 ChannelPanel button → EvChannel* on bus → _drain_bus → _signal_channel_changed()
  → call_soon_threadsafe(asyncio.Event.set) → _watch_channels() wakes → _make_handler()
 ```
--- a/tui/app.py
+++ b/tui/app.py
--- a/tui/events.md
+++ b/tui/events.md
@@ -0,0 +1,66 @@
 # tui/events.py
 Thread-safe event bus between the bot backend thread and the Textual TUI.  
 The bot thread calls `post()`. The TUI drains the queue every 100ms via `_drain_bus()`.
 ## Public API
 ```python
 from tui import events as bus   # from core/ and tui/app.py
 from tui.events import post, init_bus, get_bus, tui_active
 ```
 ### `init_bus() -> queue.Queue`
 Creates the `queue.Queue`. Called inside `MonitorApp.on_mount()` — **must run on Textual's event loop**, not before `App.run()`.
 ### `post(event: Any) -> None`
 Fire-and-forget from any thread. Silently drops if bus not initialised.  
 Uses `queue.Queue.put_nowait()` — never blocks.
 ### `get_bus() -> queue.Queue | None`
 Returns the queue for the TUI consumer to drain.
 ### `tui_active: bool`
 Set to `True` by `init_bus()`. Checked by `core/tdl_downloader.py` to decide whether to pipe tdl output or inherit the terminal.
 ---
 ## Event types
 | Class | Fields | Posted by | Consumed by |
 |-------|--------|-----------|-------------|
 | `EvDownloadQueued` | `batch_id, filename, size_mb, source, password` | `tdl_downloader`, `scraper` | `DownloadPanel.queued()` |
 | `EvDownloadStarted` | `batch_id, filename` | `tdl_downloader`, `scraper` | `DownloadPanel.status("downloading")` |
 | `EvDownloadDone` | `batch_id, filename, via` | `tdl_downloader`, `scraper` | `DownloadPanel.status("done_tdl"\|"done_tel")` |
 | `EvDownloadFailed` | `batch_id, filename, reason` | `tdl_downloader`, `scraper` | `DownloadPanel.status("failed")` |
 | `EvTdlOutput` | `line` | `tdl_downloader._relay()` | `DownloadPanel.tdl_line()` |
 | `EvHit` | `severity, raw, source, filename, reasons` | `notifier.notify()` | `HitsPanel.add_hit()` + `StatsPanel.refresh_stats()` |
 | `EvChannelAdded` | `channel` | `ChannelPanel.on_button_pressed()` | `_drain_bus` → `_signal_channel_changed()` |
 | `EvChannelRemoved` | `channel` | `ChannelPanel.on_button_pressed()` | `_drain_bus` → `_signal_channel_changed()` |
 | `EvStatus` | `text, level` | everywhere | `MonitorApp.notify()` toast |
 `level` on `EvStatus`: `"info"` (default) · `"warning"` · `"error"`
 ---
 ## Threading model
 ```
 Bot thread (own asyncio loop)
  └─ bus.post(event)          ← queue.Queue.put_nowait() [thread-safe]
        ↓
  queue.Queue
        ↓
 Textual thread (Textual's loop)
  └─ _drain_bus() [set_interval 100ms]
       └─ q.get_nowait() loop
            └─ dispatch to widgets [safe, same thread as Textual]
 ```
 Channel changes flow the other way:
 ```
 _drain_bus sees EvChannelAdded/Removed
  → _signal_channel_changed()
       → loop.call_soon_threadsafe(asyncio.Event.set)
            → bot thread's _watch_channels() wakes
 ```
--- a/tui/events.py
+++ b/tui/events.py
@@ -0,0 +1,114 @@
 """
 tui_events.py — Thread-safe event bus between the bot backend and the TUI.
 The bot backend runs in a dedicated thread with its own asyncio event loop
 (completely isolated from Textual's loop).  Events are posted via a standard
 queue.Queue (thread-safe), and the TUI consumer polls it from Textual's loop
 using asyncio.get_event_loop().run_in_executor() bridging.
 post() is safe to call from any thread or any asyncio loop.
 """
 import queue
 import threading
 from dataclasses import dataclass, field
 from typing import Any
 # Thread-safe queue — works across the bot thread and Textual's thread.
 _queue: queue.Queue | None = None
 _queue_lock = threading.Lock()
 # Set to True when the TUI is running so tdl pipes output instead of
 # writing directly to the terminal.
 tui_active: bool = False
 def init_bus() -> queue.Queue:
    """Call once from MonitorApp.on_mount() to create the queue."""
    global _queue, tui_active
    _queue = queue.Queue()
    tui_active = True
    return _queue
 def get_bus() -> queue.Queue | None:
    return _queue
 def post(event: Any) -> None:
    """Fire-and-forget from any thread. Silently drops if bus not up."""
    if _queue is not None:
        try:
            _queue.put_nowait(event)
        except queue.Full:
            pass
 # ─── Event types ──────────────────────────────────────────────────────────────
@dataclass
 class EvDownloadQueued:
    """A file has been accepted and is waiting for tdl."""
    batch_id:  str
    filename:  str
    size_mb:   float
    source:    str
    password:  str | None
@dataclass
 class EvDownloadStarted:
    """tdl has begun transferring this file."""
    batch_id:  str
    filename:  str
@dataclass
 class EvDownloadDone:
    """File fully downloaded (tdl or Telethon fallback)."""
    batch_id:  str
    filename:  str
    via:       str   # "tdl" | "telethon"
@dataclass
 class EvDownloadFailed:
    """All download attempts failed."""
    batch_id:  str
    filename:  str
    reason:    str
@dataclass
 class EvTdlOutput:
    """A line of output from tdl's stdout/stderr (TUI mode only)."""
    line: str
@dataclass
 class EvHit:
    """A scored credential hit to display in the hits panel."""
    severity:  str
    raw:       str
    source:    str
    filename:  str
    reasons:   list[str] = field(default_factory=list)
@dataclass
 class EvChannelAdded:
    """A channel was added to the live watch list."""
    channel: str | int
@dataclass
 class EvChannelRemoved:
    """A channel was removed from the live watch list."""
    channel: str | int
@dataclass
 class EvStatus:
    """Generic one-line status message (startup, errors, etc.)."""
    text: str
    level: str = "info"   # "info" | "warning" | "error"
--- a/utils/init.py
+++ b/utils/init.py
@@ -0,0 +1 @@
 """utils — pure logic modules with no Telegram dependencies."""
--- a/utils/cache.md
+++ b/utils/cache.md
@@ -0,0 +1,32 @@
 # utils/cache.py
 Tracks already-processed Telegram document IDs to avoid redownloading.  
 Persists to `data/cache.json` as a JSON array of integers.
 ## Public API
 ```python
 from utils.cache import is_seen, mark_seen
 ```
 ### `is_seen(file_id: int) -> bool`
 Returns `True` if this document ID has been processed before.  
 Loads from disk on every call (safe for multi-process, slightly slow for hot loops — not an issue given download cadence).
 ### `mark_seen(file_id: int) -> None`
 Adds `file_id` to the cache and persists to disk.
 ---
 ## Storage
 - **File:** `data/cache.json`
 - **Format:** JSON array of integers — `[123456789, 987654321, ...]`
 - **No expiry** — grows indefinitely. Safe to delete to re-process all files.
 ---
 ## Notes
 - `is_seen` + `mark_seen` are called in `core/scraper.py` after a successful download+process cycle, not before — so a file that fails mid-process will be retried on next run.
 - Not thread-safe (load/modify/save is not atomic). Acceptable because downloads are sequential within the bot loop.
--- a/utils/cache.py
+++ b/utils/cache.py
@@ -0,0 +1,38 @@
 """
 cache.py — Tracks already-processed file IDs to avoid redownloading.
 Persists to a simple JSON file on disk.
 """
 import json
 import logging
 from pathlib import Path
 log = logging.getLogger(__name__)
 CACHE_FILE = Path("./data/cache.json")
 def _load() -> set:
    if not CACHE_FILE.exists():
        return set()
    try:
        with open(CACHE_FILE, "r") as f:
            return set(json.load(f))
    except Exception:
        return set()
 def _save(seen: set) -> None:
    with open(CACHE_FILE, "w") as f:
        json.dump(list(seen), f)
 def is_seen(file_id: int) -> bool:
    return file_id in _load()
 def mark_seen(file_id: int) -> None:
    seen = _load()
    seen.add(file_id)
    _save(seen)
    log.debug(f"  Cached file ID {file_id}")
--- a/utils/database.md
+++ b/utils/database.md
@@ -0,0 +1,89 @@
 # utils/database.py
 SQLite persistence layer for credential hits.  
 DB file: `data/hits.db`
 ## Public API
 ```python
 from utils.database import init_db, insert_hits, search, recent, by_severity, stats
 ```
 ### Setup
 #### `init_db() -> None`
 Creates `hits` table and indexes if they don't exist. Call once on startup.  
 Safe to call multiple times (idempotent).
 ---
 ### Writing
 #### `insert_hits(scored_hits, source, filename, seen_before=False) -> int`
 Inserts a list of `ScoredHit` objects. Returns row count inserted.
 ```python
 insert_hits(new_hits, source="channelname", filename="combo.zip")
 insert_hits(dupe_hits, source="channelname", filename="combo.zip", seen_before=True)
 ```
 ---
 ### Querying
 #### `search(keyword: str) -> list[sqlite3.Row]`
 Full-text search across `url`, `username`, `raw`. Returns rows sorted by score DESC, timestamp DESC.
 #### `recent(limit: int = 50) -> list[sqlite3.Row]`
 Most recent hits, newest first.
 #### `by_severity(severity: str) -> list[sqlite3.Row]`
 All unique (non-duplicate) hits at a given severity, newest first.  
 `severity` must be one of: `"CRITICAL"`, `"HIGH"`, `"MEDIUM"`, `"LOW"`
 #### `stats() -> dict`
 Returns summary counters:
 ```python
 {
    "total":      int,   # all rows
    "unique":     int,   # seen_before=0
    "duplicates": int,   # seen_before=1
    "critical":   int,   # unique CRITICAL
    "high":       int,
    "medium":     int,
    "low":        int,
    "sources":    int,   # distinct source channels
    "top_source": {"source": str, "cnt": int} | None,
 }
 ```
 ---
 ## Schema
 ```sql
 hits (
    id          INTEGER PRIMARY KEY AUTOINCREMENT,
    url         TEXT,
    username    TEXT,
    password    TEXT,
    raw         TEXT NOT NULL,      -- full original credential line
    source      TEXT,               -- channel username or ID
    filename    TEXT,               -- downloaded file name
    timestamp   TEXT NOT NULL,      -- "YYYY-MM-DD HH:MM:SS UTC"
    severity    TEXT NOT NULL,      -- CRITICAL/HIGH/MEDIUM/LOW
    score       INTEGER NOT NULL,   -- 40/30/20/10
    reasons     TEXT,               -- pipe-separated reason strings
    seen_before INTEGER NOT NULL    -- 0=new, 1=duplicate
 )
 ```
 Indexes: `url`, `username`, `source`, `timestamp`, `severity`.
 ---
 ## Notes
 - Each query opens and closes its own connection via the `_connect()` context manager.
 - `conn.row_factory = sqlite3.Row` — rows support both index and column-name access.
 - Transactions: commit on success, rollback on exception.
--- a/utils/database.py
+++ b/utils/database.py
@@ -0,0 +1,171 @@
 """
 database.py — SQLite storage for credential hits.
 Schema:
  hits table:
    - id          auto-increment primary key
    - url         the target URL from the credential line
    - username    extracted username/email
    - password    extracted password
    - raw         the full original line
    - source      channel/bot it came from
    - filename    the file it was found in
    - timestamp   UTC time of discovery
    - severity    CRITICAL / HIGH / MEDIUM / LOW
    - score       numeric score (higher = worse)
    - reasons     pipe-separated list of scoring reasons
    - seen_before whether this was a duplicate (for stats)
 """
 import sqlite3
 import logging
 from datetime import datetime, timezone
 from pathlib import Path
 from contextlib import contextmanager
 log = logging.getLogger(__name__)
 DB_FILE = Path("./data/hits.db")
 # ─── Setup ────────────────────────────────────────────────────────────────────
@contextmanager
 def _connect():
    conn = sqlite3.connect(DB_FILE)
    conn.row_factory = sqlite3.Row
    try:
        yield conn
        conn.commit()
    except Exception:
        conn.rollback()
        raise
    finally:
        conn.close()
 def init_db() -> None:
    """Create tables if they don't exist yet."""
    with _connect() as conn:
        conn.execute("""
            CREATE TABLE IF NOT EXISTS hits (
                id          INTEGER PRIMARY KEY AUTOINCREMENT,
                url         TEXT,
                username    TEXT,
                password    TEXT,
                raw         TEXT NOT NULL,
                source      TEXT,
                filename    TEXT,
                timestamp   TEXT NOT NULL,
                severity    TEXT NOT NULL DEFAULT 'LOW',
                score       INTEGER NOT NULL DEFAULT 10,
                reasons     TEXT,
                seen_before INTEGER NOT NULL DEFAULT 0
            )
        """)
        conn.execute("CREATE INDEX IF NOT EXISTS idx_url       ON hits(url)")
        conn.execute("CREATE INDEX IF NOT EXISTS idx_username  ON hits(username)")
        conn.execute("CREATE INDEX IF NOT EXISTS idx_source    ON hits(source)")
        conn.execute("CREATE INDEX IF NOT EXISTS idx_timestamp ON hits(timestamp)")
        conn.execute("CREATE INDEX IF NOT EXISTS idx_severity  ON hits(severity)")
    log.info(f"Database ready: {DB_FILE}")
 # ─── Writing ─────────────────────────────────────────────────────────────────
 def insert_hits(
    scored_hits: list,
    source: str,
    filename: str,
    seen_before: bool = False,
 ) -> int:
    """
    Insert a list of ScoredHit objects into the database.
    Returns the number of rows inserted.
    """
    timestamp = datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M:%S UTC")
    rows = []
    for h in scored_hits:
        rows.append((
            h.url,
            h.username,
            h.password,
            h.raw,
            source,
            filename,
            timestamp,
            h.severity,
            h.score,
            " | ".join(h.reasons),
            1 if seen_before else 0,
        ))
    with _connect() as conn:
        conn.executemany("""
            INSERT INTO hits
              (url, username, password, raw, source, filename, timestamp,
               severity, score, reasons, seen_before)
            VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
        """, rows)
    log.info(f"  DB: inserted {len(rows)} row(s) from {filename}")
    return len(rows)
 # ─── Querying ─────────────────────────────────────────────────────────────────
 def search(keyword: str) -> list[sqlite3.Row]:
    """Search hits by keyword across url, username, raw fields."""
    with _connect() as conn:
        return conn.execute("""
            SELECT * FROM hits
            WHERE url LIKE ? OR username LIKE ? OR raw LIKE ?
            ORDER BY score DESC, timestamp DESC
        """, (f"%{keyword}%",) * 3).fetchall()
 def recent(limit: int = 50) -> list[sqlite3.Row]:
    """Return the most recent hits."""
    with _connect() as conn:
        return conn.execute("""
            SELECT * FROM hits
            ORDER BY timestamp DESC
            LIMIT ?
        """, (limit,)).fetchall()
 def by_severity(severity: str) -> list[sqlite3.Row]:
    """Return all hits of a given severity level."""
    with _connect() as conn:
        return conn.execute("""
            SELECT * FROM hits
            WHERE severity = ? AND seen_before = 0
            ORDER BY timestamp DESC
        """, (severity,)).fetchall()
 def stats() -> dict:
    """Return summary statistics."""
    with _connect() as conn:
        total      = conn.execute("SELECT COUNT(*) FROM hits").fetchone()[0]
        unique     = conn.execute("SELECT COUNT(*) FROM hits WHERE seen_before=0").fetchone()[0]
        critical   = conn.execute("SELECT COUNT(*) FROM hits WHERE severity='CRITICAL' AND seen_before=0").fetchone()[0]
        high       = conn.execute("SELECT COUNT(*) FROM hits WHERE severity='HIGH' AND seen_before=0").fetchone()[0]
        medium     = conn.execute("SELECT COUNT(*) FROM hits WHERE severity='MEDIUM' AND seen_before=0").fetchone()[0]
        low        = conn.execute("SELECT COUNT(*) FROM hits WHERE severity='LOW' AND seen_before=0").fetchone()[0]
        sources    = conn.execute("SELECT COUNT(DISTINCT source) FROM hits").fetchone()[0]
        top_source = conn.execute("""
            SELECT source, COUNT(*) as cnt FROM hits
            GROUP BY source ORDER BY cnt DESC LIMIT 1
        """).fetchone()
    return {
        "total":      total,
        "unique":     unique,
        "duplicates": total - unique,
        "critical":   critical,
        "high":       high,
        "medium":     medium,
        "low":        low,
        "sources":    sources,
        "top_source": dict(top_source) if top_source else None,
    }
--- a/utils/scorer.md
+++ b/utils/scorer.md
@@ -0,0 +1,87 @@
 # utils/scorer.py
 Severity scoring for credential hits. No Telegram deps. Pure logic.
 ## Public API
 ```python
 from utils.scorer import score_hit, score_hits, summarize, ScoredHit
 from utils.scorer import CRITICAL, HIGH, MEDIUM, LOW, SEVERITY_EMOJI, SEVERITY_SCORES
 ```
 ### `score_hit(line: str) -> ScoredHit`
 Score a single raw credential line. Parses ULP format (`url:user:pass`), runs all checks, returns a `ScoredHit`.
 ### `score_hits(lines: list[str]) -> list[ScoredHit]`
 Score a list of lines. Returns sorted descending by score.
 ### `summarize(scored: list[ScoredHit]) -> dict`
 Returns `{CRITICAL: n, HIGH: n, MEDIUM: n, LOW: n}`.
 ---
 ## ScoredHit dataclass
 | Field | Type | Description |
 |-------|------|-------------|
 | `raw` | str | Original credential line |
 | `severity` | str | CRITICAL / HIGH / MEDIUM / LOW |
 | `score` | int | 40 / 30 / 20 / 10 |
 | `reasons` | list[str] | Human-readable match reasons |
 | `url` | str\|None | Parsed URL field |
 | `username` | str\|None | Parsed username/email field |
 | `password` | str\|None | Parsed password field |
 | `.emoji` | property | 🔴🟠🟡🟢 |
 ---
 ## Scoring rules (highest match wins)
 | Severity | Triggers |
 |----------|----------|
 | CRITICAL | Employee email domain after `@` in username/line · Privileged service URL (admin, vpn, ssh, rdp, gitlab, jira…) |
 | HIGH | Internal service URL (intranet, erp, crm, sso, owa, sharepoint…) |
 | MEDIUM | Client-facing URL (app, patient, booking, helpdesk…) |
 | LOW | Org domain appears anywhere in line (baseline) |
 Check 6 (no severity change): flags weak passwords ≤6 chars or common strings.
 ---
 ## Employee domain matching
 Keywords in `config.TARGET_KEYWORDS` containing `@` become employee patterns.  
 Pattern: `@<domain>(?:[^a-zA-Z0-9.\-]|$)` — requires literal `@` before the domain.  
 **`user@gmail.com` on a URL containing `myorg.cl` does NOT trigger CRITICAL.**
 Keywords without `@` go only to `ORG_DOMAINS` (LOW baseline).
 ---
 ## ULP line parser (`ULP_PATTERN`)
 Separators: `:` `;` `,` `|` `\t` (any of these between the three fields).
 The URL field handles two common stealer-log complications:
 1. **`://` not treated as separator** — the optional scheme prefix `(?:https?|ftp)://` is consumed before the character-class match, so `https://` never gets split at the colon.
 2. **Port + path consumed into the URL** — the optional group `(?::\d+/[^\s:;,|\t]*)` absorbs `:port/path` when the port is pure digits immediately followed by `/`. This correctly handles `http://host:8085/path/:user:pass` but intentionally skips patterns like `:24145487-8` (RUT number — hyphen after digits, no `/`).
 **Known limitation:** A bare port with no path (e.g. `https://host:8080:user:pass`) will mis-parse `8080` as the username. This is not observed in practice — stealer logs always include at least a trailing `/`.
 ---
 ## Module-level globals (rebuilt on import + via KeywordsScreen)
 | Name | Type | Description |
 |------|------|-------------|
 | `EMPLOYEE_DOMAINS` | `list[tuple[str, Pattern]]` | `(domain_str, anchored_pattern)` for `@`-keywords |
 | `ORG_DOMAINS` | `list[Pattern]` | Plain domain patterns for all keywords |
 To rebuild after editing `config.TARGET_KEYWORDS` at runtime:
 ```python
 import utils.scorer as scorer
 scorer.EMPLOYEE_DOMAINS = scorer._build_employee_domains()
 scorer.ORG_DOMAINS      = scorer._build_org_domains()
 ```
--- a/utils/scorer.py
+++ b/utils/scorer.py
@@ -0,0 +1,273 @@
 """
 scorer.py — Severity scoring for credential hits.
 Scoring logic (highest match wins):
  CRITICAL  — Employee credentials (internal email domain)
                e.g. jdoe@yourclinic.cl:password
              — Admin/privileged service URLs
                e.g. admin., vpn., ssh., rdp., gitlab., jira.
  HIGH      — Internal-facing services
                e.g. intranet., erp., crm., portal., citrix.
              — Password manager or SSO hits
              — Any credential where username looks like an employee email
  MEDIUM    — Client-facing portals
                e.g. app., patient., client., booking.
              — Domain match on a non-privileged service
  LOW       — Generic domain keyword match
              — No URL parsed, just a raw domain mention
 Each scored hit gets a dict with:
  - severity:    CRITICAL / HIGH / MEDIUM / LOW
  - score:       int (higher = worse)
  - reasons:     list of human-readable reasons
  - raw:         original line
 """
 import re
 import logging
 from dataclasses import dataclass, field
 from config import TARGET_KEYWORDS
 log = logging.getLogger(__name__)
 # ─── Severity levels ─────────────────────────────────────────────────────────
 CRITICAL = "CRITICAL"
 HIGH     = "HIGH"
 MEDIUM   = "MEDIUM"
 LOW      = "LOW"
 SEVERITY_SCORES = {
    CRITICAL: 40,
    HIGH:     30,
    MEDIUM:   20,
    LOW:      10,
 }
 SEVERITY_EMOJI = {
    CRITICAL: "🔴",
    HIGH:     "🟠",
    MEDIUM:   "🟡",
    LOW:      "🟢",
 }
 # ─── Pattern banks ───────────────────────────────────────────────────────────
 # Subdomains/services that indicate privileged access
 CRITICAL_SERVICES = re.compile(
    r"(?:^|https?://|\.)"
    r"(admin|vpn|ssh|rdp|ftp|sftp|gitlab|github|bitbucket|jenkins|"
    r"jira|confluence|grafana|kibana|sentry|vault|bastion|jump|"
    r"firewall|router|switch|proxy|ldap|ad\.|activedirectory|"
    r"exchange|mail\.)",
    re.IGNORECASE
 )
 HIGH_SERVICES = re.compile(
    r"(?:^|https?://|\.)"
    r"(intranet|erp|crm|portal|citrix|workspace|webmail|owa|"
    r"sharepoint|teams|slack|zoom|meet|sso|login|auth|oauth|"
    r"accounts?|dashboard|internal|corp|staff|hr|payroll|"
    r"finance|accounting)",
    re.IGNORECASE
 )
 MEDIUM_SERVICES = re.compile(
    r"(?:^|https?://|\.)"
    r"(app|patient|client|customer|booking|appointment|"
    r"reserva|cita|paciente|user|member|registro|signup|"
    r"support|helpdesk|ticket)",
    re.IGNORECASE
 )
 # Looks like a corporate email (user@domain)
 EMAIL_PATTERN = re.compile(r"[a-zA-Z0-9._%+\-]+@([a-zA-Z0-9.\-]+\.[a-zA-Z]{2,})")
 # ULP line parser
 # Separator set: colon, semicolon, comma, pipe, tab.
 # URL field: optional scheme (http/https/ftp) consumed first so '://' is never
 # mistaken for a separator; then an optional port group ':\d+/' absorbs port+path
 # (port is digits immediately followed by '/') so 'http://host:88/path:user:pass'
 # yields url='http://host:88/path', not url='http'.
 ULP_PATTERN = re.compile(
    r"^(?P<url>"
        r"(?:(?:https?|ftp)://)?[^\s:;,|\t]+"  # optional scheme + host/path
        r"(?::\d+/[^\s:;,|\t]*)?"              # optional :port/path (port = digits then /)
    r")"
    r"(?:[:;,|\t])"
    r"(?P<username>[^\s:;,|\t]+)"
    r"(?:[:;,|\t])"
    r"(?P<password>.+)$"
 )
 # ─── Derived from config ──────────────────────────────────────────────────────
 def _kw_to_domain(kw: str) -> str:
    """Strip regex syntax from a keyword to get a plain domain string."""
    return kw.replace(r"@", "").replace(r"\.", ".").strip("^$").lstrip(".")
 def _build_employee_domains() -> list[tuple[str, re.Pattern]]:
    """
    Keywords that contain '@' are employee email domain patterns.
    Pattern anchors at '@<domain>' so that a URL containing the org domain
    never causes a false CRITICAL on an unrelated email like @gmail.com.
    Returns list of (domain_str, compiled_pattern) tuples.
    """
    patterns = []
    for kw in TARGET_KEYWORDS:
        if "@" in kw:
            domain = _kw_to_domain(kw)
            if domain:
                pat = re.compile(
                    r"@" + re.escape(domain) + r"(?:[^a-zA-Z0-9.\-]|$)",
                    re.IGNORECASE,
                )
                patterns.append((domain, pat))
    return patterns
 EMPLOYEE_DOMAINS = _build_employee_domains()
 def _build_org_domains() -> list[re.Pattern]:
    """
    All keywords as plain domain patterns for the LOW baseline match.
    Checks that the org domain appears anywhere in the line.
    """
    patterns = []
    for kw in TARGET_KEYWORDS:
        domain = _kw_to_domain(kw)
        if domain:
            patterns.append(re.compile(re.escape(domain), re.IGNORECASE))
    return patterns
 ORG_DOMAINS = _build_org_domains()
 # ─── Scoring logic ────────────────────────────────────────────────────────────
@dataclass
 class ScoredHit:
    raw:      str
    severity: str
    score:    int
    reasons:  list[str] = field(default_factory=list)
    url:      str | None = None
    username: str | None = None
    password: str | None = None
    @property
    def emoji(self) -> str:
        return SEVERITY_EMOJI.get(self.severity, "⚪")
    def __str__(self) -> str:
        return f"{self.emoji} [{self.severity}] {self.raw}"
 def score_hit(line: str) -> ScoredHit:
    """
    Score a single credential line.
    Returns a ScoredHit with severity, score, and reasons.
    """
    line    = line.strip()
    reasons = []
    scores  = []
    # Parse ULP fields if possible
    url = username = password = None
    m = ULP_PATTERN.match(line)
    if m:
        url      = m.group("url")
        username = m.group("username")
        password = m.group("password")
    # ── Check 1: Employee email domain in username or line ───────────────
    # EMPLOYEE_DOMAINS entries are (domain_str, pattern) where the pattern
    # requires '@' immediately before the domain, so a URL containing the
    # org domain never triggers a CRITICAL on an unrelated email (@gmail etc).
    for domain_str, pat in EMPLOYEE_DOMAINS:
        # Try the parsed username field first; fall back to full line.
        # Either way the pattern requires a literal '@' before the domain.
        field = username if username else ""
        if not pat.search(field):
            field = line
        if pat.search(field):
            scores.append(CRITICAL)
            reasons.append(f"Employee email domain: {domain_str}")
            break
    # ── Check 2: Is the URL a privileged/critical service? ────────────────
    if url and CRITICAL_SERVICES.search(url):
        scores.append(CRITICAL)
        reasons.append(f"Critical service URL: {url}")
    # ── Check 3: Is the URL a high-value internal service? ────────────────
    if url and HIGH_SERVICES.search(url):
        scores.append(HIGH)
        reasons.append(f"High-value internal service: {url}")
    # ── Check 4: Is the URL a client-facing service? ──────────────────────
    if url and MEDIUM_SERVICES.search(url):
        scores.append(MEDIUM)
        reasons.append(f"Client-facing service: {url}")
    # ── Check 5: Generic org domain match (baseline) ─────────────────────
    for pattern in ORG_DOMAINS:
        if pattern.search(line):
            if not scores:
                scores.append(LOW)
                reasons.append(f"Org domain match in line")
            break
    # ── Check 6: Weak/empty password flag ────────────────────────────────
    if password:
        if len(password) <= 6:
            reasons.append(f"⚠ Weak password ({len(password)} chars)")
        if password.lower() in {"123456", "password", "qwerty", "111111", "admin", "letmein"}:
            reasons.append(f"⚠ Common password: {password}")
    # ── Resolve final severity ────────────────────────────────────────────
    severity_order = [CRITICAL, HIGH, MEDIUM, LOW]
    final_severity = LOW  # default
    for s in severity_order:
        if s in scores:
            final_severity = s
            break
    if not reasons:
        reasons.append("Pattern match")
    return ScoredHit(
        raw      = line,
        severity = final_severity,
        score    = SEVERITY_SCORES[final_severity],
        reasons  = reasons,
        url      = url,
        username = username,
        password = password,
    )
 def score_hits(lines: list[str]) -> list[ScoredHit]:
    """Score a list of credential lines. Returns sorted by score descending."""
    scored = [score_hit(line) for line in lines]
    scored.sort(key=lambda h: h.score, reverse=True)
    return scored
 def summarize(scored: list[ScoredHit]) -> dict:
    """Count hits by severity level."""
    summary = {CRITICAL: 0, HIGH: 0, MEDIUM: 0, LOW: 0}
    for h in scored:
        summary[h.severity] += 1
    return summary
		`@@ -0,0 +1 @@`
							`"""core — Telegram I/O pipeline (scraper, downloader, processor, notifier)."""`
		`@@ -0,0 +1 @@`
							`"""tui — Textual TUI frontend and event bus."""`
		`@@ -0,0 +1 @@`
							`"""utils — pure logic modules with no Telegram dependencies."""`