From 48f486ac97acdb6ea963cf392d036fad23058d50 Mon Sep 17 00:00:00 2001
From: anti <samuel@securejump.cl>
Date: Thu, 2 Apr 2026 01:58:49 -0300
Subject: [PATCH] Initial commit: ULPgrammer

- Core Telegram monitoring pipeline (scraper, processor, notifier, downloaders)
- Textual TUI frontend with thread-safe event bus
- SQLite persistence, severity scoring, dedup cache
- Fixed ULP parser: handles https:// truncation, port+path URLs, semicolon separator
- Test suite: 88 tests across scorer, cache, database, processor
---
 .claudeignore           |   25 +
 .env.example            |   22 +
 .gitignore              |   28 ++
 QUICK_REF.md            |  182 +++++++
 README.md               |  146 ++++++
 config.py               |  100 ++++
 core/__init__.py        |    1 +
 core/bot_downloader.md  |   68 +++
 core/bot_downloader.py  |  161 +++++++
 core/notifier.md        |   67 +++
 core/notifier.py        |  248 ++++++++++
 core/processor.md       |   69 +++
 core/processor.py       |  233 +++++++++
 core/scraper.md         |   65 +++
 core/scraper.py         |  410 ++++++++++++++++
 core/tdl_downloader.md  |   70 +++
 core/tdl_downloader.py  |  363 ++++++++++++++
 data/.gitkeep           |    0
 logs/monitor.log        |   54 +++
 main.py                 |  142 ++++++
 pytest.ini              |    2 +
 requirements-dev.txt    |    1 +
 requirements.txt        |   16 +
 tests/__init__.py       |    0
 tests/conftest.py       |   31 ++
 tests/test_cache.py     |   55 +++
 tests/test_database.py  |  188 ++++++++
 tests/test_processor.py |  223 +++++++++
 tests/test_scorer.py    |  282 +++++++++++
 tui/__init__.py         |    1 +
 tui/app.md              |  130 +++++
 tui/app.py              | 1016 +++++++++++++++++++++++++++++++++++++++
 tui/events.md           |   66 +++
 tui/events.py           |  114 +++++
 utils/__init__.py       |    1 +
 utils/cache.md          |   32 ++
 utils/cache.py          |   38 ++
 utils/database.md       |   89 ++++
 utils/database.py       |  171 +++++++
 utils/scorer.md         |   87 ++++
 utils/scorer.py         |  273 +++++++++++
 41 files changed, 5270 insertions(+)
 create mode 100644 .claudeignore
 create mode 100644 .env.example
 create mode 100644 .gitignore
 create mode 100644 QUICK_REF.md
 create mode 100644 README.md
 create mode 100644 config.py
 create mode 100644 core/__init__.py
 create mode 100644 core/bot_downloader.md
 create mode 100644 core/bot_downloader.py
 create mode 100644 core/notifier.md
 create mode 100644 core/notifier.py
 create mode 100644 core/processor.md
 create mode 100644 core/processor.py
 create mode 100644 core/scraper.md
 create mode 100644 core/scraper.py
 create mode 100644 core/tdl_downloader.md
 create mode 100644 core/tdl_downloader.py
 create mode 100644 data/.gitkeep
 create mode 100644 logs/monitor.log
 create mode 100644 main.py
 create mode 100644 pytest.ini
 create mode 100644 requirements-dev.txt
 create mode 100644 requirements.txt
 create mode 100644 tests/__init__.py
 create mode 100644 tests/conftest.py
 create mode 100644 tests/test_cache.py
 create mode 100644 tests/test_database.py
 create mode 100644 tests/test_processor.py
 create mode 100644 tests/test_scorer.py
 create mode 100644 tui/__init__.py
 create mode 100644 tui/app.md
 create mode 100644 tui/app.py
 create mode 100644 tui/events.md
 create mode 100644 tui/events.py
 create mode 100644 utils/__init__.py
 create mode 100644 utils/cache.md
 create mode 100644 utils/cache.py
 create mode 100644 utils/database.md
 create mode 100644 utils/database.py
 create mode 100644 utils/scorer.md
 create mode 100644 utils/scorer.py

diff --git a/.claudeignore b/.claudeignore
new file mode 100644
index 0000000..a99e0af
--- /dev/null
+++ b/.claudeignore
@@ -0,0 +1,25 @@
+# Sessions
+*.session
+*.session-journal
+bot_session*
+
+# Data — keep the folder, ignore contents
+data/hits.db
+data/hits.txt
+data/hits.csv
+data/dedup.json
+data/cache.json
+data/tmp/
+data/logs/
+!data/.gitkeep
+
+# Env
+.env
+
+# Python
+__pycache__/
+*.pyc
+*.pyo
+.venv/
+venv/
+
diff --git a/.env.example b/.env.example
new file mode 100644
index 0000000..6b949bf
--- /dev/null
+++ b/.env.example
@@ -0,0 +1,22 @@
+# ─── Telegram API credentials ──────────────────────────────────────────────
+# Get these from https://my.telegram.org → API development tools
+API_ID=12345678
+API_HASH=your_api_hash_here
+
+# ─── Bot credentials ────────────────────────────────────────────────────────
+# Create a bot via @BotFather and paste the token here
+BOT_TOKEN=123456789:ABCdefGHIjklMNOpqrSTUvwxYZ
+
+# ─── Alert destination ──────────────────────────────────────────────────────
+# Chat ID to send hit notifications to (your personal ID or a group)
+# Tip: message @userinfobot on Telegram to get your ID
+NOTIFY_CHAT_ID=987654321
+
+# ─── Session name (just a filename, no extension needed) ────────────────────
+SESSION_NAME=monitor_session
+
+# ─── tdl (fast Go downloader) — optional but strongly recommended ───────────
+# Install: https://github.com/iyear/tdl
+# After installing, run once: tdl login -n <SESSION_NAME>
+# SESSION_NAME above is shared between Telethon and tdl — no double login needed.
+# If tdl is not on PATH the bot falls back to Telethon automatically.
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..79805e2
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,28 @@
+# Sessions
+*.session
+*.session-journal
+bot_session*
+
+# Data — keep the folder, ignore contents
+data/hits.db
+data/hits.txt
+data/hits.csv
+data/dedup.json
+data/cache.json
+data/tmp/
+data/logs/
+!data/.gitkeep
+
+# Env
+.env
+
+# Python
+__pycache__/
+*.pyc
+*.pyo
+.venv/
+venv/
+
+# Claude things
+CLAUDE.md
+.claude/*
diff --git a/QUICK_REF.md b/QUICK_REF.md
new file mode 100644
index 0000000..d9bb89b
--- /dev/null
+++ b/QUICK_REF.md
@@ -0,0 +1,182 @@
+# ULP Monitor — Quick Reference
+
+> For Claude Code: read the per-file `.md` alongside each `.py` before editing.  
+> Full docs in `README.md`.
+
+---
+
+## Project layout
+
+```
+ulp_monitor/
+├── main.py           Entry point (--no-tui flag for CLI mode)
+├── config.py         All settings — edit this for keywords, channels, paths
+│
+├── core/             Telegram I/O pipeline (all async, Telethon-dependent)
+│   ├── scraper.py        Live listener + backfill orchestration
+│   ├── tdl_downloader.py tdl subprocess wrapper + Telethon fallback
+│   ├── bot_downloader.py Inline "DOWNLOAD" button click flow
+│   ├── processor.py      Archive extraction (.zip/.7z/.rar) + line search
+│   └── notifier.py       Scoring → dedup → DB → hits.txt/csv → Telegram alert
+│
+├── utils/            Pure logic, no Telegram deps, no async
+│   ├── scorer.py         Severity scoring (CRITICAL/HIGH/MEDIUM/LOW)
+│   ├── cache.py          Seen file-ID dedup (data/cache.json)
+│   └── database.py       SQLite read/write (data/hits.db)
+│
+├── tui/              Textual TUI — runs in main thread
+│   ├── app.py            MonitorApp + all screens + bot thread launcher
+│   └── events.py         Thread-safe queue.Queue event bus
+│
+└── data/             Runtime output — gitignored
+    ├── hits.db
+    ├── hits.txt
+    ├── hits.csv
+    ├── cache.json
+    ├── dedup.json
+    └── logs/monitor.log
+```
+
+---
+
+## Data flow
+
+```
+Telegram channel
+  └─ new message with file / download button
+       │
+       ├─ core/scraper.py          detects + guards (size, extension, dedup)
+       │
+       ├─ core/tdl_downloader.py   downloads via tdl (batched)
+       │   └─ core/scraper.py      Telethon fallback if tdl fails
+       │
+       ├─ core/bot_downloader.py   handles inline button → bot reply flow
+       │
+       ├─ core/processor.py        extracts archive → searches .txt line by line
+       │
+       └─ core/notifier.py         scores → deduplicates → persists → alerts
+            ├─ utils/scorer.py
+            ├─ utils/database.py
+            └─ tui/events.py       posts EvHit to TUI
+```
+
+---
+
+## Threading architecture
+
+```
+main thread (Textual's event loop)
+  ├─ MonitorApp.on_mount()
+  │   ├─ bus.init_bus()            creates queue.Queue on THIS loop
+  │   ├─ threading.Thread → _run_bot_thread()
+  │   └─ set_interval(0.1, _drain_bus)
+  │
+  ├─ _drain_bus() [every 100ms]
+  │   └─ queue.Queue.get_nowait() → dispatch to widgets
+  │
+  └─ Textual widgets, screens, keybindings
+
+bot thread (own asyncio event loop)
+  └─ _bot_main()
+      ├─ bot_client.connect() + sign_in()
+      ├─ user_client.connect() + is_user_authorized()
+      ├─ warm_entity_cache()
+      ├─ _make_handler() → NewMessage handler registered
+      ├─ backfill_all()
+      └─ run_until_disconnected() + _watch_channels() [gathered]
+
+cross-thread communication
+  bot → TUI:  bus.post(event)              [queue.Queue.put_nowait, always safe]
+  TUI → bot:  loop.call_soon_threadsafe()  [asyncio.Event.set for channel changes]
+```
+
+---
+
+## Config quick reference (`config.py`)
+
+| Setting | Type | Description |
+|---------|------|-------------|
+| `API_ID` | int | From my.telegram.org |
+| `API_HASH` | str | From my.telegram.org |
+| `BOT_TOKEN` | str | From @BotFather |
+| `NOTIFY_CHAT_ID` | int | Your Telegram user/group ID |
+| `SESSION_NAME` | str | Session file name (default: `monitor_session`) |
+| `TARGET_KEYWORDS` | list[str] | Regex patterns. `@`-prefixed → employee email (CRITICAL). Plain → domain match (LOW) |
+| `WATCHED_CHANNELS` | list[str\|int] | Usernames or `-100xxxxxxxxxx` IDs |
+| `BACKFILL_LIMIT` | int | Messages to scan per channel on startup (0 = off) |
+| `ALLOWED_EXTENSIONS` | set | `.txt .zip .7z .rar` |
+| `MAX_FILE_SIZE` | int | Bytes (default 4 GB) |
+| `ARCHIVE_PASSWORDS` | list[bytes] | Tried in order on locked archives |
+| `TDL_NAMESPACE` | str\|None | `tdl login -n <name>` namespace |
+| `TDL_THREADS` | int | Chunk workers per file (`-t`) |
+| `TDL_PERFILE` | int | Concurrent files per tdl call (`-l`) |
+| `TDL_AMOUNT` | int | Messages per batch |
+| `TEMP_DIR` | Path | `data/tmp` |
+| `HITS_FILE` | Path | `data/hits.txt` |
+| `LOG_FILE` | Path | `data/logs/monitor.log` |
+
+---
+
+## Severity scoring summary
+
+| Severity | Score | Triggers |
+|----------|-------|----------|
+| CRITICAL | 40 | Employee email (`@myorg.cl` in username) · Privileged service URL (admin, vpn, rdp, gitlab…) |
+| HIGH | 30 | Internal service URL (intranet, erp, sso, owa…) |
+| MEDIUM | 20 | Client-facing URL (app, booking, helpdesk…) |
+| LOW | 10 | Org domain appears anywhere in line |
+
+`@`-keyword rule: pattern requires literal `@` before domain — `user@gmail.com` on a URL containing `myorg.cl` does **not** trigger CRITICAL.
+
+---
+
+## TUI keybindings
+
+| Key | Action | Screen |
+|-----|--------|--------|
+| `s` | Search hits DB | → SearchScreen |
+| `h` | Browse hits by severity | → HitsDBScreen |
+| `k` | Edit keyword patterns live | → KeywordsScreen |
+| `c` | Clear download + hits logs | main |
+| `r` | Force-refresh stats bar | main |
+| `q` / `ctrl+c` | Quit | any |
+| `Escape` | Back to main | sub-screens |
+| `1`/`2`/`3`/`4` | Filter CRITICAL/HIGH/MEDIUM/LOW | HitsDBScreen |
+| `r` | Load recent 50 | HitsDBScreen |
+
+---
+
+## Per-file reference docs
+
+| File | Reference |
+|------|-----------|
+| `utils/scorer.py` | `utils/scorer.md` |
+| `utils/cache.py` | `utils/cache.md` |
+| `utils/database.py` | `utils/database.md` |
+| `core/scraper.py` | `core/scraper.md` |
+| `core/processor.py` | `core/processor.md` |
+| `core/notifier.py` | `core/notifier.md` |
+| `core/tdl_downloader.py` | `core/tdl_downloader.md` |
+| `core/bot_downloader.py` | `core/bot_downloader.md` |
+| `tui/app.py` | `tui/app.md` |
+| `tui/events.py` | `tui/events.md` |
+
+---
+
+## Common tasks
+
+**Add a new keyword at runtime:** open the TUI → press `k` → add pattern → active immediately. Copy to `config.TARGET_KEYWORDS` to persist.
+
+**Add a channel at runtime:** type username or numeric ID in the Channels panel → ➕ Add. Handler re-registers immediately. Edit `config.WATCHED_CHANNELS` to persist.
+
+**Query hits from CLI:**
+```bash
+sqlite3 data/hits.db "SELECT severity, username, url FROM hits WHERE seen_before=0 ORDER BY score DESC LIMIT 20"
+```
+
+**Re-process all files** (wipe cache):
+```bash
+rm data/cache.json data/dedup.json
+```
+
+**Check what's happening:** `tail -f data/logs/monitor.log`
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..c7d0493
--- /dev/null
+++ b/README.md
@@ -0,0 +1,146 @@
+# ULP Credential Monitor
+
+A Telegram-based credential exposure monitor for threat intelligence teams.
+Watches channels for combo/stealer log files and alerts you when your
+organization's credentials appear in them.
+
+---
+
+## How it works
+
+```
+User session (Telethon)
+  └─ watches N channels
+       └─ detects file attachments (.txt, .zip, .7z, .rar)
+            └─ downloads → extracts → searches line by line
+                 └─ hit? → writes to data/ + sends bot alert
+                 └─ no hit? → deletes file, moves on
+```
+
+---
+
+## Project structure
+
+```
+ulp_monitor/
+├── main.py           Entry point
+├── config.py         All settings (keywords, channels, paths)
+│
+├── core/             Telegram I/O pipeline
+│   ├── scraper.py        Live listener + backfill
+│   ├── tdl_downloader.py Fast downloads via tdl (Go MTProto)
+│   ├── bot_downloader.py Inline button / bot-dispatched file flows
+│   ├── processor.py      Archive extraction + line-by-line search
+│   └── notifier.py       hits.txt / hits.csv writer + bot alerts
+│
+├── utils/            Pure logic — no Telegram dependencies
+│   ├── scorer.py         Hit severity scoring
+│   ├── cache.py          Seen-file deduplication
+│   └── database.py       SQLite persistence layer
+│
+├── tui/              Textual TUI frontend
+│   ├── app.py            MonitorApp + all Screen classes
+│   └── events.py         Thread-safe event bus (bot thread → TUI)
+│
+└── data/             Runtime-generated (gitignored)
+    ├── hits.db           SQLite database
+    ├── hits.txt          Human-readable hit log
+    ├── hits.csv          CSV hit log (importable into Excel / pandas)
+    ├── dedup.json        Deduplication hashes
+    ├── cache.json        Seen file-ID cache
+    └── logs/monitor.log
+```
+
+---
+
+## Setup
+
+### 1. Get Telegram API credentials
+- Go to https://my.telegram.org → *API development tools*
+- Create an app → note your `api_id` and `api_hash`
+
+### 2. Create a bot
+- Message [@BotFather](https://t.me/BotFather) → `/newbot`
+- Start a chat with your new bot before running
+
+### 3. Get your chat ID
+- Message [@userinfobot](https://t.me/userinfobot)
+
+### 4. Configure
+
+```bash
+cp .env.example .env
+# fill in API_ID, API_HASH, BOT_TOKEN, NOTIFY_CHAT_ID
+```
+
+Open `config.py` and set:
+
+- **`TARGET_KEYWORDS`** — your org's domains and email patterns.
+  Keywords with `@` (e.g. `r"@myorg\.cl"`) are **employee email domains** → CRITICAL.
+  Keywords without `@` are plain domain matches → LOW baseline.
+- **`WATCHED_CHANNELS`** — channel usernames or numeric IDs
+- **`BACKFILL_LIMIT`** — past messages to scan per channel on startup
+
+### 5. Install dependencies
+
+```bash
+pip install -r requirements.txt
+# rarfile needs the unrar binary:
+# Ubuntu/Debian: sudo apt install unrar
+# macOS:         brew install rar
+```
+
+### 5a. Install tdl (strongly recommended)
+
+```bash
+curl -sSL https://raw.githubusercontent.com/iyear/tdl/main/scripts/install.sh | bash
+tdl login -n monitor_session
+```
+
+### 6. First run — complete Telegram auth
+
+```bash
+python main.py --no-tui
+# follow the phone + 2FA prompts once
+```
+
+### 7. Run
+
+```bash
+python main.py          # TUI mode (recommended)
+python main.py --no-tui # plain CLI
+```
+
+---
+
+## TUI keybindings
+
+| Key | Action |
+|-----|--------|
+| `s` | Search hits database |
+| `h` | Browse hits by severity |
+| `k` | Edit keyword patterns live |
+| `c` | Clear logs |
+| `r` | Refresh stats |
+| `q` | Quit |
+
+---
+
+## Output
+
+| File | Description |
+|------|-------------|
+| `data/hits.db`  | SQLite — all hits with scores, severity, dedup flag |
+| `data/hits.txt` | Human-readable grouped log |
+| `data/hits.csv` | CSV — easy to pull into Excel / pandas |
+| `data/logs/monitor.log` | Full run log |
+
+Telegram alerts fire for CRITICAL / HIGH / MEDIUM only. LOW is stored silently.
+
+---
+
+## Notes
+
+- **Session files are sensitive** — equivalent to a logged-in account. Gitignored, never share.
+- **Flood limits** — `FloodWaitError` is handled automatically.
+- **Private channels** — your user account must already be a member.
diff --git a/config.py b/config.py
new file mode 100644
index 0000000..260c822
--- /dev/null
+++ b/config.py
@@ -0,0 +1,100 @@
+"""
+config.py — Loads and validates all settings from .env
+"""
+
+import os
+from pathlib import Path
+from dotenv import load_dotenv
+
+load_dotenv()
+
+# -- Timeouts --
+BOT_REPLY_TIMEOUT = 10
+
+# ─── Telegram credentials ────────────────────────────────────────────────────
+API_ID   = int(os.environ["API_ID"])
+API_HASH = os.environ["API_HASH"]
+BOT_TOKEN = os.environ["BOT_TOKEN"]
+NOTIFY_CHAT_ID = int(os.environ["NOTIFY_CHAT_ID"])
+SESSION_NAME = os.getenv("SESSION_NAME", "monitor_session")
+
+# ─── Target keywords ─────────────────────────────────────────────────────────
+# Add your org's domains, email patterns, IP ranges, known usernames, etc.
+# All patterns are case-insensitive regex.
+TARGET_KEYWORDS: list[str] = [
+    r"sanatorioaleman\.cl",
+    r"@sanatorioaleman\.cl",
+    # r"192\.168\.10\.",            # internal IP range example
+    # r"specificuser",              # known internal usernames
+]
+
+# ─── Channels to watch ───────────────────────────────────────────────────────
+# Use usernames (without @) or numeric channel IDs (-100xxxxxxxxxx)
+WATCHED_CHANNELS: list[str | int] = [
+    #-1002230225603,
+    "cloudxlog",
+    #-1001967030016, # daisycloud
+    #"berserklogs", # berserklogs
+    #"BorwitaFreeLogs", # borwita
+    -1002748707556, # darkcloud
+    -1001684073398, # BHF Cloud
+    -1003163621939, # Wich Love from R
+    -1003611713618, # Khazan Cloud
+    -1003328682684, # LogsPlanet
+    -1003204260194, # JDP
+    -1002828367761, # HesoyamCloud
+    -1003513974925, # Slurm Logs
+    -1003599300787, # Arhont Corp
+    -1002582513379, # OnlyLogs
+    -1002788333372, # Ickis Cloud
+    #-1001234567890,  # private channel by ID
+]
+
+# ─── File handling ───────────────────────────────────────────────────────────
+TEMP_DIR  = Path("./tmp")
+HITS_FILE = Path("./hits.txt")
+LOG_FILE  = Path("./logs/monitor.log")
+
+# Extensions to download and process
+ALLOWED_EXTENSIONS = {".txt", ".zip", ".7z", ".rar"}
+
+# Max file size to download (bytes). Default: 200 MB.
+# Very large files are skipped to avoid abuse of your session.
+MAX_FILE_SIZE = 4 * 1024 * 1024 * 1024  # 4 GB (Telegram Premium max)
+
+# ─── Archive passwords to try ────────────────────────────────────────────────
+ARCHIVE_PASSWORDS: list[bytes] = [
+    b"1234",
+    b"0000",
+    b"infected",
+    b"telegram",
+    b"password",
+    b"12345",
+    b"",
+    b"Borwita",
+    b"@WichLoveFromR",
+]
+
+# ─── Backfill settings ───────────────────────────────────────────────────────
+# How many historical messages to scan per channel on startup (0 = skip backfill)
+BACKFILL_LIMIT = 500
+
+# ─── tdl downloader settings ─────────────────────────────────────────────────
+# Namespace tdl was logged into.  Run `tdl login` with no -n flag → namespace
+# is "default".  Run `tdl login -n foo` → namespace is "foo".
+# Set to None to omit -n entirely (tdl will use "default" anyway).
+TDL_NAMESPACE: str | None = "ulpmon"
+
+# Parallel chunk workers per file (-t / --threads global flag)
+TDL_THREADS = 8
+
+# Max concurrent files per tdl invocation (-l / --limit global flag)
+TDL_PERFILE = 4
+
+# Max messages to batch into a single tdl invocation during backfill.
+# tdl handles the parallelism internally via -l and -t.
+TDL_AMOUNT = 4
+
+# Whether to use a Telegram takeout session for downloads (lower flood limits).
+# Takeout sessions are rate-limited differently — good for bulk backfill.
+TDL_TAKEOUT = True
diff --git a/core/__init__.py b/core/__init__.py
new file mode 100644
index 0000000..e85ef1c
--- /dev/null
+++ b/core/__init__.py
@@ -0,0 +1 @@
+"""core — Telegram I/O pipeline (scraper, downloader, processor, notifier)."""
diff --git a/core/bot_downloader.md b/core/bot_downloader.md
new file mode 100644
index 0000000..185aa48
--- /dev/null
+++ b/core/bot_downloader.md
@@ -0,0 +1,68 @@
+# core/bot_downloader.py
+
+Handles "click to download" inline button flows. Some Telegram channels post files via a bot behind a button rather than directly attaching them.
+
+## Public API
+
+```python
+from core.bot_downloader import (
+    handle_bot_download_message,
+    has_download_button,
+    extract_password,
+)
+```
+
+### `handle_bot_download_message(client, bot, msg, source_name, patterns, password=None)`
+**async.** Full pipeline:
+1. Detect download button
+2. Click it (URL button → `/start payload` to the bot; callback button → `.click()`)
+3. Wait up to `BOT_REPLY_TIMEOUT` seconds for the bot to send a file back
+4. Hand each file response to `core.scraper.handle_message()`
+
+### `has_download_button(msg) -> bool`
+Returns `True` if the message contains a recognisable download button.  
+Checked in live handler and backfill before calling this module.
+
+### `extract_password(msg) -> str | None`
+Scans message text for `Pass: ...` / `Password: ...` / `Contraseña: ...` patterns.  
+Returns the extracted password string, or `None`.
+
+---
+
+## Button detection
+
+Recognised button text keywords (case-insensitive):
+```
+DOWNLOAD, DESCARGAR, GET FILE, GET PACK, ⬇, 📥
+```
+
+---
+
+## URL button flow (most common)
+
+```
+Button URL: https://t.me/SomeBot?start=ABC123
+  → parse bot username + payload
+  → client.send_message(bot_entity, "/start ABC123")
+  → poll get_messages(bot_entity, limit=3) every 1s for BOT_REPLY_TIMEOUT seconds
+  → return file messages found
+```
+
+## Callback button flow (fallback)
+
+```
+btn.click()
+→ sleep 2s
+→ get_messages(sender, limit=5)
+→ return file messages found
+```
+
+---
+
+## Constants
+
+| Name | Value | Description |
+|------|-------|-------------|
+| `BOT_REPLY_TIMEOUT` | `10` | Seconds to wait for bot file reply |
+| `DOWNLOAD_BUTTON_KEYWORDS` | see above | Button text triggers |
+| `PASSWORD_PATTERN` | regex | Matches `Pass[word]: value` in message text |
diff --git a/core/bot_downloader.py b/core/bot_downloader.py
new file mode 100644
index 0000000..b991765
--- /dev/null
+++ b/core/bot_downloader.py
@@ -0,0 +1,161 @@
+"""
+bot_downloader.py — Handles "click to download" inline button flows.
+
+Some Telegram channels post messages with a DOWNLOAD button that triggers
+a bot to send you the actual file. This module simulates that click and
+captures the bot's file response.
+"""
+
+import asyncio
+import re
+import logging
+
+from telethon import TelegramClient
+from telethon.tl.types import MessageMediaDocument, KeyboardButtonUrl
+from telethon.errors import FloodWaitError
+
+log = logging.getLogger(__name__)
+
+DOWNLOAD_BUTTON_KEYWORDS = ["DOWNLOAD", "DESCARGAR", "GET FILE", "GET PACK", "⬇", "📥"]
+BOT_REPLY_TIMEOUT = 10
+
+PASSWORD_PATTERN = re.compile(
+    r"(?:Pass|Password|Contraseña|Contrasena|Clave)[\s]*:[\s]*(.+)$",
+    re.IGNORECASE | re.MULTILINE
+)
+
+
+# ─── Password extraction ──────────────────────────────────────────────────────
+
+def extract_password(msg) -> str | None:
+    if not msg.text:
+        return None
+    match = PASSWORD_PATTERN.search(msg.text)
+    if match:
+        pwd = match.group(1).strip()
+        # Strip markdown formatting characters
+        pwd = pwd.strip("*`_~")
+        log.info(f"  Found password in message: '{pwd}'")
+        return pwd
+    return None
+
+
+# ─── Button detection ─────────────────────────────────────────────────────────
+
+def find_download_button(msg):
+    """
+    Scans a message's inline keyboard for a download-like button.
+    Returns the button object or None.
+    """
+    if not msg.buttons:
+        return None
+    for row in msg.buttons:
+        for btn in row:
+            if any(kw in btn.text.upper() for kw in DOWNLOAD_BUTTON_KEYWORDS):
+                return btn
+    return None
+
+
+def has_download_button(msg) -> bool:
+    return find_download_button(msg) is not None
+
+
+# ─── Click + wait flow ────────────────────────────────────────────────────────
+
+async def click_download_button(client: TelegramClient, msg) -> list:
+    """
+    Clicks the download button on a message, then waits for the bot to reply
+    with a file. Returns a list of response messages containing documents.
+    """
+    btn = find_download_button(msg)
+    if not btn:
+        return []
+
+    log.info(f"  Clicking button: '{btn.text}'")
+
+    # ── URL button (most common) ───────────────────────────────────────────
+    if isinstance(btn.button, KeyboardButtonUrl):
+        url = btn.button.url  # e.g. https://t.me/SomeBot?start=ABC123
+
+        match = re.search(r"t\.me/([A-Za-z0-9_]+)\?start=(.+)", url)
+        if not match:
+            log.warning(f"  Unrecognised URL format: {url}")
+            return []
+
+        bot_username, payload = match.group(1), match.group(2)
+        log.info(f"  → Messaging @{bot_username} with /start {payload}")
+
+        try:
+            bot_entity = await client.get_entity(bot_username)
+            await client.send_message(bot_entity, f"/start {payload}")
+        except Exception as e:
+            log.error(f"  Failed to message bot: {e}")
+            return []
+
+        # Poll for reply
+        log.info(f"  Waiting up to {BOT_REPLY_TIMEOUT}s for bot reply...")
+        for _ in range(BOT_REPLY_TIMEOUT):
+            await asyncio.sleep(1)
+            try:
+                recent = await client.get_messages(bot_entity, limit=3)
+                files = [m for m in recent if m.media and isinstance(m.media, MessageMediaDocument)]
+                if files:
+                    log.info(f"  ✓ Got file from bot.")
+                    return files
+            except Exception as e:
+                log.warning(f"  Poll error: {e}")
+                break
+
+        log.warning(f"  Bot did not reply within {BOT_REPLY_TIMEOUT}s.")
+        return []
+
+    # ── Callback button (less common) ─────────────────────────────────────
+    else:
+        try:
+            await btn.click()
+            await asyncio.sleep(2)
+        except Exception as e:
+            log.error(f"  Callback click failed: {e}")
+            return []
+
+        try:
+            sender = await msg.get_sender()
+            recent = await client.get_messages(sender, limit=5)
+            return [m for m in recent if m.media and isinstance(m.media, MessageMediaDocument)]
+        except Exception as e:
+            log.warning(f"  Fallback poll failed: {e}")
+            return []
+
+
+# ─── Main entry point ─────────────────────────────────────────────────────────
+
+async def handle_bot_download_message(
+    client: TelegramClient,
+    bot: TelegramClient,
+    msg,
+    source_name: str,
+    patterns,
+    password: str | None = None,
+) -> None:
+    """
+    Full pipeline for a message with a download button:
+      1. Detect download button
+      2. Click it
+      3. Wait for bot to send back a file
+      4. Hand off to the normal handle_message() flow
+    """
+    if not has_download_button(msg):
+        return
+
+    log.info(f"[BotDL] Download button detected in {source_name}")
+
+    responses = await click_download_button(client, msg)
+
+    if not responses:
+        log.warning(f"[BotDL] No file received for message in {source_name}.")
+        return
+
+    from core.scraper import handle_message
+    for resp in responses:
+        log.info(f"  [BotDL] Response media type: {type(resp.media).__name__}, attrs: {getattr(resp.media.document, 'attributes', []) if hasattr(resp.media, 'document') else 'none'}")
+        await handle_message(client, bot, resp, f"{source_name}[bot]", patterns, password=password)
diff --git a/core/notifier.md b/core/notifier.md
new file mode 100644
index 0000000..9ad4dba
--- /dev/null
+++ b/core/notifier.md
@@ -0,0 +1,67 @@
+# core/notifier.py
+
+Scores hits, deduplicates, persists to disk and DB, sends Telegram alerts.
+
+## Public API
+
+```python
+from core.notifier import notify, send_status
+```
+
+### `notify(bot, hits: list[str], source: str, filename: str)`
+**async.** Full notification pipeline:
+1. `score_hits(hits)` → `list[ScoredHit]`
+2. Deduplicate via SHA-256 hashes (`data/dedup.json`)
+3. `insert_hits()` into SQLite for new + dupes (flagged accordingly)
+4. `write_hits()` → append to `data/hits.txt`
+5. `write_hits_csv()` → append to `data/hits.csv`
+6. `send_alert()` → Telegram message for CRITICAL/HIGH/MEDIUM only
+7. Post `EvHit` events onto the TUI bus for each new hit
+
+### `send_status(bot, message: str)`
+**async.** Sends a plain Markdown message to `config.NOTIFY_CHAT_ID`. Used for startup/status notifications.
+
+---
+
+## Internal functions
+
+| Function | Description |
+|----------|-------------|
+| `deduplicate(hits)` | Returns `(new_hits, dupe_hits)`; updates `data/dedup.json` |
+| `write_hits(scored_hits, source)` | Appends grouped human-readable block to `data/hits.txt` |
+| `write_hits_csv(scored_hits, source, filename)` | Appends rows to `data/hits.csv`; writes header on first call |
+| `send_alert(bot, scored_hits, source, filename)` | Sends Telegram message grouped by severity; skips if all LOW |
+
+---
+
+## Output files
+
+| File | Format | Notes |
+|------|--------|-------|
+| `data/hits.txt` | Plain text, grouped by severity | Human-readable, append-only |
+| `data/hits.csv` | CSV with header | Columns: `timestamp, severity, score, url, username, password, reasons, source, filename` |
+| `data/dedup.json` | JSON array of SHA-256 hex strings | Hashes of `line.strip().lower()` |
+
+---
+
+## Alert behaviour
+
+- CRITICAL / HIGH / MEDIUM → Telegram alert sent immediately
+- LOW → stored in DB + files, **no** Telegram alert
+- Duplicates → stored in DB with `seen_before=1`, no alert, no file write
+
+## Telegram alert format
+
+```
+🚨 Credential hit(s) detected
+📁 `filename`
+📢 `source`
+🕐 `timestamp`
+
+Summary: 🔴 N  🟠 N  🟡 N  🟢 N
+
+🔴 CRITICAL (N)
+`url:user:pass`
+↳ reason | reason
+... (up to 10 per severity; remainder counted)
+```
diff --git a/core/notifier.py b/core/notifier.py
new file mode 100644
index 0000000..710d1ef
--- /dev/null
+++ b/core/notifier.py
@@ -0,0 +1,248 @@
+"""
+notifier.py — Persists hits to disk and sends Telegram bot alerts.
+
+Includes:
+  - Severity scoring via scorer.py
+  - Deduplication: same credential never written or alerted twice
+  - SQLite storage via database.py
+  - hits.txt kept as a human-readable backup
+  - Telegram alerts grouped by severity
+"""
+
+import logging
+import hashlib
+import json
+from datetime import datetime, timezone
+from pathlib import Path
+
+from telethon import TelegramClient
+
+import csv
+
+from config import HITS_FILE, NOTIFY_CHAT_ID
+from utils.scorer import score_hits, summarize, CRITICAL, HIGH, MEDIUM, LOW, SEVERITY_EMOJI
+from utils.database import insert_hits
+from tui import events as bus
+
+HITS_CSV = HITS_FILE.with_suffix(".csv")
+
+log = logging.getLogger(__name__)
+
+MAX_PREVIEW = 10   # hits to show per severity group in alert
+DEDUP_FILE  = Path("./data/dedup.json")
+
+# Only alert immediately for these severities — LOW hits are silent
+ALERT_SEVERITIES = {CRITICAL, HIGH, MEDIUM}
+
+
+# ─── Deduplication ────────────────────────────────────────────────────────────
+
+def _hash(line: str) -> str:
+    return hashlib.sha256(line.strip().lower().encode()).hexdigest()
+
+
+def _load_seen_hashes() -> set:
+    if not DEDUP_FILE.exists():
+        return set()
+    try:
+        with open(DEDUP_FILE, "r") as f:
+            return set(json.load(f))
+    except Exception:
+        return set()
+
+
+def _save_seen_hashes(seen: set) -> None:
+    try:
+        with open(DEDUP_FILE, "w") as f:
+            json.dump(list(seen), f)
+    except Exception as e:
+        log.warning(f"Could not save dedup file: {e}")
+
+
+def deduplicate(hits: list) -> tuple[list, list]:
+    """
+    Accepts a list of ScoredHit objects.
+    Returns (new_hits, dupe_hits).
+    """
+    seen       = _load_seen_hashes()
+    new_hits   = []
+    dupe_hits  = []
+    new_hashes = set()
+
+    for h in hits:
+        digest = _hash(h.raw)
+        if digest in seen:
+            dupe_hits.append(h)
+        else:
+            new_hits.append(h)
+            new_hashes.add(digest)
+
+    if new_hashes:
+        seen.update(new_hashes)
+        _save_seen_hashes(seen)
+
+    log.info(
+        f"  Dedup: {len(hits)} raw hit(s) → "
+        f"{len(new_hits)} new, {len(dupe_hits)} duplicate(s)"
+    )
+    return new_hits, dupe_hits
+
+
+# ─── Helpers ─────────────────────────────────────────────────────────────────
+
+def _timestamp() -> str:
+    return datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M:%S UTC")
+
+
+# ─── Output ──────────────────────────────────────────────────────────────────
+
+def write_hits(scored_hits: list, source: str) -> None:
+    """Append new hits to hits.txt grouped by severity."""
+    HITS_FILE.parent.mkdir(parents=True, exist_ok=True)
+    summary = summarize(scored_hits)
+
+    with open(HITS_FILE, "a", encoding="utf-8") as f:
+        f.write(f"\n{'='*60}\n")
+        f.write(f"Source  : {source}\n")
+        f.write(f"Time    : {_timestamp()}\n")
+        f.write(f"Hits    : {len(scored_hits)} ")
+        f.write(f"(CRITICAL={summary[CRITICAL]} HIGH={summary[HIGH]} ")
+        f.write(f"MEDIUM={summary[MEDIUM]} LOW={summary[LOW]})\n")
+        f.write(f"{'='*60}\n")
+
+        for severity in [CRITICAL, HIGH, MEDIUM, LOW]:
+            group = [h for h in scored_hits if h.severity == severity]
+            if not group:
+                continue
+            emoji = SEVERITY_EMOJI[severity]
+            f.write(f"\n{emoji} {severity} ({len(group)})\n")
+            for h in group:
+                f.write(f"  {h.raw}\n")
+                f.write(f"  → {' | '.join(h.reasons)}\n")
+
+    log.info(f"  Wrote {len(scored_hits)} hit(s) to {HITS_FILE}")
+
+
+def write_hits_csv(scored_hits: list, source: str, filename: str) -> None:
+    """Append new hits to hits.csv — one row per hit, easy to import."""
+    HITS_CSV.parent.mkdir(parents=True, exist_ok=True)
+    write_header = not HITS_CSV.exists()
+    timestamp = _timestamp()
+    with open(HITS_CSV, "a", newline="", encoding="utf-8") as f:
+        writer = csv.writer(f)
+        if write_header:
+            writer.writerow([
+                "timestamp", "severity", "score", "url", "username",
+                "password", "reasons", "source", "filename",
+            ])
+        for h in scored_hits:
+            writer.writerow([
+                timestamp, h.severity, h.score,
+                h.url or "", h.username or "", h.password or "",
+                " | ".join(h.reasons), source, filename,
+            ])
+    log.info(f"  Wrote {len(scored_hits)} hit(s) to {HITS_CSV}")
+
+
+async def send_alert(
+    bot: TelegramClient,
+    scored_hits: list,
+    source: str,
+    filename: str,
+) -> None:
+    """
+    Send a Telegram alert grouped by severity.
+    Only includes CRITICAL, HIGH, MEDIUM — LOW hits are omitted from alerts.
+    """
+    summary  = summarize(scored_hits)
+    alertable = [h for h in scored_hits if h.severity in ALERT_SEVERITIES]
+
+    if not alertable:
+        log.info("  No alertable hits (all LOW) — skipping Telegram notification.")
+        return
+
+    lines = [
+        f"🚨 *Credential hit(s) detected*",
+        f"",
+        f"📁 `{filename}`",
+        f"📢 `{source}`",
+        f"🕐 `{_timestamp()}`",
+        f"",
+        f"*Summary:*",
+        f"🔴 CRITICAL: `{summary[CRITICAL]}`  "
+        f"🟠 HIGH: `{summary[HIGH]}`  "
+        f"🟡 MEDIUM: `{summary[MEDIUM]}`  "
+        f"🟢 LOW: `{summary[LOW]}`",
+    ]
+
+    for severity in [CRITICAL, HIGH, MEDIUM]:
+        group = [h for h in scored_hits if h.severity == severity]
+        if not group:
+            continue
+        emoji = SEVERITY_EMOJI[severity]
+        lines.append(f"\n{emoji} *{severity}* ({len(group)})")
+        for h in group[:MAX_PREVIEW]:
+            safe = h.raw.replace("`", "'")
+            lines.append(f"`{safe}`")
+            lines.append(f"_↳ {' | '.join(h.reasons)}_")
+        if len(group) > MAX_PREVIEW:
+            lines.append(f"_...and {len(group) - MAX_PREVIEW} more_")
+
+    try:
+        await bot.send_message(NOTIFY_CHAT_ID, "\n".join(lines), parse_mode="markdown")
+    except Exception as e:
+        log.error(f"Failed to send Telegram alert: {e}")
+
+
+# ─── Main entry point ────────────────────────────────────────────────────────
+
+async def notify(bot: TelegramClient, hits: list[str], source: str, filename: str) -> None:
+    """
+    Full notification pipeline:
+      1. Score all hits
+      2. Deduplicate
+      3. Insert all hits into SQLite (new + dupes, flagged accordingly)
+      4. Write new hits to hits.txt
+      5. Send Telegram alert for new alertable hits only
+    """
+    if not hits:
+        return
+
+    # Score first
+    scored = score_hits(hits)
+    log.info(f"  Scored {len(scored)} hit(s) — {summarize(scored)}")
+
+    # Deduplicate
+    new_hits, dupe_hits = deduplicate(scored)
+
+    # Always insert into DB
+    if new_hits:
+        insert_hits(new_hits, source, filename, seen_before=False)
+    if dupe_hits:
+        insert_hits(dupe_hits, source, filename, seen_before=True)
+
+    if not new_hits:
+        log.info("  All hits already seen before — no alert sent.")
+        return
+
+    # Push hits to TUI
+    for h in new_hits:
+        bus.post(bus.EvHit(
+            severity=h.severity,
+            raw=h.raw,
+            source=source,
+            filename=filename,
+            reasons=h.reasons,
+        ))
+
+    write_hits(new_hits, source)
+    write_hits_csv(new_hits, source, filename)
+    await send_alert(bot, new_hits, source, filename)
+
+
+async def send_status(bot: TelegramClient, message: str) -> None:
+    """Send a plain status/info message to the notify chat."""
+    try:
+        await bot.send_message(NOTIFY_CHAT_ID, message, parse_mode="markdown")
+    except Exception as e:
+        log.error(f"Failed to send status message: {e}")
diff --git a/core/processor.md b/core/processor.md
new file mode 100644
index 0000000..29c4e87
--- /dev/null
+++ b/core/processor.md
@@ -0,0 +1,69 @@
+# core/processor.py
+
+Archive extraction and hit searching. No Telegram deps, no async.
+
+## Public API
+
+```python
+from core.processor import compile_patterns, process_file
+```
+
+### `compile_patterns(keywords: list[str]) -> list[re.Pattern]`
+Compiles a list of keyword strings into case-insensitive regex patterns.  
+Call once at startup; pass the result everywhere patterns are needed.
+
+```python
+patterns = compile_patterns(config.TARGET_KEYWORDS)
+```
+
+### `process_file(filepath: Path, patterns, password=None) -> list[str]`
+Full pipeline: unpack → search each `.txt` → recurse into nested archives → clean up everything.  
+Returns list of matching raw lines (hits). Deletes the original file and all extracted contents on completion.
+
+```python
+hits = process_file(Path("data/tmp/combo.zip"), patterns, password="infected")
+```
+
+---
+
+## Internal functions
+
+| Function | Signature | Description |
+|----------|-----------|-------------|
+| `search_file` | `(filepath, patterns) -> list[str]` | Stream-reads `.txt` line by line; ignores encoding errors |
+| `unpack` | `(filepath, extra_password) -> (files, extract_dir\|None)` | Dispatches to correct extractor; plain `.txt` returned as-is |
+| `extract_zip` | `(filepath, dest, extra_password)` | Tries no password first, then `ARCHIVE_PASSWORDS` list |
+| `extract_7z` | `(filepath, dest, extra_password)` | Requires `py7zr`; skips if not installed |
+| `extract_rar` | `(filepath, dest, extra_password)` | Requires `rarfile` + `unrar` binary |
+| `_try_passwords` | `(extract_fn, passwords)` | Iterates password list, stops on first success |
+
+---
+
+## Supported formats
+
+| Extension | Library | Notes |
+|-----------|---------|-------|
+| `.txt` | built-in | Stream-read, no load into memory |
+| `.zip` | `zipfile` | stdlib |
+| `.7z` | `py7zr` | optional; skipped if not installed |
+| `.rar` | `rarfile` | optional; requires `unrar` system binary |
+
+Nested archives are recursed **one level** only.
+
+---
+
+## Password order
+
+1. `extra_password` (from message/channel carry-forward) — tried first
+2. `config.ARCHIVE_PASSWORDS` — tried in order
+
+---
+
+## Cleanup guarantee
+
+`process_file` always deletes:
+- Extracted individual files
+- Extract subdirectory
+- Original downloaded file
+
+Even if no hits are found.
diff --git a/core/processor.py b/core/processor.py
new file mode 100644
index 0000000..4f844dc
--- /dev/null
+++ b/core/processor.py
@@ -0,0 +1,233 @@
+"""
+processor.py — Archive extraction and hit searching logic.
+
+Supports: .txt, .zip, .7z, .rar
+Stream-processes files line by line — safe for large combo lists.
+"""
+
+import rarfile
+rarfile.UNRAR_TOOL = "unrar"
+
+import re
+import zipfile
+import logging
+import shutil
+from pathlib import Path
+
+try:
+    import py7zr
+    HAS_7Z = True
+except ImportError:
+    HAS_7Z = False
+
+try:
+    import rarfile
+    HAS_RAR = True
+except ImportError:
+    HAS_RAR = False
+
+from config import ARCHIVE_PASSWORDS
+
+log = logging.getLogger(__name__)
+
+
+# ─── Searching ───────────────────────────────────────────────────────────────
+
+def compile_patterns(keywords: list[str]) -> list[re.Pattern]:
+    return [re.compile(kw, re.IGNORECASE) for kw in keywords]
+
+
+def search_file(filepath: Path, patterns: list[re.Pattern]) -> list[str]:
+    """
+    Stream-reads a text file line by line and returns lines matching any pattern.
+    Ignores encoding errors — combo files are often messy.
+    """
+    hits: list[str] = []
+    try:
+        with open(filepath, "r", encoding="utf-8", errors="ignore") as f:
+            for line in f:
+                stripped = line.strip()
+                if stripped and any(p.search(stripped) for p in patterns):
+                    hits.append(stripped)
+    except Exception as e:
+        log.warning(f"Could not read {filepath.name}: {e}")
+    return hits
+
+
+# ─── Extraction ──────────────────────────────────────────────────────────────
+
+def _try_passwords(extract_fn, passwords: list[bytes]) -> bool:
+    """Try a list of passwords against an extract function. Returns True on success."""
+    for pwd in passwords:
+        try:
+            extract_fn(pwd)
+            return True
+        except Exception:
+            continue
+    return False
+
+
+def extract_zip(filepath: Path, dest: Path, extra_password: str | None = None) -> list[Path]:
+    passwords = ARCHIVE_PASSWORDS.copy()
+    if extra_password:
+        passwords.insert(0, extra_password.encode())
+    extracted: list[Path] = []
+    try:
+        with zipfile.ZipFile(filepath) as zf:
+            def try_extract(pwd: bytes):
+                zf.extractall(dest, pwd=pwd or None)
+
+            try:
+                zf.extractall(dest)
+            except RuntimeError:
+                log.info(f"  ZIP is password-protected, trying common passwords...")
+                if not _try_passwords(try_extract, ARCHIVE_PASSWORDS):
+                    log.warning(f"  Could not unlock {filepath.name} — skipping.")
+                    return []
+
+            extracted = [p for p in dest.rglob("*") if p.is_file()]
+    except zipfile.BadZipFile:
+        log.warning(f"  {filepath.name} is not a valid ZIP.")
+    except Exception as e:
+        log.warning(f"  ZIP extraction error on {filepath.name}: {e}")
+    return extracted
+
+
+def extract_7z(filepath: Path, dest: Path, extra_password: str | None = None) -> list[Path]:
+    if not HAS_7Z:
+        log.warning("py7zr not installed — skipping .7z file.")
+        return []
+    extracted: list[Path] = []
+    passwords = ARCHIVE_PASSWORDS.copy()
+    if extra_password:
+        passwords.insert(0, extra_password.encode())
+
+    try:
+        # Try without password first
+        try:
+            with py7zr.SevenZipFile(filepath, mode="r") as z:
+                z.extractall(dest)
+        except py7zr.exceptions.PasswordRequired:
+            log.info(f"  7z is password-protected, trying common passwords...")
+            success = False
+            for pwd in ARCHIVE_PASSWORDS:
+                try:
+                    with py7zr.SevenZipFile(filepath, mode="r", password=pwd.decode()) as z:
+                        z.extractall(dest)
+                    success = True
+                    break
+                except Exception:
+                    continue
+            if not success:
+                log.warning(f"  Could not unlock {filepath.name} — skipping.")
+                return []
+
+        extracted = [p for p in dest.rglob("*") if p.is_file()]
+    except Exception as e:
+        log.warning(f"  7z extraction error on {filepath.name}: {e}")
+    return extracted
+
+
+def extract_rar(filepath: Path, dest: Path, extra_password: str | None = None) -> list[Path]:
+    if not HAS_RAR:
+        log.warning("rarfile not installed — skipping .rar file.")
+        return []
+
+    passwords = ARCHIVE_PASSWORDS.copy()
+    if extra_password:
+        passwords.insert(0, extra_password.encode())
+    extracted: list[Path] = []
+    try:
+        with rarfile.RarFile(filepath) as rf:
+            def try_extract(pwd: bytes):
+                rf.extractall(dest, pwd=pwd.decode() if pwd else None)
+
+            try:
+                rf.extractall(dest)
+            except rarfile.BadRarFile:
+                log.warning(f"  {filepath.name} is not a valid RAR.")
+                return []
+            except Exception:
+                log.info(f"  RAR may be password-protected, trying common passwords...")
+                if not _try_passwords(try_extract, ARCHIVE_PASSWORDS):
+                    log.warning(f"  Could not unlock {filepath.name} — skipping.")
+                    return []
+
+        extracted = [p for p in dest.rglob("*") if p.is_file()]
+    except Exception as e:
+        log.warning(f"  RAR extraction error on {filepath.name}: {e}")
+    return extracted
+
+
+def unpack(filepath: Path, extra_password: str | None = None) -> tuple[list[Path], Path | None]:
+    """
+    Unpacks an archive into a sibling directory.
+    Returns (list of extracted files, extract_dir or None).
+    If it's not an archive, returns ([filepath], None).
+    """
+    suffix = filepath.suffix.lower()
+    extract_dir = filepath.parent / filepath.stem
+
+    if suffix == ".zip":
+        extract_dir.mkdir(exist_ok=True)
+        files = extract_zip(filepath, extract_dir, extra_password)
+        return files, extract_dir
+
+    elif suffix == ".7z":
+        extract_dir.mkdir(exist_ok=True)
+        files = extract_7z(filepath, extract_dir, extra_password)
+        return files, extract_dir
+
+    elif suffix == ".rar":
+        extract_dir.mkdir(exist_ok=True)
+        files = extract_rar(filepath, extract_dir, extra_password)
+        return files, extract_dir
+
+    else:
+        # Plain file — return as-is, no extract dir to clean up
+        return [filepath], None
+
+
+# ─── Main entry point ────────────────────────────────────────────────────────
+
+def process_file(filepath: Path, patterns, password: str | None = None) -> list[str]:
+    """
+    Full pipeline: unpack → search each file → clean up everything.
+    Returns list of matching lines (hits).
+    """
+    log.info(f"  Processing: {filepath.name}")
+    all_hits: list[str] = []
+
+    files, extract_dir = unpack(filepath, extra_password=password)
+
+    for f in files:
+        if f.suffix.lower() == ".txt":
+            hits = search_file(f, patterns)
+            if hits:
+                log.info(f"    ✓ {len(hits)} hit(s) in {f.name}")
+            all_hits.extend(hits)
+
+        # Nested archives — recurse one level
+        elif f.suffix.lower() in {".zip", ".7z", ".rar"} and f != filepath:
+            log.info(f"    → Nested archive: {f.name}")
+            nested_hits = process_file(f, patterns)
+            all_hits.extend(nested_hits)
+            continue  # process_file already cleaned up f
+
+        # Clean up extracted file
+        try:
+            f.unlink(missing_ok=True)
+        except Exception:
+            pass
+
+    # Clean up extract dir
+    if extract_dir and extract_dir.exists():
+        shutil.rmtree(extract_dir, ignore_errors=True)
+
+    # Clean up original download
+    try:
+        filepath.unlink(missing_ok=True)
+    except Exception:
+        pass
+
+    return all_hits
diff --git a/core/scraper.md b/core/scraper.md
new file mode 100644
index 0000000..9ade2be
--- /dev/null
+++ b/core/scraper.md
@@ -0,0 +1,65 @@
+# core/scraper.py
+
+Telethon user-client layer. Handles live listening, backfill, and the single-message download pipeline.
+
+## Public API
+
+```python
+from core.scraper import handle_message, backfill_all, register_handlers, warm_entity_cache
+```
+
+### `handle_message(client, bot, msg, source_name, patterns, password=None)`
+**async.** Full pipeline for one document message:
+1. Extract filename + size, check allowlist + size guard
+2. Check `utils.cache` — skip if already seen
+3. Try `tdl` download → Telethon fallback
+4. `core.processor.process_file()` → hits
+5. `core.notifier.notify()` if hits found
+6. `utils.cache.mark_seen()`
+
+Called by: live handler, `bot_downloader`, backfill fallback path.
+
+### `backfill_all(client, bot, patterns)`
+**async.** Iterates `config.WATCHED_CHANNELS`, calls `backfill_channel()` for each.  
+No-op if `config.BACKFILL_LIMIT == 0`.
+
+### `register_handlers(client, bot, patterns)`
+Registers a `NewMessage` Telethon event handler on `config.WATCHED_CHANNELS`.  
+Used in **CLI mode only** (`--no-tui`). The TUI manages its own handler via `_make_handler()` in `tui/app.py`.
+
+### `warm_entity_cache(client)`
+**async.** Iterates `client.iter_dialogs()` so Telethon caches entity mappings.  
+Must be called before using raw numeric channel IDs.
+
+---
+
+## Internal functions
+
+| Function | Description |
+|----------|-------------|
+| `get_filename(msg)` | Extracts filename from `MessageMediaDocument`; falls back to `{msg_id}{ext}` from MIME |
+| `get_filesize(msg)` | Returns document size in bytes |
+| `is_processable(filename, size)` | Checks extension allowlist + size limit; returns `(bool, reason)` |
+| `_make_dest(msg, filename)` | Resolves temp path, handles collision with `{msg_id}_{filename}` |
+| `_telethon_download(client, msg, dest, ...)` | Telethon fallback with tqdm progress + flood-wait handling. Posts `EvDownload*` bus events |
+| `backfill_channel(client, bot, channel, patterns, limit)` | Scans history with password carry-forward; batches via tdl |
+| `_process_batch(client, bot, batch, patterns)` | One tdl invocation for up to `TDL_AMOUNT` messages; per-file Telethon fallback |
+
+---
+
+## Password carry-forward (backfill)
+
+Channels often post the archive password as a separate text message.  
+`backfill_channel` iterates newest→oldest, carrying `last_password` so both older and newer file messages in the same scan pick it up.
+
+---
+
+## Download strategy
+
+```
+is_tdl_available()?
+  yes → download_single_with_tdl() / download_batch_with_tdl()
+          ↓ failed?
+        _telethon_download()
+  no  → _telethon_download() directly
+```
diff --git a/core/scraper.py b/core/scraper.py
new file mode 100644
index 0000000..e95821b
--- /dev/null
+++ b/core/scraper.py
@@ -0,0 +1,410 @@
+"""
+scraper.py — Telethon user client.
+
+Handles:
+  - Listening for new file messages in watched channels
+  - Listening for messages with inline download buttons (bot-dispatched files)
+  - Backfilling recent channel history on startup (batched via tdl)
+  - Downloading files safely (size guard, flood wait)
+"""
+
+import asyncio
+import logging
+import time
+from pathlib import Path
+
+from tqdm import tqdm
+from telethon import TelegramClient, events
+from telethon.errors import FloodWaitError, ChannelPrivateError, UsernameNotOccupiedError
+from telethon.tl.types import (
+    MessageMediaDocument,
+    DocumentAttributeFilename,
+    InputDocumentFileLocation,
+)
+
+from config import (
+    ALLOWED_EXTENSIONS,
+    BACKFILL_LIMIT,
+    MAX_FILE_SIZE,
+    TEMP_DIR,
+    WATCHED_CHANNELS,
+    TDL_AMOUNT,
+)
+from core.bot_downloader import handle_bot_download_message, has_download_button, extract_password
+from utils.cache import is_seen, mark_seen
+from core.processor import process_file
+from core.notifier import notify
+from core.tdl_downloader import (
+    BatchEntry,
+    download_batch_with_tdl,
+    download_single_with_tdl,
+    is_tdl_available,
+)
+from tui import events as bus
+
+log = logging.getLogger(__name__)
+
+
+# ─── Helpers ──────────────────────────────────────────────────────────────────
+
+def get_filename(msg) -> str | None:
+    """Extract the filename from a document message, if any."""
+    if not isinstance(msg.media, MessageMediaDocument):
+        return None
+    doc = msg.media.document
+    for attr in doc.attributes:
+        if isinstance(attr, DocumentAttributeFilename):
+            return attr.file_name
+    mime = getattr(doc, "mime_type", "") or ""
+    ext_map = {
+        "application/x-rar-compressed": ".rar",
+        "application/vnd.rar":          ".rar",
+        "application/zip":              ".zip",
+        "application/x-7z-compressed":  ".7z",
+        "text/plain":                   ".txt",
+    }
+    return f"{msg.id}{ext_map.get(mime, '.bin')}"
+
+
+def get_filesize(msg) -> int:
+    """Return document size in bytes, or 0 if not a document."""
+    if not isinstance(msg.media, MessageMediaDocument):
+        return 0
+    return msg.media.document.size or 0
+
+
+def is_processable(filename: str, size: int) -> tuple[bool, str]:
+    """Check whether a file should be downloaded. Returns (ok, reason)."""
+    suffix = Path(filename).suffix.lower()
+    if suffix not in ALLOWED_EXTENSIONS:
+        return False, f"extension {suffix!r} not in allowlist"
+    if size > MAX_FILE_SIZE:
+        mb = size / (1024 * 1024)
+        return False, f"too large ({mb:.1f} MB > {MAX_FILE_SIZE // (1024 * 1024)} MB limit)"
+    return True, ""
+
+
+def _make_dest(msg, filename: str) -> Path:
+    """Resolve the destination path, avoiding name collisions."""
+    TEMP_DIR.mkdir(exist_ok=True)
+    dest = TEMP_DIR / filename
+    if dest.exists():
+        dest = TEMP_DIR / f"{msg.id}_{filename}"
+    return dest
+
+
+# ─── Telethon fallback download ───────────────────────────────────────────────
+
+async def _telethon_download(client: TelegramClient, msg, dest: Path, filename: str, size: int, batch_id: str | None = None) -> bool:
+    """Download a single file via Telethon. Returns True on success."""
+    _bid = batch_id or f"telethon_{int(time.monotonic_ns())}"
+    if batch_id is None:
+        # Standalone call (not already queued by tdl path) — post queued event
+        bus.post(bus.EvDownloadQueued(
+            batch_id=_bid, filename=filename,
+            size_mb=round(size / (1024 * 1024), 2),
+            source="telethon", password=None,
+        ))
+    bus.post(bus.EvDownloadStarted(batch_id=_bid, filename=filename))
+    try:
+        with tqdm(
+            total=size,
+            unit="B",
+            unit_scale=True,
+            unit_divisor=1024,
+            desc=filename[:40],
+            colour="cyan",
+        ) as pbar:
+            async def progress(current, total):
+                pbar.n = current
+                pbar.refresh()
+
+            doc = msg.media.document
+            location = InputDocumentFileLocation(
+                id=doc.id,
+                access_hash=doc.access_hash,
+                file_reference=doc.file_reference,
+                thumb_size="",
+            )
+            await client.download_file(
+                location,
+                file=dest,
+                part_size_kb=512,
+                progress_callback=progress,
+            )
+        bus.post(bus.EvDownloadDone(batch_id=_bid, filename=filename, via="telethon"))
+        return True
+    except FloodWaitError as e:
+        log.warning(f"  Flood wait: sleeping {e.seconds}s...")
+        await asyncio.sleep(e.seconds)
+        await client.download_media(msg, file=dest)
+        bus.post(bus.EvDownloadDone(batch_id=_bid, filename=filename, via="telethon"))
+        return True
+    except Exception as e:
+        log.error(f"  Telethon download failed for {filename}: {e}")
+        bus.post(bus.EvDownloadFailed(batch_id=_bid, filename=filename, reason=str(e)))
+        return False
+
+
+# ─── Single-message pipeline (live handler + bot_downloader) ──────────────────
+
+async def handle_message(
+    client: TelegramClient,
+    bot: TelegramClient,
+    msg,
+    source_name: str,
+    patterns,
+    password: str | None = None,
+) -> None:
+    """Download and process a single file message."""
+    filename = get_filename(msg)
+    if not filename:
+        log.warning("  handle_message: could not extract filename, skipping.")
+        return
+
+    size = get_filesize(msg)
+    ok, reason = is_processable(filename, size)
+    if not ok:
+        log.warning(f"  handle_message: skipping '{filename}' — {reason}")
+        return
+
+    doc_id = msg.media.document.id
+    if is_seen(doc_id):
+        log.info(f"  Skipping {filename} — already processed.")
+        return
+
+    dest = _make_dest(msg, filename)
+    log.info(f"↓ Downloading: {filename} ({size / 1024:.1f} KB) from {source_name}")
+
+    # tdl single → Telethon fallback
+    downloaded = await download_single_with_tdl(msg, dest) if is_tdl_available() else False
+    if not downloaded:
+        if is_tdl_available():
+            log.warning("  [tdl] failed — falling back to Telethon")
+        downloaded = await _telethon_download(client, msg, dest, filename, size)
+
+    if not downloaded:
+        log.error(f"  All download attempts failed for {filename}")
+        return
+
+    hits = process_file(dest, patterns, password=password)
+    mark_seen(doc_id)
+
+    if hits:
+        await notify(bot, hits, source_name, filename)
+    else:
+        log.info(f"  No hits in {filename}")
+
+
+# ─── Batch pipeline (backfill only) ───────────────────────────────────────────
+
+async def _process_batch(
+    client: TelegramClient,
+    bot: TelegramClient,
+    batch: list[tuple],   # list of (msg, source_name, password)
+    patterns,
+) -> int:
+    """
+    Download up to TDL_AMOUNT messages in one tdl invocation, then process
+    each. Falls back to Telethon per-file for anything tdl missed.
+    Returns the number of files successfully processed.
+    """
+    if not batch:
+        return 0
+
+    # Build BatchEntry list
+    entries: list[BatchEntry] = []
+    for msg, source_name, password in batch:
+        filename = get_filename(msg)
+        if not filename:
+            continue
+        entries.append(BatchEntry(
+            msg=msg,
+            filename=filename,
+            dest=_make_dest(msg, filename),
+            doc_id=msg.media.document.id,
+            source_name=source_name,
+            password=password,
+        ))
+
+    names = ", ".join(e.filename for e in entries)
+    log.info(f"[Batch] {len(entries)} file(s): {names}")
+
+    # One tdl call for the whole batch
+    results = await download_batch_with_tdl(entries)
+
+    processed = 0
+    for entry in entries:
+        tdl_ok = results.get(entry.doc_id, False)
+
+        if not tdl_ok:
+            # Per-file Telethon fallback
+            log.info(f"  [Batch] Telethon fallback: {entry.filename}")
+            size = get_filesize(entry.msg)
+            tdl_ok = await _telethon_download(client, entry.msg, entry.dest, entry.filename, size)
+
+        if not tdl_ok:
+            log.error(f"  [Batch] All attempts failed: {entry.filename}")
+            continue
+
+        hits = process_file(entry.dest, patterns, password=entry.password)
+        mark_seen(entry.doc_id)
+
+        if hits:
+            await notify(bot, hits, entry.source_name, entry.filename)
+        else:
+            log.info(f"  No hits in {entry.filename}")
+
+        processed += 1
+
+    return processed
+
+
+# ─── Backfill ─────────────────────────────────────────────────────────────────
+
+async def backfill_channel(
+    client: TelegramClient,
+    bot: TelegramClient,
+    channel: str | int,
+    patterns,
+    limit: int,
+) -> None:
+    """Scan the last `limit` messages of a channel for file attachments."""
+    log.info(f"[Backfill] Scanning history: {channel} (last {limit} messages)")
+    total = 0
+    batch: list[tuple] = []   # (msg, source_name, password)
+    last_password: str | None = None  # carry password across adjacent messages
+
+    async def flush_batch():
+        nonlocal total
+        if batch:
+            total += await _process_batch(client, bot, batch, patterns)
+            batch.clear()
+
+    try:
+        async for msg in client.iter_messages(channel, limit=limit):
+            source_name = str(channel)
+
+            # Extract password from this message if present, and remember it.
+            # iter_messages goes newest→oldest, so a password post that appears
+            # above the files in the channel will arrive AFTER them here.
+            # We therefore carry last_password in both directions:
+            #   - apply it to file messages that have no inline password
+            #   - update it whenever we see a fresh password, so subsequent
+            #     (older) file messages in the same batch pick it up too.
+            msg_password = extract_password(msg)
+            if msg_password:
+                last_password = msg_password
+
+            password = msg_password or last_password
+
+            if msg.media and isinstance(msg.media, MessageMediaDocument):
+                filename = get_filename(msg)
+                size = get_filesize(msg)
+
+                if not filename:
+                    continue
+
+                ok, reason = is_processable(filename, size)
+                if not ok:
+                    log.warning(f"  [Backfill] Skipping '{filename}' — {reason}")
+                    continue
+
+                if is_seen(msg.media.document.id):
+                    log.info(f"  [Backfill] Already seen: {filename}")
+                    continue
+
+                if is_tdl_available():
+                    batch.append((msg, source_name, password))
+                    if len(batch) >= TDL_AMOUNT:
+                        await flush_batch()
+                else:
+                    # No tdl — fall straight through to single handle_message
+                    await handle_message(client, bot, msg, source_name, patterns, password=password)
+                    total += 1
+                    await asyncio.sleep(0.5)
+
+            elif msg.buttons and has_download_button(msg):
+                # Bot-button messages can't be batched — handle individually
+                await flush_batch()  # flush any pending batch first
+                await handle_bot_download_message(client, bot, msg, source_name, patterns, password=password)
+                total += 1
+                await asyncio.sleep(1.5)
+
+        # Flush whatever's left
+        await flush_batch()
+
+    except (ChannelPrivateError, UsernameNotOccupiedError) as e:
+        log.error(f"[Backfill] Cannot access {channel}: {e}")
+    except Exception as e:
+        log.error(f"[Backfill] Error scanning {channel}: {e}")
+
+    log.info(f"[Backfill] Done: {channel} — {total} file(s) processed")
+
+
+async def backfill_all(
+    client: TelegramClient,
+    bot: TelegramClient,
+    patterns,
+) -> None:
+    """Backfill all watched channels sequentially."""
+    if BACKFILL_LIMIT <= 0:
+        log.info("[Backfill] Disabled (BACKFILL_LIMIT=0)")
+        return
+    log.info(f"[Backfill] Starting for {len(WATCHED_CHANNELS)} channel(s)...")
+    for ch in WATCHED_CHANNELS:
+        await backfill_channel(client, bot, ch, patterns, BACKFILL_LIMIT)
+    log.info("[Backfill] Complete.")
+
+
+# ─── Entity cache warmup ──────────────────────────────────────────────────────
+
+async def warm_entity_cache(client: TelegramClient) -> None:
+    """
+    Fetches your dialog list so Telethon caches all entity mappings.
+    Required before using raw numeric IDs.
+    """
+    log.info("Warming entity cache (fetching dialogs)...")
+    async for _ in client.iter_dialogs():
+        pass
+    log.info("Entity cache ready.")
+
+
+# ─── Live listener ────────────────────────────────────────────────────────────
+
+def register_handlers(
+    client: TelegramClient,
+    bot: TelegramClient,
+    patterns,
+) -> None:
+    """Register the NewMessage event handler for all watched channels."""
+
+    # Per-channel password cache for the live handler.
+    # Channels often post a text message with the password separately from
+    # the file message.  We remember the last seen password per channel so
+    # that the file message that follows (or precedes by seconds) picks it up.
+    _channel_passwords: dict[int, str] = {}
+
+    @client.on(events.NewMessage(chats=WATCHED_CHANNELS))
+    async def on_new_message(event):
+        msg = event.message
+        try:
+            source = event.chat.username or str(event.chat_id)
+        except Exception:
+            source = str(event.chat_id)
+
+        chat_id = event.chat_id
+        log.info(f"[Live] New message in {source}")
+
+        # Update cache if this message carries a password
+        msg_password = extract_password(msg)
+        if msg_password:
+            _channel_passwords[chat_id] = msg_password
+            log.debug(f"[Live] Password cached for {source}: '{msg_password}'")
+
+        password = msg_password or _channel_passwords.get(chat_id)
+
+        if msg.media and isinstance(msg.media, MessageMediaDocument):
+            await handle_message(client, bot, msg, source, patterns, password=password)
+        elif msg.buttons and has_download_button(msg):
+            await handle_bot_download_message(client, bot, msg, source, patterns, password=password)
diff --git a/core/tdl_downloader.md b/core/tdl_downloader.md
new file mode 100644
index 0000000..74efc5b
--- /dev/null
+++ b/core/tdl_downloader.md
@@ -0,0 +1,70 @@
+# core/tdl_downloader.py
+
+Fast file downloads via `tdl` (Go MTProto). Falls back gracefully if tdl is not installed.
+
+## Public API
+
+```python
+from core.tdl_downloader import (
+    is_tdl_available,
+    download_single_with_tdl,
+    download_batch_with_tdl,
+    BatchEntry,
+)
+```
+
+### `is_tdl_available() -> bool`
+Returns `True` if `tdl` binary is on PATH.
+
+### `download_single_with_tdl(msg, dest: Path) -> bool`
+**async.** Downloads one message's document. Returns `True` on success.  
+Used by the live handler and `bot_downloader`.
+
+### `download_batch_with_tdl(entries: list[BatchEntry]) -> dict[int, bool]`
+**async.** Downloads up to `TDL_AMOUNT` messages in a single `tdl dl` invocation.  
+Returns `{doc_id: True|False}` — `False` means Telethon fallback needed.
+
+---
+
+## BatchEntry dataclass
+
+```python
+@dataclass
+class BatchEntry:
+    msg:         object       # Telethon Message
+    filename:    str
+    dest:        Path         # final destination path in TEMP_DIR
+    doc_id:      int          # msg.media.document.id
+    source_name: str
+    password:    str | None
+```
+
+---
+
+## TUI output pipeline
+
+In TUI mode (`bus.tui_active == True`), `_run_tdl` pipes stdout+stderr and relays lines as `EvTdlOutput` events in real time.  
+**Reads raw 256-byte chunks** (not line-by-line) and splits on `\r` and `\n`, because tdl uses `\r` to overwrite its progress bar in place.
+
+In CLI mode: subprocess inherits the terminal, progress bars render natively.
+
+---
+
+## Staging directory isolation
+
+Each batch/single download gets a unique `data/tmp/_tdl_{monotonic_ns}/` staging dir.  
+After `tdl` exits, files are matched by name (with fuzzy stem fallback for `filenamify()` mangling) and moved to final `dest`. Staging dir is removed regardless of outcome.
+
+`--template '{{ filenamify .FileName }}'` — tdl uses the original Telegram filename, not its default `DialogID_MessageID_filename` format.
+
+---
+
+## Config knobs (`config.py`)
+
+| Setting | Default | Description |
+|---------|---------|-------------|
+| `TDL_NAMESPACE` | `"default"` | `-n` flag; `None` omits it |
+| `TDL_THREADS` | `8` | `-t` chunk workers per file |
+| `TDL_PERFILE` | `4` | `-l` concurrent files per invocation |
+| `TDL_AMOUNT` | `4` | Max messages per batch |
+| `TDL_TAKEOUT` | `False` | `--takeout` session flag |
diff --git a/core/tdl_downloader.py b/core/tdl_downloader.py
new file mode 100644
index 0000000..eea963f
--- /dev/null
+++ b/core/tdl_downloader.py
@@ -0,0 +1,363 @@
+"""
+tdl_downloader.py — Fast file downloads via tdl (Go MTProto implementation).
+
+Install: https://github.com/iyear/tdl
+    curl -sSL https://raw.githubusercontent.com/iyear/tdl/main/scripts/install.sh | bash
+
+First-time setup — log in once:
+    tdl login               # saves to namespace "default"
+    tdl login -n myns       # saves to a named namespace
+
+Relevant config.py knobs:
+    TDL_NAMESPACE  str|None  Session namespace (default "default"; None omits -n)
+    TDL_THREADS    int       Chunk workers per file  (-t, default 4)
+    TDL_PERFILE    int       Concurrent files        (-l, default 4)
+    TDL_AMOUNT     int       Messages per tdl batch  (default 4)
+    TDL_TAKEOUT    bool      Use takeout session      (--takeout)
+
+Flag reference:
+  Global (BEFORE subcommand): -n --ns, -t --threads, -l --limit
+  dl-specific:                -u --url, -d --dir, --template, --continue, --takeout
+
+Download isolation strategy:
+  Each batch gets its own staging subdirectory (TEMP_DIR/<batch_id>/) so that
+  concurrent downloads and homoglyph filename collisions can never cause tdl's
+  internal .tmp → final rename to fail.  Files are moved to TEMP_DIR after
+  the batch completes and the staging dir is removed.
+"""
+
+import asyncio
+import logging
+import shutil
+import time
+from dataclasses import dataclass
+from pathlib import Path
+
+from config import TDL_NAMESPACE, TDL_THREADS, TDL_PERFILE, TDL_TAKEOUT, TEMP_DIR
+from tui import events as bus
+
+log = logging.getLogger(__name__)
+
+
+# ─── Availability ─────────────────────────────────────────────────────────────
+
+def is_tdl_available() -> bool:
+    return shutil.which("tdl") is not None
+
+
+# ─── Message → URL ────────────────────────────────────────────────────────────
+
+def _build_message_url(msg) -> str:
+    """
+    Build a t.me/c/<channel_id>/<msg_id> link from a Telethon Message.
+    Works for public and private channels alike.
+    """
+    peer = msg.peer_id
+    if hasattr(peer, "channel_id"):
+        return f"https://t.me/c/{peer.channel_id}/{msg.id}"
+    elif hasattr(peer, "chat_id"):
+        return f"https://t.me/c/{peer.chat_id}/{msg.id}"
+    elif hasattr(peer, "user_id"):
+        return f"https://t.me/c/{peer.user_id}/{msg.id}"
+    raise ValueError(f"Cannot build message URL from peer: {peer!r}")
+
+
+# ─── Command builder ──────────────────────────────────────────────────────────
+
+def _build_cmd(urls: list[str], staging_dir: Path) -> list[str]:
+    """
+    Build the full tdl dl command.
+
+    Global flags (-n, -t, -l) MUST precede the subcommand.
+    staging_dir is always an absolute path to a fresh per-batch directory,
+    so tdl's internal .tmp → final rename can never collide with an existing
+    file of the same name.
+
+    --template '{{ filenamify .FileName }}' keeps just the original filename
+    (no DialogID_MessageID_ prefix).
+
+    --continue is kept so interrupted downloads resume rather than restart.
+    --skip-same is intentionally omitted — deduplication is handled upstream
+    by is_seen(), and --skip-same can cause the .tmp rename to fail when a
+    same-named file already exists in the directory.
+    """
+    global_flags: list[str] = []
+    if TDL_NAMESPACE:
+        global_flags += ["-n", str(TDL_NAMESPACE)]
+    global_flags += ["-t", str(TDL_THREADS), "-l", str(TDL_PERFILE)]
+
+    url_flags: list[str] = []
+    for url in urls:
+        url_flags += ["-u", url]
+
+    dl_flags = [
+        "-d", str(staging_dir),
+        "--template", "{{ filenamify .FileName }}",
+        "--continue",
+    ]
+    if TDL_TAKEOUT:
+        dl_flags.append("--takeout")
+
+    return ["tdl", *global_flags, "dl", *url_flags, *dl_flags]
+
+
+# ─── Runner ───────────────────────────────────────────────────────────────────
+
+# ANSI escape stripper — tdl emits colour codes even when not a TTY
+import re as _re
+_ANSI_RE = _re.compile(r"\x1b\[[0-9;]*[mGKHFJA-Z]|\x1b=|\x1b>|\x1b\[\?[0-9]+[hl]")
+
+def _strip_ansi(text: str) -> str:
+    return _ANSI_RE.sub("", text)
+
+
+async def _run_tdl(cmd: list[str], label: str) -> bool:
+    """
+    Spawn tdl and handle output based on whether the TUI is running:
+      - TUI mode:  pipe stdout+stderr, read raw chunks (NOT line-by-line),
+                   split on both \\r and \\n, strip ANSI, post non-empty
+                   segments immediately as EvTdlOutput.
+                   tdl uses \\r to overwrite its progress bar in place, so
+                   async-for-line on the stream would block until EOF.
+                   Chunk-reading + manual split delivers progress live.
+      - CLI mode:  inherit the terminal so tdl's progress bars render natively.
+    Returns True on exit code 0, False otherwise.
+    """
+    log.debug(f"[tdl] cmd: {' '.join(cmd)}")
+    try:
+        if bus.tui_active:
+            proc = await asyncio.create_subprocess_exec(
+                *cmd,
+                stdout=asyncio.subprocess.PIPE,
+                stderr=asyncio.subprocess.PIPE,
+            )
+
+            async def _relay(stream):
+                buf = ""
+                while True:
+                    chunk = await stream.read(256)
+                    if not chunk:
+                        break
+                    buf += chunk.decode(errors="replace")
+                    # Split on both \r and \n; process all complete segments
+                    parts = _re.split(r"[\r\n]", buf)
+                    # Last element may be an incomplete segment — keep in buffer
+                    buf = parts[-1]
+                    for part in parts[:-1]:
+                        clean = _strip_ansi(part).strip()
+                        if clean:
+                            bus.post(bus.EvTdlOutput(line=clean))
+                # Flush any remaining buffer content
+                if buf:
+                    clean = _strip_ansi(buf).strip()
+                    if clean:
+                        bus.post(bus.EvTdlOutput(line=clean))
+
+            await asyncio.gather(_relay(proc.stdout), _relay(proc.stderr))
+            await proc.wait()
+        else:
+            proc = await asyncio.create_subprocess_exec(*cmd)
+            await proc.wait()
+
+        if proc.returncode == 0:
+            log.info(f"[tdl] ✓ {label}")
+            return True
+        else:
+            log.error(f"[tdl] ✗ exit {proc.returncode} — {label}")
+            return False
+    except FileNotFoundError:
+        log.error("[tdl] binary not found at runtime")
+        return False
+    except Exception as e:
+        log.error(f"[tdl] Unexpected error: {e}")
+        return False
+
+
+# ─── Staging dir helpers ──────────────────────────────────────────────────────
+
+def _make_staging_dir() -> Path:
+    """Create a unique staging subdirectory under TEMP_DIR for one batch."""
+    staging = TEMP_DIR.resolve() / f"_tdl_{int(time.monotonic_ns())}"
+    staging.mkdir(parents=True, exist_ok=True)
+    return staging
+
+
+def _find_in_staging(staging: Path, expected_name: str) -> Path | None:
+    """
+    Locate a downloaded file in the staging dir by matching its name.
+    filenamify() can munge characters (strips @, collapses unicode, etc.)
+    so we do a normalised stem comparison as a fallback.
+    """
+    # Exact match first
+    exact = staging / expected_name
+    if exact.exists():
+        return exact
+
+    expected_stem = Path(expected_name).stem.lower().lstrip("@").replace(" ", "")
+    expected_suffix = Path(expected_name).suffix.lower()
+
+    for candidate in staging.iterdir():
+        if not candidate.is_file():
+            continue
+        if candidate.suffix.lower() != expected_suffix:
+            continue
+        cand_stem = candidate.stem.lower().lstrip("@").replace(" ", "")
+        if cand_stem == expected_stem:
+            return candidate
+
+    return None
+
+
+def _move_from_staging(staging: Path, expected_name: str, final_dest: Path) -> bool:
+    """
+    Find the file in staging, move it to final_dest, return True on success.
+    """
+    found = _find_in_staging(staging, expected_name)
+    if not found:
+        log.warning(f"[tdl] Not found in staging: '{expected_name}' (staging: {staging})")
+        return False
+
+    try:
+        found.rename(final_dest)
+        log.debug(f"[tdl] Moved: {found.name} → {final_dest}")
+        return True
+    except Exception as e:
+        log.error(f"[tdl] Move failed {found} → {final_dest}: {e}")
+        return False
+
+
+def _cleanup_staging(staging: Path) -> None:
+    try:
+        shutil.rmtree(staging, ignore_errors=True)
+    except Exception:
+        pass
+
+
+# ─── Public API ───────────────────────────────────────────────────────────────
+
+@dataclass
+class BatchEntry:
+    """Carries everything needed to process one file after a batch download."""
+    msg: object          # Telethon Message
+    filename: str
+    dest: Path
+    doc_id: int
+    source_name: str
+    password: str | None
+
+
+async def download_batch_with_tdl(entries: list[BatchEntry]) -> dict[int, bool]:
+    """
+    Download a batch of messages in a single tdl invocation.
+
+    Each batch gets its own staging subdirectory so filenames can never
+    collide with existing files in TEMP_DIR.  After tdl exits, files are
+    moved from staging to their final dest paths.
+
+    Returns dict mapping doc_id → True (ready at entry.dest) / False (fallback needed).
+    """
+    if not entries:
+        return {}
+
+    if not is_tdl_available():
+        log.warning("[tdl] not available — all entries need Telethon fallback")
+        return {e.doc_id: False for e in entries}
+
+    urls: list[str] = []
+    for entry in entries:
+        try:
+            urls.append(_build_message_url(entry.msg))
+        except ValueError as exc:
+            log.error(f"[tdl] Skipping {entry.filename}: {exc}")
+            urls.append("")
+
+    valid_entries = [(e, u) for e, u in zip(entries, urls) if u]
+    if not valid_entries:
+        return {e.doc_id: False for e in entries}
+
+    batch_id = f"batch_{int(time.monotonic_ns())}"
+    names = ", ".join(e.filename for e, _ in valid_entries)
+    log.info(f"[tdl] Batch ({len(valid_entries)} files): {names}")
+
+    # Notify TUI: all files in this batch are queued
+    for entry, _ in valid_entries:
+        size_mb = (entry.msg.media.document.size or 0) / (1024 * 1024)
+        bus.post(bus.EvDownloadQueued(
+            batch_id=batch_id,
+            filename=entry.filename,
+            size_mb=round(size_mb, 2),
+            source=entry.source_name,
+            password=entry.password,
+        ))
+
+    staging = _make_staging_dir()
+    cmd = _build_cmd([u for _, u in valid_entries], staging)
+
+    # Signal batch started
+    for entry, _ in valid_entries:
+        bus.post(bus.EvDownloadStarted(batch_id=batch_id, filename=entry.filename))
+
+    tdl_ok = await _run_tdl(cmd, f"batch of {len(valid_entries)}")
+
+    results: dict[int, bool] = {}
+    for entry in entries:
+        if not any(e.doc_id == entry.doc_id for e, _ in valid_entries):
+            results[entry.doc_id] = False
+            continue
+
+        if tdl_ok:
+            moved = _move_from_staging(staging, entry.filename, entry.dest)
+            results[entry.doc_id] = moved
+            if moved:
+                bus.post(bus.EvDownloadDone(batch_id=batch_id, filename=entry.filename, via="tdl"))
+            else:
+                log.warning(f"[tdl] Fallback needed: {entry.filename}")
+                bus.post(bus.EvDownloadFailed(batch_id=batch_id, filename=entry.filename, reason="staging mismatch"))
+        else:
+            results[entry.doc_id] = False
+            bus.post(bus.EvDownloadFailed(batch_id=batch_id, filename=entry.filename, reason="tdl exit error"))
+
+    _cleanup_staging(staging)
+    return results
+
+
+async def download_single_with_tdl(msg, dest: Path) -> bool:
+    """
+    Download a single message with tdl. Used by the live handler and
+    bot_downloader where batching doesn't apply.
+    """
+    if not is_tdl_available():
+        log.warning("[tdl] not available — falling back to Telethon")
+        return False
+
+    try:
+        url = _build_message_url(msg)
+    except ValueError as e:
+        log.error(f"[tdl] Cannot build URL: {e}")
+        return False
+
+    batch_id = f"single_{int(time.monotonic_ns())}"
+    size_mb = (msg.media.document.size or 0) / (1024 * 1024) if hasattr(msg, "media") and msg.media else 0
+    bus.post(bus.EvDownloadQueued(
+        batch_id=batch_id, filename=dest.name,
+        size_mb=round(size_mb, 2), source="live", password=None,
+    ))
+    bus.post(bus.EvDownloadStarted(batch_id=batch_id, filename=dest.name))
+
+    staging = _make_staging_dir()
+    cmd = _build_cmd([url], staging)
+    log.info(f"[tdl] Single: {dest.name}  ({url})")
+    tdl_ok = await _run_tdl(cmd, dest.name)
+
+    if tdl_ok:
+        result = _move_from_staging(staging, dest.name, dest)
+    else:
+        result = False
+
+    _cleanup_staging(staging)
+
+    if result:
+        bus.post(bus.EvDownloadDone(batch_id=batch_id, filename=dest.name, via="tdl"))
+    else:
+        bus.post(bus.EvDownloadFailed(batch_id=batch_id, filename=dest.name, reason="tdl failed"))
+    return result
diff --git a/data/.gitkeep b/data/.gitkeep
new file mode 100644
index 0000000..e69de29
diff --git a/logs/monitor.log b/logs/monitor.log
new file mode 100644
index 0000000..0580406
--- /dev/null
+++ b/logs/monitor.log
@@ -0,0 +1,54 @@
+2026-04-02 00:45:48,909 [INFO] utils.database: Database ready: data/hits.db
+2026-04-02 00:45:49,119 [INFO] telethon.crypto.aes: libssl detected, it will be used for encryption
+2026-04-02 00:45:49,156 [INFO] utils.database: Database ready: data/hits.db
+2026-04-02 00:45:49,159 [INFO] tui.app: [bot] Connecting bot_client...
+2026-04-02 00:45:49,159 [INFO] telethon.network.mtprotosender: Connecting to 149.154.175.59:443/TcpFull...
+2026-04-02 00:45:49,203 [INFO] tui.app: [bus] EvStatus: Starting — 12 channel(s), 2 pattern(s)
+2026-04-02 00:45:49,281 [INFO] telethon.network.mtprotosender: Connection to 149.154.175.59:443/TcpFull complete!
+2026-04-02 00:45:49,900 [INFO] tui.app: [bot] bot_client connected, authorizing...
+2026-04-02 00:45:49,901 [INFO] tui.app: [bot] bot_client ready
+2026-04-02 00:45:49,901 [INFO] tui.app: [bot] Connecting user_client...
+2026-04-02 00:45:49,901 [INFO] telethon.network.mtprotosender: Connecting to 149.154.175.59:443/TcpFull...
+2026-04-02 00:45:49,908 [INFO] __main__: Cleaning up tmp/...
+2026-04-02 00:54:16,429 [INFO] utils.database: Database ready: data/hits.db
+2026-04-02 00:54:16,638 [INFO] telethon.crypto.aes: libssl detected, it will be used for encryption
+2026-04-02 00:54:16,666 [ERROR] tui.app: [bot-thread] Unhandled exception: cannot import name 'HITS_CSV' from 'config' (/home/anti/Tools/sj/telegrammer/config.py)
+Traceback (most recent call last):
+  File "/home/anti/Tools/sj/telegrammer/tui/app.py", line 848, in _run_bot_thread
+    loop.run_until_complete(self._bot_main())
+    ~~~~~~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^
+  File "/usr/lib64/python3.14/asyncio/base_events.py", line 719, in run_until_complete
+    return future.result()
+           ~~~~~~~~~~~~~^^
+  File "/home/anti/Tools/sj/telegrammer/tui/app.py", line 865, in _bot_main
+    from core.notifier import send_status
+  File "/home/anti/Tools/sj/telegrammer/core/notifier.py", line 22, in <module>
+    from config import HITS_FILE, HITS_CSV, NOTIFY_CHAT_ID
+ImportError: cannot import name 'HITS_CSV' from 'config' (/home/anti/Tools/sj/telegrammer/config.py)
+2026-04-02 00:54:16,716 [INFO] tui.app: [bus] EvStatus: Bot thread crashed: cannot import name 'HITS_CSV' from 'config' (/home/anti/Tools/sj/telegrammer/config.py)
+2026-04-02 00:54:22,624 [INFO] __main__: Cleaning up tmp/...
+2026-04-02 00:54:34,773 [INFO] utils.database: Database ready: data/hits.db
+2026-04-02 00:54:34,983 [INFO] telethon.crypto.aes: libssl detected, it will be used for encryption
+2026-04-02 00:54:35,015 [INFO] utils.database: Database ready: data/hits.db
+2026-04-02 00:54:35,015 [INFO] tui.app: [bot] Connecting bot_client...
+2026-04-02 00:54:35,015 [INFO] telethon.network.mtprotosender: Connecting to 149.154.175.59:443/TcpFull...
+2026-04-02 00:54:35,063 [INFO] tui.app: [bus] EvStatus: Starting — 12 channel(s), 2 pattern(s)
+2026-04-02 00:54:35,120 [INFO] telethon.network.mtprotosender: Connection to 149.154.175.59:443/TcpFull complete!
+2026-04-02 00:54:35,698 [INFO] tui.app: [bot] bot_client connected, authorizing...
+2026-04-02 00:54:35,698 [INFO] tui.app: [bot] bot_client ready
+2026-04-02 00:54:35,698 [INFO] tui.app: [bot] Connecting user_client...
+2026-04-02 00:54:35,698 [INFO] telethon.network.mtprotosender: Connecting to 149.154.175.59:443/TcpFull...
+2026-04-02 00:54:35,810 [INFO] telethon.network.mtprotosender: Connection to 149.154.175.59:443/TcpFull complete!
+2026-04-02 00:54:36,420 [INFO] tui.app: [bot] user_client connected, checking auth...
+2026-04-02 00:54:36,420 [INFO] tui.app: [bot] user_client ready
+2026-04-02 00:54:36,563 [INFO] tui.app: [bus] EvStatus: Connected as 4n (@clp_c)
+2026-04-02 00:54:36,653 [INFO] core.scraper: Warming entity cache (fetching dialogs)...
+2026-04-02 00:54:38,437 [INFO] core.scraper: Entity cache ready.
+2026-04-02 00:54:38,437 [INFO] tui.app: [bot] Handler registered for 12 channel(s)
+2026-04-02 00:54:38,437 [INFO] core.scraper: [Backfill] Starting for 12 channel(s)...
+2026-04-02 00:54:38,437 [INFO] core.scraper: [Backfill] Scanning history: cloudxlog (last 500 messages)
+2026-04-02 00:54:38,463 [INFO] tui.app: [bus] EvStatus: Watching 12 channel(s)
+2026-04-02 00:54:38,463 [INFO] tui.app: [bus] EvStatus: Live listener active
+2026-04-02 00:54:38,585 [INFO] core.scraper: [Batch] 4 file(s): @cloud t13.txt, @cloud t12.txt, @cloud t11.txt, @cloud t10.txt
+2026-04-02 00:54:38,585 [INFO] core.tdl_downloader: [tdl] Batch (4 files): @cloud t13.txt, @cloud t12.txt, @cloud t11.txt, @cloud t10.txt
+2026-04-02 00:54:40,248 [INFO] __main__: Cleaning up tmp/...
diff --git a/main.py b/main.py
new file mode 100644
index 0000000..04393e9
--- /dev/null
+++ b/main.py
@@ -0,0 +1,142 @@
+"""
+main.py — Entry point for the ULP credential monitor.
+
+Usage:
+    python main.py          # TUI mode (default, requires textual)
+    python main.py --no-tui # Plain CLI mode
+
+First run will prompt for your Telegram phone number and 2FA code
+to create a session file. Subsequent runs are fully automatic.
+"""
+
+import asyncio
+import logging
+import sys
+import shutil
+import argparse
+
+import config
+from utils.database import init_db
+
+
+# ─── Logging setup ────────────────────────────────────────────────────────────
+
+config.LOG_FILE.parent.mkdir(parents=True, exist_ok=True)
+config.TEMP_DIR.mkdir(parents=True, exist_ok=True)
+
+logging.basicConfig(
+    level=logging.INFO,
+    format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
+    handlers=[
+        logging.FileHandler(config.LOG_FILE, encoding="utf-8"),
+    ],
+)
+log = logging.getLogger(__name__)
+
+init_db()
+
+
+# ─── Plain CLI mode ───────────────────────────────────────────────────────────
+
+async def _cli_main():
+    """Original asyncio main — runs without the TUI."""
+    logging.getLogger().addHandler(logging.StreamHandler(sys.stdout))
+
+    from telethon import TelegramClient
+    from core.processor import compile_patterns
+    from core.notifier import send_status
+    from core.scraper import backfill_all, register_handlers, warm_entity_cache
+
+    log.info("=" * 60)
+    log.info("  ULP Credential Monitor — CLI mode")
+    log.info("=" * 60)
+
+    patterns = compile_patterns(config.TARGET_KEYWORDS)
+    log.info(f"Loaded {len(patterns)} keyword pattern(s)")
+    log.info(f"Watching {len(config.WATCHED_CHANNELS)} channel(s)")
+
+    user_client = TelegramClient(
+        config.SESSION_NAME, config.API_ID, config.API_HASH,
+        connection_retries=5, auto_reconnect=True, request_retries=5,
+    )
+    bot_client = TelegramClient(
+        "bot_session", config.API_ID, config.API_HASH,
+    )
+
+    async with user_client, bot_client:
+        await bot_client.start(bot_token=config.BOT_TOKEN)
+        log.info("Bot client connected.")
+
+        await user_client.start()
+        me = await user_client.get_me()
+        log.info(f"User client connected as: {me.first_name} (@{me.username})")
+
+        await send_status(
+            bot_client,
+            f"✅ *Monitor started*\n"
+            f"User: `{me.first_name}`\n"
+            f"Channels: `{len(config.WATCHED_CHANNELS)}`\n"
+            f"Patterns: `{len(patterns)}`\n"
+            f"Backfill: `{config.BACKFILL_LIMIT} msg/channel`",
+        )
+
+        await warm_entity_cache(user_client)
+        register_handlers(user_client, bot_client, patterns)
+        log.info("Live listener registered.")
+
+        await backfill_all(user_client, bot_client, patterns)
+
+        log.info("Listening for new messages... (Ctrl+C to stop)")
+        await user_client.run_until_disconnected()
+
+    log.info("Monitor stopped.")
+
+
+# ─── Entry point ──────────────────────────────────────────────────────────────
+
+def main():
+    parser = argparse.ArgumentParser(description="ULP Credential Monitor")
+    parser.add_argument(
+        "--no-tui",
+        action="store_true",
+        help="Run in plain CLI mode (no Textual TUI)",
+    )
+    args = parser.parse_args()
+
+    if args.no_tui:
+        try:
+            asyncio.run(_cli_main())
+        except KeyboardInterrupt:
+            log.info("Interrupted by user.")
+        finally:
+            log.info("Cleaning up tmp/...")
+            if config.TEMP_DIR.exists():
+                shutil.rmtree(config.TEMP_DIR, ignore_errors=True)
+                config.TEMP_DIR.mkdir()
+            log.info("Done.")
+    else:
+        try:
+            from tui.app import run_tui
+        except ImportError:
+            print(
+                "⚠  Textual is not installed. Install it with:\n"
+                "     pip install textual\n"
+                "Or run in plain CLI mode:\n"
+                "     python main.py --no-tui",
+                file=sys.stderr,
+            )
+            sys.exit(1)
+
+        try:
+            run_tui()
+        except KeyboardInterrupt:
+            pass
+        finally:
+            log.info("Cleaning up tmp/...")
+            if config.TEMP_DIR.exists():
+                shutil.rmtree(config.TEMP_DIR, ignore_errors=True)
+                config.TEMP_DIR.mkdir()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/pytest.ini b/pytest.ini
new file mode 100644
index 0000000..5ee6477
--- /dev/null
+++ b/pytest.ini
@@ -0,0 +1,2 @@
+[pytest]
+testpaths = tests
diff --git a/requirements-dev.txt b/requirements-dev.txt
new file mode 100644
index 0000000..e079f8a
--- /dev/null
+++ b/requirements-dev.txt
@@ -0,0 +1 @@
+pytest
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..9fdadb0
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,16 @@
+# Telegram
+telethon
+tgcrypto
+
+# TUI
+textual
+
+# Config
+python-dotenv
+
+# Progress bars (CLI mode)
+tqdm
+
+# Archive extraction
+py7zr
+rarfile
diff --git a/tests/__init__.py b/tests/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/conftest.py b/tests/conftest.py
new file mode 100644
index 0000000..f2d8d56
--- /dev/null
+++ b/tests/conftest.py
@@ -0,0 +1,31 @@
+import os
+
+# Must be set before config.py is imported by any module.
+# load_dotenv() runs at import time; these setdefaults fill the gap when .env is absent.
+os.environ.setdefault("API_ID", "12345")
+os.environ.setdefault("API_HASH", "dummy_hash_for_tests")
+os.environ.setdefault("BOT_TOKEN", "0:dummy_bot_token")
+os.environ.setdefault("NOTIFY_CHAT_ID", "99999")
+
+import pytest
+import config
+import utils.scorer as scorer
+
+# Two test keywords:
+#   @testcorp\.com  — employee email domain (triggers CRITICAL)
+#   testcorp\.com   — plain domain match   (triggers LOW baseline)
+TEST_KEYWORDS = [r"@testcorp\.com", r"testcorp\.com"]
+
+
+@pytest.fixture
+def patched_keywords(monkeypatch):
+    """
+    Override TARGET_KEYWORDS for the duration of a test and rebuild the
+    scorer's module-level globals so scoring logic uses known test patterns.
+    """
+    monkeypatch.setattr(config, "TARGET_KEYWORDS", TEST_KEYWORDS)
+    # scorer.py uses `from config import TARGET_KEYWORDS` — a local binding that
+    # doesn't update when config.TARGET_KEYWORDS is patched. Patch it directly.
+    monkeypatch.setattr(scorer, "TARGET_KEYWORDS", TEST_KEYWORDS)
+    monkeypatch.setattr(scorer, "EMPLOYEE_DOMAINS", scorer._build_employee_domains())
+    monkeypatch.setattr(scorer, "ORG_DOMAINS", scorer._build_org_domains())
diff --git a/tests/test_cache.py b/tests/test_cache.py
new file mode 100644
index 0000000..301b2d9
--- /dev/null
+++ b/tests/test_cache.py
@@ -0,0 +1,55 @@
+"""
+Tests for utils/cache.py — file-ID deduplication cache.
+
+Each test gets an isolated cache file via the `isolated_cache` fixture
+so tests never touch data/cache.json.
+"""
+
+import pytest
+import utils.cache as cache_module
+
+
+@pytest.fixture(autouse=True)
+def isolated_cache(tmp_path, monkeypatch):
+    monkeypatch.setattr(cache_module, "CACHE_FILE", tmp_path / "cache.json")
+
+
+def test_unseen_id_returns_false():
+    assert cache_module.is_seen(12345) is False
+
+
+def test_mark_seen_makes_id_seen():
+    cache_module.mark_seen(12345)
+    assert cache_module.is_seen(12345) is True
+
+
+def test_multiple_ids_stored_independently():
+    cache_module.mark_seen(1)
+    cache_module.mark_seen(2)
+    cache_module.mark_seen(3)
+    assert cache_module.is_seen(1)
+    assert cache_module.is_seen(2)
+    assert cache_module.is_seen(3)
+    assert not cache_module.is_seen(4)
+
+
+def test_persists_to_disk_between_calls():
+    """
+    is_seen() and mark_seen() each load from disk independently.
+    This verifies the persist-on-write / load-on-read contract
+    (simulating what happens across separate function calls in the bot loop).
+    """
+    cache_module.mark_seen(999)
+    assert cache_module.is_seen(999) is True
+
+
+def test_missing_cache_file_handled_gracefully(tmp_path, monkeypatch):
+    monkeypatch.setattr(cache_module, "CACHE_FILE", tmp_path / "nonexistent.json")
+    assert cache_module.is_seen(42) is False
+
+
+def test_mark_seen_is_idempotent():
+    cache_module.mark_seen(7)
+    cache_module.mark_seen(7)
+    cache_module.mark_seen(7)
+    assert cache_module.is_seen(7) is True
diff --git a/tests/test_database.py b/tests/test_database.py
new file mode 100644
index 0000000..10bb543
--- /dev/null
+++ b/tests/test_database.py
@@ -0,0 +1,188 @@
+"""
+Tests for utils/database.py — SQLite persistence layer.
+
+Each test gets an isolated in-memory-equivalent DB via the `isolated_db`
+fixture so tests never touch data/hits.db.
+"""
+
+import pytest
+import utils.database as db_module
+from utils.scorer import ScoredHit, CRITICAL, HIGH, MEDIUM, LOW
+
+
+def make_hit(severity=LOW, url="testcorp.com", username="user", password="pass", raw=None):
+    """Build a minimal ScoredHit for insertion tests."""
+    scores = {CRITICAL: 40, HIGH: 30, MEDIUM: 20, LOW: 10}
+    return ScoredHit(
+        raw=raw or f"{url}|{username}|{password}",
+        severity=severity,
+        score=scores[severity],
+        reasons=["Test reason"],
+        url=url,
+        username=username,
+        password=password,
+    )
+
+
+@pytest.fixture(autouse=True)
+def isolated_db(tmp_path, monkeypatch):
+    monkeypatch.setattr(db_module, "DB_FILE", tmp_path / "test_hits.db")
+    db_module.init_db()
+
+
+# ─── init_db ─────────────────────────────────────────────────────────────────
+
+def test_init_db_is_idempotent():
+    db_module.init_db()
+    db_module.init_db()  # must not raise
+
+
+# ─── insert_hits ──────────────────────────────────────────────────────────────
+
+def test_insert_returns_correct_row_count():
+    hits = [make_hit(), make_hit(severity=CRITICAL)]
+    count = db_module.insert_hits(hits, source="testchan", filename="combo.txt")
+    assert count == 2
+
+
+def test_insert_stores_all_fields():
+    hit = make_hit(severity=HIGH, url="intranet.testcorp.com", username="jdoe", password="s3cr3t")
+    db_module.insert_hits([hit], source="mychan", filename="creds.zip")
+    rows = db_module.search("jdoe")
+    assert len(rows) == 1
+    row = rows[0]
+    assert row["url"] == "intranet.testcorp.com"
+    assert row["username"] == "jdoe"
+    assert row["password"] == "s3cr3t"
+    assert row["severity"] == HIGH
+    assert row["score"] == 30
+    assert row["source"] == "mychan"
+    assert row["filename"] == "creds.zip"
+    assert row["seen_before"] == 0
+
+
+def test_insert_seen_before_flag():
+    hit = make_hit()
+    db_module.insert_hits([hit], source="chan", filename="f.txt", seen_before=True)
+    rows = db_module.search("testcorp")
+    assert rows[0]["seen_before"] == 1
+
+
+# ─── search ───────────────────────────────────────────────────────────────────
+
+def test_search_finds_by_username():
+    db_module.insert_hits([make_hit(username="jdoe@testcorp.com")], source="c", filename="f.txt")
+    results = db_module.search("jdoe")
+    assert len(results) == 1
+    assert results[0]["username"] == "jdoe@testcorp.com"
+
+
+def test_search_finds_by_url():
+    db_module.insert_hits([make_hit(url="admin.testcorp.com")], source="c", filename="f.txt")
+    results = db_module.search("admin.testcorp")
+    assert len(results) == 1
+
+
+def test_search_finds_by_raw():
+    db_module.insert_hits([make_hit(raw="raw_unique_token_xyz")], source="c", filename="f.txt")
+    results = db_module.search("unique_token")
+    assert len(results) == 1
+
+
+def test_search_returns_empty_for_no_match():
+    db_module.insert_hits([make_hit()], source="c", filename="f.txt")
+    assert db_module.search("zzznomatch_xyz") == []
+
+
+def test_search_sorted_by_score_descending():
+    db_module.insert_hits([make_hit(severity=LOW)], source="c", filename="f.txt")
+    db_module.insert_hits([make_hit(severity=CRITICAL, url="admin.testcorp.com")], source="c", filename="f.txt")
+    results = db_module.search("testcorp")
+    assert results[0]["score"] >= results[-1]["score"]
+
+
+# ─── by_severity ──────────────────────────────────────────────────────────────
+
+def test_by_severity_returns_correct_severity():
+    db_module.insert_hits([make_hit(severity=CRITICAL, url="admin.testcorp.com")], source="c", filename="f.txt")
+    db_module.insert_hits([make_hit(severity=LOW)], source="c", filename="f.txt")
+    results = db_module.by_severity(CRITICAL)
+    assert len(results) == 1
+    assert results[0]["severity"] == CRITICAL
+
+
+def test_by_severity_excludes_duplicates():
+    """seen_before=1 rows must be invisible to by_severity — they are stored for stats only."""
+    hit = make_hit(severity=HIGH, url="intranet.testcorp.com")
+    db_module.insert_hits([hit], source="c", filename="f.txt", seen_before=True)
+    assert db_module.by_severity(HIGH) == []
+
+
+def test_by_severity_returns_empty_when_none():
+    assert db_module.by_severity(CRITICAL) == []
+
+
+# ─── stats ───────────────────────────────────────────────────────────────────
+
+def test_stats_counts_by_severity():
+    db_module.insert_hits([make_hit(severity=CRITICAL, url="admin.testcorp.com")], source="c", filename="f.txt")
+    db_module.insert_hits([make_hit(severity=HIGH, url="intranet.testcorp.com")], source="c", filename="f.txt")
+    db_module.insert_hits([make_hit(severity=MEDIUM, url="app.testcorp.com")], source="c", filename="f.txt")
+    db_module.insert_hits([make_hit(severity=LOW)], source="c", filename="f.txt")
+    s = db_module.stats()
+    assert s["critical"] == 1
+    assert s["high"] == 1
+    assert s["medium"] == 1
+    assert s["low"] == 1
+    assert s["total"] == 4
+    assert s["unique"] == 4
+    assert s["duplicates"] == 0
+
+
+def test_stats_separates_duplicates():
+    hit = make_hit()
+    db_module.insert_hits([hit], source="c", filename="f.txt", seen_before=False)
+    db_module.insert_hits([hit], source="c", filename="f.txt", seen_before=True)
+    s = db_module.stats()
+    assert s["total"] == 2
+    assert s["unique"] == 1
+    assert s["duplicates"] == 1
+
+
+def test_stats_severity_counts_exclude_duplicates():
+    hit = make_hit(severity=CRITICAL, url="admin.testcorp.com")
+    db_module.insert_hits([hit], source="c", filename="f.txt", seen_before=False)
+    db_module.insert_hits([hit], source="c", filename="f.txt", seen_before=True)
+    s = db_module.stats()
+    assert s["critical"] == 1  # only the unique one
+
+
+def test_stats_empty_db():
+    s = db_module.stats()
+    assert s["total"] == 0
+    assert s["unique"] == 0
+    assert s["top_source"] is None
+
+
+def test_stats_top_source():
+    db_module.insert_hits([make_hit()], source="channelA", filename="f.txt")
+    db_module.insert_hits([make_hit()], source="channelA", filename="f.txt")
+    db_module.insert_hits([make_hit()], source="channelB", filename="f.txt")
+    s = db_module.stats()
+    assert s["top_source"]["source"] == "channelA"
+
+
+# ─── recent ───────────────────────────────────────────────────────────────────
+
+def test_recent_respects_limit():
+    for i in range(5):
+        db_module.insert_hits([make_hit(raw=f"testcorp.com|user{i}|pass")], source="c", filename="f.txt")
+    rows = db_module.recent(limit=3)
+    assert len(rows) == 3
+
+
+def test_recent_returns_all_when_under_limit():
+    db_module.insert_hits([make_hit()], source="c", filename="f.txt")
+    db_module.insert_hits([make_hit()], source="c", filename="f.txt")
+    rows = db_module.recent(limit=50)
+    assert len(rows) == 2
diff --git a/tests/test_processor.py b/tests/test_processor.py
new file mode 100644
index 0000000..108586c
--- /dev/null
+++ b/tests/test_processor.py
@@ -0,0 +1,223 @@
+"""
+Tests for core/processor.py — archive extraction and line-by-line search.
+
+No Telegram deps, no async. Tests create real archive fixtures in tmp_path
+so process_file's cleanup guarantee can be verified against actual disk state.
+"""
+
+import zipfile
+import pytest
+from pathlib import Path
+
+from core.processor import compile_patterns, search_file, process_file
+
+
+@pytest.fixture
+def patterns():
+    return compile_patterns([r"testcorp\.com"])
+
+
+# ─── compile_patterns ─────────────────────────────────────────────────────────
+
+class TestCompilePatterns:
+    def test_returns_case_insensitive_patterns(self):
+        pats = compile_patterns([r"hello"])
+        assert pats[0].search("HELLO") is not None
+        assert pats[0].search("Hello") is not None
+
+    def test_multiple_patterns(self):
+        pats = compile_patterns([r"alpha", r"beta"])
+        assert len(pats) == 2
+        assert pats[0].search("alpha_line")
+        assert pats[1].search("beta_line")
+
+    def test_empty_list(self):
+        assert compile_patterns([]) == []
+
+
+# ─── search_file ──────────────────────────────────────────────────────────────
+
+class TestSearchFile:
+    def test_returns_matching_lines(self, tmp_path, patterns):
+        f = tmp_path / "combo.txt"
+        f.write_text("testcorp.com|user|pass\nothersite.com|user|pass\n")
+        assert search_file(f, patterns) == ["testcorp.com|user|pass"]
+
+    def test_returns_empty_when_no_match(self, tmp_path, patterns):
+        f = tmp_path / "combo.txt"
+        f.write_text("nomatch.com|user|pass\nanother.net|x|y\n")
+        assert search_file(f, patterns) == []
+
+    def test_strips_whitespace_from_returned_lines(self, tmp_path, patterns):
+        f = tmp_path / "combo.txt"
+        f.write_text("  testcorp.com|user|pass  \n")
+        hits = search_file(f, patterns)
+        assert hits[0] == "testcorp.com|user|pass"
+
+    def test_skips_blank_lines(self, tmp_path, patterns):
+        f = tmp_path / "combo.txt"
+        f.write_text("\n\ntestcorp.com|user|pass\n\n")
+        assert search_file(f, patterns) == ["testcorp.com|user|pass"]
+
+    def test_handles_encoding_errors_gracefully(self, tmp_path, patterns):
+        """Combo files are often messy — invalid bytes must not crash the search."""
+        f = tmp_path / "combo.txt"
+        f.write_bytes(
+            b"testcorp.com|user1|pass\n"
+            b"\xff\xfe invalid bytes here\n"
+            b"testcorp.com|user2|pass\n"
+        )
+        hits = search_file(f, patterns)
+        assert len(hits) == 2
+
+    def test_multiple_matching_lines_all_returned(self, tmp_path, patterns):
+        f = tmp_path / "combo.txt"
+        f.write_text(
+            "testcorp.com|alice|pass1\n"
+            "nomatch.com|bob|pass2\n"
+            "testcorp.com|carol|pass3\n"
+        )
+        hits = search_file(f, patterns)
+        assert len(hits) == 2
+
+
+# ─── process_file — plain .txt ────────────────────────────────────────────────
+
+class TestProcessFilePlainText:
+    def test_returns_hits(self, tmp_path, patterns):
+        f = tmp_path / "combo.txt"
+        f.write_text("testcorp.com|user|pass\nnomatch.com|x|y\n")
+        hits = process_file(f, patterns)
+        assert hits == ["testcorp.com|user|pass"]
+
+    def test_deletes_file_after_processing(self, tmp_path, patterns):
+        f = tmp_path / "combo.txt"
+        f.write_text("testcorp.com|user|pass\n")
+        process_file(f, patterns)
+        assert not f.exists()
+
+    def test_deletes_file_even_with_no_hits(self, tmp_path, patterns):
+        f = tmp_path / "combo.txt"
+        f.write_text("nomatch.com|x|y\n")
+        hits = process_file(f, patterns)
+        assert hits == []
+        assert not f.exists()
+
+
+# ─── process_file — .zip extraction ──────────────────────────────────────────
+
+class TestProcessFileZip:
+    def _make_zip(self, tmp_path: Path, content: str, filename="content.txt") -> Path:
+        txt = tmp_path / filename
+        txt.write_text(content)
+        zf = tmp_path / "combo.zip"
+        with zipfile.ZipFile(zf, "w") as z:
+            z.write(txt, filename)
+        txt.unlink()
+        return zf
+
+    def test_extracts_and_returns_hits(self, tmp_path, patterns):
+        zf = self._make_zip(tmp_path, "testcorp.com|user|pass\nnomatch.com|x|y\n")
+        hits = process_file(zf, patterns)
+        assert hits == ["testcorp.com|user|pass"]
+
+    def test_deletes_zip_after_processing(self, tmp_path, patterns):
+        zf = self._make_zip(tmp_path, "testcorp.com|user|pass\n")
+        process_file(zf, patterns)
+        assert not zf.exists()
+
+    def test_deletes_extract_dir_after_processing(self, tmp_path, patterns):
+        zf = self._make_zip(tmp_path, "testcorp.com|user|pass\n")
+        extract_dir = tmp_path / "combo"  # sibling dir named after zip stem
+        process_file(zf, patterns)
+        assert not extract_dir.exists()
+
+    def test_no_hits_still_cleans_up(self, tmp_path, patterns):
+        zf = self._make_zip(tmp_path, "nomatch.com|x|y\n")
+        extract_dir = tmp_path / "combo"
+        process_file(zf, patterns)
+        assert not zf.exists()
+        assert not extract_dir.exists()
+
+    def test_zip_with_multiple_txt_files(self, tmp_path, patterns):
+        txt1 = tmp_path / "a.txt"
+        txt1.write_text("testcorp.com|alice|pass\n")
+        txt2 = tmp_path / "b.txt"
+        txt2.write_text("testcorp.com|bob|pass\n")
+        zf = tmp_path / "combo.zip"
+        with zipfile.ZipFile(zf, "w") as z:
+            z.write(txt1, "a.txt")
+            z.write(txt2, "b.txt")
+        txt1.unlink()
+        txt2.unlink()
+
+        hits = process_file(zf, patterns)
+        assert len(hits) == 2
+
+
+# ─── process_file — nested archives ──────────────────────────────────────────
+
+class TestProcessFileNested:
+    def test_nested_zip_is_recursed(self, tmp_path, patterns):
+        inner_txt = tmp_path / "inner.txt"
+        inner_txt.write_text("testcorp.com|user|pass\n")
+        inner_zip = tmp_path / "inner.zip"
+        with zipfile.ZipFile(inner_zip, "w") as z:
+            z.write(inner_txt, "inner.txt")
+        inner_txt.unlink()
+
+        outer_zip = tmp_path / "outer.zip"
+        with zipfile.ZipFile(outer_zip, "w") as z:
+            z.write(inner_zip, "inner.zip")
+        inner_zip.unlink()
+
+        hits = process_file(outer_zip, patterns)
+        assert hits == ["testcorp.com|user|pass"]
+        assert not outer_zip.exists()
+        assert not (tmp_path / "outer").exists()
+
+
+# ─── process_file — password-protected .7z ───────────────────────────────────
+
+class TestProcessFile7zPassword:
+    def test_unlocks_with_correct_password(self, tmp_path, patterns, monkeypatch):
+        try:
+            import py7zr
+        except ImportError:
+            pytest.skip("py7zr not installed")
+
+        import core.processor as proc_module
+
+        # Isolate to a single known password so the test doesn't depend on config
+        monkeypatch.setattr(proc_module, "ARCHIVE_PASSWORDS", [b"secretpwd"])
+
+        txt = tmp_path / "content.txt"
+        txt.write_text("testcorp.com|user|pass\n")
+        szf = tmp_path / "combo.7z"
+        with py7zr.SevenZipFile(szf, "w", password="secretpwd") as z:
+            z.write(txt, "content.txt")
+        txt.unlink()
+
+        hits = process_file(szf, patterns)
+        assert hits == ["testcorp.com|user|pass"]
+        assert not szf.exists()
+
+    def test_skips_when_no_password_matches(self, tmp_path, patterns, monkeypatch):
+        try:
+            import py7zr
+        except ImportError:
+            pytest.skip("py7zr not installed")
+
+        import core.processor as proc_module
+        monkeypatch.setattr(proc_module, "ARCHIVE_PASSWORDS", [b"wrongpwd"])
+
+        txt = tmp_path / "content.txt"
+        txt.write_text("testcorp.com|user|pass\n")
+        szf = tmp_path / "combo.7z"
+        with py7zr.SevenZipFile(szf, "w", password="correctpwd") as z:
+            z.write(txt, "content.txt")
+        txt.unlink()
+
+        # No hits — archive could not be opened
+        hits = process_file(szf, patterns)
+        assert hits == []
diff --git a/tests/test_scorer.py b/tests/test_scorer.py
new file mode 100644
index 0000000..54d0912
--- /dev/null
+++ b/tests/test_scorer.py
@@ -0,0 +1,282 @@
+"""
+Tests for utils/scorer.py — severity scoring and ULP line parsing.
+
+All tests use the `patched_keywords` fixture (see conftest.py) which
+replaces TARGET_KEYWORDS with two entries:
+  @testcorp.com  — employee email domain (CRITICAL trigger)
+  testcorp.com   — plain domain match    (LOW baseline)
+"""
+
+import pytest
+from utils.scorer import score_hit, score_hits, summarize, CRITICAL, HIGH, MEDIUM, LOW
+
+
+# ─── ULP line parsing ─────────────────────────────────────────────────────────
+
+class TestULPParsing:
+    def test_parses_pipe_separated_fields(self, patched_keywords):
+        hit = score_hit("site.com|jdoe@testcorp.com|pass123")
+        assert hit.url == "site.com"
+        assert hit.username == "jdoe@testcorp.com"
+        assert hit.password == "pass123"
+
+    def test_parses_colon_separated_fields(self, patched_keywords):
+        # 'site.com' has no colon, so url field captures it cleanly
+        hit = score_hit("site.com:jdoe@testcorp.com:pass123")
+        assert hit.url == "site.com"
+        assert hit.username == "jdoe@testcorp.com"
+        assert hit.password == "pass123"
+
+    def test_malformed_line_yields_none_fields(self, patched_keywords):
+        hit = score_hit("justaplaindomainmatch_testcorp.com")
+        assert hit.url is None
+        assert hit.username is None
+        assert hit.password is None
+
+    def test_raw_field_preserved_exactly(self, patched_keywords):
+        line = "site.com|jdoe@testcorp.com|pass123"
+        hit = score_hit(line)
+        assert hit.raw == line
+
+
+# ─── Real-world ULP format coverage ──────────────────────────────────────────
+
+class TestULPParsingRealWorld:
+    """
+    Parametrized against real stealer-log lines.
+    Only field extraction is asserted (url/username/password), not severity,
+    so no patched_keywords fixture is needed.
+    """
+
+    @pytest.mark.parametrize("line,exp_url,exp_user,exp_pass", [
+        # ── Protocol + port + path, colon separator ──────────────────────────
+        # Port is digits followed by '/' — must be consumed as part of the URL.
+        (
+            "http://portal.fakehosp.example.com:88/:55512309-1:hunter2",
+            "http://portal.fakehosp.example.com:88/", "55512309-1", "hunter2",
+        ),
+        (
+            "http://portal.fakehosp.example.com:8085/app/booking/:3:letmein",
+            "http://portal.fakehosp.example.com:8085/app/booking/", "3", "letmein",
+        ),
+        (
+            "https://portal.fakehosp.example.com:81/app/FrmResetPassword.aspx:30219876-K:Spr!ng22@",
+            "https://portal.fakehosp.example.com:81/app/FrmResetPassword.aspx",
+            "30219876-K", "Spr!ng22@",
+        ),
+
+        # ── Protocol + no port, ID-style username looks like port but has hyphen ──
+        # ':\d+-' must NOT be consumed as a port (no '/' after the digits).
+        (
+            "https://booking.fakehosp.example.com:40293817-6:Summ3r99..",
+            "https://booking.fakehosp.example.com", "40293817-6", "Summ3r99..",
+        ),
+        (
+            "https://booking.fakehosp.example.com/:40293817-6:Summ3r99..",
+            "https://booking.fakehosp.example.com/", "40293817-6", "Summ3r99..",
+        ),
+
+        # ── Protocol + email username directly after host (no trailing slash) ─
+        (
+            "https://booking.fakehosp.example.com:carlos.gomez@gmail.com:Qwerty99",
+            "https://booking.fakehosp.example.com", "carlos.gomez@gmail.com", "Qwerty99",
+        ),
+        (
+            "https://accounts.saas-vendor.example.com/signin:jdoe@fakehosp.example.com:W1nter20",
+            "https://accounts.saas-vendor.example.com/signin", "jdoe@fakehosp.example.com", "W1nter20",
+        ),
+        (
+            "https://login.sso-provider.example.com/common/oauth2/authorize:jdoe@fakehosp.example.com:Passw0rd!",
+            "https://login.sso-provider.example.com/common/oauth2/authorize",
+            "jdoe@fakehosp.example.com", "Passw0rd!",
+        ),
+
+        # ── Pipe separator (unambiguous — port stays in URL) ──────────────────
+        (
+            "http://portal.fakehosp.example.com:88/|22.987.654-3|florida88",
+            "http://portal.fakehosp.example.com:88/", "22.987.654-3", "florida88",
+        ),
+        (
+            "https://booking.fakehosp.example.com/|77341209-0|Ninja42",
+            "https://booking.fakehosp.example.com/", "77341209-0", "Ninja42",
+        ),
+
+        # ── Mixed separators: pipe after URL, colon between user/password ─────
+        (
+            "http://portal.fakehosp.example.com:8085/app/booking/|Z:wd1980wd",
+            "http://portal.fakehosp.example.com:8085/app/booking/", "Z", "wd1980wd",
+        ),
+
+        # ── No protocol, port in URL ─────────────────────────────────────────
+        (
+            "portal.fakehosp.example.com:88/:22.987.654-3:florida88",
+            "portal.fakehosp.example.com:88/", "22.987.654-3", "florida88",
+        ),
+
+        # ── No protocol, no port — plain colon separators ────────────────────
+        (
+            "booking.fakehosp.example.com:66778899-7:correcthorse",
+            "booking.fakehosp.example.com", "66778899-7", "correcthorse",
+        ),
+        (
+            "booking.fakehosp.example.com/:smithjohnathan:Bb881955",
+            "booking.fakehosp.example.com/", "smithjohnathan", "Bb881955",
+        ),
+
+        # ── Password with special characters ─────────────────────────────────
+        (
+            "https://booking.fakehosp.example.com/:11223344-5:dragonball99*",
+            "https://booking.fakehosp.example.com/", "11223344-5", "dragonball99*",
+        ),
+        (
+            "https://booking.fakehosp.example.com/:9988776-65:abc.456#",
+            "https://booking.fakehosp.example.com/", "9988776-65", "abc.456#",
+        ),
+
+        # ── Semicolon separator ───────────────────────────────────────────────
+        (
+            "booking.fakehosp.example.com;smithjohnathan;Bb881955",
+            "booking.fakehosp.example.com", "smithjohnathan", "Bb881955",
+        ),
+    ])
+    def test_real_world_ulp_parsing(self, line, exp_url, exp_user, exp_pass):
+        hit = score_hit(line)
+        assert hit.url == exp_url,      f"URL mismatch for: {line!r}"
+        assert hit.username == exp_user, f"Username mismatch for: {line!r}"
+        assert hit.password == exp_pass, f"Password mismatch for: {line!r}"
+
+
+# ─── Severity classification ──────────────────────────────────────────────────
+
+class TestSeverityClassification:
+    def test_employee_email_in_username_is_critical(self, patched_keywords):
+        hit = score_hit("site.com|jdoe@testcorp.com|pass123")
+        assert hit.severity == CRITICAL
+
+    def test_gmail_on_org_url_is_not_critical(self, patched_keywords):
+        """
+        Core documented footgun: org domain appears in the URL, but the
+        credential username is a gmail address. Must NOT be CRITICAL.
+        The employee-domain pattern requires a literal '@' before the domain,
+        so 'testcorp.com' in the URL field never triggers it.
+        """
+        hit = score_hit("testcorp.com|user@gmail.com|pass123")
+        assert hit.severity != CRITICAL
+
+    def test_critical_service_subdomain_is_critical(self, patched_keywords):
+        hit = score_hit("admin.testcorp.com|user|pass123")
+        assert hit.severity == CRITICAL
+
+    def test_vpn_subdomain_is_critical(self, patched_keywords):
+        hit = score_hit("vpn.testcorp.com|user|pass123")
+        assert hit.severity == CRITICAL
+
+    def test_gitlab_subdomain_is_critical(self, patched_keywords):
+        hit = score_hit("gitlab.testcorp.com|user|pass123")
+        assert hit.severity == CRITICAL
+
+    def test_intranet_subdomain_is_high(self, patched_keywords):
+        hit = score_hit("intranet.testcorp.com|user|pass123")
+        assert hit.severity == HIGH
+
+    def test_sso_subdomain_is_high(self, patched_keywords):
+        hit = score_hit("sso.testcorp.com|user|pass123")
+        assert hit.severity == HIGH
+
+    def test_app_subdomain_is_medium(self, patched_keywords):
+        hit = score_hit("app.testcorp.com|user|pass123")
+        assert hit.severity == MEDIUM
+
+    def test_booking_subdomain_is_medium(self, patched_keywords):
+        hit = score_hit("booking.testcorp.com|user|pass123")
+        assert hit.severity == MEDIUM
+
+    def test_plain_domain_match_is_low(self, patched_keywords):
+        hit = score_hit("testcorp.com|user|pass123")
+        assert hit.severity == LOW
+
+    def test_employee_email_beats_high_service(self, patched_keywords):
+        """Employee email domain must win over a HIGH service classification."""
+        hit = score_hit("intranet.testcorp.com|jdoe@testcorp.com|pass")
+        assert hit.severity == CRITICAL
+
+    def test_employee_email_beats_medium_service(self, patched_keywords):
+        hit = score_hit("app.testcorp.com|jdoe@testcorp.com|pass")
+        assert hit.severity == CRITICAL
+
+    def test_multiple_checks_accumulate_reasons(self, patched_keywords):
+        """A line matching both employee email and a critical service URL collects both reasons."""
+        hit = score_hit("admin.testcorp.com|jdoe@testcorp.com|pass")
+        assert hit.severity == CRITICAL
+        assert len(hit.reasons) >= 2
+
+    def test_score_matches_severity(self, patched_keywords):
+        from utils.scorer import SEVERITY_SCORES
+        for line, expected_severity in [
+            ("admin.testcorp.com|user|pass", CRITICAL),
+            ("intranet.testcorp.com|user|pass", HIGH),
+            ("app.testcorp.com|user|pass", MEDIUM),
+            ("testcorp.com|user|pass", LOW),
+        ]:
+            hit = score_hit(line)
+            assert hit.score == SEVERITY_SCORES[expected_severity]
+
+
+# ─── Weak password flags ──────────────────────────────────────────────────────
+
+class TestWeakPasswordFlags:
+    def test_short_password_adds_reason(self, patched_keywords):
+        hit = score_hit("testcorp.com|user|abc")
+        assert any("Weak password" in r for r in hit.reasons)
+
+    def test_common_password_adds_reason(self, patched_keywords):
+        hit = score_hit("testcorp.com|user|password")
+        assert any("Common password" in r for r in hit.reasons)
+
+    def test_weak_password_does_not_escalate_severity(self, patched_keywords):
+        """Weak password flags are informational — they must not change severity."""
+        hit = score_hit("testcorp.com|user|abc")
+        assert hit.severity == LOW
+
+    def test_strong_password_adds_no_warning(self, patched_keywords):
+        hit = score_hit("testcorp.com|user|Xk9#mP2qLrTv")
+        assert not any("password" in r.lower() for r in hit.reasons if "Employee" not in r and "domain" not in r.lower() and "service" not in r.lower())
+
+
+# ─── score_hits and summarize ─────────────────────────────────────────────────
+
+class TestScoreHitsAndSummarize:
+    def test_score_hits_sorted_descending(self, patched_keywords):
+        lines = [
+            "testcorp.com|user|pass",           # LOW
+            "admin.testcorp.com|user|pass",     # CRITICAL
+            "intranet.testcorp.com|user|pass",  # HIGH
+            "app.testcorp.com|user|pass",       # MEDIUM
+        ]
+        hits = score_hits(lines)
+        scores = [h.score for h in hits]
+        assert scores == sorted(scores, reverse=True)
+
+    def test_summarize_counts_each_severity(self, patched_keywords):
+        lines = [
+            "admin.testcorp.com|user|pass",     # CRITICAL
+            "intranet.testcorp.com|user|pass",  # HIGH
+            "app.testcorp.com|user|pass",       # MEDIUM
+            "testcorp.com|user|pass",           # LOW
+        ]
+        summary = summarize(score_hits(lines))
+        assert summary[CRITICAL] == 1
+        assert summary[HIGH] == 1
+        assert summary[MEDIUM] == 1
+        assert summary[LOW] == 1
+
+    def test_summarize_zero_for_absent_severities(self, patched_keywords):
+        hits = score_hits(["testcorp.com|user|pass"])  # LOW only
+        summary = summarize(hits)
+        assert summary[CRITICAL] == 0
+        assert summary[HIGH] == 0
+        assert summary[MEDIUM] == 0
+        assert summary[LOW] == 1
+
+    def test_score_hits_empty_list(self, patched_keywords):
+        assert score_hits([]) == []
diff --git a/tui/__init__.py b/tui/__init__.py
new file mode 100644
index 0000000..58754af
--- /dev/null
+++ b/tui/__init__.py
@@ -0,0 +1 @@
+"""tui — Textual TUI frontend and event bus."""
diff --git a/tui/app.md b/tui/app.md
new file mode 100644
index 0000000..bb79ded
--- /dev/null
+++ b/tui/app.md
@@ -0,0 +1,130 @@
+# tui/app.py
+
+Textual TUI frontend. Entry point: `run_tui()`.
+
+## Entry point
+
+```python
+from tui.app import run_tui
+run_tui()   # called by main.py
+```
+
+---
+
+## Screen hierarchy
+
+```
+MonitorApp (App)
+├── [default screen]
+│   ├── Header
+│   ├── #top-row (Horizontal)
+│   │   ├── DownloadPanel  #dl-panel
+│   │   └── HitsPanel      #hits-panel
+│   ├── StatsPanel         #stats-panel
+│   ├── ChannelPanel       #ch-panel
+│   └── Footer
+├── SearchScreen     (push/pop via 's')
+├── HitsDBScreen     (push/pop via 'h')
+└── KeywordsScreen   (push/pop via 'k')
+```
+
+---
+
+## MonitorApp
+
+### Threading model
+- **Bot backend** → `threading.Thread(daemon=True)` with its own `asyncio.new_event_loop()`  
+  Runs `_bot_main()` — Telethon is completely isolated from Textual's loop.
+- **TUI drain** → `set_interval(0.1, _drain_bus)` — polls `queue.Queue` every 100ms on Textual's loop.
+
+### Key methods
+
+| Method | Description |
+|--------|-------------|
+| `on_mount()` | Calls `bus.init_bus()`, starts bot thread, sets drain interval |
+| `_drain_bus()` | Drains all pending events from `queue.Queue`, dispatches to widgets |
+| `_run_bot_thread()` | Thread entry: creates event loop, runs `_bot_main()` |
+| `_bot_main()` | Async bot backend: connect, auth, backfill, live handler loop |
+| `_signal_channel_changed()` | Thread-safely sets the bot loop's `asyncio.Event` via `call_soon_threadsafe` |
+
+### Keybindings
+
+| Key | Action |
+|-----|--------|
+| `s` | Push `SearchScreen` |
+| `h` | Push `HitsDBScreen` |
+| `k` | Push `KeywordsScreen` |
+| `c` | Clear download + hits logs |
+| `r` | Force-refresh stats bar |
+| `q` / `ctrl+c` | Quit |
+
+---
+
+## Widgets
+
+### DownloadPanel
+Left panel. Two `RichLog` widgets separated by a dashed line:
+- **top** (`#tdl-out`): raw tdl output lines (ANSI stripped)
+- **bottom** (`#dl-log`): structured download status entries
+
+Methods: `tdl_line(line)`, `queued(filename, size_mb, source, password)`, `status(filename, state, via)`, `clear_logs()`
+
+States for `status()`: `queued` · `downloading` · `done_tdl` · `done_tel` · `failed`
+
+### HitsPanel
+Right panel. Single `RichLog` with color-coded hit entries.  
+Reactive `hit_count` updates the panel title badge automatically.
+
+Methods: `add_hit(severity, raw, source, filename, reasons)`, `clear_log()`
+
+### StatsPanel
+Slim horizontal bar. Polls `utils.database.stats()` every 10s via `set_interval`.  
+Also refreshed immediately on each `EvHit` event.
+
+### ChannelPanel
+Bottom panel. `ListView` + `Input` + buttons.  
+Add/remove posts `EvChannelAdded` / `EvChannelRemoved` onto the bus.  
+Changes apply immediately (handler re-registered). Not persisted to `config.py` automatically.
+
+---
+
+## Screens
+
+### SearchScreen (`s`)
+- Text input → queries `utils.database.search(keyword)`
+- Results in a `DataTable` with columns: Sev, Time, URL, Username, Password, Source, File
+- Submit with `↵` or Search button; `Escape` to dismiss
+
+### HitsDBScreen (`h`)
+- Toolbar buttons + number keys filter by severity
+- `r` → recent 50, `1`→CRITICAL, `2`→HIGH, `3`→MEDIUM, `4`→LOW
+- Calls `utils.database.recent()` / `by_severity()`
+
+### KeywordsScreen (`k`)
+- Live-edit `config.TARGET_KEYWORDS`
+- Validates regex before adding
+- On change: rebuilds `utils.scorer.EMPLOYEE_DOMAINS` and `ORG_DOMAINS`
+- Bot handler recompiles patterns on the next incoming message automatically
+- **Changes are in-memory only** — copy to `config.py` to persist
+
+---
+
+## Bot auth flow (`_bot_main`)
+
+```
+await bot_client.connect()
+await bot_client.is_user_authorized()? → sign_in(bot_token=...)
+await user_client.connect()
+await user_client.is_user_authorized()? → log error + return (must run --no-tui first)
+warm_entity_cache()
+_make_handler(channels)       ← registers NewMessage handler
+backfill_all()
+run_until_disconnected()  ┐
+_watch_channels()         ┘  gathered
+```
+
+Channel-change signal path:
+```
+ChannelPanel button → EvChannel* on bus → _drain_bus → _signal_channel_changed()
+  → call_soon_threadsafe(asyncio.Event.set) → _watch_channels() wakes → _make_handler()
+```
diff --git a/tui/app.py b/tui/app.py
new file mode 100644
index 0000000..0413862
--- /dev/null
+++ b/tui/app.py
@@ -0,0 +1,1016 @@
+"""
+tui.py — Textual TUI for the ULP credential monitor.
+
+Layout (main screen):
+  ┌──────────────────────────────────┬──────────────────────────────────┐
+  │  📥 Downloads                    │  🎯 Hits  [N]                    │
+  │  (live tdl output + status log)  │  (color-coded hit log)           │
+  ├──────────────────────────────────┴──────────────────────────────────┤
+  │  📊 Stats bar  (live DB counters, auto-refresh every 10 s)          │
+  ├─────────────────────────────────────────────────────────────────────┤
+  │  📡 Channels  (add / remove entries; applied immediately)           │
+  └─────────────────────────────────────────────────────────────────────┘
+  │  Footer (keybindings)                                               │
+  └─────────────────────────────────────────────────────────────────────┘
+
+Additional screens (push/pop via keybindings):
+  • SearchScreen   — full-text search across hits DB        [s]
+  • HitsDBScreen   — paginated recent / severity viewer     [h]
+  • KeywordsScreen — live-edit TARGET_KEYWORDS regex list   [k]
+
+Architecture:
+  - The entire bot backend runs as a Textual Worker (asyncio task inside the
+    TUI event loop — no threading needed).
+  - A second Worker runs _bus_consumer(), reading events from tui_events.queue
+    and dispatching to the right panel.
+  - Channel add/remove from the UI immediately re-registers Telethon handlers
+    via asyncio.Event signalling into the bot worker.
+  - tdl output is piped (not terminal-inherited) and relayed via EvTdlOutput
+    into the download panel's RichLog.
+  - StatsPanel polls database.stats() every 10 s via set_interval().
+  - Keyword changes are applied in-memory immediately (scorer caches rebuilt);
+    NOT auto-persisted to config.py — a notice banner reminds the user.
+  - Live patterns are recompiled from config.TARGET_KEYWORDS on every message
+    so keyword changes take effect without a handler restart.
+"""
+
+import asyncio
+import logging
+import queue
+import shutil
+import threading
+from datetime import datetime, timezone
+
+from textual.app import App, ComposeResult, Screen
+from textual.binding import Binding
+from textual.containers import Horizontal, Vertical
+from textual.widgets import (
+    Footer, Header, Label, Input, Button,
+    ListView, ListItem, RichLog, DataTable, Static,
+)
+from textual.reactive import reactive
+
+from . import events as bus
+from config import WATCHED_CHANNELS, SESSION_NAME
+import config
+
+log = logging.getLogger(__name__)
+
+# ─── Colour maps ──────────────────────────────────────────────────────────────
+
+SEV_COLOUR = {
+    "CRITICAL": "bold red",
+    "HIGH":     "bold orange1",
+    "MEDIUM":   "bold yellow",
+    "LOW":      "bold green",
+}
+SEV_EMOJI = {
+    "CRITICAL": "🔴", "HIGH": "🟠", "MEDIUM": "🟡", "LOW": "🟢",
+}
+DL_COLOUR = {
+    "queued":      "dim white",
+    "downloading": "bold cyan",
+    "done_tdl":    "bold green",
+    "done_tel":    "green",
+    "failed":      "bold red",
+}
+DL_ICON = {
+    "queued": "⏳", "downloading": "⬇️ ",
+    "done_tdl": "✅", "done_tel": "✅", "failed": "❌",
+}
+
+
+def _now() -> str:
+    return datetime.now(timezone.utc).strftime("%H:%M:%S")
+
+
+# ─── Download panel ───────────────────────────────────────────────────────────
+
+class DownloadPanel(Vertical):
+    """
+    Left panel — two sub-logs stacked vertically:
+      • top:    tdl raw output (stripped ANSI), scrolling
+      • bottom: our own structured status entries
+    """
+
+    DEFAULT_CSS = """
+    DownloadPanel {
+        border: solid $accent;
+        height: 100%;
+        width: 1fr;
+    }
+    DownloadPanel Label.panel-title {
+        background: $accent;
+        color: $text;
+        padding: 0 1;
+        width: 100%;
+    }
+    DownloadPanel Label.sub-title {
+        background: $surface;
+        color: $text-muted;
+        padding: 0 1;
+        width: 100%;
+    }
+    DownloadPanel RichLog {
+        padding: 0 1;
+    }
+    #tdl-out {
+        height: 1fr;
+        border-bottom: dashed $accent-darken-2;
+    }
+    #dl-log {
+        height: 1fr;
+    }
+    """
+
+    def compose(self) -> ComposeResult:
+        yield Label("📥  Downloads", classes="panel-title")
+        yield Label("  tdl output", classes="sub-title")
+        yield RichLog(highlight=False, markup=False, wrap=True, id="tdl-out")
+        yield Label("  status", classes="sub-title")
+        yield RichLog(highlight=True, markup=True, wrap=True, id="dl-log")
+
+    def tdl_line(self, line: str) -> None:
+        self.query_one("#tdl-out", RichLog).write(line)
+
+    def queued(self, filename: str, size_mb: float, source: str,
+               password: str | None) -> None:
+        pw = f"  🔑 [dim]{password}[/dim]" if password else ""
+        self.query_one("#dl-log", RichLog).write(
+            f"[{DL_COLOUR['queued']}]{DL_ICON['queued']} {_now()}  "
+            f"{filename}[/{DL_COLOUR['queued']}]"
+            f"  [dim]{size_mb:.1f} MB  {source}[/dim]{pw}"
+        )
+
+    def status(self, filename: str, state: str, via: str = "") -> None:
+        colour = DL_COLOUR.get(state, "white")
+        icon   = DL_ICON.get(state, "•")
+        suffix = f" [dim]via {via}[/dim]" if via else ""
+        self.query_one("#dl-log", RichLog).write(
+            f"  [dim]↳[/dim] [{colour}]{icon}  {filename}[/{colour}]{suffix}"
+        )
+
+    def clear_logs(self) -> None:
+        self.query_one("#tdl-out", RichLog).clear()
+        self.query_one("#dl-log", RichLog).clear()
+
+
+# ─── Hits panel ───────────────────────────────────────────────────────────────
+
+class HitsPanel(Vertical):
+    """Right panel — scrollable color-coded hit log with live counter badge."""
+
+    hit_count: reactive[int] = reactive(0)
+
+    DEFAULT_CSS = """
+    HitsPanel {
+        border: solid $error;
+        height: 100%;
+        width: 1fr;
+    }
+    HitsPanel Label.panel-title {
+        background: $error;
+        color: $text;
+        padding: 0 1;
+        width: 100%;
+    }
+    HitsPanel RichLog {
+        height: 1fr;
+        padding: 0 1;
+    }
+    """
+
+    def compose(self) -> ComposeResult:
+        yield Label("🎯  Hits", classes="panel-title")
+        yield RichLog(highlight=True, markup=True, wrap=True, id="hits-log")
+
+    def watch_hit_count(self, count: int) -> None:
+        self.query_one(".panel-title", Label).update(f"🎯  Hits  [{count}]")
+
+    def add_hit(self, severity: str, raw: str, source: str,
+                filename: str, reasons: list[str]) -> None:
+        colour = SEV_COLOUR.get(severity, "white")
+        emoji  = SEV_EMOJI.get(severity, "⚪")
+        self.query_one("#hits-log", RichLog).write(
+            f"{emoji} [{colour}]{severity}[/{colour}]  [dim]{_now()}[/dim]\n"
+            f"  [bold]{raw}[/bold]\n"
+            f"  [dim]↳ {' | '.join(reasons)}[/dim]\n"
+            f"  [dim]📁 {filename}   📢 {source}[/dim]"
+        )
+        self.hit_count += 1
+
+    def clear_log(self) -> None:
+        self.query_one("#hits-log", RichLog).clear()
+        self.hit_count = 0
+
+
+# ─── Stats panel ──────────────────────────────────────────────────────────────
+
+class StatsPanel(Horizontal):
+    """
+    Slim bar — shows live DB stats, refreshed every 10 s.
+    Also refreshed immediately whenever a new hit arrives.
+    """
+
+    DEFAULT_CSS = """
+    StatsPanel {
+        border: solid $primary-darken-2;
+        height: 3;
+        width: 100%;
+        padding: 0 1;
+        background: $surface;
+    }
+    StatsPanel Static {
+        width: 1fr;
+        content-align: center middle;
+        color: $text-muted;
+    }
+    StatsPanel Static.stat-critical { color: red; }
+    StatsPanel Static.stat-high     { color: orange; }
+    StatsPanel Static.stat-medium   { color: yellow; }
+    StatsPanel Static.stat-low      { color: green; }
+    """
+
+    def compose(self) -> ComposeResult:
+        yield Static("📊  DB Stats", id="stat-label")
+        yield Static("🔴 —", classes="stat-critical", id="stat-critical")
+        yield Static("🟠 —", classes="stat-high",     id="stat-high")
+        yield Static("🟡 —", classes="stat-medium",   id="stat-medium")
+        yield Static("🟢 —", classes="stat-low",      id="stat-low")
+        yield Static("total: —",   id="stat-total")
+        yield Static("unique: —",  id="stat-unique")
+        yield Static("dupes: —",   id="stat-dupes")
+        yield Static("sources: —", id="stat-sources")
+
+    def on_mount(self) -> None:
+        self.set_interval(10, self.refresh_stats)
+        self.refresh_stats()
+
+    def refresh_stats(self) -> None:
+        try:
+            from utils.database import stats
+            s = stats()
+            self.query_one("#stat-critical", Static).update(f"🔴 {s['critical']}")
+            self.query_one("#stat-high",     Static).update(f"🟠 {s['high']}")
+            self.query_one("#stat-medium",   Static).update(f"🟡 {s['medium']}")
+            self.query_one("#stat-low",      Static).update(f"🟢 {s['low']}")
+            self.query_one("#stat-total",    Static).update(f"total: {s['total']}")
+            self.query_one("#stat-unique",   Static).update(f"unique: {s['unique']}")
+            self.query_one("#stat-dupes",    Static).update(f"dupes: {s['duplicates']}")
+            self.query_one("#stat-sources",  Static).update(f"sources: {s['sources']}")
+        except Exception:
+            pass  # DB not ready yet on first paint
+
+
+# ─── Channel panel ────────────────────────────────────────────────────────────
+
+class ChannelPanel(Vertical):
+    """
+    Bottom panel — live-editable channel list.
+
+    Changes are applied immediately (Telethon handlers are re-registered).
+    To make them permanent, edit config.py's WATCHED_CHANNELS manually.
+    """
+
+    DEFAULT_CSS = """
+    ChannelPanel {
+        border: solid $warning;
+        height: 14;
+        width: 100%;
+    }
+    ChannelPanel Label.panel-title {
+        background: $warning;
+        color: $text;
+        padding: 0 1;
+        width: 100%;
+    }
+    ChannelPanel Horizontal.controls {
+        height: 3;
+        padding: 0 1;
+    }
+    ChannelPanel Horizontal.controls Input {
+        width: 1fr;
+    }
+    ChannelPanel Horizontal.controls Button {
+        width: auto;
+        margin-left: 1;
+    }
+    ChannelPanel Horizontal.list-row {
+        height: 1fr;
+    }
+    ChannelPanel Horizontal.list-row ListView {
+        width: 1fr;
+        height: 100%;
+    }
+    ChannelPanel Horizontal.list-row Button {
+        width: 14;
+        margin: 0 1;
+    }
+    """
+
+    def __init__(self, initial_channels: list, **kwargs):
+        super().__init__(**kwargs)
+        self._channels: list[str | int] = list(initial_channels)
+
+    def compose(self) -> ComposeResult:
+        yield Label(
+            "📡  Channels  —  changes apply immediately  |  edit config.py to persist",
+            classes="panel-title",
+        )
+        with Horizontal(classes="controls"):
+            yield Input(placeholder="channel username  or  -100xxxxxxxxxx", id="ch-input")
+            yield Button("➕ Add", id="ch-add", variant="success")
+        with Horizontal(classes="list-row"):
+            yield ListView(id="ch-list")
+            yield Button("🗑  Remove", id="ch-remove", variant="error")
+
+    def on_mount(self) -> None:
+        self._refresh_list()
+
+    def _refresh_list(self) -> None:
+        lv = self.query_one("#ch-list", ListView)
+        lv.clear()
+        for ch in self._channels:
+            lv.append(ListItem(Label(str(ch))))
+
+    def on_button_pressed(self, event: Button.Pressed) -> None:
+        if event.button.id == "ch-add":
+            inp = self.query_one("#ch-input", Input)
+            raw = inp.value.strip()
+            if not raw:
+                return
+            channel: str | int = int(raw) if raw.lstrip("-").isdigit() else raw
+            if channel not in self._channels:
+                self._channels.append(channel)
+                self._refresh_list()
+                bus.post(bus.EvChannelAdded(channel=channel))
+                self.app.notify(f"Added: {channel}", severity="information")
+            inp.value = ""
+
+        elif event.button.id == "ch-remove":
+            lv  = self.query_one("#ch-list", ListView)
+            idx = lv.index
+            if idx is None or not (0 <= idx < len(self._channels)):
+                self.app.notify("Select a channel first", severity="warning")
+                return
+            removed = self._channels.pop(idx)
+            self._refresh_list()
+            bus.post(bus.EvChannelRemoved(channel=removed))
+            self.app.notify(f"Removed: {removed}", severity="warning")
+
+    @property
+    def channels(self) -> list[str | int]:
+        return list(self._channels)
+
+
+# ─── Search screen ────────────────────────────────────────────────────────────
+
+class SearchScreen(Screen):
+    """Full-text search across the hits database (url, username, raw line)."""
+
+    BINDINGS = [Binding("escape", "dismiss", "Back")]
+
+    DEFAULT_CSS = """
+    SearchScreen { background: $background; }
+    SearchScreen Label.screen-title {
+        background: $primary;
+        color: $text;
+        padding: 0 1;
+        width: 100%;
+    }
+    SearchScreen #search-bar {
+        height: 3;
+        padding: 0 1;
+    }
+    SearchScreen #search-bar Input  { width: 1fr; }
+    SearchScreen #search-bar Button { width: 14; margin-left: 1; }
+    SearchScreen #result-count { padding: 0 1; color: $text-muted; }
+    SearchScreen #results-table { height: 1fr; margin: 0 1 1 1; }
+    """
+
+    def compose(self) -> ComposeResult:
+        yield Header()
+        yield Label("🔍  Search Hits Database", classes="screen-title")
+        with Horizontal(id="search-bar"):
+            yield Input(placeholder="keyword, domain, username, IP…", id="search-input")
+            yield Button("Search", id="search-btn", variant="primary")
+        yield Label("Enter a keyword and press Search or ↵", id="result-count")
+        yield DataTable(id="results-table", zebra_stripes=True, cursor_type="row")
+        yield Footer()
+
+    def on_mount(self) -> None:
+        t = self.query_one("#results-table", DataTable)
+        t.add_columns("Sev", "Time", "URL", "Username", "Password", "Source", "File")
+        self.query_one("#search-input", Input).focus()
+
+    def on_button_pressed(self, event: Button.Pressed) -> None:
+        if event.button.id == "search-btn":
+            self._run_search()
+
+    def on_input_submitted(self, event: Input.Submitted) -> None:
+        if event.input.id == "search-input":
+            self._run_search()
+
+    def _run_search(self) -> None:
+        kw = self.query_one("#search-input", Input).value.strip()
+        if not kw:
+            return
+        try:
+            from utils.database import search
+            rows = search(kw)
+        except Exception as e:
+            self.app.notify(f"Search error: {e}", severity="error")
+            return
+
+        t = self.query_one("#results-table", DataTable)
+        t.clear()
+        for row in rows:
+            emoji = SEV_EMOJI.get(row["severity"], "⚪")
+            t.add_row(
+                f"{emoji} {row['severity']}",
+                row["timestamp"],
+                (row["url"]      or "")[:45],
+                (row["username"] or "")[:30],
+                (row["password"] or "")[:20],
+                (row["source"]   or "")[:20],
+                (row["filename"] or "")[:25],
+            )
+        self.query_one("#result-count", Label).update(
+            f"  {len(rows)} result(s) for '{kw}'"
+        )
+
+    def action_dismiss(self) -> None:
+        self.app.pop_screen()
+
+
+# ─── Hits DB viewer screen ────────────────────────────────────────────────────
+
+class HitsDBScreen(Screen):
+    """
+    Paginated viewer for DB hits.
+    Toolbar buttons + number-key bindings filter by severity.
+    """
+
+    BINDINGS = [
+        Binding("escape", "dismiss",         "Back"),
+        Binding("r",      "load_recent",     "Recent 50"),
+        Binding("1",      "filter_critical", "CRITICAL"),
+        Binding("2",      "filter_high",     "HIGH"),
+        Binding("3",      "filter_medium",   "MEDIUM"),
+        Binding("4",      "filter_low",      "LOW"),
+    ]
+
+    DEFAULT_CSS = """
+    HitsDBScreen { background: $background; }
+    HitsDBScreen Label.screen-title {
+        background: $error;
+        color: $text;
+        padding: 0 1;
+        width: 100%;
+    }
+    HitsDBScreen #toolbar {
+        height: 3;
+        padding: 0 1;
+        background: $surface;
+    }
+    HitsDBScreen #toolbar Button { margin-right: 1; width: auto; }
+    HitsDBScreen #db-status { padding: 0 1; color: $text-muted; }
+    HitsDBScreen #hits-db-table { height: 1fr; margin: 0 1 1 1; }
+    """
+
+    def compose(self) -> ComposeResult:
+        yield Header()
+        yield Label("📋  Hits Database Viewer", classes="screen-title")
+        with Horizontal(id="toolbar"):
+            yield Button("Recent 50",  id="btn-recent",   variant="default")
+            yield Button("🔴 CRITICAL", id="btn-critical", variant="error")
+            yield Button("🟠 HIGH",     id="btn-high",     variant="warning")
+            yield Button("🟡 MEDIUM",   id="btn-medium",   variant="default")
+            yield Button("🟢 LOW",      id="btn-low",      variant="success")
+        yield Label("", id="db-status")
+        yield DataTable(id="hits-db-table", zebra_stripes=True, cursor_type="row")
+        yield Footer()
+
+    def on_mount(self) -> None:
+        t = self.query_one("#hits-db-table", DataTable)
+        t.add_columns("ID", "Sev", "Timestamp", "URL", "Username", "Source", "Status")
+        self._load_recent()
+
+    def on_button_pressed(self, event: Button.Pressed) -> None:
+        dispatch = {
+            "btn-recent":   self._load_recent,
+            "btn-critical": lambda: self._load_severity("CRITICAL"),
+            "btn-high":     lambda: self._load_severity("HIGH"),
+            "btn-medium":   lambda: self._load_severity("MEDIUM"),
+            "btn-low":      lambda: self._load_severity("LOW"),
+        }
+        fn = dispatch.get(event.button.id)
+        if fn:
+            fn()
+
+    def _populate(self, rows, label: str) -> None:
+        t = self.query_one("#hits-db-table", DataTable)
+        t.clear()
+        for row in rows:
+            emoji  = SEV_EMOJI.get(row["severity"], "⚪")
+            status = "dup" if row["seen_before"] else "new"
+            t.add_row(
+                str(row["id"]),
+                f"{emoji} {row['severity']}",
+                row["timestamp"],
+                (row["url"]      or "")[:45],
+                (row["username"] or "")[:30],
+                (row["source"]   or "")[:20],
+                status,
+            )
+        self.query_one("#db-status", Label).update(
+            f"  {len(rows)} row(s) — {label}"
+        )
+
+    def _load_recent(self) -> None:
+        try:
+            from utils.database import recent
+            self._populate(recent(50), "most recent 50")
+        except Exception as e:
+            self.app.notify(f"DB error: {e}", severity="error")
+
+    def _load_severity(self, sev: str) -> None:
+        try:
+            from utils.database import by_severity
+            self._populate(by_severity(sev), f"severity = {sev} (unique only)")
+        except Exception as e:
+            self.app.notify(f"DB error: {e}", severity="error")
+
+    def action_dismiss(self)        : self.app.pop_screen()
+    def action_load_recent(self)    : self._load_recent()
+    def action_filter_critical(self): self._load_severity("CRITICAL")
+    def action_filter_high(self)    : self._load_severity("HIGH")
+    def action_filter_medium(self)  : self._load_severity("MEDIUM")
+    def action_filter_low(self)     : self._load_severity("LOW")
+
+
+# ─── Keywords screen ──────────────────────────────────────────────────────────
+
+class KeywordsScreen(Screen):
+    """
+    Live-edit TARGET_KEYWORDS regex patterns.
+
+    Additions / removals apply immediately:
+      • config.TARGET_KEYWORDS is mutated in place
+      • scorer's domain caches are rebuilt
+      • The bot handler recompiles patterns on the next message automatically
+
+    Changes are NOT written back to config.py — a notice banner says so.
+    """
+
+    BINDINGS = [Binding("escape", "dismiss", "Back")]
+
+    DEFAULT_CSS = """
+    KeywordsScreen { background: $background; }
+    KeywordsScreen Label.screen-title {
+        background: $success;
+        color: $text;
+        padding: 0 1;
+        width: 100%;
+    }
+    KeywordsScreen Label.notice {
+        background: $warning;
+        color: $text;
+        padding: 0 1;
+        width: 100%;
+    }
+    KeywordsScreen #kw-controls {
+        height: 3;
+        padding: 0 1;
+    }
+    KeywordsScreen #kw-controls Input  { width: 1fr; }
+    KeywordsScreen #kw-controls Button { width: auto; margin-left: 1; }
+    KeywordsScreen #kw-list-row {
+        height: 1fr;
+        padding: 0 1;
+    }
+    KeywordsScreen #kw-list {
+        width: 1fr;
+        height: 100%;
+        border: solid $primary;
+    }
+    KeywordsScreen #kw-list-row Button { width: 16; margin-left: 1; }
+    """
+
+    def compose(self) -> ComposeResult:
+        yield Header()
+        yield Label("🔑  Keyword / Pattern Editor", classes="screen-title")
+        yield Label(
+            "⚠  Changes are in-memory only — copy patterns to config.py to persist across restarts.",
+            classes="notice",
+        )
+        with Horizontal(id="kw-controls"):
+            yield Input(
+                placeholder="regex  e.g.  @myorg\\.com   or   192\\.168\\.10\\.",
+                id="kw-input",
+            )
+            yield Button("➕ Add", id="kw-add", variant="success")
+        with Horizontal(id="kw-list-row"):
+            yield ListView(id="kw-list")
+            yield Button("🗑  Remove", id="kw-remove", variant="error")
+        yield Footer()
+
+    def on_mount(self) -> None:
+        self._refresh_list()
+        self.query_one("#kw-input", Input).focus()
+
+    def _refresh_list(self) -> None:
+        lv = self.query_one("#kw-list", ListView)
+        lv.clear()
+        for kw in config.TARGET_KEYWORDS:
+            lv.append(ListItem(Label(kw)))
+
+    def on_button_pressed(self, event: Button.Pressed) -> None:
+        if event.button.id == "kw-add":
+            inp = self.query_one("#kw-input", Input)
+            raw = inp.value.strip()
+            if not raw:
+                return
+            import re
+            try:
+                re.compile(raw, re.IGNORECASE)
+            except re.error as e:
+                self.app.notify(f"Invalid regex: {e}", severity="error")
+                return
+            if raw not in config.TARGET_KEYWORDS:
+                config.TARGET_KEYWORDS.append(raw)
+                self._rebuild_scorer()
+                self._refresh_list()
+                self.app.notify(f"Pattern added: {raw}", severity="information")
+            inp.value = ""
+
+        elif event.button.id == "kw-remove":
+            lv  = self.query_one("#kw-list", ListView)
+            idx = lv.index
+            if idx is None or not (0 <= idx < len(config.TARGET_KEYWORDS)):
+                self.app.notify("Select a pattern first", severity="warning")
+                return
+            removed = config.TARGET_KEYWORDS.pop(idx)
+            self._rebuild_scorer()
+            self._refresh_list()
+            self.app.notify(f"Pattern removed: {removed}", severity="warning")
+
+    def on_input_submitted(self, event: Input.Submitted) -> None:
+        if event.input.id == "kw-input":
+            # Simulate Add button press
+            self.on_button_pressed(
+                Button.Pressed(self.query_one("#kw-add", Button))
+            )
+
+    def _rebuild_scorer(self) -> None:
+        """Rebuild scorer's cached domain patterns after a keyword change."""
+        try:
+            import scorer
+            scorer.EMPLOYEE_DOMAINS = scorer._build_employee_domains()
+            scorer.ORG_DOMAINS      = scorer._build_org_domains()
+        except Exception as e:
+            log.warning(f"Could not rebuild scorer caches: {e}")
+        bus.post(bus.EvStatus(
+            f"Keywords updated — {len(config.TARGET_KEYWORDS)} pattern(s) active"
+        ))
+
+    def action_dismiss(self) -> None:
+        self.app.pop_screen()
+
+
+# ─── Main application ─────────────────────────────────────────────────────────
+
+class MonitorApp(App):
+
+    CSS = """
+    Screen { layout: vertical; }
+    #top-row { layout: horizontal; height: 1fr; }
+    """
+
+    BINDINGS = [
+        Binding("q",      "quit",           "Quit",         priority=True),
+        Binding("ctrl+c", "quit",           "Quit",         priority=True),
+        Binding("s",      "push_search",    "Search DB"),
+        Binding("h",      "push_hits_db",   "Hits DB"),
+        Binding("k",      "push_keywords",  "Keywords"),
+        Binding("c",      "clear_logs",     "Clear Logs"),
+        Binding("r",      "refresh_stats",  "Refresh Stats"),
+    ]
+
+    TITLE     = "ULP Credential Monitor"
+    SUB_TITLE = f"session: {SESSION_NAME}"
+
+    def __init__(self):
+        super().__init__()
+        self._live_channels: list[str | int] = list(WATCHED_CHANNELS)
+        # Set by _drain_bus (Textual loop), read by _bot_main (bot loop)
+        # via call_soon_threadsafe so the asyncio.Event is set on the right loop.
+        self._bot_loop_channel_event: asyncio.Event | None = None
+        self._bot_loop: asyncio.AbstractEventLoop | None = None
+
+    def compose(self) -> ComposeResult:
+        yield Header()
+        with Horizontal(id="top-row"):
+            yield DownloadPanel(id="dl-panel")
+            yield HitsPanel(id="hits-panel")
+        yield StatsPanel(id="stats-panel")
+        yield ChannelPanel(initial_channels=WATCHED_CHANNELS, id="ch-panel")
+        yield Footer()
+
+    def on_mount(self) -> None:
+        # The bot backend runs in its own thread with its own asyncio event
+        # loop, completely isolated from Textual.  Telethon spawns background
+        # tasks via asyncio.ensure_future() and calls connect() which returns
+        # only after its receiver loop is scheduled — both of these deadlock
+        # inside Textual's managed loop.  Running in a dedicated thread
+        # sidesteps all of that.
+        #
+        # Communication uses a thread-safe queue.Queue (see tui_events.py).
+        # The TUI polls it every 100 ms via set_interval().
+        bus.init_bus()
+        self._bot_thread = threading.Thread(
+            target=self._run_bot_thread,
+            name="bot-thread",
+            daemon=True,
+        )
+        self._bot_thread.start()
+        # Poll the thread-safe queue and dispatch to widgets
+        self.set_interval(0.1, self._drain_bus)
+
+    # ── Screen navigation ─────────────────────────────────────────────────────
+
+    def action_push_search(self)   : self.push_screen(SearchScreen())
+    def action_push_hits_db(self)  : self.push_screen(HitsDBScreen())
+    def action_push_keywords(self) : self.push_screen(KeywordsScreen())
+
+    def action_clear_logs(self) -> None:
+        self.query_one("#dl-panel",   DownloadPanel).clear_logs()
+        self.query_one("#hits-panel", HitsPanel).clear_log()
+        self.notify("Logs cleared", severity="information")
+
+    def action_refresh_stats(self) -> None:
+        self.query_one("#stats-panel", StatsPanel).refresh_stats()
+        self.notify("Stats refreshed", severity="information")
+
+    # ── Event bus consumer ────────────────────────────────────────────────────
+
+    def _signal_channel_changed(self) -> None:
+        """Thread-safely set the channel-change event on the bot loop."""
+        ev = self._bot_loop_channel_event
+        loop = self._bot_loop
+        if ev is not None and loop is not None and loop.is_running():
+            loop.call_soon_threadsafe(ev.set)
+
+    # ── Bus drain (runs on Textual's loop via set_interval) ──────────────────
+
+    def _drain_bus(self) -> None:
+        """
+        Called every 100 ms by set_interval().  Drains all pending events
+        from the thread-safe queue and dispatches them to the right widget.
+        Runs on Textual's event loop — safe to call widget methods directly.
+        """
+        q = bus.get_bus()
+        if q is None:
+            return
+
+        try:
+            dl    = self.query_one("#dl-panel",    DownloadPanel)
+            hit   = self.query_one("#hits-panel",  HitsPanel)
+            stats = self.query_one("#stats-panel", StatsPanel)
+        except Exception:
+            return  # widgets not mounted yet
+
+        # Drain everything currently in the queue in one pass
+        while True:
+            try:
+                ev = q.get_nowait()
+            except queue.Empty:
+                break
+
+            try:
+                if isinstance(ev, bus.EvTdlOutput):
+                    dl.tdl_line(ev.line)
+
+                elif isinstance(ev, bus.EvDownloadQueued):
+                    dl.queued(ev.filename, ev.size_mb, ev.source, ev.password)
+
+                elif isinstance(ev, bus.EvDownloadStarted):
+                    dl.status(ev.filename, "downloading")
+
+                elif isinstance(ev, bus.EvDownloadDone):
+                    dl.status(ev.filename,
+                              "done_tdl" if ev.via == "tdl" else "done_tel",
+                              via=ev.via)
+
+                elif isinstance(ev, bus.EvDownloadFailed):
+                    dl.status(ev.filename, "failed")
+
+                elif isinstance(ev, bus.EvHit):
+                    hit.add_hit(ev.severity, ev.raw, ev.source, ev.filename, ev.reasons)
+                    stats.refresh_stats()
+
+                elif isinstance(ev, bus.EvChannelAdded):
+                    if ev.channel not in self._live_channels:
+                        self._live_channels.append(ev.channel)
+                    self._signal_channel_changed()
+
+                elif isinstance(ev, bus.EvChannelRemoved):
+                    self._live_channels = [
+                        c for c in self._live_channels if c != ev.channel
+                    ]
+                    self._signal_channel_changed()
+
+                elif isinstance(ev, bus.EvStatus):
+                    log.info(f"[bus] EvStatus: {ev.text}")
+                    severity = {"error": "error", "warning": "warning"}.get(
+                        ev.level, "information"
+                    )
+                    self.notify(ev.text, severity=severity)
+
+                else:
+                    log.warning(f"[bus] Unknown event type: {type(ev)}")
+
+            except Exception as e:
+                log.error(f"[bus] Dispatch error for {type(ev).__name__}: {e}", exc_info=True)
+
+    # ── Bot thread ────────────────────────────────────────────────────────────
+
+    def _run_bot_thread(self) -> None:
+        """
+        Entry point for the bot background thread.
+        Creates a brand-new asyncio event loop for Telethon to use,
+        completely isolated from Textual's loop.
+        """
+        loop = asyncio.new_event_loop()
+        asyncio.set_event_loop(loop)
+        self._bot_loop = loop
+        try:
+            loop.run_until_complete(self._bot_main())
+        except Exception as e:
+            log.error(f"[bot-thread] Unhandled exception: {e}", exc_info=True)
+            bus.post(bus.EvStatus(f"Bot thread crashed: {e}", level="error"))
+        finally:
+            loop.close()
+
+    async def _bot_main(self) -> None:
+        """
+        Full bot backend — runs inside the bot thread's own event loop.
+        Telethon is free to schedule background tasks without interfering
+        with Textual's loop.
+        """
+        import shutil as _shutil
+        from telethon import TelegramClient
+        from telethon import events as tl_events
+        from core.processor import compile_patterns
+        from core.notifier import send_status
+        from core.scraper import backfill_all, warm_entity_cache
+        from utils.database import init_db
+
+        init_db()
+        patterns = compile_patterns(config.TARGET_KEYWORDS)
+
+        bus.post(bus.EvStatus(
+            f"Starting — {len(config.WATCHED_CHANNELS)} channel(s), "
+            f"{len(patterns)} pattern(s)"
+        ))
+
+        user_client = TelegramClient(
+            config.SESSION_NAME, config.API_ID, config.API_HASH,
+            connection_retries=5, auto_reconnect=True, request_retries=5,
+        )
+        bot_client = TelegramClient(
+            "bot_session", config.API_ID, config.API_HASH,
+        )
+
+        try:
+            log.info("[bot] Connecting bot_client...")
+            await bot_client.connect()
+            log.info("[bot] bot_client connected, authorizing...")
+            if not await bot_client.is_user_authorized():
+                await bot_client.sign_in(bot_token=config.BOT_TOKEN)
+            log.info("[bot] bot_client ready")
+
+            log.info("[bot] Connecting user_client...")
+            await user_client.connect()
+            log.info("[bot] user_client connected, checking auth...")
+            if not await user_client.is_user_authorized():
+                log.error("[bot] user_client not authorized — run: python main.py --no-tui")
+                bus.post(bus.EvStatus(
+                    "Not authorized — run --no-tui once to complete login",
+                    level="error",
+                ))
+                return
+            log.info("[bot] user_client ready")
+
+            try:
+                me = await user_client.get_me()
+                bus.post(bus.EvStatus(f"Connected as {me.first_name} (@{me.username})"))
+                await send_status(
+                    bot_client,
+                    f"✅ *Monitor started* (TUI)\n"
+                    f"User: `{me.first_name}`\n"
+                    f"Channels: `{len(config.WATCHED_CHANNELS)}`\n"
+                    f"Patterns: `{len(patterns)}`",
+                )
+
+                await warm_entity_cache(user_client)
+
+                _current_handler = [None]
+
+                def _make_handler(channels):
+                    if _current_handler[0] is not None:
+                        user_client.remove_event_handler(_current_handler[0])
+
+                    from core.bot_downloader import (
+                        handle_bot_download_message,
+                        has_download_button,
+                        extract_password,
+                    )
+                    from core.scraper import handle_message
+                    from telethon.tl.types import MessageMediaDocument
+
+                    _channel_passwords: dict[int, str] = {}
+
+                    @user_client.on(tl_events.NewMessage(chats=channels))
+                    async def _handler(event):
+                        msg = event.message
+                        try:
+                            source = event.chat.username or str(event.chat_id)
+                        except Exception:
+                            source = str(event.chat_id)
+
+                        chat_id = event.chat_id
+                        msg_pw  = extract_password(msg)
+                        if msg_pw:
+                            _channel_passwords[chat_id] = msg_pw
+                        password = msg_pw or _channel_passwords.get(chat_id)
+
+                        live_patterns = compile_patterns(config.TARGET_KEYWORDS)
+
+                        if msg.media and isinstance(msg.media, MessageMediaDocument):
+                            await handle_message(
+                                user_client, bot_client, msg,
+                                source, live_patterns, password=password,
+                            )
+                        elif msg.buttons and has_download_button(msg):
+                            await handle_bot_download_message(
+                                user_client, bot_client, msg,
+                                source, live_patterns, password=password,
+                            )
+
+                    _current_handler[0] = _handler
+                    log.info(f"[bot] Handler registered for {len(channels)} channel(s)")
+                    bus.post(bus.EvStatus(f"Watching {len(channels)} channel(s)"))
+
+                # Channel-change event — lives on this (bot) loop.
+                # Textual signals it thread-safely via _signal_channel_changed().
+                _ch_changed = asyncio.Event()
+                self._bot_loop_channel_event = _ch_changed
+
+                _make_handler(list(self._live_channels))
+                bus.post(bus.EvStatus("Live listener active"))
+
+                await backfill_all(user_client, bot_client, patterns)
+                bus.post(bus.EvStatus("Backfill complete — monitoring live"))
+
+                async def _watch_channels():
+                    while True:
+                        await _ch_changed.wait()
+                        _ch_changed.clear()
+                        new_channels = list(self._live_channels)
+                        log.info(f"[bot] Channel list changed → {new_channels}")
+                        _make_handler(new_channels)
+
+                await asyncio.gather(
+                    user_client.run_until_disconnected(),
+                    _watch_channels(),
+                )
+
+            except Exception as e:
+                bus.post(bus.EvStatus(f"Bot error: {e}", level="error"))
+                log.error("[bot] Bot main crashed", exc_info=True)
+            finally:
+                log.info("[bot] Disconnecting clients...")
+                await user_client.disconnect()
+                await bot_client.disconnect()
+
+        except Exception as e:
+            bus.post(bus.EvStatus(f"Bot connect error: {e}", level="error"))
+            log.error("[bot] Connection failed", exc_info=True)
+        finally:
+            if config.TEMP_DIR.exists():
+                _shutil.rmtree(config.TEMP_DIR, ignore_errors=True)
+                config.TEMP_DIR.mkdir(exist_ok=True)
+
+    def action_quit(self) -> None:
+        self.exit()
+
+
+# ─── Entry point ──────────────────────────────────────────────────────────────
+
+def run_tui() -> None:
+    # Do NOT call bus.init_bus() here — the Queue must be created inside
+    # Textual's event loop (see MonitorApp.on_mount).  Calling it here
+    # would bind the Queue to the outer loop which is discarded when
+    # App.run() creates a new one.
+    MonitorApp().run()
diff --git a/tui/events.md b/tui/events.md
new file mode 100644
index 0000000..674117e
--- /dev/null
+++ b/tui/events.md
@@ -0,0 +1,66 @@
+# tui/events.py
+
+Thread-safe event bus between the bot backend thread and the Textual TUI.  
+The bot thread calls `post()`. The TUI drains the queue every 100ms via `_drain_bus()`.
+
+## Public API
+
+```python
+from tui import events as bus   # from core/ and tui/app.py
+from tui.events import post, init_bus, get_bus, tui_active
+```
+
+### `init_bus() -> queue.Queue`
+Creates the `queue.Queue`. Called inside `MonitorApp.on_mount()` — **must run on Textual's event loop**, not before `App.run()`.
+
+### `post(event: Any) -> None`
+Fire-and-forget from any thread. Silently drops if bus not initialised.  
+Uses `queue.Queue.put_nowait()` — never blocks.
+
+### `get_bus() -> queue.Queue | None`
+Returns the queue for the TUI consumer to drain.
+
+### `tui_active: bool`
+Set to `True` by `init_bus()`. Checked by `core/tdl_downloader.py` to decide whether to pipe tdl output or inherit the terminal.
+
+---
+
+## Event types
+
+| Class | Fields | Posted by | Consumed by |
+|-------|--------|-----------|-------------|
+| `EvDownloadQueued` | `batch_id, filename, size_mb, source, password` | `tdl_downloader`, `scraper` | `DownloadPanel.queued()` |
+| `EvDownloadStarted` | `batch_id, filename` | `tdl_downloader`, `scraper` | `DownloadPanel.status("downloading")` |
+| `EvDownloadDone` | `batch_id, filename, via` | `tdl_downloader`, `scraper` | `DownloadPanel.status("done_tdl"\|"done_tel")` |
+| `EvDownloadFailed` | `batch_id, filename, reason` | `tdl_downloader`, `scraper` | `DownloadPanel.status("failed")` |
+| `EvTdlOutput` | `line` | `tdl_downloader._relay()` | `DownloadPanel.tdl_line()` |
+| `EvHit` | `severity, raw, source, filename, reasons` | `notifier.notify()` | `HitsPanel.add_hit()` + `StatsPanel.refresh_stats()` |
+| `EvChannelAdded` | `channel` | `ChannelPanel.on_button_pressed()` | `_drain_bus` → `_signal_channel_changed()` |
+| `EvChannelRemoved` | `channel` | `ChannelPanel.on_button_pressed()` | `_drain_bus` → `_signal_channel_changed()` |
+| `EvStatus` | `text, level` | everywhere | `MonitorApp.notify()` toast |
+
+`level` on `EvStatus`: `"info"` (default) · `"warning"` · `"error"`
+
+---
+
+## Threading model
+
+```
+Bot thread (own asyncio loop)
+  └─ bus.post(event)          ← queue.Queue.put_nowait() [thread-safe]
+        ↓
+  queue.Queue
+        ↓
+Textual thread (Textual's loop)
+  └─ _drain_bus() [set_interval 100ms]
+       └─ q.get_nowait() loop
+            └─ dispatch to widgets [safe, same thread as Textual]
+```
+
+Channel changes flow the other way:
+```
+_drain_bus sees EvChannelAdded/Removed
+  → _signal_channel_changed()
+       → loop.call_soon_threadsafe(asyncio.Event.set)
+            → bot thread's _watch_channels() wakes
+```
diff --git a/tui/events.py b/tui/events.py
new file mode 100644
index 0000000..ff0cd27
--- /dev/null
+++ b/tui/events.py
@@ -0,0 +1,114 @@
+"""
+tui_events.py — Thread-safe event bus between the bot backend and the TUI.
+
+The bot backend runs in a dedicated thread with its own asyncio event loop
+(completely isolated from Textual's loop).  Events are posted via a standard
+queue.Queue (thread-safe), and the TUI consumer polls it from Textual's loop
+using asyncio.get_event_loop().run_in_executor() bridging.
+
+post() is safe to call from any thread or any asyncio loop.
+"""
+
+import queue
+import threading
+from dataclasses import dataclass, field
+from typing import Any
+
+# Thread-safe queue — works across the bot thread and Textual's thread.
+_queue: queue.Queue | None = None
+_queue_lock = threading.Lock()
+
+# Set to True when the TUI is running so tdl pipes output instead of
+# writing directly to the terminal.
+tui_active: bool = False
+
+
+def init_bus() -> queue.Queue:
+    """Call once from MonitorApp.on_mount() to create the queue."""
+    global _queue, tui_active
+    _queue = queue.Queue()
+    tui_active = True
+    return _queue
+
+
+def get_bus() -> queue.Queue | None:
+    return _queue
+
+
+def post(event: Any) -> None:
+    """Fire-and-forget from any thread. Silently drops if bus not up."""
+    if _queue is not None:
+        try:
+            _queue.put_nowait(event)
+        except queue.Full:
+            pass
+
+
+# ─── Event types ──────────────────────────────────────────────────────────────
+
+@dataclass
+class EvDownloadQueued:
+    """A file has been accepted and is waiting for tdl."""
+    batch_id:  str
+    filename:  str
+    size_mb:   float
+    source:    str
+    password:  str | None
+
+
+@dataclass
+class EvDownloadStarted:
+    """tdl has begun transferring this file."""
+    batch_id:  str
+    filename:  str
+
+
+@dataclass
+class EvDownloadDone:
+    """File fully downloaded (tdl or Telethon fallback)."""
+    batch_id:  str
+    filename:  str
+    via:       str   # "tdl" | "telethon"
+
+
+@dataclass
+class EvDownloadFailed:
+    """All download attempts failed."""
+    batch_id:  str
+    filename:  str
+    reason:    str
+
+
+@dataclass
+class EvTdlOutput:
+    """A line of output from tdl's stdout/stderr (TUI mode only)."""
+    line: str
+
+
+@dataclass
+class EvHit:
+    """A scored credential hit to display in the hits panel."""
+    severity:  str
+    raw:       str
+    source:    str
+    filename:  str
+    reasons:   list[str] = field(default_factory=list)
+
+
+@dataclass
+class EvChannelAdded:
+    """A channel was added to the live watch list."""
+    channel: str | int
+
+
+@dataclass
+class EvChannelRemoved:
+    """A channel was removed from the live watch list."""
+    channel: str | int
+
+
+@dataclass
+class EvStatus:
+    """Generic one-line status message (startup, errors, etc.)."""
+    text: str
+    level: str = "info"   # "info" | "warning" | "error"
diff --git a/utils/__init__.py b/utils/__init__.py
new file mode 100644
index 0000000..8c6b899
--- /dev/null
+++ b/utils/__init__.py
@@ -0,0 +1 @@
+"""utils — pure logic modules with no Telegram dependencies."""
diff --git a/utils/cache.md b/utils/cache.md
new file mode 100644
index 0000000..91ebaec
--- /dev/null
+++ b/utils/cache.md
@@ -0,0 +1,32 @@
+# utils/cache.py
+
+Tracks already-processed Telegram document IDs to avoid redownloading.  
+Persists to `data/cache.json` as a JSON array of integers.
+
+## Public API
+
+```python
+from utils.cache import is_seen, mark_seen
+```
+
+### `is_seen(file_id: int) -> bool`
+Returns `True` if this document ID has been processed before.  
+Loads from disk on every call (safe for multi-process, slightly slow for hot loops — not an issue given download cadence).
+
+### `mark_seen(file_id: int) -> None`
+Adds `file_id` to the cache and persists to disk.
+
+---
+
+## Storage
+
+- **File:** `data/cache.json`
+- **Format:** JSON array of integers — `[123456789, 987654321, ...]`
+- **No expiry** — grows indefinitely. Safe to delete to re-process all files.
+
+---
+
+## Notes
+
+- `is_seen` + `mark_seen` are called in `core/scraper.py` after a successful download+process cycle, not before — so a file that fails mid-process will be retried on next run.
+- Not thread-safe (load/modify/save is not atomic). Acceptable because downloads are sequential within the bot loop.
diff --git a/utils/cache.py b/utils/cache.py
new file mode 100644
index 0000000..8182eeb
--- /dev/null
+++ b/utils/cache.py
@@ -0,0 +1,38 @@
+"""
+cache.py — Tracks already-processed file IDs to avoid redownloading.
+Persists to a simple JSON file on disk.
+"""
+
+import json
+import logging
+from pathlib import Path
+
+log = logging.getLogger(__name__)
+
+CACHE_FILE = Path("./data/cache.json")
+
+
+def _load() -> set:
+    if not CACHE_FILE.exists():
+        return set()
+    try:
+        with open(CACHE_FILE, "r") as f:
+            return set(json.load(f))
+    except Exception:
+        return set()
+
+
+def _save(seen: set) -> None:
+    with open(CACHE_FILE, "w") as f:
+        json.dump(list(seen), f)
+
+
+def is_seen(file_id: int) -> bool:
+    return file_id in _load()
+
+
+def mark_seen(file_id: int) -> None:
+    seen = _load()
+    seen.add(file_id)
+    _save(seen)
+    log.debug(f"  Cached file ID {file_id}")
diff --git a/utils/database.md b/utils/database.md
new file mode 100644
index 0000000..92909f8
--- /dev/null
+++ b/utils/database.md
@@ -0,0 +1,89 @@
+# utils/database.py
+
+SQLite persistence layer for credential hits.  
+DB file: `data/hits.db`
+
+## Public API
+
+```python
+from utils.database import init_db, insert_hits, search, recent, by_severity, stats
+```
+
+### Setup
+
+#### `init_db() -> None`
+Creates `hits` table and indexes if they don't exist. Call once on startup.  
+Safe to call multiple times (idempotent).
+
+---
+
+### Writing
+
+#### `insert_hits(scored_hits, source, filename, seen_before=False) -> int`
+Inserts a list of `ScoredHit` objects. Returns row count inserted.
+
+```python
+insert_hits(new_hits, source="channelname", filename="combo.zip")
+insert_hits(dupe_hits, source="channelname", filename="combo.zip", seen_before=True)
+```
+
+---
+
+### Querying
+
+#### `search(keyword: str) -> list[sqlite3.Row]`
+Full-text search across `url`, `username`, `raw`. Returns rows sorted by score DESC, timestamp DESC.
+
+#### `recent(limit: int = 50) -> list[sqlite3.Row]`
+Most recent hits, newest first.
+
+#### `by_severity(severity: str) -> list[sqlite3.Row]`
+All unique (non-duplicate) hits at a given severity, newest first.  
+`severity` must be one of: `"CRITICAL"`, `"HIGH"`, `"MEDIUM"`, `"LOW"`
+
+#### `stats() -> dict`
+Returns summary counters:
+```python
+{
+    "total":      int,   # all rows
+    "unique":     int,   # seen_before=0
+    "duplicates": int,   # seen_before=1
+    "critical":   int,   # unique CRITICAL
+    "high":       int,
+    "medium":     int,
+    "low":        int,
+    "sources":    int,   # distinct source channels
+    "top_source": {"source": str, "cnt": int} | None,
+}
+```
+
+---
+
+## Schema
+
+```sql
+hits (
+    id          INTEGER PRIMARY KEY AUTOINCREMENT,
+    url         TEXT,
+    username    TEXT,
+    password    TEXT,
+    raw         TEXT NOT NULL,      -- full original credential line
+    source      TEXT,               -- channel username or ID
+    filename    TEXT,               -- downloaded file name
+    timestamp   TEXT NOT NULL,      -- "YYYY-MM-DD HH:MM:SS UTC"
+    severity    TEXT NOT NULL,      -- CRITICAL/HIGH/MEDIUM/LOW
+    score       INTEGER NOT NULL,   -- 40/30/20/10
+    reasons     TEXT,               -- pipe-separated reason strings
+    seen_before INTEGER NOT NULL    -- 0=new, 1=duplicate
+)
+```
+
+Indexes: `url`, `username`, `source`, `timestamp`, `severity`.
+
+---
+
+## Notes
+
+- Each query opens and closes its own connection via the `_connect()` context manager.
+- `conn.row_factory = sqlite3.Row` — rows support both index and column-name access.
+- Transactions: commit on success, rollback on exception.
diff --git a/utils/database.py b/utils/database.py
new file mode 100644
index 0000000..589acb7
--- /dev/null
+++ b/utils/database.py
@@ -0,0 +1,171 @@
+"""
+database.py — SQLite storage for credential hits.
+
+Schema:
+  hits table:
+    - id          auto-increment primary key
+    - url         the target URL from the credential line
+    - username    extracted username/email
+    - password    extracted password
+    - raw         the full original line
+    - source      channel/bot it came from
+    - filename    the file it was found in
+    - timestamp   UTC time of discovery
+    - severity    CRITICAL / HIGH / MEDIUM / LOW
+    - score       numeric score (higher = worse)
+    - reasons     pipe-separated list of scoring reasons
+    - seen_before whether this was a duplicate (for stats)
+"""
+
+import sqlite3
+import logging
+from datetime import datetime, timezone
+from pathlib import Path
+from contextlib import contextmanager
+
+log = logging.getLogger(__name__)
+
+DB_FILE = Path("./data/hits.db")
+
+
+# ─── Setup ────────────────────────────────────────────────────────────────────
+
+@contextmanager
+def _connect():
+    conn = sqlite3.connect(DB_FILE)
+    conn.row_factory = sqlite3.Row
+    try:
+        yield conn
+        conn.commit()
+    except Exception:
+        conn.rollback()
+        raise
+    finally:
+        conn.close()
+
+
+def init_db() -> None:
+    """Create tables if they don't exist yet."""
+    with _connect() as conn:
+        conn.execute("""
+            CREATE TABLE IF NOT EXISTS hits (
+                id          INTEGER PRIMARY KEY AUTOINCREMENT,
+                url         TEXT,
+                username    TEXT,
+                password    TEXT,
+                raw         TEXT NOT NULL,
+                source      TEXT,
+                filename    TEXT,
+                timestamp   TEXT NOT NULL,
+                severity    TEXT NOT NULL DEFAULT 'LOW',
+                score       INTEGER NOT NULL DEFAULT 10,
+                reasons     TEXT,
+                seen_before INTEGER NOT NULL DEFAULT 0
+            )
+        """)
+        conn.execute("CREATE INDEX IF NOT EXISTS idx_url       ON hits(url)")
+        conn.execute("CREATE INDEX IF NOT EXISTS idx_username  ON hits(username)")
+        conn.execute("CREATE INDEX IF NOT EXISTS idx_source    ON hits(source)")
+        conn.execute("CREATE INDEX IF NOT EXISTS idx_timestamp ON hits(timestamp)")
+        conn.execute("CREATE INDEX IF NOT EXISTS idx_severity  ON hits(severity)")
+    log.info(f"Database ready: {DB_FILE}")
+
+
+# ─── Writing ─────────────────────────────────────────────────────────────────
+
+def insert_hits(
+    scored_hits: list,
+    source: str,
+    filename: str,
+    seen_before: bool = False,
+) -> int:
+    """
+    Insert a list of ScoredHit objects into the database.
+    Returns the number of rows inserted.
+    """
+    timestamp = datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M:%S UTC")
+    rows = []
+    for h in scored_hits:
+        rows.append((
+            h.url,
+            h.username,
+            h.password,
+            h.raw,
+            source,
+            filename,
+            timestamp,
+            h.severity,
+            h.score,
+            " | ".join(h.reasons),
+            1 if seen_before else 0,
+        ))
+
+    with _connect() as conn:
+        conn.executemany("""
+            INSERT INTO hits
+              (url, username, password, raw, source, filename, timestamp,
+               severity, score, reasons, seen_before)
+            VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
+        """, rows)
+
+    log.info(f"  DB: inserted {len(rows)} row(s) from {filename}")
+    return len(rows)
+
+
+# ─── Querying ─────────────────────────────────────────────────────────────────
+
+def search(keyword: str) -> list[sqlite3.Row]:
+    """Search hits by keyword across url, username, raw fields."""
+    with _connect() as conn:
+        return conn.execute("""
+            SELECT * FROM hits
+            WHERE url LIKE ? OR username LIKE ? OR raw LIKE ?
+            ORDER BY score DESC, timestamp DESC
+        """, (f"%{keyword}%",) * 3).fetchall()
+
+
+def recent(limit: int = 50) -> list[sqlite3.Row]:
+    """Return the most recent hits."""
+    with _connect() as conn:
+        return conn.execute("""
+            SELECT * FROM hits
+            ORDER BY timestamp DESC
+            LIMIT ?
+        """, (limit,)).fetchall()
+
+
+def by_severity(severity: str) -> list[sqlite3.Row]:
+    """Return all hits of a given severity level."""
+    with _connect() as conn:
+        return conn.execute("""
+            SELECT * FROM hits
+            WHERE severity = ? AND seen_before = 0
+            ORDER BY timestamp DESC
+        """, (severity,)).fetchall()
+
+
+def stats() -> dict:
+    """Return summary statistics."""
+    with _connect() as conn:
+        total      = conn.execute("SELECT COUNT(*) FROM hits").fetchone()[0]
+        unique     = conn.execute("SELECT COUNT(*) FROM hits WHERE seen_before=0").fetchone()[0]
+        critical   = conn.execute("SELECT COUNT(*) FROM hits WHERE severity='CRITICAL' AND seen_before=0").fetchone()[0]
+        high       = conn.execute("SELECT COUNT(*) FROM hits WHERE severity='HIGH' AND seen_before=0").fetchone()[0]
+        medium     = conn.execute("SELECT COUNT(*) FROM hits WHERE severity='MEDIUM' AND seen_before=0").fetchone()[0]
+        low        = conn.execute("SELECT COUNT(*) FROM hits WHERE severity='LOW' AND seen_before=0").fetchone()[0]
+        sources    = conn.execute("SELECT COUNT(DISTINCT source) FROM hits").fetchone()[0]
+        top_source = conn.execute("""
+            SELECT source, COUNT(*) as cnt FROM hits
+            GROUP BY source ORDER BY cnt DESC LIMIT 1
+        """).fetchone()
+    return {
+        "total":      total,
+        "unique":     unique,
+        "duplicates": total - unique,
+        "critical":   critical,
+        "high":       high,
+        "medium":     medium,
+        "low":        low,
+        "sources":    sources,
+        "top_source": dict(top_source) if top_source else None,
+    }
diff --git a/utils/scorer.md b/utils/scorer.md
new file mode 100644
index 0000000..50df937
--- /dev/null
+++ b/utils/scorer.md
@@ -0,0 +1,87 @@
+# utils/scorer.py
+
+Severity scoring for credential hits. No Telegram deps. Pure logic.
+
+## Public API
+
+```python
+from utils.scorer import score_hit, score_hits, summarize, ScoredHit
+from utils.scorer import CRITICAL, HIGH, MEDIUM, LOW, SEVERITY_EMOJI, SEVERITY_SCORES
+```
+
+### `score_hit(line: str) -> ScoredHit`
+Score a single raw credential line. Parses ULP format (`url:user:pass`), runs all checks, returns a `ScoredHit`.
+
+### `score_hits(lines: list[str]) -> list[ScoredHit]`
+Score a list of lines. Returns sorted descending by score.
+
+### `summarize(scored: list[ScoredHit]) -> dict`
+Returns `{CRITICAL: n, HIGH: n, MEDIUM: n, LOW: n}`.
+
+---
+
+## ScoredHit dataclass
+
+| Field | Type | Description |
+|-------|------|-------------|
+| `raw` | str | Original credential line |
+| `severity` | str | CRITICAL / HIGH / MEDIUM / LOW |
+| `score` | int | 40 / 30 / 20 / 10 |
+| `reasons` | list[str] | Human-readable match reasons |
+| `url` | str\|None | Parsed URL field |
+| `username` | str\|None | Parsed username/email field |
+| `password` | str\|None | Parsed password field |
+| `.emoji` | property | 🔴🟠🟡🟢 |
+
+---
+
+## Scoring rules (highest match wins)
+
+| Severity | Triggers |
+|----------|----------|
+| CRITICAL | Employee email domain after `@` in username/line · Privileged service URL (admin, vpn, ssh, rdp, gitlab, jira…) |
+| HIGH | Internal service URL (intranet, erp, crm, sso, owa, sharepoint…) |
+| MEDIUM | Client-facing URL (app, patient, booking, helpdesk…) |
+| LOW | Org domain appears anywhere in line (baseline) |
+
+Check 6 (no severity change): flags weak passwords ≤6 chars or common strings.
+
+---
+
+## Employee domain matching
+
+Keywords in `config.TARGET_KEYWORDS` containing `@` become employee patterns.  
+Pattern: `@<domain>(?:[^a-zA-Z0-9.\-]|$)` — requires literal `@` before the domain.  
+**`user@gmail.com` on a URL containing `myorg.cl` does NOT trigger CRITICAL.**
+
+Keywords without `@` go only to `ORG_DOMAINS` (LOW baseline).
+
+---
+
+## ULP line parser (`ULP_PATTERN`)
+
+Separators: `:` `;` `,` `|` `\t` (any of these between the three fields).
+
+The URL field handles two common stealer-log complications:
+
+1. **`://` not treated as separator** — the optional scheme prefix `(?:https?|ftp)://` is consumed before the character-class match, so `https://` never gets split at the colon.
+
+2. **Port + path consumed into the URL** — the optional group `(?::\d+/[^\s:;,|\t]*)` absorbs `:port/path` when the port is pure digits immediately followed by `/`. This correctly handles `http://host:8085/path/:user:pass` but intentionally skips patterns like `:24145487-8` (RUT number — hyphen after digits, no `/`).
+
+**Known limitation:** A bare port with no path (e.g. `https://host:8080:user:pass`) will mis-parse `8080` as the username. This is not observed in practice — stealer logs always include at least a trailing `/`.
+
+---
+
+## Module-level globals (rebuilt on import + via KeywordsScreen)
+
+| Name | Type | Description |
+|------|------|-------------|
+| `EMPLOYEE_DOMAINS` | `list[tuple[str, Pattern]]` | `(domain_str, anchored_pattern)` for `@`-keywords |
+| `ORG_DOMAINS` | `list[Pattern]` | Plain domain patterns for all keywords |
+
+To rebuild after editing `config.TARGET_KEYWORDS` at runtime:
+```python
+import utils.scorer as scorer
+scorer.EMPLOYEE_DOMAINS = scorer._build_employee_domains()
+scorer.ORG_DOMAINS      = scorer._build_org_domains()
+```
diff --git a/utils/scorer.py b/utils/scorer.py
new file mode 100644
index 0000000..9f1a3a8
--- /dev/null
+++ b/utils/scorer.py
@@ -0,0 +1,273 @@
+"""
+scorer.py — Severity scoring for credential hits.
+
+Scoring logic (highest match wins):
+
+  CRITICAL  — Employee credentials (internal email domain)
+                e.g. jdoe@yourclinic.cl:password
+              — Admin/privileged service URLs
+                e.g. admin., vpn., ssh., rdp., gitlab., jira.
+
+  HIGH      — Internal-facing services
+                e.g. intranet., erp., crm., portal., citrix.
+              — Password manager or SSO hits
+              — Any credential where username looks like an employee email
+
+  MEDIUM    — Client-facing portals
+                e.g. app., patient., client., booking.
+              — Domain match on a non-privileged service
+
+  LOW       — Generic domain keyword match
+              — No URL parsed, just a raw domain mention
+
+Each scored hit gets a dict with:
+  - severity:    CRITICAL / HIGH / MEDIUM / LOW
+  - score:       int (higher = worse)
+  - reasons:     list of human-readable reasons
+  - raw:         original line
+"""
+
+import re
+import logging
+from dataclasses import dataclass, field
+from config import TARGET_KEYWORDS
+
+log = logging.getLogger(__name__)
+
+
+# ─── Severity levels ─────────────────────────────────────────────────────────
+
+CRITICAL = "CRITICAL"
+HIGH     = "HIGH"
+MEDIUM   = "MEDIUM"
+LOW      = "LOW"
+
+SEVERITY_SCORES = {
+    CRITICAL: 40,
+    HIGH:     30,
+    MEDIUM:   20,
+    LOW:      10,
+}
+
+SEVERITY_EMOJI = {
+    CRITICAL: "🔴",
+    HIGH:     "🟠",
+    MEDIUM:   "🟡",
+    LOW:      "🟢",
+}
+
+
+# ─── Pattern banks ───────────────────────────────────────────────────────────
+
+# Subdomains/services that indicate privileged access
+CRITICAL_SERVICES = re.compile(
+    r"(?:^|https?://|\.)"
+    r"(admin|vpn|ssh|rdp|ftp|sftp|gitlab|github|bitbucket|jenkins|"
+    r"jira|confluence|grafana|kibana|sentry|vault|bastion|jump|"
+    r"firewall|router|switch|proxy|ldap|ad\.|activedirectory|"
+    r"exchange|mail\.)",
+    re.IGNORECASE
+)
+
+HIGH_SERVICES = re.compile(
+    r"(?:^|https?://|\.)"
+    r"(intranet|erp|crm|portal|citrix|workspace|webmail|owa|"
+    r"sharepoint|teams|slack|zoom|meet|sso|login|auth|oauth|"
+    r"accounts?|dashboard|internal|corp|staff|hr|payroll|"
+    r"finance|accounting)",
+    re.IGNORECASE
+)
+
+MEDIUM_SERVICES = re.compile(
+    r"(?:^|https?://|\.)"
+    r"(app|patient|client|customer|booking|appointment|"
+    r"reserva|cita|paciente|user|member|registro|signup|"
+    r"support|helpdesk|ticket)",
+    re.IGNORECASE
+)
+
+# Looks like a corporate email (user@domain)
+EMAIL_PATTERN = re.compile(r"[a-zA-Z0-9._%+\-]+@([a-zA-Z0-9.\-]+\.[a-zA-Z]{2,})")
+
+# ULP line parser
+# Separator set: colon, semicolon, comma, pipe, tab.
+# URL field: optional scheme (http/https/ftp) consumed first so '://' is never
+# mistaken for a separator; then an optional port group ':\d+/' absorbs port+path
+# (port is digits immediately followed by '/') so 'http://host:88/path:user:pass'
+# yields url='http://host:88/path', not url='http'.
+ULP_PATTERN = re.compile(
+    r"^(?P<url>"
+        r"(?:(?:https?|ftp)://)?[^\s:;,|\t]+"  # optional scheme + host/path
+        r"(?::\d+/[^\s:;,|\t]*)?"              # optional :port/path (port = digits then /)
+    r")"
+    r"(?:[:;,|\t])"
+    r"(?P<username>[^\s:;,|\t]+)"
+    r"(?:[:;,|\t])"
+    r"(?P<password>.+)$"
+)
+
+
+# ─── Derived from config ──────────────────────────────────────────────────────
+
+def _kw_to_domain(kw: str) -> str:
+    """Strip regex syntax from a keyword to get a plain domain string."""
+    return kw.replace(r"@", "").replace(r"\.", ".").strip("^$").lstrip(".")
+
+
+def _build_employee_domains() -> list[tuple[str, re.Pattern]]:
+    """
+    Keywords that contain '@' are employee email domain patterns.
+
+    Pattern anchors at '@<domain>' so that a URL containing the org domain
+    never causes a false CRITICAL on an unrelated email like @gmail.com.
+
+    Returns list of (domain_str, compiled_pattern) tuples.
+    """
+    patterns = []
+    for kw in TARGET_KEYWORDS:
+        if "@" in kw:
+            domain = _kw_to_domain(kw)
+            if domain:
+                pat = re.compile(
+                    r"@" + re.escape(domain) + r"(?:[^a-zA-Z0-9.\-]|$)",
+                    re.IGNORECASE,
+                )
+                patterns.append((domain, pat))
+    return patterns
+
+EMPLOYEE_DOMAINS = _build_employee_domains()
+
+
+def _build_org_domains() -> list[re.Pattern]:
+    """
+    All keywords as plain domain patterns for the LOW baseline match.
+    Checks that the org domain appears anywhere in the line.
+    """
+    patterns = []
+    for kw in TARGET_KEYWORDS:
+        domain = _kw_to_domain(kw)
+        if domain:
+            patterns.append(re.compile(re.escape(domain), re.IGNORECASE))
+    return patterns
+
+ORG_DOMAINS = _build_org_domains()
+
+
+
+# ─── Scoring logic ────────────────────────────────────────────────────────────
+
+@dataclass
+class ScoredHit:
+    raw:      str
+    severity: str
+    score:    int
+    reasons:  list[str] = field(default_factory=list)
+    url:      str | None = None
+    username: str | None = None
+    password: str | None = None
+
+    @property
+    def emoji(self) -> str:
+        return SEVERITY_EMOJI.get(self.severity, "⚪")
+
+    def __str__(self) -> str:
+        return f"{self.emoji} [{self.severity}] {self.raw}"
+
+
+def score_hit(line: str) -> ScoredHit:
+    """
+    Score a single credential line.
+    Returns a ScoredHit with severity, score, and reasons.
+    """
+    line    = line.strip()
+    reasons = []
+    scores  = []
+
+    # Parse ULP fields if possible
+    url = username = password = None
+    m = ULP_PATTERN.match(line)
+    if m:
+        url      = m.group("url")
+        username = m.group("username")
+        password = m.group("password")
+
+    # ── Check 1: Employee email domain in username or line ───────────────
+    # EMPLOYEE_DOMAINS entries are (domain_str, pattern) where the pattern
+    # requires '@' immediately before the domain, so a URL containing the
+    # org domain never triggers a CRITICAL on an unrelated email (@gmail etc).
+    for domain_str, pat in EMPLOYEE_DOMAINS:
+        # Try the parsed username field first; fall back to full line.
+        # Either way the pattern requires a literal '@' before the domain.
+        field = username if username else ""
+        if not pat.search(field):
+            field = line
+        if pat.search(field):
+            scores.append(CRITICAL)
+            reasons.append(f"Employee email domain: {domain_str}")
+            break
+
+    # ── Check 2: Is the URL a privileged/critical service? ────────────────
+    if url and CRITICAL_SERVICES.search(url):
+        scores.append(CRITICAL)
+        reasons.append(f"Critical service URL: {url}")
+
+    # ── Check 3: Is the URL a high-value internal service? ────────────────
+    if url and HIGH_SERVICES.search(url):
+        scores.append(HIGH)
+        reasons.append(f"High-value internal service: {url}")
+
+    # ── Check 4: Is the URL a client-facing service? ──────────────────────
+    if url and MEDIUM_SERVICES.search(url):
+        scores.append(MEDIUM)
+        reasons.append(f"Client-facing service: {url}")
+
+    # ── Check 5: Generic org domain match (baseline) ─────────────────────
+    for pattern in ORG_DOMAINS:
+        if pattern.search(line):
+            if not scores:
+                scores.append(LOW)
+                reasons.append(f"Org domain match in line")
+            break
+
+    # ── Check 6: Weak/empty password flag ────────────────────────────────
+    if password:
+        if len(password) <= 6:
+            reasons.append(f"⚠ Weak password ({len(password)} chars)")
+        if password.lower() in {"123456", "password", "qwerty", "111111", "admin", "letmein"}:
+            reasons.append(f"⚠ Common password: {password}")
+
+    # ── Resolve final severity ────────────────────────────────────────────
+    severity_order = [CRITICAL, HIGH, MEDIUM, LOW]
+    final_severity = LOW  # default
+    for s in severity_order:
+        if s in scores:
+            final_severity = s
+            break
+
+    if not reasons:
+        reasons.append("Pattern match")
+
+    return ScoredHit(
+        raw      = line,
+        severity = final_severity,
+        score    = SEVERITY_SCORES[final_severity],
+        reasons  = reasons,
+        url      = url,
+        username = username,
+        password = password,
+    )
+
+
+def score_hits(lines: list[str]) -> list[ScoredHit]:
+    """Score a list of credential lines. Returns sorted by score descending."""
+    scored = [score_hit(line) for line in lines]
+    scored.sort(key=lambda h: h.score, reverse=True)
+    return scored
+
+
+def summarize(scored: list[ScoredHit]) -> dict:
+    """Count hits by severity level."""
+    summary = {CRITICAL: 0, HIGH: 0, MEDIUM: 0, LOW: 0}
+    for h in scored:
+        summary[h.severity] += 1
+    return summary