Initial commit: ULPgrammer

- Core Telegram monitoring pipeline (scraper, processor, notifier, downloaders)
- Textual TUI frontend with thread-safe event bus
- SQLite persistence, severity scoring, dedup cache
- Fixed ULP parser: handles https:// truncation, port+path URLs, semicolon separator
- Test suite: 88 tests across scorer, cache, database, processor
This commit is contained in:
2026-04-02 01:58:49 -03:00
commit 48f486ac97
41 changed files with 5270 additions and 0 deletions

161
core/bot_downloader.py Normal file
View File

@@ -0,0 +1,161 @@
"""
bot_downloader.py — Handles "click to download" inline button flows.
Some Telegram channels post messages with a DOWNLOAD button that triggers
a bot to send you the actual file. This module simulates that click and
captures the bot's file response.
"""
import asyncio
import re
import logging
from telethon import TelegramClient
from telethon.tl.types import MessageMediaDocument, KeyboardButtonUrl
from telethon.errors import FloodWaitError
log = logging.getLogger(__name__)
DOWNLOAD_BUTTON_KEYWORDS = ["DOWNLOAD", "DESCARGAR", "GET FILE", "GET PACK", "", "📥"]
BOT_REPLY_TIMEOUT = 10
PASSWORD_PATTERN = re.compile(
r"(?:Pass|Password|Contraseña|Contrasena|Clave)[\s]*:[\s]*(.+)$",
re.IGNORECASE | re.MULTILINE
)
# ─── Password extraction ──────────────────────────────────────────────────────
def extract_password(msg) -> str | None:
if not msg.text:
return None
match = PASSWORD_PATTERN.search(msg.text)
if match:
pwd = match.group(1).strip()
# Strip markdown formatting characters
pwd = pwd.strip("*`_~")
log.info(f" Found password in message: '{pwd}'")
return pwd
return None
# ─── Button detection ─────────────────────────────────────────────────────────
def find_download_button(msg):
"""
Scans a message's inline keyboard for a download-like button.
Returns the button object or None.
"""
if not msg.buttons:
return None
for row in msg.buttons:
for btn in row:
if any(kw in btn.text.upper() for kw in DOWNLOAD_BUTTON_KEYWORDS):
return btn
return None
def has_download_button(msg) -> bool:
return find_download_button(msg) is not None
# ─── Click + wait flow ────────────────────────────────────────────────────────
async def click_download_button(client: TelegramClient, msg) -> list:
"""
Clicks the download button on a message, then waits for the bot to reply
with a file. Returns a list of response messages containing documents.
"""
btn = find_download_button(msg)
if not btn:
return []
log.info(f" Clicking button: '{btn.text}'")
# ── URL button (most common) ───────────────────────────────────────────
if isinstance(btn.button, KeyboardButtonUrl):
url = btn.button.url # e.g. https://t.me/SomeBot?start=ABC123
match = re.search(r"t\.me/([A-Za-z0-9_]+)\?start=(.+)", url)
if not match:
log.warning(f" Unrecognised URL format: {url}")
return []
bot_username, payload = match.group(1), match.group(2)
log.info(f" → Messaging @{bot_username} with /start {payload}")
try:
bot_entity = await client.get_entity(bot_username)
await client.send_message(bot_entity, f"/start {payload}")
except Exception as e:
log.error(f" Failed to message bot: {e}")
return []
# Poll for reply
log.info(f" Waiting up to {BOT_REPLY_TIMEOUT}s for bot reply...")
for _ in range(BOT_REPLY_TIMEOUT):
await asyncio.sleep(1)
try:
recent = await client.get_messages(bot_entity, limit=3)
files = [m for m in recent if m.media and isinstance(m.media, MessageMediaDocument)]
if files:
log.info(f" ✓ Got file from bot.")
return files
except Exception as e:
log.warning(f" Poll error: {e}")
break
log.warning(f" Bot did not reply within {BOT_REPLY_TIMEOUT}s.")
return []
# ── Callback button (less common) ─────────────────────────────────────
else:
try:
await btn.click()
await asyncio.sleep(2)
except Exception as e:
log.error(f" Callback click failed: {e}")
return []
try:
sender = await msg.get_sender()
recent = await client.get_messages(sender, limit=5)
return [m for m in recent if m.media and isinstance(m.media, MessageMediaDocument)]
except Exception as e:
log.warning(f" Fallback poll failed: {e}")
return []
# ─── Main entry point ─────────────────────────────────────────────────────────
async def handle_bot_download_message(
client: TelegramClient,
bot: TelegramClient,
msg,
source_name: str,
patterns,
password: str | None = None,
) -> None:
"""
Full pipeline for a message with a download button:
1. Detect download button
2. Click it
3. Wait for bot to send back a file
4. Hand off to the normal handle_message() flow
"""
if not has_download_button(msg):
return
log.info(f"[BotDL] Download button detected in {source_name}")
responses = await click_download_button(client, msg)
if not responses:
log.warning(f"[BotDL] No file received for message in {source_name}.")
return
from core.scraper import handle_message
for resp in responses:
log.info(f" [BotDL] Response media type: {type(resp.media).__name__}, attrs: {getattr(resp.media.document, 'attributes', []) if hasattr(resp.media, 'document') else 'none'}")
await handle_message(client, bot, resp, f"{source_name}[bot]", patterns, password=password)