""" bot_downloader.py β€” Handles "click to download" inline button flows. Some Telegram channels post messages with a DOWNLOAD button that triggers a bot to send you the actual file. This module simulates that click and captures the bot's file response. """ import asyncio import re import logging from telethon import TelegramClient from telethon.tl.types import MessageMediaDocument, KeyboardButtonUrl from telethon.errors import FloodWaitError log = logging.getLogger(__name__) DOWNLOAD_BUTTON_KEYWORDS = ["DOWNLOAD", "DESCARGAR", "GET FILE", "GET PACK", "⬇", "πŸ“₯"] BOT_REPLY_TIMEOUT = 10 PASSWORD_PATTERN = re.compile( r"(?:Pass|Password|ContraseΓ±a|Contrasena|Clave)[\s]*:[\s]*(.+)$", re.IGNORECASE | re.MULTILINE ) # ─── Password extraction ────────────────────────────────────────────────────── def extract_password(msg) -> str | None: if not msg.text: return None match = PASSWORD_PATTERN.search(msg.text) if match: pwd = match.group(1).strip() # Strip markdown formatting characters pwd = pwd.strip("*`_~") log.info(f" Found password in message: '{pwd}'") return pwd return None # ─── Button detection ───────────────────────────────────────────────────────── def find_download_button(msg): """ Scans a message's inline keyboard for a download-like button. Returns the button object or None. """ if not msg.buttons: return None for row in msg.buttons: for btn in row: if any(kw in btn.text.upper() for kw in DOWNLOAD_BUTTON_KEYWORDS): return btn return None def has_download_button(msg) -> bool: return find_download_button(msg) is not None # ─── Click + wait flow ──────────────────────────────────────────────────────── async def click_download_button(client: TelegramClient, msg) -> list: """ Clicks the download button on a message, then waits for the bot to reply with a file. Returns a list of response messages containing documents. """ btn = find_download_button(msg) if not btn: return [] log.info(f" Clicking button: '{btn.text}'") # ── URL button (most common) ─────────────────────────────────────────── if isinstance(btn.button, KeyboardButtonUrl): url = btn.button.url # e.g. https://t.me/SomeBot?start=ABC123 match = re.search(r"t\.me/([A-Za-z0-9_]+)\?start=(.+)", url) if not match: log.warning(f" Unrecognised URL format: {url}") return [] bot_username, payload = match.group(1), match.group(2) log.info(f" β†’ Messaging @{bot_username} with /start {payload}") try: bot_entity = await client.get_entity(bot_username) await client.send_message(bot_entity, f"/start {payload}") except Exception as e: log.error(f" Failed to message bot: {e}") return [] # Poll for reply log.info(f" Waiting up to {BOT_REPLY_TIMEOUT}s for bot reply...") for _ in range(BOT_REPLY_TIMEOUT): await asyncio.sleep(1) try: recent = await client.get_messages(bot_entity, limit=3) files = [m for m in recent if m.media and isinstance(m.media, MessageMediaDocument)] if files: log.info(f" βœ“ Got file from bot.") return files except Exception as e: log.warning(f" Poll error: {e}") break log.warning(f" Bot did not reply within {BOT_REPLY_TIMEOUT}s.") return [] # ── Callback button (less common) ───────────────────────────────────── else: try: await btn.click() await asyncio.sleep(2) except Exception as e: log.error(f" Callback click failed: {e}") return [] try: sender = await msg.get_sender() recent = await client.get_messages(sender, limit=5) return [m for m in recent if m.media and isinstance(m.media, MessageMediaDocument)] except Exception as e: log.warning(f" Fallback poll failed: {e}") return [] # ─── Main entry point ───────────────────────────────────────────────────────── async def handle_bot_download_message( client: TelegramClient, bot: TelegramClient, msg, source_name: str, patterns, password: str | None = None, ) -> None: """ Full pipeline for a message with a download button: 1. Detect download button 2. Click it 3. Wait for bot to send back a file 4. Hand off to the normal handle_message() flow """ if not has_download_button(msg): return log.info(f"[BotDL] Download button detected in {source_name}") responses = await click_download_button(client, msg) if not responses: log.warning(f"[BotDL] No file received for message in {source_name}.") return from core.scraper import handle_message for resp in responses: log.info(f" [BotDL] Response media type: {type(resp.media).__name__}, attrs: {getattr(resp.media.document, 'attributes', []) if hasattr(resp.media, 'document') else 'none'}") await handle_message(client, bot, resp, f"{source_name}[bot]", patterns, password=password)