""" config.py — Loads and validates all settings from .env """ import json import logging import os from pathlib import Path from dotenv import load_dotenv load_dotenv() log = logging.getLogger(__name__) # -- Timeouts -- BOT_REPLY_TIMEOUT = 10 # ─── Telegram credentials ──────────────────────────────────────────────────── API_ID = int(os.environ["API_ID"]) API_HASH = os.environ["API_HASH"] BOT_TOKEN = os.environ["BOT_TOKEN"] NOTIFY_CHAT_ID = int(os.environ["NOTIFY_CHAT_ID"]) SESSION_NAME = os.getenv("SESSION_NAME", "monitor_session") # ─── Runtime config path ───────────────────────────────────────────────────── RUNTIME_CONFIG_PATH = Path("./data/runtime_config.json") # ─── Hardcoded defaults (used when runtime_config.json is absent) ───────────── # Add your org's domains, email patterns, IP ranges, known usernames, etc. # All patterns are case-insensitive regex. _DEFAULT_KEYWORDS: list[str] = [ r"sanatorioaleman\.cl", r"@sanatorioaleman\.cl", # r"192\.168\.10\.", # internal IP range example # r"specificuser", # known internal usernames ] # Use usernames (without @) or numeric channel IDs (-100xxxxxxxxxx) _DEFAULT_CHANNELS: list[str | int] = [ #-1002230225603, "cloudxlog", #-1001967030016, # daisycloud #"berserklogs", # berserklogs #"BorwitaFreeLogs", # borwita -1002748707556, # darkcloud -1001684073398, # BHF Cloud -1003163621939, # Wich Love from R -1003611713618, # Khazan Cloud -1003328682684, # LogsPlanet -1003204260194, # JDP -1002828367761, # HesoyamCloud -1003513974925, # Slurm Logs -1003599300787, # Arhont Corp -1002582513379, # OnlyLogs -1002788333372, # Ickis Cloud #-1001234567890, # private channel by ID ] # ─── Runtime config helpers ─────────────────────────────────────────────────── def _load_runtime_config() -> dict: """Load runtime_config.json; return empty dict if absent or malformed.""" if not RUNTIME_CONFIG_PATH.exists(): return {} try: with open(RUNTIME_CONFIG_PATH) as f: return json.load(f) except Exception as e: log.warning("Failed to load %s: %s", RUNTIME_CONFIG_PATH, e) return {} def _keywords_from_groups(groups: list[dict]) -> list[str]: """Flatten all group patterns into a single keyword list.""" return [p["regex"] for g in groups for p in g.get("patterns", [])] # ─── Live config ────────────────────────────────────────────────────────────── # Populated from runtime_config.json at import; falls back to hardcoded defaults. _cfg = _load_runtime_config() KEYWORD_GROUPS: list[dict] = _cfg.get("groups", []) TARGET_KEYWORDS: list[str] = ( _keywords_from_groups(KEYWORD_GROUPS) if KEYWORD_GROUPS else _DEFAULT_KEYWORDS ) WATCHED_CHANNELS: list[str | int] = _cfg.get("channels", _DEFAULT_CHANNELS) def save_runtime_config(groups: list[dict], channels: list[str | int]) -> None: """ Persist keyword groups + channel list to runtime_config.json. Updates module globals so the running process sees the new values immediately. Called by web config routes after validating input. """ global KEYWORD_GROUPS, TARGET_KEYWORDS, WATCHED_CHANNELS data = {"groups": groups, "channels": channels} RUNTIME_CONFIG_PATH.parent.mkdir(parents=True, exist_ok=True) with open(RUNTIME_CONFIG_PATH, "w") as f: json.dump(data, f, indent=2) KEYWORD_GROUPS = groups TARGET_KEYWORDS = _keywords_from_groups(groups) if groups else _DEFAULT_KEYWORDS WATCHED_CHANNELS = channels # ─── File handling ─────────────────────────────────────────────────────────── TEMP_DIR = Path("./tmp") HITS_FILE = Path("./hits.txt") LOG_FILE = Path("./logs/monitor.log") # Extensions to download and process ALLOWED_EXTENSIONS = {".txt", ".zip", ".7z", ".rar"} # Max file size to download (bytes). Default: 200 MB. # Very large files are skipped to avoid abuse of your session. MAX_FILE_SIZE = 4 * 1024 * 1024 * 1024 # 4 GB (Telegram Premium max) # ─── Archive passwords to try ──────────────────────────────────────────────── ARCHIVE_PASSWORDS: list[bytes] = [ b"1234", b"0000", b"infected", b"telegram", b"password", b"12345", b"", b"Borwita", b"@WichLoveFromR", ] # ─── Backfill settings ─────────────────────────────────────────────────────── # How many historical messages to scan per channel on startup (0 = skip backfill) BACKFILL_LIMIT = 500 # ─── tdl downloader settings ───────────────────────────────────────────────── # Namespace tdl was logged into. Run `tdl login` with no -n flag → namespace # is "default". Run `tdl login -n foo` → namespace is "foo". # Set to None to omit -n entirely (tdl will use "default" anyway). TDL_NAMESPACE: str | None = "ulpmon" # Parallel chunk workers per file (-t / --threads global flag) TDL_THREADS = 8 # Max concurrent files per tdl invocation (-l / --limit global flag) TDL_PERFILE = 4 # Max messages to batch into a single tdl invocation during backfill. # tdl handles the parallelism internally via -l and -t. TDL_AMOUNT = 4 # Whether to use a Telegram takeout session for downloads (lower flood limits). # Takeout sessions are rate-limited differently — good for bulk backfill. TDL_TAKEOUT = True