config.py now imports config_local.py at the bottom if present (gitignored). Sensitive defaults (real archive passwords, personal tdl namespace) removed from config.py and documented in config_local.py.example instead. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
148 lines
6.0 KiB
Python
148 lines
6.0 KiB
Python
"""
|
|
config.py - Loads and validates all settings from .env
|
|
"""
|
|
|
|
import json
|
|
import logging
|
|
import os
|
|
from pathlib import Path
|
|
from dotenv import load_dotenv
|
|
|
|
load_dotenv()
|
|
|
|
log = logging.getLogger(__name__)
|
|
|
|
# -- Timeouts --
|
|
BOT_REPLY_TIMEOUT = 10
|
|
|
|
# ─── Telegram credentials ────────────────────────────────────────────────────
|
|
API_ID = int(os.environ["API_ID"])
|
|
API_HASH = os.environ["API_HASH"]
|
|
BOT_TOKEN = os.environ["BOT_TOKEN"]
|
|
NOTIFY_CHAT_ID = int(os.environ["NOTIFY_CHAT_ID"])
|
|
SESSION_NAME = os.getenv("SESSION_NAME", "monitor_session")
|
|
|
|
# ─── Runtime config path ─────────────────────────────────────────────────────
|
|
RUNTIME_CONFIG_PATH = Path("./data/runtime_config.json")
|
|
|
|
# ─── Hardcoded defaults (used when runtime_config.json is absent) ─────────────
|
|
# Add your org's domains, email patterns, IP ranges, known usernames, etc.
|
|
# All patterns are case-insensitive regex.
|
|
_DEFAULT_KEYWORDS: list[str] = [
|
|
#r"sanatorioaleman\.cl",
|
|
#r"@sanatorioaleman\.cl",
|
|
#r"@hites\.cl",
|
|
#r"hites\.com",
|
|
# r"192\.168\.10\.", # internal IP range example
|
|
# r"specificuser", # known internal usernames
|
|
r"onion\.global",
|
|
r"@onion\.global",
|
|
]
|
|
|
|
# Use usernames (without @) or numeric channel IDs (-100xxxxxxxxxx)
|
|
_DEFAULT_CHANNELS: list[str | int] = [
|
|
#"channelName",
|
|
#-1001234567890, # private channel by ID
|
|
]
|
|
|
|
# ─── Runtime config helpers ───────────────────────────────────────────────────
|
|
|
|
def _load_runtime_config() -> dict:
|
|
"""Load runtime_config.json; return empty dict if absent or malformed."""
|
|
if not RUNTIME_CONFIG_PATH.exists():
|
|
return {}
|
|
try:
|
|
with open(RUNTIME_CONFIG_PATH) as f:
|
|
return json.load(f)
|
|
except Exception as e:
|
|
log.warning("Failed to load %s: %s", RUNTIME_CONFIG_PATH, e)
|
|
return {}
|
|
|
|
|
|
def _keywords_from_groups(groups: list[dict]) -> list[str]:
|
|
"""Flatten all group patterns into a single keyword list."""
|
|
return [p["regex"] for g in groups for p in g.get("patterns", [])]
|
|
|
|
|
|
# ─── Live config ──────────────────────────────────────────────────────────────
|
|
# Populated from runtime_config.json at import; falls back to hardcoded defaults.
|
|
|
|
_cfg = _load_runtime_config()
|
|
|
|
KEYWORD_GROUPS: list[dict] = _cfg.get("groups", [])
|
|
TARGET_KEYWORDS: list[str] = (
|
|
_keywords_from_groups(KEYWORD_GROUPS) if KEYWORD_GROUPS else _DEFAULT_KEYWORDS
|
|
)
|
|
WATCHED_CHANNELS: list[str | int] = _cfg.get("channels", _DEFAULT_CHANNELS)
|
|
|
|
|
|
def save_runtime_config(groups: list[dict], channels: list[str | int]) -> None:
|
|
"""
|
|
Persist keyword groups + channel list to runtime_config.json.
|
|
Updates module globals so the running process sees the new values immediately.
|
|
Called by web config routes after validating input.
|
|
"""
|
|
global KEYWORD_GROUPS, TARGET_KEYWORDS, WATCHED_CHANNELS
|
|
data = {"groups": groups, "channels": channels}
|
|
RUNTIME_CONFIG_PATH.parent.mkdir(parents=True, exist_ok=True)
|
|
with open(RUNTIME_CONFIG_PATH, "w") as f:
|
|
json.dump(data, f, indent=2)
|
|
KEYWORD_GROUPS = groups
|
|
TARGET_KEYWORDS = _keywords_from_groups(groups) if groups else _DEFAULT_KEYWORDS
|
|
WATCHED_CHANNELS = channels
|
|
|
|
|
|
# ─── File handling ───────────────────────────────────────────────────────────
|
|
TEMP_DIR = Path("./tmp")
|
|
HITS_FILE = Path("./hits.txt")
|
|
LOG_FILE = Path("./logs/monitor.log")
|
|
|
|
# Extensions to download and process
|
|
ALLOWED_EXTENSIONS = {".txt", ".zip", ".7z", ".rar"}
|
|
|
|
# Max file size to download (bytes). Default: 200 MB.
|
|
# Very large files are skipped to avoid abuse of your session.
|
|
MAX_FILE_SIZE = 4 * 1024 * 1024 * 1024 # 4 GB (Telegram Premium max)
|
|
|
|
# ─── Archive passwords to try ────────────────────────────────────────────────
|
|
ARCHIVE_PASSWORDS: list[bytes] = [
|
|
b"",
|
|
b"infected",
|
|
b"password",
|
|
b"1234",
|
|
b"12345",
|
|
b"0000",
|
|
b"telegram",
|
|
]
|
|
|
|
# ─── Backfill settings ───────────────────────────────────────────────────────
|
|
# How many historical messages to scan per channel on startup (0 = skip backfill)
|
|
BACKFILL_LIMIT = 500
|
|
|
|
# ─── tdl downloader settings ─────────────────────────────────────────────────
|
|
# Namespace tdl was logged into. Run `tdl login` with no -n flag → namespace
|
|
# is "default". Run `tdl login -n foo` → namespace is "foo".
|
|
# Set to None to omit -n entirely (tdl will use "default" anyway).
|
|
TDL_NAMESPACE: str | None = "monitor_session"
|
|
|
|
# Parallel chunk workers per file (-t / --threads global flag)
|
|
TDL_THREADS = 8
|
|
|
|
# Max concurrent files per tdl invocation (-l / --limit global flag)
|
|
TDL_PERFILE = 4
|
|
|
|
# Max messages to batch into a single tdl invocation during backfill.
|
|
# tdl handles the parallelism internally via -l and -t.
|
|
TDL_AMOUNT = 4
|
|
|
|
# Whether to use a Telegram takeout session for downloads (lower flood limits).
|
|
# Takeout sessions are rate-limited differently - good for bulk backfill.
|
|
TDL_TAKEOUT = True
|
|
|
|
# ─── Local overrides (gitignored) ────────────────────────────────────────────
|
|
# Create config_local.py to override any value above without touching this file.
|
|
try:
|
|
from config_local import * # noqa: F401, F403
|
|
except ImportError:
|
|
pass
|