Rename to stealergram, add pyproject.toml, purge em-dashes

- Rename project to stealergram throughout - Add pyproject.toml (replaces requirements.txt split, folds pytest.ini) - Replace all em-dashes with hyphens across all source files Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-19 10:06:30 -04:00
parent 4c104cddd2
commit 741e6bb0d3
46 changed files with 244 additions and 191 deletions
--- a/core/init.py
+++ b/core/init.py
@@ -1 +1 @@
-"""core — Telegram I/O pipeline (scraper, downloader, processor, notifier)."""
+"""core - Telegram I/O pipeline (scraper, downloader, processor, notifier)."""
--- a/core/bot_downloader.py
+++ b/core/bot_downloader.py
@@ -1,5 +1,5 @@
 """
-bot_downloader.py — Handles "click to download" inline button flows.
+bot_downloader.py - Handles "click to download" inline button flows.

 Some Telegram channels post messages with a DOWNLOAD button that triggers
 a bot to send you the actual file. This module simulates that click and
--- a/core/notifier.py
+++ b/core/notifier.py
@@ -1,5 +1,5 @@
 """
-notifier.py — Persists hits to disk and sends Telegram bot alerts.
+notifier.py - Persists hits to disk and sends Telegram bot alerts.

 Includes:
  - Severity scoring via scorer.py
@@ -31,7 +31,7 @@ log = logging.getLogger(__name__)
 MAX_PREVIEW = 10   # hits to show per severity group in alert
 DEDUP_FILE  = Path("./data/dedup.json")

-# Only alert immediately for these severities — LOW hits are silent
+# Only alert immediately for these severities - LOW hits are silent
 ALERT_SEVERITIES = {CRITICAL, HIGH, MEDIUM}


@@ -124,7 +124,7 @@ def write_hits(scored_hits: list, source: str) -> None:


 def write_hits_csv(scored_hits: list, source: str, filename: str) -> None:
-    """Append new hits to hits.csv — one row per hit, easy to import."""
+    """Append new hits to hits.csv - one row per hit, easy to import."""
    HITS_CSV.parent.mkdir(parents=True, exist_ok=True)
    write_header = not HITS_CSV.exists()
    timestamp = _timestamp()
@@ -152,13 +152,13 @@ async def send_alert(
 ) -> None:
    """
    Send a Telegram alert grouped by severity.
-    Only includes CRITICAL, HIGH, MEDIUM — LOW hits are omitted from alerts.
+    Only includes CRITICAL, HIGH, MEDIUM - LOW hits are omitted from alerts.
    """
    summary  = summarize(scored_hits)
    alertable = [h for h in scored_hits if h.severity in ALERT_SEVERITIES]

    if not alertable:
-        log.info("  No alertable hits (all LOW) — skipping Telegram notification.")
+        log.info("  No alertable hits (all LOW) - skipping Telegram notification.")
        return

    lines = [
@@ -210,7 +210,7 @@ async def notify(bot: TelegramClient, hits: list[str], source: str, filename: st

    # Score first
    scored = score_hits(hits)
-    log.info(f"  Scored {len(scored)} hit(s) — {summarize(scored)}")
+    log.info(f"  Scored {len(scored)} hit(s) - {summarize(scored)}")

    # Deduplicate
    new_hits, dupe_hits = deduplicate(scored)
@@ -222,7 +222,7 @@ async def notify(bot: TelegramClient, hits: list[str], source: str, filename: st
        insert_hits(dupe_hits, source, filename, seen_before=True)

    if not new_hits:
-        log.info("  All hits already seen before — no alert sent.")
+        log.info("  All hits already seen before - no alert sent.")
        return

    # Push hits to TUI
--- a/core/processor.md
+++ b/core/processor.md
@@ -54,8 +54,8 @@ Nested archives are recursed **one level** only.

 ## Password order

-1. `extra_password` (from message/channel carry-forward) — tried first
-2. `config.ARCHIVE_PASSWORDS` — tried in order
+1. `extra_password` (from message/channel carry-forward) - tried first
+2. `config.ARCHIVE_PASSWORDS` - tried in order

 ---

--- a/core/processor.py
+++ b/core/processor.py
@@ -1,8 +1,8 @@
 """
-processor.py — Archive extraction and hit searching logic.
+processor.py - Archive extraction and hit searching logic.

 Supports: .txt, .zip, .7z, .rar
-Stream-processes files line by line — safe for large combo lists.
+Stream-processes files line by line - safe for large combo lists.
 """

 import rarfile
@@ -40,7 +40,7 @@ def compile_patterns(keywords: list[str]) -> list[re.Pattern]:
 def search_file(filepath: Path, patterns: list[re.Pattern]) -> list[str]:
    """
    Stream-reads a text file line by line and returns lines matching any pattern.
-    Ignores encoding errors — combo files are often messy.
+    Ignores encoding errors - combo files are often messy.
    """
    hits: list[str] = []
    try:
@@ -82,7 +82,7 @@ def extract_zip(filepath: Path, dest: Path, extra_password: str | None = None) -
            except RuntimeError:
                log.info(f"  ZIP is password-protected, trying common passwords...")
                if not _try_passwords(try_extract, ARCHIVE_PASSWORDS):
-                    log.warning(f"  Could not unlock {filepath.name} — skipping.")
+                    log.warning(f"  Could not unlock {filepath.name} - skipping.")
                    return []

            extracted = [p for p in dest.rglob("*") if p.is_file()]
@@ -95,7 +95,7 @@ def extract_zip(filepath: Path, dest: Path, extra_password: str | None = None) -

 def extract_7z(filepath: Path, dest: Path, extra_password: str | None = None) -> list[Path]:
    if not HAS_7Z:
-        log.warning("py7zr not installed — skipping .7z file.")
+        log.warning("py7zr not installed - skipping .7z file.")
        return []
    extracted: list[Path] = []
    passwords = ARCHIVE_PASSWORDS.copy()
@@ -119,7 +119,7 @@ def extract_7z(filepath: Path, dest: Path, extra_password: str | None = None) ->
                except Exception:
                    continue
            if not success:
-                log.warning(f"  Could not unlock {filepath.name} — skipping.")
+                log.warning(f"  Could not unlock {filepath.name} - skipping.")
                return []

        extracted = [p for p in dest.rglob("*") if p.is_file()]
@@ -130,7 +130,7 @@ def extract_7z(filepath: Path, dest: Path, extra_password: str | None = None) ->

 def extract_rar(filepath: Path, dest: Path, extra_password: str | None = None) -> list[Path]:
    if not HAS_RAR:
-        log.warning("rarfile not installed — skipping .rar file.")
+        log.warning("rarfile not installed - skipping .rar file.")
        return []

    passwords = ARCHIVE_PASSWORDS.copy()
@@ -150,7 +150,7 @@ def extract_rar(filepath: Path, dest: Path, extra_password: str | None = None) -
            except Exception:
                log.info(f"  RAR may be password-protected, trying common passwords...")
                if not _try_passwords(try_extract, ARCHIVE_PASSWORDS):
-                    log.warning(f"  Could not unlock {filepath.name} — skipping.")
+                    log.warning(f"  Could not unlock {filepath.name} - skipping.")
                    return []

        extracted = [p for p in dest.rglob("*") if p.is_file()]
@@ -184,7 +184,7 @@ def unpack(filepath: Path, extra_password: str | None = None) -> tuple[list[Path
        return files, extract_dir

    else:
-        # Plain file — return as-is, no extract dir to clean up
+        # Plain file - return as-is, no extract dir to clean up
        return [filepath], None


@@ -207,7 +207,7 @@ def process_file(filepath: Path, patterns, password: str | None = None) -> list[
                log.info(f"    ✓ {len(hits)} hit(s) in {f.name}")
            all_hits.extend(hits)

-        # Nested archives — recurse one level
+        # Nested archives - recurse one level
        elif f.suffix.lower() in {".zip", ".7z", ".rar"} and f != filepath:
            log.info(f"    → Nested archive: {f.name}")
            nested_hits = process_file(f, patterns)
--- a/core/scraper.md
+++ b/core/scraper.md
@@ -11,7 +11,7 @@ from core.scraper import handle_message, backfill_all, register_handlers, warm_e
 ### `handle_message(client, bot, msg, source_name, patterns, password=None)`
 **async.** Full pipeline for one document message:
 1. Extract filename + size, check allowlist + size guard
-2. Check `utils.cache` — skip if already seen
+2. Check `utils.cache` - skip if already seen
 3. Try `tdl` download → Telethon fallback
 4. `core.processor.process_file()` → hits
 5. `core.notifier.notify()` if hits found
--- a/core/scraper.py
+++ b/core/scraper.py
@@ -1,5 +1,5 @@
 """
-scraper.py — Telethon user client.
+scraper.py - Telethon user client.

 Handles:
  - Listening for new file messages in watched channels
@@ -99,7 +99,7 @@ async def _telethon_download(client: TelegramClient, msg, dest: Path, filename:
    """Download a single file via Telethon. Returns True on success."""
    _bid = batch_id or f"telethon_{int(time.monotonic_ns())}"
    if batch_id is None:
-        # Standalone call (not already queued by tdl path) — post queued event
+        # Standalone call (not already queued by tdl path) - post queued event
        bus.post(bus.EvDownloadQueued(
            batch_id=_bid, filename=filename,
            size_mb=round(size / (1024 * 1024), 2),
@@ -165,12 +165,12 @@ async def handle_message(
    size = get_filesize(msg)
    ok, reason = is_processable(filename, size)
    if not ok:
-        log.warning(f"  handle_message: skipping '{filename}' — {reason}")
+        log.warning(f"  handle_message: skipping '{filename}' - {reason}")
        return

    doc_id = msg.media.document.id
    if is_seen(doc_id):
-        log.info(f"  Skipping {filename} — already processed.")
+        log.info(f"  Skipping {filename} - already processed.")
        return

    dest = _make_dest(msg, filename)
@@ -180,7 +180,7 @@ async def handle_message(
    downloaded = await download_single_with_tdl(msg, dest) if is_tdl_available() else False
    if not downloaded:
        if is_tdl_available():
-            log.warning("  [tdl] failed — falling back to Telethon")
+            log.warning("  [tdl] failed - falling back to Telethon")
        downloaded = await _telethon_download(client, msg, dest, filename, size)

    if not downloaded:
@@ -307,7 +307,7 @@ async def backfill_channel(

                ok, reason = is_processable(filename, size)
                if not ok:
-                    log.warning(f"  [Backfill] Skipping '{filename}' — {reason}")
+                    log.warning(f"  [Backfill] Skipping '{filename}' - {reason}")
                    continue

                if is_seen(msg.media.document.id):
@@ -319,13 +319,13 @@ async def backfill_channel(
                    if len(batch) >= TDL_AMOUNT:
                        await flush_batch()
                else:
-                    # No tdl — fall straight through to single handle_message
+                    # No tdl - fall straight through to single handle_message
                    await handle_message(client, bot, msg, source_name, patterns, password=password)
                    total += 1
                    await asyncio.sleep(0.5)

            elif msg.buttons and has_download_button(msg):
-                # Bot-button messages can't be batched — handle individually
+                # Bot-button messages can't be batched - handle individually
                await flush_batch()  # flush any pending batch first
                await handle_bot_download_message(client, bot, msg, source_name, patterns, password=password)
                total += 1
@@ -339,7 +339,7 @@ async def backfill_channel(
    except Exception as e:
        log.error(f"[Backfill] Error scanning {channel}: {e}")

-    log.info(f"[Backfill] Done: {channel} — {total} file(s) processed")
+    log.info(f"[Backfill] Done: {channel} - {total} file(s) processed")


 async def backfill_all(
--- a/core/tdl_downloader.md
+++ b/core/tdl_downloader.md
@@ -22,7 +22,7 @@ Used by the live handler and `bot_downloader`.

 ### `download_batch_with_tdl(entries: list[BatchEntry]) -> dict[int, bool]`
 **async.** Downloads up to `TDL_AMOUNT` messages in a single `tdl dl` invocation.  
-Returns `{doc_id: True|False}` — `False` means Telethon fallback needed.
+Returns `{doc_id: True|False}` - `False` means Telethon fallback needed.

 ---

@@ -55,7 +55,7 @@ In CLI mode: subprocess inherits the terminal, progress bars render natively.
 Each batch/single download gets a unique `data/tmp/_tdl_{monotonic_ns}/` staging dir.  
 After `tdl` exits, files are matched by name (with fuzzy stem fallback for `filenamify()` mangling) and moved to final `dest`. Staging dir is removed regardless of outcome.

-`--template '{{ filenamify .FileName }}'` — tdl uses the original Telegram filename, not its default `DialogID_MessageID_filename` format.
+`--template '{{ filenamify .FileName }}'` - tdl uses the original Telegram filename, not its default `DialogID_MessageID_filename` format.

 ---

--- a/core/tdl_downloader.py
+++ b/core/tdl_downloader.py
@@ -1,10 +1,10 @@
 """
-tdl_downloader.py — Fast file downloads via tdl (Go MTProto implementation).
+tdl_downloader.py - Fast file downloads via tdl (Go MTProto implementation).

 Install: https://github.com/iyear/tdl
    curl -sSL https://raw.githubusercontent.com/iyear/tdl/main/scripts/install.sh | bash

-First-time setup — log in once:
+First-time setup - log in once:
    tdl login               # saves to namespace "default"
    tdl login -n myns       # saves to a named namespace

@@ -77,7 +77,7 @@ def _build_cmd(urls: list[str], staging_dir: Path) -> list[str]:
    (no DialogID_MessageID_ prefix).

    --continue is kept so interrupted downloads resume rather than restart.
-    --skip-same is intentionally omitted — deduplication is handled upstream
+    --skip-same is intentionally omitted - deduplication is handled upstream
    by is_seen(), and --skip-same can cause the .tmp rename to fail when a
    same-named file already exists in the directory.
    """
@@ -103,7 +103,7 @@ def _build_cmd(urls: list[str], staging_dir: Path) -> list[str]:

 # ─── Runner ───────────────────────────────────────────────────────────────────

-# ANSI escape stripper — tdl emits colour codes even when not a TTY
+# ANSI escape stripper - tdl emits colour codes even when not a TTY
 import re as _re
 _ANSI_RE = _re.compile(r"\x1b\[[0-9;]*[mGKHFJA-Z]|\x1b=|\x1b>|\x1b\[\?[0-9]+[hl]")

@@ -141,7 +141,7 @@ async def _run_tdl(cmd: list[str], label: str) -> bool:
                    buf += chunk.decode(errors="replace")
                    # Split on both \r and \n; process all complete segments
                    parts = _re.split(r"[\r\n]", buf)
-                    # Last element may be an incomplete segment — keep in buffer
+                    # Last element may be an incomplete segment - keep in buffer
                    buf = parts[-1]
                    for part in parts[:-1]:
                        clean = _strip_ansi(part).strip()
@@ -163,7 +163,7 @@ async def _run_tdl(cmd: list[str], label: str) -> bool:
            log.info(f"[tdl] ✓ {label}")
            return True
        else:
-            log.error(f"[tdl] ✗ exit {proc.returncode} — {label}")
+            log.error(f"[tdl] ✗ exit {proc.returncode} - {label}")
            return False
    except FileNotFoundError:
        log.error("[tdl] binary not found at runtime")
@@ -260,7 +260,7 @@ async def download_batch_with_tdl(entries: list[BatchEntry]) -> dict[int, bool]:
        return {}

    if not is_tdl_available():
-        log.warning("[tdl] not available — all entries need Telethon fallback")
+        log.warning("[tdl] not available - all entries need Telethon fallback")
        return {e.doc_id: False for e in entries}

    urls: list[str] = []
@@ -327,7 +327,7 @@ async def download_single_with_tdl(msg, dest: Path) -> bool:
    bot_downloader where batching doesn't apply.
    """
    if not is_tdl_available():
-        log.warning("[tdl] not available — falling back to Telethon")
+        log.warning("[tdl] not available - falling back to Telethon")
        return False

    try: