Initial commit: ULPgrammer

- Core Telegram monitoring pipeline (scraper, processor, notifier, downloaders)
- Textual TUI frontend with thread-safe event bus
- SQLite persistence, severity scoring, dedup cache
- Fixed ULP parser: handles https:// truncation, port+path URLs, semicolon separator
- Test suite: 88 tests across scorer, cache, database, processor
This commit is contained in:
2026-04-02 01:58:49 -03:00
commit 48f486ac97
41 changed files with 5270 additions and 0 deletions

223
tests/test_processor.py Normal file
View File

@@ -0,0 +1,223 @@
"""
Tests for core/processor.py — archive extraction and line-by-line search.
No Telegram deps, no async. Tests create real archive fixtures in tmp_path
so process_file's cleanup guarantee can be verified against actual disk state.
"""
import zipfile
import pytest
from pathlib import Path
from core.processor import compile_patterns, search_file, process_file
@pytest.fixture
def patterns():
return compile_patterns([r"testcorp\.com"])
# ─── compile_patterns ─────────────────────────────────────────────────────────
class TestCompilePatterns:
def test_returns_case_insensitive_patterns(self):
pats = compile_patterns([r"hello"])
assert pats[0].search("HELLO") is not None
assert pats[0].search("Hello") is not None
def test_multiple_patterns(self):
pats = compile_patterns([r"alpha", r"beta"])
assert len(pats) == 2
assert pats[0].search("alpha_line")
assert pats[1].search("beta_line")
def test_empty_list(self):
assert compile_patterns([]) == []
# ─── search_file ──────────────────────────────────────────────────────────────
class TestSearchFile:
def test_returns_matching_lines(self, tmp_path, patterns):
f = tmp_path / "combo.txt"
f.write_text("testcorp.com|user|pass\nothersite.com|user|pass\n")
assert search_file(f, patterns) == ["testcorp.com|user|pass"]
def test_returns_empty_when_no_match(self, tmp_path, patterns):
f = tmp_path / "combo.txt"
f.write_text("nomatch.com|user|pass\nanother.net|x|y\n")
assert search_file(f, patterns) == []
def test_strips_whitespace_from_returned_lines(self, tmp_path, patterns):
f = tmp_path / "combo.txt"
f.write_text(" testcorp.com|user|pass \n")
hits = search_file(f, patterns)
assert hits[0] == "testcorp.com|user|pass"
def test_skips_blank_lines(self, tmp_path, patterns):
f = tmp_path / "combo.txt"
f.write_text("\n\ntestcorp.com|user|pass\n\n")
assert search_file(f, patterns) == ["testcorp.com|user|pass"]
def test_handles_encoding_errors_gracefully(self, tmp_path, patterns):
"""Combo files are often messy — invalid bytes must not crash the search."""
f = tmp_path / "combo.txt"
f.write_bytes(
b"testcorp.com|user1|pass\n"
b"\xff\xfe invalid bytes here\n"
b"testcorp.com|user2|pass\n"
)
hits = search_file(f, patterns)
assert len(hits) == 2
def test_multiple_matching_lines_all_returned(self, tmp_path, patterns):
f = tmp_path / "combo.txt"
f.write_text(
"testcorp.com|alice|pass1\n"
"nomatch.com|bob|pass2\n"
"testcorp.com|carol|pass3\n"
)
hits = search_file(f, patterns)
assert len(hits) == 2
# ─── process_file — plain .txt ────────────────────────────────────────────────
class TestProcessFilePlainText:
def test_returns_hits(self, tmp_path, patterns):
f = tmp_path / "combo.txt"
f.write_text("testcorp.com|user|pass\nnomatch.com|x|y\n")
hits = process_file(f, patterns)
assert hits == ["testcorp.com|user|pass"]
def test_deletes_file_after_processing(self, tmp_path, patterns):
f = tmp_path / "combo.txt"
f.write_text("testcorp.com|user|pass\n")
process_file(f, patterns)
assert not f.exists()
def test_deletes_file_even_with_no_hits(self, tmp_path, patterns):
f = tmp_path / "combo.txt"
f.write_text("nomatch.com|x|y\n")
hits = process_file(f, patterns)
assert hits == []
assert not f.exists()
# ─── process_file — .zip extraction ──────────────────────────────────────────
class TestProcessFileZip:
def _make_zip(self, tmp_path: Path, content: str, filename="content.txt") -> Path:
txt = tmp_path / filename
txt.write_text(content)
zf = tmp_path / "combo.zip"
with zipfile.ZipFile(zf, "w") as z:
z.write(txt, filename)
txt.unlink()
return zf
def test_extracts_and_returns_hits(self, tmp_path, patterns):
zf = self._make_zip(tmp_path, "testcorp.com|user|pass\nnomatch.com|x|y\n")
hits = process_file(zf, patterns)
assert hits == ["testcorp.com|user|pass"]
def test_deletes_zip_after_processing(self, tmp_path, patterns):
zf = self._make_zip(tmp_path, "testcorp.com|user|pass\n")
process_file(zf, patterns)
assert not zf.exists()
def test_deletes_extract_dir_after_processing(self, tmp_path, patterns):
zf = self._make_zip(tmp_path, "testcorp.com|user|pass\n")
extract_dir = tmp_path / "combo" # sibling dir named after zip stem
process_file(zf, patterns)
assert not extract_dir.exists()
def test_no_hits_still_cleans_up(self, tmp_path, patterns):
zf = self._make_zip(tmp_path, "nomatch.com|x|y\n")
extract_dir = tmp_path / "combo"
process_file(zf, patterns)
assert not zf.exists()
assert not extract_dir.exists()
def test_zip_with_multiple_txt_files(self, tmp_path, patterns):
txt1 = tmp_path / "a.txt"
txt1.write_text("testcorp.com|alice|pass\n")
txt2 = tmp_path / "b.txt"
txt2.write_text("testcorp.com|bob|pass\n")
zf = tmp_path / "combo.zip"
with zipfile.ZipFile(zf, "w") as z:
z.write(txt1, "a.txt")
z.write(txt2, "b.txt")
txt1.unlink()
txt2.unlink()
hits = process_file(zf, patterns)
assert len(hits) == 2
# ─── process_file — nested archives ──────────────────────────────────────────
class TestProcessFileNested:
def test_nested_zip_is_recursed(self, tmp_path, patterns):
inner_txt = tmp_path / "inner.txt"
inner_txt.write_text("testcorp.com|user|pass\n")
inner_zip = tmp_path / "inner.zip"
with zipfile.ZipFile(inner_zip, "w") as z:
z.write(inner_txt, "inner.txt")
inner_txt.unlink()
outer_zip = tmp_path / "outer.zip"
with zipfile.ZipFile(outer_zip, "w") as z:
z.write(inner_zip, "inner.zip")
inner_zip.unlink()
hits = process_file(outer_zip, patterns)
assert hits == ["testcorp.com|user|pass"]
assert not outer_zip.exists()
assert not (tmp_path / "outer").exists()
# ─── process_file — password-protected .7z ───────────────────────────────────
class TestProcessFile7zPassword:
def test_unlocks_with_correct_password(self, tmp_path, patterns, monkeypatch):
try:
import py7zr
except ImportError:
pytest.skip("py7zr not installed")
import core.processor as proc_module
# Isolate to a single known password so the test doesn't depend on config
monkeypatch.setattr(proc_module, "ARCHIVE_PASSWORDS", [b"secretpwd"])
txt = tmp_path / "content.txt"
txt.write_text("testcorp.com|user|pass\n")
szf = tmp_path / "combo.7z"
with py7zr.SevenZipFile(szf, "w", password="secretpwd") as z:
z.write(txt, "content.txt")
txt.unlink()
hits = process_file(szf, patterns)
assert hits == ["testcorp.com|user|pass"]
assert not szf.exists()
def test_skips_when_no_password_matches(self, tmp_path, patterns, monkeypatch):
try:
import py7zr
except ImportError:
pytest.skip("py7zr not installed")
import core.processor as proc_module
monkeypatch.setattr(proc_module, "ARCHIVE_PASSWORDS", [b"wrongpwd"])
txt = tmp_path / "content.txt"
txt.write_text("testcorp.com|user|pass\n")
szf = tmp_path / "combo.7z"
with py7zr.SevenZipFile(szf, "w", password="correctpwd") as z:
z.write(txt, "content.txt")
txt.unlink()
# No hits — archive could not be opened
hits = process_file(szf, patterns)
assert hits == []