feat(canary): worker decodes ?d=/?o=/?s=&i=&n=&d= fingerprint params

The fingerprint payload beacons fingerprint data as base64url JSON in
GET query params: ?o=1 for the bare-open beacon, ?d=<blob> for a
single-shot dump, or ?s/i/n/d=<chunk> for chunked dumps. Until now
those params were buried inside request_path; consumers had to parse
the URL themselves.

Worker now extracts them in _extract_fingerprint and merges into
raw_headers under reserved _fp* keys:

* _fp_open       — bare-open marker
* _fp            — decoded fingerprint dict (single-shot path)
* _fp_sid/idx/total/chunk — chunked metadata + raw base64 (reassembly
  is a downstream concern, not the worker's job)
* _fp_decode_error / _fp_oversize — failure markers for trash dumps

Per-chunk size capped at 8KB so an attacker spamming /c/<known_slug>
can't inflate trigger rows indefinitely. Decode failures degrade
gracefully — the trigger row still records the hit, just with a
_fp_decode_error flag instead of structured fingerprint data.

Tests cover the single-shot decode, bare-open flag, chunked metadata,
malformed input, and oversize drop paths.
This commit is contained in:
2026-04-29 16:25:17 -04:00
parent f64e78f78c
commit dd807bc55e
2 changed files with 177 additions and 2 deletions

View File

@@ -26,9 +26,12 @@ crashes loudly rather than masking failures.
from __future__ import annotations
import asyncio
import base64
import binascii
import json
import os
from datetime import datetime, timezone
from typing import Optional
from typing import Any, Optional
from fastapi import FastAPI, Request, Response
@@ -104,6 +107,10 @@ def _build_app(repo: BaseRepository, bus: BaseBus) -> FastAPI:
@app.get("/c/{slug}")
async def callback(slug: str, request: Request) -> Response:
merged_headers = dict(request.headers)
fp_meta = _extract_fingerprint(request.query_params)
if fp_meta:
merged_headers.update(fp_meta)
await _record_hit(
repo, bus,
slug=slug,
@@ -111,7 +118,7 @@ def _build_app(repo: BaseRepository, bus: BaseBus) -> FastAPI:
user_agent=request.headers.get("user-agent"),
request_path=str(request.url.path),
dns_qname=None,
raw_headers=dict(request.headers),
raw_headers=merged_headers,
)
# Always 200 with a tiny image so the attacker's client sees
# a "success" — same return regardless of whether the slug is
@@ -129,6 +136,70 @@ def _build_app(repo: BaseRepository, bus: BaseBus) -> FastAPI:
return app
# Per-chunk size cap. Real fingerprints fit in one ~3KB GET; honest
# overflow is handled via chunking (s/i/n + d). Anything larger than
# this on a single request is junk, so we drop it instead of letting an
# attacker inflate a trigger row indefinitely.
_FP_CHUNK_MAX = 8 * 1024
def _extract_fingerprint(qp: Any) -> dict[str, Any]:
"""Decode the fingerprint-payload query params into reserved keys.
The obfuscated browser payload may send three shapes on ``GET /c/<slug>``:
* ``?o=1`` — bare-open beacon, fired before fingerprinting starts.
* ``?d=<b64url-json>`` — single-shot fingerprint dump.
* ``?s=<sid>&i=<idx>&n=<total>&d=<b64url-chunk>`` — chunked dump,
one request per chunk; the reassembler joins by ``s`` and ``i``.
Returns a flat dict whose keys are namespaced under a ``_fp`` prefix
so they can't collide with real HTTP header names when merged into
``raw_headers``. Unknown / malformed input returns ``{}`` — we
never raise; the trigger row records the hit either way.
"""
out: dict[str, Any] = {}
if not qp:
return out
o = qp.get("o") if hasattr(qp, "get") else None
if o:
out["_fp_open"] = "1"
d = qp.get("d") if hasattr(qp, "get") else None
if not d:
return out
if len(d) > _FP_CHUNK_MAX:
out["_fp_oversize"] = "1"
return out
sid = qp.get("s")
idx = qp.get("i")
total = qp.get("n")
if sid and idx and total:
# Chunked payload: keep raw base64url + metadata; reassembly is
# a downstream concern (a later worker pass will join chunks
# by ``_fp_sid`` and decode the concatenation).
out["_fp_sid"] = sid
out["_fp_idx"] = idx
out["_fp_total"] = total
out["_fp_chunk"] = d
return out
# Single-shot: decode now so the API consumer sees a structured
# dict rather than a long opaque base64 blob.
try:
padded = d + "=" * (-len(d) % 4)
raw = base64.urlsafe_b64decode(padded.encode("ascii"))
parsed = json.loads(raw.decode("utf-8"))
except (binascii.Error, ValueError, UnicodeDecodeError):
out["_fp_decode_error"] = "1"
return out
if isinstance(parsed, dict):
out["_fp"] = parsed
else:
out["_fp_decode_error"] = "1"
return out
def _client_ip(request: Request) -> str:
# Honor X-Forwarded-For if the operator deployed behind a reverse
# proxy. Take the leftmost address in the chain; everything after

View File

@@ -108,6 +108,110 @@ async def test_xff_is_honored(repo: SQLiteRepository, bus: FakeBus) -> None:
assert triggers[0]["src_ip"] == "9.9.9.9"
@pytest.mark.asyncio
async def test_fingerprint_query_param_decoded_into_raw_headers(
repo: SQLiteRepository, bus: FakeBus,
) -> None:
"""``?d=<b64url(json)>`` is decoded into raw_headers["_fp"] as a dict."""
import base64
import json
await repo.create_canary_token({
"uuid": "tok-fp1", "kind": "http", "decky_name": "web1",
"generator": "fingerprint_html", "placement_path": "/x",
"callback_token": "slug-FP1", "secret_seed": "s", "created_by": "u1",
})
fp = {"mint": "abc-123", "nav": {"ua": "Test/1.0"}, "id": "h" * 64}
blob = base64.urlsafe_b64encode(json.dumps(fp).encode()).rstrip(b"=").decode()
app = _build_app(repo, bus)
with TestClient(app) as client:
client.get(f"/c/slug-FP1?d={blob}")
triggers = await repo.list_canary_triggers("tok-fp1")
headers = json.loads(triggers[0]["raw_headers"])
assert headers["_fp"] == fp
@pytest.mark.asyncio
async def test_bare_open_beacon_records_fp_open_flag(
repo: SQLiteRepository, bus: FakeBus,
) -> None:
import json
await repo.create_canary_token({
"uuid": "tok-fp2", "kind": "http", "decky_name": "web1",
"generator": "fingerprint_html", "placement_path": "/x",
"callback_token": "slug-FP2", "secret_seed": "s", "created_by": "u1",
})
app = _build_app(repo, bus)
with TestClient(app) as client:
client.get("/c/slug-FP2?o=1")
triggers = await repo.list_canary_triggers("tok-fp2")
headers = json.loads(triggers[0]["raw_headers"])
assert headers["_fp_open"] == "1"
@pytest.mark.asyncio
async def test_chunked_fingerprint_stores_metadata(
repo: SQLiteRepository, bus: FakeBus,
) -> None:
import json
await repo.create_canary_token({
"uuid": "tok-fp3", "kind": "http", "decky_name": "web1",
"generator": "fingerprint_html", "placement_path": "/x",
"callback_token": "slug-FP3", "secret_seed": "s", "created_by": "u1",
})
app = _build_app(repo, bus)
with TestClient(app) as client:
client.get("/c/slug-FP3?s=abc&i=0&n=2&d=Zm9vYmFy")
triggers = await repo.list_canary_triggers("tok-fp3")
headers = json.loads(triggers[0]["raw_headers"])
assert headers["_fp_sid"] == "abc"
assert headers["_fp_idx"] == "0"
assert headers["_fp_total"] == "2"
assert headers["_fp_chunk"] == "Zm9vYmFy"
# Single-shot decode should NOT have run for a chunked payload.
assert "_fp" not in headers
@pytest.mark.asyncio
async def test_malformed_fingerprint_records_decode_error(
repo: SQLiteRepository, bus: FakeBus,
) -> None:
import json
await repo.create_canary_token({
"uuid": "tok-fp4", "kind": "http", "decky_name": "web1",
"generator": "fingerprint_html", "placement_path": "/x",
"callback_token": "slug-FP4", "secret_seed": "s", "created_by": "u1",
})
app = _build_app(repo, bus)
with TestClient(app) as client:
# base64-decodable but not JSON
client.get("/c/slug-FP4?d=Zm9vYmFy") # "foobar"
triggers = await repo.list_canary_triggers("tok-fp4")
headers = json.loads(triggers[0]["raw_headers"])
assert headers["_fp_decode_error"] == "1"
assert "_fp" not in headers
@pytest.mark.asyncio
async def test_oversize_fingerprint_dropped(
repo: SQLiteRepository, bus: FakeBus,
) -> None:
import json
await repo.create_canary_token({
"uuid": "tok-fp5", "kind": "http", "decky_name": "web1",
"generator": "fingerprint_html", "placement_path": "/x",
"callback_token": "slug-FP5", "secret_seed": "s", "created_by": "u1",
})
app = _build_app(repo, bus)
with TestClient(app) as client:
# 9KB blob exceeds the 8KB per-chunk cap
client.get("/c/slug-FP5?d=" + "A" * (9 * 1024))
triggers = await repo.list_canary_triggers("tok-fp5")
headers = json.loads(triggers[0]["raw_headers"])
assert headers["_fp_oversize"] == "1"
assert "_fp" not in headers
@pytest.mark.asyncio
async def test_no_decnet_strings_in_response(repo: SQLiteRepository, bus: FakeBus) -> None:
"""Stealth posture: nothing in the HTTP surface mentions DECNET."""