diff --git a/.jules/bolt.md b/.jules/bolt.md index a035da6f..4bc70515 100644 --- a/.jules/bolt.md +++ b/.jules/bolt.md @@ -25,3 +25,6 @@ ## 2024-05-19 - Pre-compile Regex Patterns in Loop-called Functions **Learning:** In `scripts/ci/pr_review_merge_scheduler.py`, the `scrub_sensitive_data` function was repeatedly compiling multiple regex patterns via `re.sub` for every log line or text scrubbed. This incurs measurable overhead due to cache lookups and object recreation in tightly looped string processing. **Action:** When using multiple regex replacements inside functions that are called frequently or process large amounts of text, define and pre-compile the regex objects at the module level (e.g., `SENSITIVE_DATA_SCRUB_PATTERNS`) and iterate over them using `pattern.sub()`. +## 2026-07-02 - Credential Masking Security Hole in Subprocess Environments +**Learning:** Found a critical missing credential masking pattern in `scripts/ci/noema_review_gate.py`'s `scrub_sensitive_data` which didn't mask `Authorization: Basic` or `Proxy-Authorization: Basic` tokens unlike its analogous helper in `scripts/ci/pr_review_merge_scheduler.py`. This leaves exception messages and logs vulnerable to exposing sensitive credentials when HTTP operations fail. +**Action:** When implementing credential masking functions that sanitize tracebacks and log messages, ensure the masking scope includes all relevant headers, particularly `Authorization` and `Proxy-Authorization`. Ensure parity across masking helpers across CI scripts to prevent blind spots. diff --git a/scripts/ci/noema_review_gate.py b/scripts/ci/noema_review_gate.py index 1e4661b7..93abb370 100644 --- a/scripts/ci/noema_review_gate.py +++ b/scripts/ci/noema_review_gate.py @@ -33,18 +33,25 @@ RUNNING_STATES = {"QUEUED", "IN_PROGRESS", "PENDING", "REQUESTED", "WAITING", "EXPECTED"} MAX_DIFF_CHARS = 60000 +# ⚡ Bolt: Pre-compiled regex patterns to avoid recompilation on every scrub_sensitive_data call. +# Impact: Improves string processing performance in error reporting. +SENSITIVE_DATA_SCRUB_PATTERNS = ( + (re.compile(r'(?i)(bearer\s+)[^\s"\'\\]+'), r'\1***'), + (re.compile(r'(?i)(token\s+)[^\s"\'\\]+'), r'\1***'), + (re.compile(r'(?i)\b(?:github_pat_[A-Za-z0-9_]+|gh[pousr]_[A-Za-z0-9_]+)\b'), '***'), + (re.compile(r'\b(sk-[A-Za-z0-9_-]+)'), '***'), + (re.compile(r'\b(xox[baprs]-[A-Za-z0-9-]+)'), '***'), + (re.compile(r'\b(AKIA[0-9A-Z]{16})'), '***'), + (re.compile(r'(?i)((?:api[_-]?key|access[_-]?token|refresh[_-]?token|id[_-]?token|client[_-]?secret|password|passwd|secret)\s*[:=]\s*)["\']?[^"\'\s]+["\']?'), r'\1***'), + (re.compile(r'(?i)((?:authorization|proxy-authorization)\s*:\s*(?:bearer|basic)\s+)[A-Za-z0-9._~+\/=-]+'), r'\1***'), +) def scrub_sensitive_data(text: str | None) -> str | None: """Mask sensitive tokens in text to prevent secret leakage.""" if not text: return text - text = re.sub(r'(?i)(bearer\s+)[^\s"\'\\]+', r'\1***', text) - text = re.sub(r'(?i)(token\s+)[^\s"\'\\]+', r'\1***', text) - text = re.sub(r'(?i)\b(?:github_pat_[A-Za-z0-9_]+|gh[pousr]_[A-Za-z0-9_]+)\b', '***', text) - text = re.sub(r'\b(sk-[A-Za-z0-9_-]+)', '***', text) - text = re.sub(r'\b(xox[baprs]-[A-Za-z0-9-]+)', '***', text) - text = re.sub(r'\b(AKIA[0-9A-Z]{16})', '***', text) - text = re.sub(r'(?i)((?:api[_-]?key|access[_-]?token|refresh[_-]?token|id[_-]?token|client[_-]?secret|password|passwd|secret)\s*[:=]\s*)["\']?[^"\'\s]+["\']?', r'\1***', text) + for pattern, repl in SENSITIVE_DATA_SCRUB_PATTERNS: + text = pattern.sub(repl, text) return text diff --git a/tests/test_noema_review_gate.py b/tests/test_noema_review_gate.py index 0b333ab3..8f3aade8 100644 --- a/tests/test_noema_review_gate.py +++ b/tests/test_noema_review_gate.py @@ -60,6 +60,12 @@ def test_scrub_sensitive_data(): assert noema.scrub_sensitive_data("password: xyz") == "password: ***" +def test_scrub_sensitive_data_authorization_headers(): + assert noema.scrub_sensitive_data("Authorization: Basic dXNlcjpwYXNz") == "Authorization: Basic ***" + assert noema.scrub_sensitive_data("Proxy-Authorization: Basic dXNlcjpwYXNz") == "Proxy-Authorization: Basic ***" + assert noema.scrub_sensitive_data("authorization: bearer xyz") == "authorization: bearer ***" + + def test_split_repo_and_graphql(monkeypatch): with pytest.raises(ValueError): noema.split_repo("owner")