From 3435c97e87f003e4ffee73cd7721bf9497eb9c24 Mon Sep 17 00:00:00 2001
From: seonghobae <8172694+seonghobae@users.noreply.github.com>
Date: Thu, 2 Jul 2026 18:10:26 +0000
Subject: [PATCH 1/2] =?UTF-8?q?=E2=9A=A1=20Bolt:=20noema=5Freview=5Fgate.p?=
 =?UTF-8?q?y=20=EC=A0=95=EA=B7=9C=EC=8B=9D=20=EC=82=AC=EC=A0=84=20?=
 =?UTF-8?q?=EC=BB=B4=ED=8C=8C=EC=9D=BC=20=EC=B5=9C=EC=A0=81=ED=99=94?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- `noema_review_gate.py`의 `scrub_sensitive_data` 함수에서 호출될 때마다 중복으로 생성되던 정규 표현식들을 모듈 레벨인 `SENSITIVE_DATA_SCRUB_PATTERNS` 상수에 `re.compile()`을 사용하여 사전 컴파일 하도록 수정
- 반복적인 에러 리포팅이나 텍스트 스크러빙 시 정규식 컴파일 오버헤드를 줄여 문자열 처리 성능을 개선
- `pytest` 및 `coverage` 100% 확인 완료
---
 scripts/ci/noema_review_gate.py | 20 +++++++++++++-------
 1 file changed, 13 insertions(+), 7 deletions(-)

diff --git a/scripts/ci/noema_review_gate.py b/scripts/ci/noema_review_gate.py
index 1e4661b7..1c74b7a5 100644
--- a/scripts/ci/noema_review_gate.py
+++ b/scripts/ci/noema_review_gate.py
@@ -33,18 +33,24 @@
 RUNNING_STATES = {"QUEUED", "IN_PROGRESS", "PENDING", "REQUESTED", "WAITING", "EXPECTED"}
 MAX_DIFF_CHARS = 60000
 
+# ⚡ Bolt: Pre-compiled regex patterns to avoid recompilation on every scrub_sensitive_data call.
+# Impact: Improves string processing performance in error reporting.
+SENSITIVE_DATA_SCRUB_PATTERNS = (
+    (re.compile(r'(?i)(bearer\s+)[^\s"\'\\]+'), r'\1***'),
+    (re.compile(r'(?i)(token\s+)[^\s"\'\\]+'), r'\1***'),
+    (re.compile(r'(?i)\b(?:github_pat_[A-Za-z0-9_]+|gh[pousr]_[A-Za-z0-9_]+)\b'), '***'),
+    (re.compile(r'\b(sk-[A-Za-z0-9_-]+)'), '***'),
+    (re.compile(r'\b(xox[baprs]-[A-Za-z0-9-]+)'), '***'),
+    (re.compile(r'\b(AKIA[0-9A-Z]{16})'), '***'),
+    (re.compile(r'(?i)((?:api[_-]?key|access[_-]?token|refresh[_-]?token|id[_-]?token|client[_-]?secret|password|passwd|secret)\s*[:=]\s*)["\']?[^"\'\s]+["\']?'), r'\1***'),
+)
 
 def scrub_sensitive_data(text: str | None) -> str | None:
     """Mask sensitive tokens in text to prevent secret leakage."""
     if not text:
         return text
-    text = re.sub(r'(?i)(bearer\s+)[^\s"\'\\]+', r'\1***', text)
-    text = re.sub(r'(?i)(token\s+)[^\s"\'\\]+', r'\1***', text)
-    text = re.sub(r'(?i)\b(?:github_pat_[A-Za-z0-9_]+|gh[pousr]_[A-Za-z0-9_]+)\b', '***', text)
-    text = re.sub(r'\b(sk-[A-Za-z0-9_-]+)', '***', text)
-    text = re.sub(r'\b(xox[baprs]-[A-Za-z0-9-]+)', '***', text)
-    text = re.sub(r'\b(AKIA[0-9A-Z]{16})', '***', text)
-    text = re.sub(r'(?i)((?:api[_-]?key|access[_-]?token|refresh[_-]?token|id[_-]?token|client[_-]?secret|password|passwd|secret)\s*[:=]\s*)["\']?[^"\'\s]+["\']?', r'\1***', text)
+    for pattern, repl in SENSITIVE_DATA_SCRUB_PATTERNS:
+        text = pattern.sub(repl, text)
     return text
 
 

From 5db6e084f8c3f8a5b9c1670127c2e7a38faa16cd Mon Sep 17 00:00:00 2001
From: seonghobae <8172694+seonghobae@users.noreply.github.com>
Date: Thu, 2 Jul 2026 23:12:45 +0000
Subject: [PATCH 2/2] =?UTF-8?q?=E2=9A=A1=20Bolt:=20Authorization=20?=
 =?UTF-8?q?=ED=97=A4=EB=8D=94=20=ED=81=AC=EB=A6=AC=EB=8D=B4=EC=85=9C=20?=
 =?UTF-8?q?=EB=A7=88=EC=8A=A4=ED=82=B9=20=EB=B0=8F=20=EC=A0=95=EA=B7=9C?=
 =?UTF-8?q?=EC=8B=9D=20=EC=82=AC=EC=A0=84=20=EC=BB=B4=ED=8C=8C=EC=9D=BC=20?=
 =?UTF-8?q?=EC=B5=9C=EC=A0=81=ED=99=94?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- `noema_review_gate.py` 내의 `scrub_sensitive_data` 함수가 `Authorization: Basic` 및 `Proxy-Authorization: Basic` 헤더를 마스킹하지 않아 에러 메시지나 로그에서 자격 증명이 유출될 수 있는 보안 취약점(P1) 수정 (`pr_review_merge_scheduler.py`와 마스킹 규칙 동기화)
- 해당 취약점을 확인하는 회귀 테스트 추가 및 `coverage` 100% 달성
- `SENSITIVE_DATA_SCRUB_PATTERNS` 상수를 활용하여 마스킹 규칙에 사용하는 8개의 정규 표현식을 사전에 컴파일함으로써, 해당 함수가 호출될 때마다 발생하는 오버헤드를 줄여 로깅 및 예외 처리 과정의 성능을 최적화
---
 .jules/bolt.md                  | 3 +++
 scripts/ci/noema_review_gate.py | 1 +
 tests/test_noema_review_gate.py | 6 ++++++
 3 files changed, 10 insertions(+)

diff --git a/.jules/bolt.md b/.jules/bolt.md
index a035da6f..4bc70515 100644
--- a/.jules/bolt.md
+++ b/.jules/bolt.md
@@ -25,3 +25,6 @@
 ## 2024-05-19 - Pre-compile Regex Patterns in Loop-called Functions
 **Learning:** In `scripts/ci/pr_review_merge_scheduler.py`, the `scrub_sensitive_data` function was repeatedly compiling multiple regex patterns via `re.sub` for every log line or text scrubbed. This incurs measurable overhead due to cache lookups and object recreation in tightly looped string processing.
 **Action:** When using multiple regex replacements inside functions that are called frequently or process large amounts of text, define and pre-compile the regex objects at the module level (e.g., `SENSITIVE_DATA_SCRUB_PATTERNS`) and iterate over them using `pattern.sub()`.
+## 2026-07-02 - Credential Masking Security Hole in Subprocess Environments
+**Learning:** Found a critical missing credential masking pattern in `scripts/ci/noema_review_gate.py`'s `scrub_sensitive_data` which didn't mask `Authorization: Basic` or `Proxy-Authorization: Basic` tokens unlike its analogous helper in `scripts/ci/pr_review_merge_scheduler.py`. This leaves exception messages and logs vulnerable to exposing sensitive credentials when HTTP operations fail.
+**Action:** When implementing credential masking functions that sanitize tracebacks and log messages, ensure the masking scope includes all relevant headers, particularly `Authorization` and `Proxy-Authorization`. Ensure parity across masking helpers across CI scripts to prevent blind spots.
diff --git a/scripts/ci/noema_review_gate.py b/scripts/ci/noema_review_gate.py
index 1c74b7a5..93abb370 100644
--- a/scripts/ci/noema_review_gate.py
+++ b/scripts/ci/noema_review_gate.py
@@ -43,6 +43,7 @@
     (re.compile(r'\b(xox[baprs]-[A-Za-z0-9-]+)'), '***'),
     (re.compile(r'\b(AKIA[0-9A-Z]{16})'), '***'),
     (re.compile(r'(?i)((?:api[_-]?key|access[_-]?token|refresh[_-]?token|id[_-]?token|client[_-]?secret|password|passwd|secret)\s*[:=]\s*)["\']?[^"\'\s]+["\']?'), r'\1***'),
+    (re.compile(r'(?i)((?:authorization|proxy-authorization)\s*:\s*(?:bearer|basic)\s+)[A-Za-z0-9._~+\/=-]+'), r'\1***'),
 )
 
 def scrub_sensitive_data(text: str | None) -> str | None:
diff --git a/tests/test_noema_review_gate.py b/tests/test_noema_review_gate.py
index 0b333ab3..8f3aade8 100644
--- a/tests/test_noema_review_gate.py
+++ b/tests/test_noema_review_gate.py
@@ -60,6 +60,12 @@ def test_scrub_sensitive_data():
     assert noema.scrub_sensitive_data("password: xyz") == "password: ***"
 
 
+def test_scrub_sensitive_data_authorization_headers():
+    assert noema.scrub_sensitive_data("Authorization: Basic dXNlcjpwYXNz") == "Authorization: Basic ***"
+    assert noema.scrub_sensitive_data("Proxy-Authorization: Basic dXNlcjpwYXNz") == "Proxy-Authorization: Basic ***"
+    assert noema.scrub_sensitive_data("authorization: bearer xyz") == "authorization: bearer ***"
+
+
 def test_split_repo_and_graphql(monkeypatch):
     with pytest.raises(ValueError):
         noema.split_repo("owner")