ContextualWisdomLab · seonghobae · Jul 4, 2026
diff --git a/.jules/bolt.md b/.jules/bolt.md
@@ -25,3 +25,6 @@
 ## 2024-05-19 - Pre-compile Regex Patterns in Loop-called Functions
 **Learning:** In `scripts/ci/pr_review_merge_scheduler.py`, the `scrub_sensitive_data` function was repeatedly compiling multiple regex patterns via `re.sub` for every log line or text scrubbed. This incurs measurable overhead due to cache lookups and object recreation in tightly looped string processing.
 **Action:** When using multiple regex replacements inside functions that are called frequently or process large amounts of text, define and pre-compile the regex objects at the module level (e.g., `SENSITIVE_DATA_SCRUB_PATTERNS`) and iterate over them using `pattern.sub()`.
+## 2024-11-21 - Pre-compile Regex Patterns in `noema_review_gate.py`
+**Learning:** Found that the regex pre-compilation optimization previously applied to `scrub_sensitive_data` in `pr_review_merge_scheduler.py` was not applied to the duplicated function in `noema_review_gate.py`. The same O(N) regex compilation bottleneck existed in the new file.
+**Action:** Always check sibling files or duplicated utility functions (like `scrub_sensitive_data`) across the repository when applying a pattern-based performance optimization like pre-compiling regexes.
diff --git a/scripts/ci/noema_review_gate.py b/scripts/ci/noema_review_gate.py
@@ -34,17 +34,23 @@
 MAX_DIFF_CHARS = 60000
 
 
+SENSITIVE_DATA_SCRUB_PATTERNS = (
+    (re.compile(r'(?i)(bearer\s+)[^\s"\'\\]+'), r'\1***'),
+    (re.compile(r'(?i)(token\s+)[^\s"\'\\]+'), r'\1***'),
+    (re.compile(r'(?i)\b(?:github_pat_[A-Za-z0-9_]+|gh[pousr]_[A-Za-z0-9_]+)\b'), '***'),
+    (re.compile(r'\b(sk-[A-Za-z0-9_-]+)'), '***'),
+    (re.compile(r'\b(xox[baprs]-[A-Za-z0-9-]+)'), '***'),
+    (re.compile(r'\b(AKIA[0-9A-Z]{16})'), '***'),
+    (re.compile(r'(?i)((?:api[_-]?key|access[_-]?token|refresh[_-]?token|id[_-]?token|client[_-]?secret|password|passwd|secret)\s*[:=]\s*)["\']?[^"\'\s]+["\']?'), r'\1***'),
+)
+
+
 def scrub_sensitive_data(text: str | None) -> str | None:
     """Mask sensitive tokens in text to prevent secret leakage."""
     if not text:
         return text
-    text = re.sub(r'(?i)(bearer\s+)[^\s"\'\\]+', r'\1***', text)
-    text = re.sub(r'(?i)(token\s+)[^\s"\'\\]+', r'\1***', text)
-    text = re.sub(r'(?i)\b(?:github_pat_[A-Za-z0-9_]+|gh[pousr]_[A-Za-z0-9_]+)\b', '***', text)
-    text = re.sub(r'\b(sk-[A-Za-z0-9_-]+)', '***', text)
-    text = re.sub(r'\b(xox[baprs]-[A-Za-z0-9-]+)', '***', text)
-    text = re.sub(r'\b(AKIA[0-9A-Z]{16})', '***', text)
-    text = re.sub(r'(?i)((?:api[_-]?key|access[_-]?token|refresh[_-]?token|id[_-]?token|client[_-]?secret|password|passwd|secret)\s*[:=]\s*)["\']?[^"\'\s]+["\']?', r'\1***', text)
+    for pattern, repl in SENSITIVE_DATA_SCRUB_PATTERNS:
+        text = pattern.sub(repl, text)
     return text