From 0d11593d4669a2f6c6d34f0fe2e6c49ed6d6af8d Mon Sep 17 00:00:00 2001 From: seonghobae <8172694+seonghobae@users.noreply.github.com> Date: Fri, 3 Jul 2026 17:15:37 +0000 Subject: [PATCH] =?UTF-8?q?=F0=9F=9B=A1=EF=B8=8F=20Sentinel:=20[HIGH]=20SS?= =?UTF-8?q?RF=20=EC=B7=A8=EC=95=BD=EC=A0=90=20=EC=88=98=EC=A0=95=EC=9D=84?= =?UTF-8?q?=20=EC=9C=84=ED=95=9C=20URL=20=EC=8A=A4=ED=82=A4=EB=A7=88=20?= =?UTF-8?q?=EA=B2=80=EC=A6=9D=20=EC=B6=94=EA=B0=80?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit scripts/ci/noema_review_gate.py의 call_llm 함수에서 사용되는 NOEMA_LLM_API_URL이 http:// 또는 https:// 로 시작하는지 검증하는 로직을 추가하여 SSRF 및 Local File Inclusion 취약점을 방지합니다. - URL 스키마 검증 로직 추가 - urllib.request.urlopen에 `# nosec B310` 추가 - 관련 테스트 코드 작성 및 100% 커버리지 확인 - .jules/sentinel.md에 관련 내용 기록 --- .jules/sentinel.md | 4 ++++ scripts/ci/noema_review_gate.py | 5 ++++- tests/test_noema_review_gate.py | 5 +++++ 3 files changed, 13 insertions(+), 1 deletion(-) diff --git a/.jules/sentinel.md b/.jules/sentinel.md index 9133bba1..7039d665 100644 --- a/.jules/sentinel.md +++ b/.jules/sentinel.md @@ -22,3 +22,7 @@ **Vulnerability:** Server-Side Request Forgery (SSRF) / Local File Inclusion **Learning:** Functions that fetch URLs provided via user inputs (e.g., `wait_for_url` fetching `--backend-ready-url` in CI scripts) can inadvertently read local files if they do not validate the scheme. Python's `urllib.request.urlopen` supports `file://` schemes, allowing attackers to access arbitrary file contents from the host machine or sandbox if they can control the URL parameter. **Prevention:** Always validate URL inputs to restrict allowed schemes. Check that URLs explicitly start with `http://` or `https://` before fetching them with standard libraries like `urllib`. +## 2026-07-03 - Prevent SSRF via URL Scheme Validation +**Vulnerability:** Server-Side Request Forgery (SSRF) / Local File Inclusion +**Learning:** External URL fetching with `urllib.request.urlopen` (like API endpoints passed via environment variables) can accept schemes like `file://` implicitly, which could allow arbitrary file reading or internal network scanning if the environment is misconfigured or manipulated. +**Prevention:** Always validate that URLs explicitly start with `http://` or `https://` before using them in standard library requests. Append to suppress linter warnings only after verifying the input is validated. diff --git a/scripts/ci/noema_review_gate.py b/scripts/ci/noema_review_gate.py index 1e4661b7..93fda3b2 100644 --- a/scripts/ci/noema_review_gate.py +++ b/scripts/ci/noema_review_gate.py @@ -268,6 +268,9 @@ def call_llm(repo: str, number: int, pr: dict[str, Any], diff: str, truncated: b print("Noema LLM review unavailable: NOEMA_LLM_API_URL or NOEMA_LLM_API_KEY is not configured.") return None + if not (api_url.startswith("http://") or api_url.startswith("https://")): + raise ValueError(f"NOEMA_LLM_API_URL must start with http:// or https:// to prevent SSRF vulnerabilities, got: {api_url}") + prompt = { "role": "user", "content": "\n".join( @@ -304,7 +307,7 @@ def call_llm(repo: str, number: int, pr: dict[str, Any], diff: str, truncated: b }, method="POST", ) - with urllib.request.urlopen(request, timeout=120) as response: + with urllib.request.urlopen(request, timeout=120) as response: # nosec B310 raw = response.read().decode("utf-8") data = json.loads(raw) content = (((data.get("choices") or [{}])[0].get("message") or {}).get("content") or "").strip() diff --git a/tests/test_noema_review_gate.py b/tests/test_noema_review_gate.py index 0b333ab3..835d1918 100644 --- a/tests/test_noema_review_gate.py +++ b/tests/test_noema_review_gate.py @@ -198,6 +198,11 @@ def test_call_llm_handles_configuration_and_verdicts(monkeypatch): monkeypatch.delenv("NOEMA_LLM_API_KEY", raising=False) assert noema.call_llm("owner/repo", 1, pr, "diff", False) is None + monkeypatch.setenv("NOEMA_LLM_API_URL", "file:///etc/passwd") + monkeypatch.setenv("NOEMA_LLM_API_KEY", "secret") + with pytest.raises(ValueError, match="must start with http:// or https://"): + noema.call_llm("owner/repo", 1, pr, "diff", False) + monkeypatch.setenv("NOEMA_LLM_API_URL", "https://llm.example.test/chat") monkeypatch.setenv("NOEMA_LLM_API_KEY", "secret") monkeypatch.setenv("NOEMA_LLM_MODEL", "review-model")