From 37f41611ad5af5d7dac770524fcc3da47a74760e Mon Sep 17 00:00:00 2001 From: seonghobae <8172694+seonghobae@users.noreply.github.com> Date: Thu, 2 Jul 2026 21:45:47 +0000 Subject: [PATCH 1/2] =?UTF-8?q?=F0=9F=9B=A1=EF=B8=8F=20Sentinel:=20[CRITIC?= =?UTF-8?q?AL/HIGH]=20Fix=20Server-Side=20Request=20Forgery=20(SSRF)=20ris?= =?UTF-8?q?k=20in=20Noema=20LLM=20API=20URL?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- scripts/ci/noema_review_gate.py | 2 ++ tests/test_noema_review_gate.py | 4 ++++ 2 files changed, 6 insertions(+) diff --git a/scripts/ci/noema_review_gate.py b/scripts/ci/noema_review_gate.py index 1e4661b7..47c95ca3 100644 --- a/scripts/ci/noema_review_gate.py +++ b/scripts/ci/noema_review_gate.py @@ -267,6 +267,8 @@ def call_llm(repo: str, number: int, pr: dict[str, Any], diff: str, truncated: b if not api_url or not api_key: print("Noema LLM review unavailable: NOEMA_LLM_API_URL or NOEMA_LLM_API_KEY is not configured.") return None + if not (api_url.startswith("http://") or api_url.startswith("https://")): + raise ValueError(f"URL must start with http:// or https://, got: {api_url}") prompt = { "role": "user", diff --git a/tests/test_noema_review_gate.py b/tests/test_noema_review_gate.py index 0b333ab3..53b5e784 100644 --- a/tests/test_noema_review_gate.py +++ b/tests/test_noema_review_gate.py @@ -222,6 +222,10 @@ def fake_urlopen(request, timeout): with pytest.raises(RuntimeError, match="unsupported decision"): noema.call_llm("owner/repo", 1, pr, "diff", False) + monkeypatch.setenv("NOEMA_LLM_API_URL", "file:///etc/passwd") + with pytest.raises(ValueError, match="URL must start with http:// or https://"): + noema.call_llm("owner/repo", 1, pr, "diff", False) + def test_format_findings_and_submit_review(monkeypatch): findings = noema.format_findings( From 74d5b17caa2cc2e6acf1da59c7a63a9d61d87398 Mon Sep 17 00:00:00 2001 From: seonghobae <8172694+seonghobae@users.noreply.github.com> Date: Fri, 3 Jul 2026 07:47:25 +0000 Subject: [PATCH 2/2] =?UTF-8?q?=F0=9F=9B=A1=EF=B8=8F=20Sentinel:=20[CRITIC?= =?UTF-8?q?AL/HIGH]=20Fix=20Server-Side=20Request=20Forgery=20(SSRF)=20ris?= =?UTF-8?q?k=20in=20Noema=20LLM=20API=20URL?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- scripts/ci/noema_review_gate.py | 26 ++++++++++++++-- tests/test_noema_review_gate.py | 53 ++++++++++++++++++++++++++++++++- 2 files changed, 76 insertions(+), 3 deletions(-) diff --git a/scripts/ci/noema_review_gate.py b/scripts/ci/noema_review_gate.py index 47c95ca3..07290d6f 100644 --- a/scripts/ci/noema_review_gate.py +++ b/scripts/ci/noema_review_gate.py @@ -4,11 +4,14 @@ from __future__ import annotations import argparse +import ipaddress import json import os import re +import socket import subprocess import sys +import urllib.parse import urllib.request from collections.abc import Sequence from typing import Any @@ -267,8 +270,27 @@ def call_llm(repo: str, number: int, pr: dict[str, Any], diff: str, truncated: b if not api_url or not api_key: print("Noema LLM review unavailable: NOEMA_LLM_API_URL or NOEMA_LLM_API_KEY is not configured.") return None - if not (api_url.startswith("http://") or api_url.startswith("https://")): - raise ValueError(f"URL must start with http:// or https://, got: {api_url}") + parsed = urllib.parse.urlparse(api_url) + if parsed.scheme.lower() not in {"http", "https"}: + raise ValueError("URL scheme must be http or https") + hostname = (parsed.hostname or "").lower() + if not hostname: + raise ValueError("URL must have a valid hostname") + if hostname in {"localhost", "localhost.localdomain"} or hostname.endswith(".localhost"): + raise ValueError("URL cannot target localhost") + try: + addrinfo = socket.getaddrinfo(hostname, None) + except socket.gaierror: + pass + else: + for result in addrinfo: + ip_str = result[4][0] + try: + ip = ipaddress.ip_address(ip_str) + except ValueError: + continue + if ip.is_private or ip.is_loopback or ip.is_link_local or ip.is_multicast or ip.is_unspecified: + raise ValueError("URL cannot target internal IP addresses") prompt = { "role": "user", diff --git a/tests/test_noema_review_gate.py b/tests/test_noema_review_gate.py index 53b5e784..e1324e7b 100644 --- a/tests/test_noema_review_gate.py +++ b/tests/test_noema_review_gate.py @@ -222,10 +222,61 @@ def fake_urlopen(request, timeout): with pytest.raises(RuntimeError, match="unsupported decision"): noema.call_llm("owner/repo", 1, pr, "diff", False) + # Test case-insensitive valid URL + monkeypatch.setenv("NOEMA_LLM_API_URL", "HTTPS://llm.example.test/chat") + monkeypatch.setattr(noema.urllib.request, "urlopen", fake_urlopen) + assert noema.call_llm("owner/repo", 1, pr, "diff", True)["decision"] == "approve" + + # Test invalid scheme (and no original URL in error) monkeypatch.setenv("NOEMA_LLM_API_URL", "file:///etc/passwd") - with pytest.raises(ValueError, match="URL must start with http:// or https://"): + with pytest.raises(ValueError, match="URL scheme must be http or https"): + noema.call_llm("owner/repo", 1, pr, "diff", False) + + # Test localhost rejection + monkeypatch.setenv("NOEMA_LLM_API_URL", "http://localhost/chat") + with pytest.raises(ValueError, match="URL cannot target localhost"): + noema.call_llm("owner/repo", 1, pr, "diff", False) + + # Test missing hostname + monkeypatch.setenv("NOEMA_LLM_API_URL", "http:///chat") + with pytest.raises(ValueError, match="URL must have a valid hostname"): noema.call_llm("owner/repo", 1, pr, "diff", False) + # Test internal IP rejection + monkeypatch.setenv("NOEMA_LLM_API_URL", "http://169.254.169.254/chat") + with pytest.raises(ValueError, match="URL cannot target internal IP addresses"): + noema.call_llm("owner/repo", 1, pr, "diff", False) + + import socket + original_getaddrinfo = socket.getaddrinfo + + # Test DNS resolution bypass + monkeypatch.setenv("NOEMA_LLM_API_URL", "http://resolved-to-local.example.com/chat") + def fake_getaddrinfo(host, port, *args, **kwargs): + if host == "resolved-to-local.example.com": + return [(socket.AF_INET, socket.SOCK_STREAM, 6, "", ("127.0.0.1", 0))] + return original_getaddrinfo(host, port, *args, **kwargs) + monkeypatch.setattr(socket, "getaddrinfo", fake_getaddrinfo) + with pytest.raises(ValueError, match="URL cannot target internal IP addresses"): + noema.call_llm("owner/repo", 1, pr, "diff", False) + + # Test unresolved hostname does not break + monkeypatch.setenv("NOEMA_LLM_API_URL", "http://unresolved.example.com/chat") + def fake_getaddrinfo_error(host, port, *args, **kwargs): + raise socket.gaierror("Name or service not known") + monkeypatch.setattr(socket, "getaddrinfo", fake_getaddrinfo_error) + monkeypatch.setattr(noema.urllib.request, "urlopen", fake_urlopen) + assert noema.call_llm("owner/repo", 1, pr, "diff", True)["decision"] == "approve" + + # Test invalid IP string from getaddrinfo (unlikely but theoretically possible) + monkeypatch.setenv("NOEMA_LLM_API_URL", "http://weird-dns.example.com/chat") + def fake_getaddrinfo_invalid_ip(host, port, *args, **kwargs): + if host == "weird-dns.example.com": + return [(socket.AF_INET, socket.SOCK_STREAM, 6, "", ("not_an_ip", 0))] + return original_getaddrinfo(host, port, *args, **kwargs) + monkeypatch.setattr(socket, "getaddrinfo", fake_getaddrinfo_invalid_ip) + assert noema.call_llm("owner/repo", 1, pr, "diff", True)["decision"] == "approve" + def test_format_findings_and_submit_review(monkeypatch): findings = noema.format_findings(