diff --git a/scripts/ci/noema_review_gate.py b/scripts/ci/noema_review_gate.py index 1e4661b7..07290d6f 100644 --- a/scripts/ci/noema_review_gate.py +++ b/scripts/ci/noema_review_gate.py @@ -4,11 +4,14 @@ from __future__ import annotations import argparse +import ipaddress import json import os import re +import socket import subprocess import sys +import urllib.parse import urllib.request from collections.abc import Sequence from typing import Any @@ -267,6 +270,27 @@ def call_llm(repo: str, number: int, pr: dict[str, Any], diff: str, truncated: b if not api_url or not api_key: print("Noema LLM review unavailable: NOEMA_LLM_API_URL or NOEMA_LLM_API_KEY is not configured.") return None + parsed = urllib.parse.urlparse(api_url) + if parsed.scheme.lower() not in {"http", "https"}: + raise ValueError("URL scheme must be http or https") + hostname = (parsed.hostname or "").lower() + if not hostname: + raise ValueError("URL must have a valid hostname") + if hostname in {"localhost", "localhost.localdomain"} or hostname.endswith(".localhost"): + raise ValueError("URL cannot target localhost") + try: + addrinfo = socket.getaddrinfo(hostname, None) + except socket.gaierror: + pass + else: + for result in addrinfo: + ip_str = result[4][0] + try: + ip = ipaddress.ip_address(ip_str) + except ValueError: + continue + if ip.is_private or ip.is_loopback or ip.is_link_local or ip.is_multicast or ip.is_unspecified: + raise ValueError("URL cannot target internal IP addresses") prompt = { "role": "user", diff --git a/tests/test_noema_review_gate.py b/tests/test_noema_review_gate.py index 0b333ab3..e1324e7b 100644 --- a/tests/test_noema_review_gate.py +++ b/tests/test_noema_review_gate.py @@ -222,6 +222,61 @@ def fake_urlopen(request, timeout): with pytest.raises(RuntimeError, match="unsupported decision"): noema.call_llm("owner/repo", 1, pr, "diff", False) + # Test case-insensitive valid URL + monkeypatch.setenv("NOEMA_LLM_API_URL", "HTTPS://llm.example.test/chat") + monkeypatch.setattr(noema.urllib.request, "urlopen", fake_urlopen) + assert noema.call_llm("owner/repo", 1, pr, "diff", True)["decision"] == "approve" + + # Test invalid scheme (and no original URL in error) + monkeypatch.setenv("NOEMA_LLM_API_URL", "file:///etc/passwd") + with pytest.raises(ValueError, match="URL scheme must be http or https"): + noema.call_llm("owner/repo", 1, pr, "diff", False) + + # Test localhost rejection + monkeypatch.setenv("NOEMA_LLM_API_URL", "http://localhost/chat") + with pytest.raises(ValueError, match="URL cannot target localhost"): + noema.call_llm("owner/repo", 1, pr, "diff", False) + + # Test missing hostname + monkeypatch.setenv("NOEMA_LLM_API_URL", "http:///chat") + with pytest.raises(ValueError, match="URL must have a valid hostname"): + noema.call_llm("owner/repo", 1, pr, "diff", False) + + # Test internal IP rejection + monkeypatch.setenv("NOEMA_LLM_API_URL", "http://169.254.169.254/chat") + with pytest.raises(ValueError, match="URL cannot target internal IP addresses"): + noema.call_llm("owner/repo", 1, pr, "diff", False) + + import socket + original_getaddrinfo = socket.getaddrinfo + + # Test DNS resolution bypass + monkeypatch.setenv("NOEMA_LLM_API_URL", "http://resolved-to-local.example.com/chat") + def fake_getaddrinfo(host, port, *args, **kwargs): + if host == "resolved-to-local.example.com": + return [(socket.AF_INET, socket.SOCK_STREAM, 6, "", ("127.0.0.1", 0))] + return original_getaddrinfo(host, port, *args, **kwargs) + monkeypatch.setattr(socket, "getaddrinfo", fake_getaddrinfo) + with pytest.raises(ValueError, match="URL cannot target internal IP addresses"): + noema.call_llm("owner/repo", 1, pr, "diff", False) + + # Test unresolved hostname does not break + monkeypatch.setenv("NOEMA_LLM_API_URL", "http://unresolved.example.com/chat") + def fake_getaddrinfo_error(host, port, *args, **kwargs): + raise socket.gaierror("Name or service not known") + monkeypatch.setattr(socket, "getaddrinfo", fake_getaddrinfo_error) + monkeypatch.setattr(noema.urllib.request, "urlopen", fake_urlopen) + assert noema.call_llm("owner/repo", 1, pr, "diff", True)["decision"] == "approve" + + # Test invalid IP string from getaddrinfo (unlikely but theoretically possible) + monkeypatch.setenv("NOEMA_LLM_API_URL", "http://weird-dns.example.com/chat") + def fake_getaddrinfo_invalid_ip(host, port, *args, **kwargs): + if host == "weird-dns.example.com": + return [(socket.AF_INET, socket.SOCK_STREAM, 6, "", ("not_an_ip", 0))] + return original_getaddrinfo(host, port, *args, **kwargs) + monkeypatch.setattr(socket, "getaddrinfo", fake_getaddrinfo_invalid_ip) + assert noema.call_llm("owner/repo", 1, pr, "diff", True)["decision"] == "approve" + def test_format_findings_and_submit_review(monkeypatch): findings = noema.format_findings(