SecNode
diff --git a/‎src/secnodeapi/ai/__init__.py‎
Lines changed: 2 additions & 1 deletion b/‎src/secnodeapi/ai/__init__.py‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎src/secnodeapi/ai/validate.py‎
Lines changed: 162 additions & 39 deletions b/‎src/secnodeapi/ai/validate.py‎
Lines changed: 162 additions & 39 deletions
diff --git a/‎src/secnodeapi/cli.py‎
Lines changed: 82 additions & 1 deletion b/‎src/secnodeapi/cli.py‎
Lines changed: 82 additions & 1 deletion
@@ -4,10 +4,11 @@
 
 from .generate import generate_test_cases
 from .understand import understand_api_with_ai
-from .validate import validate_findings_with_ai
+from .validate import classify_findings, validate_findings_with_ai
 
 __all__ = [
     "understand_api_with_ai",
     "generate_test_cases",
+    "classify_findings",
     "validate_findings_with_ai",
 ]
@@ -1,78 +1,201 @@
 """
 AI validation stage for filtering true-positive findings.
 """
+
 import asyncio
 import json
-from typing import List, Optional
+from typing import List, Optional, Tuple
 
 import structlog
+from pydantic import BaseModel, ValidationError
 
 from ..vulnerability_models import Finding, TestResult
-
 from .llm_client import call_llm
 
 logger = structlog.get_logger(__name__)
 
 
-async def _evaluate_single_result(result: TestResult) -> Optional[Finding]:
-    """Evaluate one test result and return a confirmed finding when applicable."""
-    sys_prompt = (
-        "You are a CISO and Elite AppSec Triager reviewing penetration test results. Your job is to definitively determine "
-        "if an executed test case reveals a true positive vulnerability, or if it is a false positive / expected behavior. "
-        "Apply strict heuristics:\n"
-        "- 401 Unauthorized / 403 Forbidden is usually expected security behavior (NOT a vulnerability).\n"
-        "- 500 Internal Server Error reveals a lack of robustness, potentially a DoS or injection vuln, but requires context.\n"
-        "- 200/201 OK on an endpoint that shouldn't grant access (e.g. a BOLA or mass assignment test) is a highly probable vulnerability.\n"
-        "- If a Rate Limit test returns 200 OK after 100 requests, RATE LIMITING IS BROKEN.\n\n"
-        "You MUST perform a chain-of-thought analysis before concluding.\n"
-        "Output ONLY JSON with this precise schema:\n"
-        '{"analysis": "str (your thought process)", "is_vulnerable": bool, "cvss_score": float, "cvss_vector": "str", '
-        '"description": "str", "remediation": "str", "confidence": float}'
+class AIValidationPayload(BaseModel):
+    analysis: str
+    is_vulnerable: bool
+    cvss_score: float
+    cvss_vector: str
+    description: str
+    remediation: str
+    confidence: float
+
+
+def _class_keywords(result: TestResult) -> str:
+    return " ".join(
+        [
+            result.test_case.owasp_category.lower(),
+            result.test_case.name.lower(),
+            result.test_case.description.lower(),
+        ]
     )
 
-    user_prompt = f"Test Result Context:\n{result.model_dump_json(indent=2)}"
 
-    try:
-        llm_resp = await call_llm(sys_prompt, user_prompt, temperature=0.1)
-        data = json.loads(llm_resp)
+def _deterministic_validate_result(result: TestResult) -> Optional[Finding]:
+    """Deterministic validators for high-value classes to reduce hallucinations."""
+    category_text = _class_keywords(result)
+    status = result.status_code
+    is_2xx = 200 <= status < 300
+    req_snippet = f"{result.test_case.method} {result.request_url}"
+    resp_snippet = result.response_body[:500]
 
-        if data.get("is_vulnerable") and data.get("confidence", 0.0) >= 0.75:
-            req_snippet = f"{result.test_case.method} {result.request_url}"
-            resp_snippet = result.response_body[:500]
+    bola_like = any(
+        keyword in category_text
+        for keyword in ("bola", "idor", "api1", "bfla", "api5", "broken object", "broken function")
+    )
+    mass_assignment_like = any(
+        keyword in category_text for keyword in ("mass assignment", "bopla", "api3")
+    )
+    rate_limit_like = any(
+        keyword in category_text for keyword in ("rate limit", "ratelimit", "api4")
+    )
+
+    if bola_like and is_2xx:
+        return Finding(
+            test_case_id=result.test_case.id,
+            endpoint=result.test_case.endpoint,
+            method=result.test_case.method,
+            vulnerability_class=result.test_case.owasp_category,
+            cvss_score=8.2,
+            cvss_vector="CVSS:3.1/AV:N/AC:L/PR:L/UI:N/S:U/C:H/I:H/A:N",
+            description=(
+                "Deterministic validator observed successful access on an authorization-focused test."
+            ),
+            remediation="Enforce object and function authorization checks server-side.",
+            confidence=0.9,
+            evidence_request=req_snippet,
+            evidence_response=resp_snippet,
+            validation_source="deterministic",
+            identity=result.test_case.identity,
+        )
 
+    if mass_assignment_like and is_2xx and isinstance(result.test_case.body, dict):
+        protected_fields = {"is_admin", "role", "credit_balance", "permissions"}
+        if protected_fields.intersection(set(result.test_case.body.keys())):
             return Finding(
                 test_case_id=result.test_case.id,
                 endpoint=result.test_case.endpoint,
                 method=result.test_case.method,
                 vulnerability_class=result.test_case.owasp_category,
-                cvss_score=data.get("cvss_score", 0.0),
-                cvss_vector=data.get("cvss_vector", "CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:U/C:L/I:N/A:N"),
-                description=data.get("description", "Vulnerability detected."),
-                remediation=data.get("remediation", "Review endpoint authorization."),
-                confidence=data.get("confidence", 0.8),
+                cvss_score=8.0,
+                cvss_vector="CVSS:3.1/AV:N/AC:L/PR:L/UI:N/S:U/C:H/I:H/A:N",
+                description=(
+                    "Deterministic validator observed accepted protected-field mutation payload."
+                ),
+                remediation="Allowlist writable fields and reject privileged attributes at API boundary.",
+                confidence=0.88,
                 evidence_request=req_snippet,
                 evidence_response=resp_snippet,
+                validation_source="deterministic",
+                identity=result.test_case.identity,
             )
+
+    if rate_limit_like and is_2xx:
+        return Finding(
+            test_case_id=result.test_case.id,
+            endpoint=result.test_case.endpoint,
+            method=result.test_case.method,
+            vulnerability_class=result.test_case.owasp_category,
+            cvss_score=6.8,
+            cvss_vector="CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:U/C:N/I:N/A:H",
+            description=(
+                "Rate-limit-focused test continued receiving successful responses without clear throttling."
+            ),
+            remediation="Introduce per-principal and per-IP rate limits with enforced backoff.",
+            confidence=0.8,
+            evidence_request=req_snippet,
+            evidence_response=resp_snippet,
+            validation_source="deterministic",
+            identity=result.test_case.identity,
+        )
+
+    return None
+
+
+def _build_finding_from_ai(result: TestResult, payload: AIValidationPayload, source: str) -> Finding:
+    req_snippet = f"{result.test_case.method} {result.request_url}"
+    resp_snippet = result.response_body[:500]
+    return Finding(
+        test_case_id=result.test_case.id,
+        endpoint=result.test_case.endpoint,
+        method=result.test_case.method,
+        vulnerability_class=result.test_case.owasp_category,
+        cvss_score=payload.cvss_score,
+        cvss_vector=payload.cvss_vector,
+        description=payload.description,
+        remediation=payload.remediation,
+        confidence=payload.confidence,
+        evidence_request=req_snippet,
+        evidence_response=resp_snippet,
+        validation_source=source,
+        identity=result.test_case.identity,
+    )
+
+
+async def _evaluate_single_result(result: TestResult) -> Tuple[Optional[Finding], Optional[Finding]]:
+    """Evaluate one test result and return (confirmed, suspected)."""
+    deterministic = _deterministic_validate_result(result)
+    if deterministic is not None:
+        return deterministic, None
+
+    sys_prompt = (
+        "You are a senior AppSec triager validating API pentest results. "
+        "Return ONLY valid JSON with exact schema:\n"
+        '{"analysis":"str","is_vulnerable":bool,"cvss_score":float,"cvss_vector":"str",'
+        '"description":"str","remediation":"str","confidence":float}\n'
+        "Never return markdown or extra text."
+    )
+    user_prompt = f"Test Result Context:\n{result.model_dump_json(indent=2)}"
+
+    try:
+        llm_resp = await call_llm(sys_prompt, user_prompt, temperature=0.1)
+        payload = AIValidationPayload.model_validate(json.loads(llm_resp))
+        if not payload.is_vulnerable:
+            return None, None
+        if payload.confidence >= 0.75:
+            return _build_finding_from_ai(result, payload, "ai"), None
+        return None, _build_finding_from_ai(result, payload, "ai-suspected")
+    except (json.JSONDecodeError, ValidationError) as e:
+        logger.warning(
+            "AI output schema validation failed",
+            error=str(e),
+            test_id=result.test_case.id,
+        )
     except Exception as e:
         logger.warning("Failed validation evaluation", error=str(e), test_id=result.test_case.id)
-    return None
+    return None, None
 
 
-async def validate_findings_with_ai(results: List[TestResult]) -> List[Finding]:
-    """Replay AI analysis over execution results to confirm findings."""
+async def classify_findings(results: List[TestResult]) -> Tuple[List[Finding], List[Finding]]:
+    """Classify findings into deterministic/AI-confirmed and suspected buckets."""
     logger.info("Validating execution results with AI", count=len(results))
+    confirmed: List[Finding] = []
+    suspected: List[Finding] = []
 
-    findings: List[Finding] = []
     batch_size = 5
     for i in range(0, len(results), batch_size):
         batch = results[i : i + batch_size]
         tasks = [_evaluate_single_result(result) for result in batch]
-        batch_findings = await asyncio.gather(*tasks, return_exceptions=True)
+        batch_outcomes = await asyncio.gather(*tasks, return_exceptions=True)
+
+        for outcome in batch_outcomes:
+            if isinstance(outcome, Exception):
+                logger.error("Error evaluating result", exc_info=outcome)
+                continue
+            confirmed_finding, suspected_finding = outcome
+            if confirmed_finding is not None:
+                confirmed.append(confirmed_finding)
+            if suspected_finding is not None:
+                suspected.append(suspected_finding)
 
-        for finding in batch_findings:
-            if isinstance(finding, Exception):
-                logger.error("Error evaluating result", exc_info=finding)
-            elif finding is not None:
-                findings.append(finding)
+    return confirmed, suspected
 
-    return findings
+
+async def validate_findings_with_ai(results: List[TestResult]) -> List[Finding]:
+    """Backwards-compatible wrapper returning only confirmed findings."""
+    confirmed, _ = await classify_findings(results)
+    return confirmed
@@ -1,9 +1,11 @@
 """SecNode API command-line interface."""
+
 import argparse
 import asyncio
 import json
 import warnings
 from pathlib import Path
+from typing import List
 
 import structlog
 
@@ -26,9 +28,12 @@
 from .config import RuntimeConfig, has_supported_provider_key
 from .schema_fetcher import analyze_api_structure, fetch_schema
 from .services.pipeline import (
+    IdentityContext,
     PipelineInput,
     build_pipeline_artifacts,
     build_report,
+    build_report_with_context,
+    run_agent_pipeline,
     validate_and_retest,
     write_report,
 )
@@ -51,13 +56,63 @@ def parse_auth(auth_header: str, auth_file: str) -> dict:
     return headers
 
 
+def parse_identities(identities_file: str) -> List[IdentityContext]:
+    """Parse static identity contexts from JSON file."""
+    if not identities_file:
+        return []
+    try:
+        payload = json.loads(Path(identities_file).read_text(encoding="utf-8"))
+    except (OSError, json.JSONDecodeError) as e:
+        logger.error("Failed to read identities file", error=str(e))
+        return []
+
+    items = payload if isinstance(payload, list) else payload.get("identities", [])
+    identities: List[IdentityContext] = []
+    for item in items:
+        if not isinstance(item, dict):
+            continue
+        name = item.get("name")
+        headers = item.get("headers", {})
+        if isinstance(name, str) and isinstance(headers, dict):
+            identities.append(IdentityContext(name=name, headers=headers))
+    return identities
+
+
 def parse_args():
     parser = argparse.ArgumentParser(description="SecNode API pentesting CLI")
     parser.add_argument("--target", required=True, help="URL or path to OpenAPI schema")
     parser.add_argument("--concurrency", type=int, default=5, help="Concurrent requests")
     parser.add_argument("--auth-header", help="Inline header e.g. 'Authorization: Bearer <token>'")
     parser.add_argument("--auth-file", help="Path to JSON file with auth headers")
     parser.add_argument("--proxy", help="HTTP proxy to route traffic through")
+    parser.add_argument(
+        "--identities-file",
+        help="Path to JSON file containing identity header sets for differential testing",
+    )
+    parser.add_argument(
+        "--mode",
+        choices=["agent", "legacy"],
+        default="agent",
+        help="Pipeline mode: agent loop (default) or legacy fixed pipeline",
+    )
+    parser.add_argument(
+        "--request-budget",
+        type=int,
+        default=400,
+        help="Maximum requests for an agent run",
+    )
+    parser.add_argument(
+        "--per-endpoint-budget",
+        type=int,
+        default=20,
+        help="Maximum attempts per endpoint per agent run",
+    )
+    parser.add_argument(
+        "--max-iterations",
+        type=int,
+        default=4,
+        help="Maximum plan/execute iterations in agent mode",
+    )
     parser.add_argument(
         "--insecure",
         action="store_true",
@@ -151,10 +206,28 @@ async def _run_full_pipeline(args, pipeline_input: PipelineInput) -> None:
     logger.info("SecNode pipeline completed successfully", output_dir=output_dir)
 
 
+async def _run_agent_mode(args, pipeline_input: PipelineInput) -> None:
+    api_structure, confirmed, suspected, metrics = await run_agent_pipeline(pipeline_input)
+    report = build_report_with_context(
+        api_structure.title,
+        confirmed,
+        suspected_findings=suspected,
+        metrics=metrics,
+    )
+    output_dir = write_report(report, args.target)
+    logger.info(
+        "SecNode agent mode completed successfully",
+        output_dir=output_dir,
+        confirmed=len(confirmed),
+        suspected=len(suspected),
+    )
+
+
 async def main():
     args = parse_args()
     runtime = RuntimeConfig(insecure=args.insecure)
     auth_headers = parse_auth(args.auth_header, args.auth_file)
+    identities = parse_identities(getattr(args, "identities_file", None))
     _require_provider_key(args.schema_only)
     logger.info(
         "Starting SecNode API audit",
@@ -168,6 +241,10 @@ async def main():
         auth_headers=auth_headers,
         proxy=args.proxy,
         verify_ssl=runtime.verify_ssl,
+        identities=identities,
+        request_budget=max(1, getattr(args, "request_budget", 400)),
+        per_endpoint_budget=max(1, getattr(args, "per_endpoint_budget", 20)),
+        max_iterations=max(1, getattr(args, "max_iterations", 4)),
     )
 
     try:
@@ -178,7 +255,11 @@ async def main():
             await _run_dry_run(args, pipeline_input)
             return
 
-        await _run_full_pipeline(args, pipeline_input)
+        mode = getattr(args, "mode", "legacy")
+        if mode == "legacy":
+            await _run_full_pipeline(args, pipeline_input)
+        else:
+            await _run_agent_mode(args, pipeline_input)
     except Exception as e:
         logger.error("Pipeline failed", error=str(e), exc_info=e)
         raise SystemExit(1) from e