improvements

vishnurajkv · vishnurajkv · commit 579269246300 · 2026-03-09T15:10:33.000+01:00
diff --git a/src/secnodeapi/ai/generate.py b/src/secnodeapi/ai/generate.py
@@ -14,15 +14,26 @@
 
 
 async def generate_test_cases(
-    understanding: APIUnderstanding, structure: SchemaStructure, instructions: Optional[Dict[str, Any]] = None
+    understanding: APIUnderstanding,
+    structure: SchemaStructure,
+    instructions: Optional[Dict[str, Any]] = None,
+    tool_context: Optional[Dict[str, Any]] = None,
 ) -> List[TestCase]:
     """Generate adversarial test cases for OWASP and business logic flaws."""
-    logger.info("Generating adversarial test cases with AI")
+    logger.info(
+        "Generating adversarial test cases with AI",
+        has_instructions=bool(instructions),
+        has_tool_context=bool(tool_context),
+    )
 
     sys_prompt = (
         "You are an elite Offensive Security Researcher evaluating an API. Your goal is to generate "
         "aggressive, complex, and highly realistic test cases. You MUST meticulously analyze the API structure, "
         "infer data models, identify relationships between differing endpoints, and exploit implied business logic constraints. "
+        "In addition, you MUST fully leverage reconnaissance results from tools like Dirsearch and Arjun:\n"
+        "- Use tool-discovered endpoints and paths (including shadow/admin/debug routes) as high-priority targets.\n"
+        "- Use Arjun-discovered parameters for parameter tampering, auth bypass, and hidden debug flags.\n"
+        "- Combine schema-derived parameters with recon-discovered parameters to craft richer attacks.\n\n"
         "Focus heavily on:\n"
         "1. BOLA/IDOR: Swapping UUIDs/integers, wrapping IDs in arrays, using wildcard IDs.\n"
         "2. BOPLA (Mass Assignment): Injecting 'is_admin': true, 'role': 'admin', 'credit_balance': 99999 into body payloads.\n"
@@ -36,12 +47,22 @@ async def generate_test_cases(
 
     user_prompt = (
         f"API Understanding Context:\n{understanding.model_dump_json(indent=2)}\n\n"
-        f"Complete Target API Structure:\n{structure.model_dump_json(indent=2)}\n\n"
+        f"Complete Target API Structure (including enriched endpoints/parameters):\n{structure.model_dump_json(indent=2)}\n\n"
         "Generate 100-200 highly targeted, exhaustive test cases ensuring sweeping coverage "
         "of OWASP API Top 10 and tailored business logic exploitation vectors specific to this API's purpose. "
         "Ensure realistic payloads based on the defined schemas (e.g. if an endpoint expects JSON, provide a structured JSON body)."
     )
 
+    if tool_context:
+        user_prompt += (
+            "\n\n[RECON & TOOL SIGNALS]\n"
+            "Use the following signals from Dirsearch, Arjun, and other tools to bias test generation "
+            "towards likely-vulnerable or undocumented surfaces:\n"
+            f"{json.dumps(tool_context, indent=2)}\n"
+            "- Prioritize endpoints and parameters that only appear in recon/tool output.\n"
+            "- Generate additional tests that mix tool-discovered params with sensitive actions (admin, payments, account management, exports, reporting, etc.)."
+        )
+
     if instructions:
         greybox_context = (
             "\n\n[GREYBOX CONTEXT - AUTHENTICATED TESTING]\n"
@@ -66,4 +87,4 @@ async def generate_test_cases(
         return [TestCase(**t) for t in data.get("tests", [])]
     except Exception as e:
         logger.error("Failed to parse TestCases JSON", error=str(e), result=result)
-        return []
+        return []
diff --git a/src/secnodeapi/ai/understand.py b/src/secnodeapi/ai/understand.py
@@ -2,6 +2,7 @@
 AI understanding stage for API context analysis.
 """
 import json
+from typing import Any, Dict, Optional
 
 import structlog
 
@@ -12,17 +13,33 @@
 logger = structlog.get_logger(__name__)
 
 
-async def understand_api_with_ai(structure: SchemaStructure) -> APIUnderstanding:
+async def understand_api_with_ai(
+    structure: SchemaStructure,
+    tool_context: Optional[Dict[str, Any]] = None,
+) -> APIUnderstanding:
     """Evaluate API business purpose and trust boundaries with AI."""
-    logger.info("Analyzing API business context with AI")
+    logger.info(
+        "Analyzing API business context with AI",
+        has_tool_context=bool(tool_context),
+    )
 
     sys_prompt = (
         "You are an expert Application Security Architect. Analyze this API structure "
-        "and output ONLY valid JSON matching this schema:\n"
+        "together with reconnaissance signals from external tools (e.g. Dirsearch, Arjun, Nuclei). "
+        "Use tool-discovered endpoints, parameters, and weak signals to refine your understanding of the true attack surface. "
+        "Output ONLY valid JSON matching this schema:\n"
         '{"business_context": "string", "trust_boundaries": ["string"], "high_risk_flows": ["string"]}'
     )
+
     user_prompt = f"API Structure:\n{structure.model_dump_json(indent=2)}"
 
+    if tool_context:
+        user_prompt += (
+            "\n\nReconnaissance & tool signals (Dirsearch / Arjun / others):\n"
+            f"{json.dumps(tool_context, indent=2)}\n"
+            "Pay special attention to shadow/admin paths, undocumented methods, and discovered parameters when reasoning about trust boundaries and high-risk flows."
+        )
+
     result = await call_llm(sys_prompt, user_prompt, temperature=0.1)
     try:
         data = json.loads(result)
@@ -33,4 +50,4 @@ async def understand_api_with_ai(structure: SchemaStructure) -> APIUnderstanding
             business_context="Failed to parse",
             trust_boundaries=[],
             high_risk_flows=[],
-        )
+        )
diff --git a/src/secnodeapi/services/pipeline.py b/src/secnodeapi/services/pipeline.py
@@ -5,7 +5,7 @@
 from datetime import datetime
 import json
 import re
-from typing import Dict, List, Optional, Tuple
+from typing import Any, Dict, List, Optional, Tuple
 from urllib.parse import urlparse
 
 import structlog
@@ -339,6 +339,21 @@ async def run_agent_pipeline(
         )
         queue = _deduplicate_test_cases(queue + chain_tests)
 
+        # Human-friendly iteration summary in the terminal
+        console.print(
+            Panel(
+                f"[bold]Iteration {iteration} summary[/bold]\n"
+                f"- Executed tests this iteration: [cyan]{len(results)}[/cyan]\n"
+                f"- New confirmed findings: [green]{len(batch_confirmed)}[/green]\n"
+                f"- New suspected findings: [yellow]{len(batch_suspected)}[/yellow]\n"
+                f"- Chained follow-up tests queued: [magenta]{len(chain_tests)}[/magenta]\n"
+                f"- Remaining request budget: [white]{remaining_budget}[/white]\n"
+                f"- Dynamic concurrency: [white]{dynamic_concurrency}[/white]",
+                title=f"🤖 Agent Iteration {iteration}",
+                border_style="magenta",
+            )
+        )
+
     # Run final AI deduplication
     console.rule("[bold yellow]Phase 3: Final Intelligence Processing")
     confirmed = await deduplicate_findings_with_ai(confirmed)
@@ -369,7 +384,7 @@ async def build_pipeline_artifacts(
         verify_ssl=pipeline_input.verify_ssl,
     )
     api_structure = analyze_api_structure(raw_schema)
-    
+
     # Phase 1a — Active Reconnaissance (built-in fuzzer)
     discovered_endpoints = await perform_active_recon(api_structure, pipeline_input)
     if discovered_endpoints:
@@ -380,9 +395,10 @@ async def build_pipeline_artifacts(
         api_structure.endpoints = list(unique_endpoints.values())
 
     # Phase 1b — External Tool Orchestration (Dirsearch, Arjun, Nuclei, SQLMap)
+    tool_context: Optional[Dict[str, Any]] = None
     try:
         orch_result = await run_tool_orchestration_phase(api_structure, api_structure.base_url)
-        
+
         # Merge tool-discovered endpoints into schema
         if orch_result.discovered_endpoints:
             for ep in orch_result.discovered_endpoints:
@@ -396,22 +412,74 @@ async def build_pipeline_artifacts(
                 total=len(api_structure.endpoints),
                 added=len(orch_result.discovered_endpoints),
             )
+
+        # Merge Dirsearch/Arjun-discovered parameters back into API structure
+        if orch_result.discovered_params:
+            merged_count = 0
+            for ep_key, params in orch_result.discovered_params.items():
+                # Normalise the endpoint key (which may be a full URL) down to a path
+                try:
+                    parsed = urlparse(ep_key)
+                    path = parsed.path or ep_key
+                except Exception:
+                    path = ep_key
+                if not path.startswith("/"):
+                    path = f"/{path}"
+
+                for endpoint in api_structure.endpoints:
+                    if endpoint.path != path:
+                        continue
+                    existing_param_names = {p.name for p in endpoint.parameters}
+                    for p_name in params:
+                        if not p_name or p_name in existing_param_names:
+                            continue
+                        endpoint.parameters.append(
+                            type(endpoint).model_fields["parameters"].annotation.__args__[0](  # type: ignore[index]
+                                name=p_name,
+                                **{"in": "query"},
+                                required=False,
+                                schema_type="string",
+                            )
+                        )
+                        merged_count += 1
+            logger.info(
+                "Merged tool-discovered parameters",
+                total_parameters_merged=merged_count,
+            )
+
+        # Build compact tool context for downstream AI prompts
+        tool_context = {
+            "dirsearch_discovered_paths": sorted(
+                {ep.path for ep in orch_result.discovered_endpoints}
+            ),
+            "tool_discovered_params": orch_result.discovered_params,
+            "tool_findings_sample": [
+                {
+                    "endpoint": f.endpoint,
+                    "method": f.method,
+                    "vuln": f.vulnerability_class,
+                    "cvss": f.cvss_score,
+                }
+                for f in orch_result.tool_findings[:25]
+            ],
+        }
     except Exception as e:
         logger.warning("Tool orchestration phase failed, continuing without tools", error=str(e))
         orch_result = None
+        tool_context = None
 
     # Phase 1c — Deep Doc Analysis + AI-Directed Arjun Parameter Discovery
     try:
         api_structure = await perform_deep_recon(raw_schema, api_structure, pipeline_input)
     except Exception as e:
         logger.warning("Deep recon phase failed, continuing without parameter enrichment", error=str(e))
 
-
-    understanding = await understand_api_with_ai(api_structure)
+    understanding = await understand_api_with_ai(api_structure, tool_context=tool_context)
     tests = await generate_test_cases(
         understanding,
         api_structure,
-        instructions=pipeline_input.instructions[0].model_dump() if pipeline_input.instructions else None
+        instructions=pipeline_input.instructions[0].model_dump() if pipeline_input.instructions else None,
+        tool_context=tool_context,
     )
     return api_structure, tests, orch_result