Skip to content

Commit 0e37ac6

Browse files
committed
fix(tools): implement tool output truncation to prevent LLM context overflow
1 parent 2bd2f23 commit 0e37ac6

3 files changed

Lines changed: 64 additions & 7 deletions

File tree

chatbot-core/api/config/config.yml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,3 +39,6 @@ tool_names:
3939
cors:
4040
allowed_origins:
4141
- "*"
42+
43+
tools:
44+
max_tool_output_length: 4000

chatbot-core/api/tools/tools.py

Lines changed: 43 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22
Definition of the tools avaialable to the Agent.
33
"""
44

5+
import logging
6+
from functools import wraps
57
from typing import Optional
68
from types import MappingProxyType
79
from api.models.embedding_model import EMBEDDING_MODEL
@@ -12,9 +14,39 @@
1214
extract_top_chunks
1315
)
1416
from api.config.loader import CONFIG
17+
# 1. Rename the logger to avoid clashing with the other functions
18+
decorator_logger = logging.getLogger(__name__)
19+
20+
tool_config = CONFIG.get("tools", {})
21+
MAX_TOOL_OUTPUT_LENGTH = tool_config.get("max_tool_output_length", 4000)
22+
23+
24+
def truncate_tool_output(func):
25+
"""Decorator to prevent tool outputs from crashing the LLM context window."""
26+
@wraps(func)
27+
def wrapper(*args, **kwargs):
28+
result = func(*args, **kwargs)
29+
if isinstance(result, str) and len(result) > MAX_TOOL_OUTPUT_LENGTH:
30+
truncated = result[:MAX_TOOL_OUTPUT_LENGTH]
31+
# 2. Use lazy formatting instead of f-strings
32+
decorator_logger.warning(
33+
"[SECURITY] Tool '%s' output truncated from %d to %d characters.",
34+
func.__name__,
35+
len(result),
36+
MAX_TOOL_OUTPUT_LENGTH
37+
)
38+
# 3. Break the long string into two lines
39+
return (
40+
f"{truncated}\n\n...[SYSTEM WARNING: Tool output truncated "
41+
"to prevent context overflow.]"
42+
)
43+
return result
44+
return wrapper
45+
1546

1647
retrieval_config = CONFIG["retrieval"]
1748

49+
1850
def search_plugin_docs(query: str, keywords: str, logger, plugin_name: Optional[str] = None) -> str:
1951
"""
2052
Search tool for the plugin docs. Exploits both a sparse and dense search, resulting in a
@@ -24,7 +56,7 @@ def search_plugin_docs(query: str, keywords: str, logger, plugin_name: Optional[
2456
query (str): The user query.
2557
keywords (str): Keywords extracted from the user query.
2658
plugin_name (Optional[str]): The refered plugin name in the query (if available).
27-
59+
2860
Returns:
2961
str: The result of the research of the plugin search tool.
3062
"""
@@ -55,6 +87,7 @@ def search_plugin_docs(query: str, keywords: str, logger, plugin_name: Optional[
5587
logger=logger
5688
)
5789

90+
5891
def search_jenkins_docs(query: str, keywords: str, logger) -> str:
5992
"""
6093
Search tool for the Jenkins docs. Exploits both a sparse and dense search, resulting in a
@@ -63,7 +96,7 @@ def search_jenkins_docs(query: str, keywords: str, logger) -> str:
6396
Args:
6497
query (str): The user query.
6598
keywords (str): Keywords extracted from the user query.
66-
99+
67100
Returns:
68101
str: The result of the research of the docs search tool.
69102
"""
@@ -87,6 +120,7 @@ def search_jenkins_docs(query: str, keywords: str, logger) -> str:
87120
logger=logger
88121
)
89122

123+
90124
def search_stackoverflow_threads(query: str) -> str:
91125
"""
92126
Stackoverflow Search tool
@@ -95,6 +129,7 @@ def search_stackoverflow_threads(query: str) -> str:
95129
pass
96130
return "Nothing relevant"
97131

132+
98133
def search_community_threads(query: str, keywords: str, logger) -> str:
99134
"""
100135
Search tool for the community discourse threads. Exploits both a sparse and
@@ -104,7 +139,7 @@ def search_community_threads(query: str, keywords: str, logger) -> str:
104139
Args:
105140
query (str): The user query.
106141
keywords (str): Keywords extracted from the user query.
107-
142+
108143
Returns:
109144
str: The result of the research of the docs search tool.
110145
"""
@@ -129,9 +164,10 @@ def search_community_threads(query: str, keywords: str, logger) -> str:
129164
semantic_weight=0.7
130165
)
131166

167+
132168
TOOL_REGISTRY = MappingProxyType({
133-
"search_plugin_docs": search_plugin_docs,
134-
"search_jenkins_docs": search_jenkins_docs,
135-
"search_stackoverflow_threads": search_stackoverflow_threads,
136-
"search_community_threads": search_community_threads,
169+
"search_plugin_docs": truncate_tool_output(search_plugin_docs),
170+
"search_jenkins_docs": truncate_tool_output(search_jenkins_docs),
171+
"search_stackoverflow_threads": truncate_tool_output(search_stackoverflow_threads),
172+
"search_community_threads": truncate_tool_output(search_community_threads),
137173
})
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
from api.tools.tools import truncate_tool_output, MAX_TOOL_OUTPUT_LENGTH
2+
3+
4+
def test_truncate_tool_output_prevents_overflow():
5+
# 1. Create a dummy tool wrapped with our new security decorator
6+
@truncate_tool_output
7+
def massive_log_generator():
8+
return "ERROR: Stack trace line. " * 50000 # Creates a massive string
9+
10+
# 2. Execute the tool
11+
result = massive_log_generator()
12+
13+
# 3. Assert the string was successfully chopped down
14+
# +100 to account for our warning message
15+
assert len(result) <= (MAX_TOOL_OUTPUT_LENGTH + 100)
16+
17+
# 4. Assert our system warning was appended
18+
assert "[SYSTEM WARNING: Tool output truncated" in result

0 commit comments

Comments
 (0)