Skip to content

Commit 955ea15

Browse files
Flamkiberviantoleo
andauthored
security: redact raw chat payloads from service logs (fixes #197) (#198)
* security: redact raw chat payloads from service logs (fixes #197) * chore: fix pylint findings for security log hardening * fix: align chat_service logging levels with maintainer feedback * chore: retrigger CI * fix: simplify sub-query debug logging * security: sanitize chat-service payload logs while preserving behavior --------- Co-authored-by: Bervianto Leo Pratama <[email protected]>
1 parent 11005e5 commit 955ea15

2 files changed

Lines changed: 122 additions & 25 deletions

File tree

chatbot-core/api/services/chat_service.py

Lines changed: 40 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020

2121
from api.services.memory import get_session, get_session_async
2222
from api.services.file_service import format_file_context
23+
from api.tools.sanitizer import sanitize_logs
2324
from api.tools.tools import TOOL_REGISTRY
2425
from api.tools.utils import (
2526
get_default_tools_call,
@@ -40,6 +41,16 @@
4041
)
4142

4243

44+
def _sanitize_log_payload(payload: object) -> str:
45+
"""
46+
Convert payloads to strings and redact common secrets before logging them.
47+
"""
48+
if payload is None:
49+
return ""
50+
51+
return sanitize_logs(str(payload))
52+
53+
4354
def get_chatbot_reply(
4455
session_id: str,
4556
user_input: str,
@@ -58,21 +69,21 @@ def get_chatbot_reply(
5869
ChatResponse: The generated assistant response.
5970
"""
6071
logger.info("New message from session '%s'", session_id)
61-
logger.info("Handling the user query: %s", user_input)
72+
logger.debug("Handling the user query: %s", _sanitize_log_payload(user_input))
6273

6374
memory = get_session(session_id)
6475
if memory is None:
6576
raise RuntimeError(f"Session '{session_id}' not found in the memory store.")
6677

6778
context = retrieve_context(user_input)
68-
logger.info("Context retrieved: %s", context)
79+
logger.debug("Context retrieved: %s", _sanitize_log_payload(context))
6980

7081
# Process file context if files are provided
7182
context = _process_file_context(context, files)
7283

7384
prompt = build_prompt(user_input, context, memory)
7485

75-
logger.info("Generating answer with prompt: %s", prompt)
86+
logger.debug("Generating answer with prompt: %s", _sanitize_log_payload(prompt))
7687
reply = generate_answer(prompt)
7788

7889
# Format user message with file info for memory
@@ -129,7 +140,7 @@ def get_chatbot_reply_new_architecture(
129140
ChatResponse: The generated assistant response.
130141
"""
131142
logger.info("New message from session '%s'", session_id)
132-
logger.info("Handling the user query: %s", user_input)
143+
logger.debug("Handling the user query: %s", _sanitize_log_payload(user_input))
133144

134145
memory = get_session(session_id)
135146
if memory is None:
@@ -188,11 +199,11 @@ def _handle_query_type(query: str, query_type: QueryType, memory) -> str:
188199

189200
answers = []
190201
for sub_query in sub_queries:
191-
logger.info("Handling the sub-query: %s.", sub_query)
202+
logger.debug("Handling sub-query: %s.", _sanitize_log_payload(sub_query))
192203
answers.append(_get_reply_simple_query_pipeline(sub_query, memory))
193204

194205
reply = _assemble_response(answers)
195-
logger.info("Final response: %s", reply)
206+
logger.debug("Final response: %s", _sanitize_log_payload(reply))
196207
else:
197208
reply = _get_reply_simple_query_pipeline(query, memory)
198209

@@ -216,10 +227,8 @@ def _get_sub_queries(query: str) -> List[str]:
216227
try:
217228
queries = ast.literal_eval(queries_string)
218229
except (ValueError, TypeError, SyntaxError, MemoryError, RecursionError):
219-
logger.warning(
220-
"Error in parsing the subqueries. The string may be not formed"
221-
" correctly: %s. Setting to default array with 1 element.",
222-
queries_string)
230+
logger.warning("Error in parsing sub-queries. Falling back to single query mode.")
231+
logger.debug("Failed sub-query payload: %s", _sanitize_log_payload(queries_string))
223232
queries = [query]
224233

225234
queries = [q.strip() for q in queries]
@@ -257,7 +266,7 @@ def _get_reply_simple_query_pipeline(query: str, memory) -> str:
257266

258267
retrieved_context = _execute_search_tools(tool_calls)
259268

260-
logger.info("Retrieved context: %s", retrieved_context)
269+
logger.debug("Retrieved context: %s", _sanitize_log_payload(retrieved_context))
261270

262271
relevance = _get_query_context_relevance(query, retrieved_context)
263272
logger.info("Query context relevance %s", relevance)
@@ -286,25 +295,24 @@ def _get_agent_tool_calls(query: str):
286295
tool_calls = generate_answer(
287296
retriever_agent_prompt, llm_config["max_tokens_retriever_agent"] + (len(query) * 3))
288297

289-
logger.warning("Tool calls: %s", tool_calls)
298+
logger.debug("Tool calls: %s", _sanitize_log_payload(tool_calls))
290299
try:
291300
tool_calls_parsed = json.loads(tool_calls)
292301
if not validate_tool_calls(tool_calls_parsed, logger):
293302
logger.warning("Tool calls are not respecting the signatures."
294303
"Going for the default config")
295304
tool_calls_parsed = get_default_tools_call(query)
296305
except json.JSONDecodeError:
297-
logger.warning(
298-
"Invalid JSON syntax in the tools output: %s.",
299-
tool_calls)
306+
logger.warning("Invalid JSON syntax in the tools output.")
307+
logger.debug("Raw tool calls payload: %s", _sanitize_log_payload(tool_calls))
300308
logger.warning("Calling all the search tools with default settings.")
301309
tool_calls_parsed = get_default_tools_call(query)
302310
except (KeyError, ValueError, TypeError, AttributeError) as e:
303311
logger.warning(
304-
"JSON structure or value error(%s %s) in the tools output: %s.",
312+
"JSON structure or value error(%s %s) in the tools output.",
305313
type(e).__name__,
306-
e,
307-
tool_calls)
314+
e)
315+
logger.debug("Raw tool calls payload: %s", _sanitize_log_payload(tool_calls))
308316
logger.warning("Calling all the search tools with default settings.")
309317
tool_calls_parsed = get_default_tools_call(query)
310318

@@ -434,10 +442,15 @@ def generate_answer(prompt: str, max_tokens: Optional[int] = None) -> str:
434442
logger.error("LLM provider unavailable: %s", e)
435443
return "LLM is not available. Please install llama-cpp-python and configure a model."
436444
except (ValueError, RuntimeError) as exc:
437-
logger.error("LLM generation failed for prompt: %r. Error: %r", prompt, exc)
445+
logger.error("LLM generation failed: %s", _sanitize_log_payload(repr(exc)))
446+
logger.debug("Failed prompt payload: %s", _sanitize_log_payload(prompt))
438447
return "Sorry, I'm having trouble generating a response right now."
439-
except Exception: # pylint: disable=broad-except
440-
logger.exception("Unexpected error during LLM generation for prompt: %r", prompt)
448+
except Exception as exc: # pylint: disable=broad-except
449+
logger.error(
450+
"Unexpected error during LLM generation: %s",
451+
_sanitize_log_payload(repr(exc))
452+
)
453+
logger.debug("Failed prompt payload: %s", _sanitize_log_payload(prompt))
441454
return "Sorry, an unexpected error occurred. Please contact support."
442455

443456

@@ -484,7 +497,7 @@ async def get_chatbot_reply_stream(
484497
str: Individual tokens from LLM response
485498
"""
486499
logger.info("Streaming message from session '%s'", session_id)
487-
logger.info("Handling user query: %s", user_input)
500+
logger.debug("Handling user query: %s", _sanitize_log_payload(user_input))
488501

489502
memory = await get_session_async(session_id)
490503

@@ -493,10 +506,13 @@ async def get_chatbot_reply_stream(
493506
f"Session '{session_id}' not found in memory store.")
494507

495508
context = retrieve_context(user_input)
496-
logger.info("Context retrieved: %s", context)
509+
logger.debug("Context retrieved: %s", _sanitize_log_payload(context))
497510

498511
prompt = build_prompt(user_input, context, memory)
499-
logger.info("Generating streaming answer with prompt: %s", prompt)
512+
logger.debug(
513+
"Generating streaming answer with prompt: %s",
514+
_sanitize_log_payload(prompt)
515+
)
500516

501517
full_reply = ""
502518
async for token in generate_answer_stream(prompt):

chatbot-core/tests/unit/services/test_chat_service.py

Lines changed: 82 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
11
"""Unit tests for chat service logic."""
22

33
import logging
4+
from unittest.mock import MagicMock
45
import pytest
5-
from api.services.chat_service import get_chatbot_reply, retrieve_context
6+
from api.services.chat_service import generate_answer, get_chatbot_reply, retrieve_context
67
from api.config.loader import CONFIG
78
from api.models.schemas import ChatResponse
89

@@ -40,6 +41,86 @@ def test_get_chatbot_reply_session_not_found(mock_get_session):
4041
assert "Session 'missing-session-id' not found in the memory store." in str(exc_info.value)
4142

4243

44+
def test_get_chatbot_reply_does_not_log_raw_content(
45+
mock_get_session,
46+
mock_retrieve_context,
47+
mock_prompt_builder,
48+
mock_llm_provider,
49+
caplog
50+
):
51+
"""Ensure sensitive payloads are not logged at INFO level."""
52+
logging.getLogger("API").propagate = True
53+
54+
sensitive_query = "token=abc123"
55+
sensitive_context = "internal secret context"
56+
sensitive_prompt = "prompt contains password=top-secret"
57+
58+
mock_chat_memory = MagicMock()
59+
mock_session = mock_get_session.return_value
60+
mock_session.chat_memory = mock_chat_memory
61+
mock_retrieve_context.return_value = sensitive_context
62+
mock_prompt_builder.return_value = sensitive_prompt
63+
mock_llm_provider.generate.return_value = "safe response"
64+
65+
with caplog.at_level(logging.INFO):
66+
get_chatbot_reply("session-id", sensitive_query)
67+
68+
assert sensitive_query not in caplog.text
69+
assert sensitive_context not in caplog.text
70+
assert sensitive_prompt not in caplog.text
71+
assert "New message from session 'session-id'" in caplog.text
72+
73+
74+
def test_get_chatbot_reply_debug_logs_are_sanitized(
75+
mock_get_session,
76+
mock_retrieve_context,
77+
mock_prompt_builder,
78+
mock_llm_provider,
79+
caplog
80+
):
81+
"""Ensure payload-heavy debug logs keep structure but redact secrets."""
82+
logging.getLogger("API").propagate = True
83+
84+
sanitized_query = "api_key=[REDACTED]"
85+
sanitized_context = "password=[REDACTED]"
86+
sanitized_prompt = "Bearer [REDACTED_TOKEN]"
87+
88+
mock_chat_memory = MagicMock()
89+
mock_session = mock_get_session.return_value
90+
mock_session.chat_memory = mock_chat_memory
91+
mock_retrieve_context.return_value = "context password=top-secret"
92+
mock_prompt_builder.return_value = (
93+
"prompt Authorization: Bearer "
94+
"ghp_1234567890abcdef1234567890abcdef1234"
95+
)
96+
mock_llm_provider.generate.return_value = "safe response"
97+
98+
with caplog.at_level(logging.DEBUG, logger="API"):
99+
get_chatbot_reply("session-id", "api_key=abc123")
100+
101+
assert "api_key=abc123" not in caplog.text
102+
assert "password=top-secret" not in caplog.text
103+
assert "ghp_1234567890abcdef1234567890abcdef1234" not in caplog.text
104+
assert sanitized_query in caplog.text
105+
assert sanitized_context in caplog.text
106+
assert sanitized_prompt in caplog.text
107+
108+
109+
def test_generate_answer_error_logs_sanitized_prompt(mock_llm_provider, caplog):
110+
"""Ensure failed prompt logging is sanitized across ERROR and DEBUG paths."""
111+
logging.getLogger("API").propagate = True
112+
sensitive_prompt = "api_key=very-secret-key"
113+
mock_llm_provider.generate.side_effect = RuntimeError("provider failure")
114+
115+
with caplog.at_level(logging.DEBUG, logger="API"):
116+
response = generate_answer(sensitive_prompt)
117+
118+
assert response == "Sorry, I'm having trouble generating a response right now."
119+
assert sensitive_prompt not in caplog.text
120+
assert "LLM generation failed" in caplog.text
121+
assert "api_key=[REDACTED]" in caplog.text
122+
123+
43124
def test_retrieve_context_with_placeholders(mock_get_relevant_documents):
44125
"""Test retrieve_context replaces placeholders with code blocks correctly."""
45126
mock_documents = get_mock_documents("with_placeholders")

0 commit comments

Comments
 (0)