Skip to content

Commit 8b06e79

Browse files
committed
fix(frontend): auto-detect force_reasoning when chat template appends <think>
1 parent 2ac22df commit 8b06e79

2 files changed

Lines changed: 38 additions & 4 deletions

File tree

components/src/dynamo/frontend/sglang_prepost.py

Lines changed: 31 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -58,12 +58,28 @@ def _materialize_messages(messages: list[Any]) -> list[dict[str, Any]]:
5858
return normalized
5959

6060

61+
def detect_force_reasoning(tokenizer, prompt_token_ids: list[int]) -> bool:
62+
"""Check if the chat template's generation prompt ends with ``<think>``.
63+
64+
When the template appends ``<think>`` to the prompt, the model output
65+
starts inside a reasoning block without an explicit opening tag.
66+
The reasoning parser must be told to begin in reasoning mode
67+
(``force_reasoning=True``) so that it correctly separates reasoning
68+
content from normal content.
69+
"""
70+
if not prompt_token_ids:
71+
return False
72+
tail = tokenizer.decode(prompt_token_ids[-10:], skip_special_tokens=False)
73+
return tail.rstrip().endswith("<think>")
74+
75+
6176
def create_parsers(
6277
request: dict[str, Any],
6378
*,
6479
tool_call_parser_name: str | None,
6580
reasoning_parser_name: str | None,
6681
sglang_tools: list[SglangTool] | None = None,
82+
force_reasoning: bool = False,
6783
) -> tuple[FunctionCallParser | None, ReasoningParser | None]:
6884
"""Create tool call and reasoning parsers for a request.
6985
@@ -86,10 +102,13 @@ def create_parsers(
86102

87103
reasoning_parser = None
88104
if reasoning_parser_name:
89-
reasoning_parser = ReasoningParser(
90-
model_type=reasoning_parser_name,
91-
stream_reasoning=True,
92-
)
105+
kwargs: dict[str, Any] = {
106+
"model_type": reasoning_parser_name,
107+
"stream_reasoning": True,
108+
}
109+
if force_reasoning:
110+
kwargs["force_reasoning"] = True
111+
reasoning_parser = ReasoningParser(**kwargs)
93112

94113
return tool_call_parser, reasoning_parser
95114

@@ -131,6 +150,7 @@ def preprocess_chat_request(
131150
template_kwargs: dict[str, Any] = {
132151
"add_generation_prompt": True,
133152
"tokenize": True,
153+
"return_dict": False,
134154
}
135155
# Strip tools from template when tool_choice=none so the model doesn't
136156
# see them and generate raw XML tool calls in its response.
@@ -144,11 +164,18 @@ def preprocess_chat_request(
144164
tokenizer.apply_chat_template(messages, **template_kwargs)
145165
)
146166

167+
force_reasoning = (
168+
detect_force_reasoning(tokenizer, prompt_token_ids)
169+
if reasoning_parser_name
170+
else False
171+
)
172+
147173
tool_call_parser, reasoning_parser = create_parsers(
148174
request,
149175
tool_call_parser_name=tool_call_parser_name,
150176
reasoning_parser_name=reasoning_parser_name,
151177
sglang_tools=sglang_tools,
178+
force_reasoning=force_reasoning,
152179
)
153180

154181
return SglangPreprocessResult(

components/src/dynamo/frontend/sglang_processor.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
from .sglang_prepost import (
3434
SglangStreamingPostProcessor,
3535
create_parsers,
36+
detect_force_reasoning,
3637
preprocess_chat_request,
3738
)
3839
from .utils import PreprocessError, extract_mm_urls, random_uuid, worker_warmup
@@ -379,10 +380,16 @@ async def _generator_inner_pool(
379380
return
380381

381382
# --- Phase 2: Recreate parsers in main process (not picklable) ---
383+
force_reasoning = (
384+
detect_force_reasoning(self.tokenizer, preproc_result.prompt_token_ids)
385+
if self.reasoning_parser_name
386+
else False
387+
)
382388
tool_call_parser, reasoning_parser = create_parsers(
383389
request,
384390
tool_call_parser_name=self.tool_call_parser_name,
385391
reasoning_parser_name=self.reasoning_parser_name,
392+
force_reasoning=force_reasoning,
386393
)
387394

388395
post = SglangStreamingPostProcessor(

0 commit comments

Comments
 (0)