@@ -58,12 +58,28 @@ def _materialize_messages(messages: list[Any]) -> list[dict[str, Any]]:
5858 return normalized
5959
6060
61+ def detect_force_reasoning (tokenizer , prompt_token_ids : list [int ]) -> bool :
62+ """Check if the chat template's generation prompt ends with ``<think>``.
63+
64+ When the template appends ``<think>`` to the prompt, the model output
65+ starts inside a reasoning block without an explicit opening tag.
66+ The reasoning parser must be told to begin in reasoning mode
67+ (``force_reasoning=True``) so that it correctly separates reasoning
68+ content from normal content.
69+ """
70+ if not prompt_token_ids :
71+ return False
72+ tail = tokenizer .decode (prompt_token_ids [- 10 :], skip_special_tokens = False )
73+ return tail .rstrip ().endswith ("<think>" )
74+
75+
6176def create_parsers (
6277 request : dict [str , Any ],
6378 * ,
6479 tool_call_parser_name : str | None ,
6580 reasoning_parser_name : str | None ,
6681 sglang_tools : list [SglangTool ] | None = None ,
82+ force_reasoning : bool = False ,
6783) -> tuple [FunctionCallParser | None , ReasoningParser | None ]:
6884 """Create tool call and reasoning parsers for a request.
6985
@@ -86,10 +102,13 @@ def create_parsers(
86102
87103 reasoning_parser = None
88104 if reasoning_parser_name :
89- reasoning_parser = ReasoningParser (
90- model_type = reasoning_parser_name ,
91- stream_reasoning = True ,
92- )
105+ kwargs : dict [str , Any ] = {
106+ "model_type" : reasoning_parser_name ,
107+ "stream_reasoning" : True ,
108+ }
109+ if force_reasoning :
110+ kwargs ["force_reasoning" ] = True
111+ reasoning_parser = ReasoningParser (** kwargs )
93112
94113 return tool_call_parser , reasoning_parser
95114
@@ -131,6 +150,7 @@ def preprocess_chat_request(
131150 template_kwargs : dict [str , Any ] = {
132151 "add_generation_prompt" : True ,
133152 "tokenize" : True ,
153+ "return_dict" : False ,
134154 }
135155 # Strip tools from template when tool_choice=none so the model doesn't
136156 # see them and generate raw XML tool calls in its response.
@@ -144,11 +164,18 @@ def preprocess_chat_request(
144164 tokenizer .apply_chat_template (messages , ** template_kwargs )
145165 )
146166
167+ force_reasoning = (
168+ detect_force_reasoning (tokenizer , prompt_token_ids )
169+ if reasoning_parser_name
170+ else False
171+ )
172+
147173 tool_call_parser , reasoning_parser = create_parsers (
148174 request ,
149175 tool_call_parser_name = tool_call_parser_name ,
150176 reasoning_parser_name = reasoning_parser_name ,
151177 sglang_tools = sglang_tools ,
178+ force_reasoning = force_reasoning ,
152179 )
153180
154181 return SglangPreprocessResult (
0 commit comments