Skip to content

Commit e041ccf

Browse files
authored
ci: remove unused SGLang NIXL ref (#8472)
1 parent 36b4208 commit e041ccf

7 files changed

Lines changed: 88 additions & 29 deletions

File tree

components/src/dynamo/frontend/sglang_prepost.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -675,6 +675,23 @@ def process_output(self, engine_response: dict[str, Any]) -> dict[str, Any] | No
675675
if tc.parameters:
676676
self._tool_call_args[seq_idx] = [tc.parameters]
677677

678+
# Do not emit partial tool calls. A streaming parser can detect a
679+
# tool name before the model finishes malformed JSON; if the
680+
# finish-time re-parse cannot recover valid arguments, treat the
681+
# response as plain text instead of surfacing name + empty args.
682+
dropped_names = []
683+
for idx in list(self._tool_call_names):
684+
if not "".join(self._tool_call_args.get(idx, [])):
685+
dropped_names.append(self._tool_call_names[idx])
686+
del self._tool_call_names[idx]
687+
self._tool_call_ids.pop(idx, None)
688+
self._tool_call_args.pop(idx, None)
689+
if dropped_names:
690+
logger.warning(
691+
"Dropping incomplete SGLang tool calls with no valid arguments: %s",
692+
dropped_names,
693+
)
694+
678695
if finish_reason and self._tool_call_names:
679696
tool_calls_out: list[dict[str, Any]] = []
680697
for idx in sorted(self._tool_call_names):

components/src/dynamo/frontend/tests/test_sglang_tool_calls.py

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -467,6 +467,50 @@ def test_finish_reason_rewritten_to_tool_calls(self, tokenizer):
467467
assert choice["finish_reason"] == "tool_calls"
468468

469469

470+
class TestMalformedToolCalls:
471+
def test_incomplete_arguments_are_not_emitted(self):
472+
class DummyTokenizer:
473+
def decode(self, token_ids, skip_special_tokens=True):
474+
return "".join(chr(x) for x in token_ids)
475+
476+
class DummyToolCall:
477+
def __init__(self, tool_index, name, parameters):
478+
self.tool_index = tool_index
479+
self.name = name
480+
self.parameters = parameters
481+
482+
class DummyParser:
483+
def parse_stream_chunk(self, text):
484+
return "", [DummyToolCall(0, "get_weather", '{"city": "Paris"')]
485+
486+
def has_tool_call(self, text):
487+
return "<tool_call>" in text
488+
489+
def parse_non_stream(self, text):
490+
return "", []
491+
492+
post = SglangStreamingPostProcessor(
493+
tokenizer=DummyTokenizer(),
494+
tool_call_parser=DummyParser(),
495+
reasoning_parser=None,
496+
)
497+
498+
malformed = (
499+
'<tool_call>\n{"name": "get_weather", '
500+
'"arguments": {"city": "Paris"}\n</tool_call>'
501+
)
502+
choice = post.process_output(
503+
{
504+
"token_ids": [ord(c) for c in malformed],
505+
"finish_reason": "stop",
506+
}
507+
)
508+
509+
assert choice is not None
510+
assert choice["finish_reason"] == "stop"
511+
assert choice.get("delta", {}).get("tool_calls", []) == []
512+
513+
470514
# ---------------------------------------------------------------------------
471515
# JsonArrayParser path (tool_choice="required" / named function)
472516
# ---------------------------------------------------------------------------

container/context.yaml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,9 @@ sglang:
8585
runtime_image: lmsysorg/sglang
8686
base_image_tag: 25.11-cuda13.0-devel-ubuntu24.04
8787
runtime_image_tag: v0.5.10.post1-cu130-runtime
88-
nixl_ref: v1.0.1
88+
# SGLang uses the NIXL stack from the upstream lmsysorg/sglang runtime image.
89+
# Do not add nixl_ref here: Dynamo does not build or install its NIXL wheel
90+
# for SGLang, and SGLang does not use Dynamo KVBM/block-manager at runtime.
8991
enable_media_ffmpeg: "true"
9092
enable_gpu_memory_service: "true"
9193
enable_kvbm: "false"

container/templates/args.Dockerfile

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,9 @@ ARG SCCACHE_REGION=""
6666

6767
# NIXL configuration
6868
ARG NIXL_UCX_REF={{ context.dynamo.nixl_ucx_ref }}
69+
{% if "nixl_ref" in context[framework] -%}
6970
ARG NIXL_REF={{ context[framework].nixl_ref }}
71+
{% endif -%}
7072
{% if device == "cuda" %}
7173
ARG NIXL_GDRCOPY_REF={{ context.dynamo.nixl_gdrcopy_ref }}
7274
ARG NIXL_LIBFABRIC_REF={{ context.dynamo.nixl_libfabric_ref }}

container/templates/dev.Dockerfile

Lines changed: 9 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -187,39 +187,19 @@ RUN if [ ! -e /usr/bin/python3 ]; then \
187187
fi; \
188188
fi
189189

190-
# Copy UCX and NIXL libraries for dev stage compilation.
191-
# The upstream SGLang runtime image doesn't include NIXL, but cargo build needs to link against
192-
# -lnixl, -lnixl_build, and -lnixl_common. Runtime stage doesn't need this since it uses pre-built
193-
# wheels, but dev stage needs it for maturin develop and cargo build from source.
194-
# - SGLang: Copy NIXL/UCX/libfabric/gdrcopy binaries from wheel_builder (not in upstream lmsysorg/sglang runtime).
195-
# - vllm/trtllm/none: NIXL/UCX are already present in runtime (no-op).
196-
ARG TARGETARCH
197-
RUN --mount=from=wheel_builder,target=/wheel_builder \
198-
if [ "${FRAMEWORK}" = "sglang" ]; then \
199-
if [ -d /wheel_builder/usr/local/ucx ] && [ -d /wheel_builder/opt/nvidia/nvda_nixl ]; then \
200-
mkdir -p /opt/nvidia /usr/include /usr/lib64 /etc/ld.so.conf.d; \
201-
cp -r /wheel_builder/opt/nvidia/nvda_nixl /opt/nvidia/; \
202-
cp -r /wheel_builder/usr/local/ucx /usr/local/; \
203-
cp -r /wheel_builder/usr/local/libfabric /usr/local/; \
204-
cp /wheel_builder/usr/include/gdrapi.h /usr/include/; \
205-
cp /wheel_builder/usr/lib64/libgdrapi.so* /usr/lib64/; \
206-
echo "/usr/lib64" >> /etc/ld.so.conf.d/gdrcopy.conf; \
207-
fi; \
208-
fi
209-
210190
{% if device == "xpu" %}
211191
ENV NIXL_LIB_DIR=/opt/intel/intel_nixl/lib/x86_64-linux-gnu \
212192
NIXL_PLUGIN_DIR=/opt/intel/intel_nixl/lib/x86_64-linux-gnu/plugins \
213193
NIXL_PREFIX=/opt/intel/intel_nixl
214-
{% else %}
215-
# NIXL is installed under lib64 (manylinux/AlmaLinux convention used by the wheel_builder).
216-
# All frameworks reference NIXL_LIB_DIR=/opt/nvidia/nvda_nixl/lib64.
217-
# For vllm/trtllm/none: This resets the same values already set in runtime (no harm).
218-
# For sglang: This sets them for the first time (required).
194+
{% elif framework != "sglang" %}
195+
# Non-SGLang runtimes use the Dynamo-built NIXL install from wheel_builder.
196+
# Reset the same values already set in runtime (no harm).
219197
ENV NIXL_PREFIX=/opt/nvidia/nvda_nixl \
220198
NIXL_LIB_DIR=/opt/nvidia/nvda_nixl/lib64 \
221199
NIXL_PLUGIN_DIR=/opt/nvidia/nvda_nixl/lib64/plugins
200+
{% endif %}
222201

202+
{% if device != "xpu" %}
223203
# Set universal CUDA development environment variables (all frameworks)
224204
# vLLM: Dockerfile.vllm line 533, 597
225205
# TRT-LLM: Dockerfile.trtllm lines 600-606
@@ -235,15 +215,18 @@ ENV CUDA_HOME=/usr/local/cuda \
235215
NVIDIA_DRIVER_CAPABILITIES=video,compute,utility
236216
{% endif %}
237217

218+
{% if framework != "sglang" %}
238219
# Base LD_LIBRARY_PATH with universal paths (all frameworks have these)
239220
# Framework-specific paths are conditionally added in /etc/profile.d/50-framework-paths.sh
240-
ARG PYTHON_VERSION
241221
ENV LD_LIBRARY_PATH=\
242222
${NIXL_LIB_DIR}:\
243223
${NIXL_PLUGIN_DIR}:\
244224
/usr/local/ucx/lib:\
245225
/usr/local/ucx/lib/ucx:\
246226
${LD_LIBRARY_PATH}
227+
{% else %}
228+
# SGLang dev/local-dev inherit the upstream SGLang/NIXL runtime stack.
229+
{% endif %}
247230

248231
# Copy shell profile script for framework-specific environment variables
249232
# This script conditionally adds PATH/LD_LIBRARY_PATH entries based on what exists

container/templates/sglang_runtime.Dockerfile

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,8 +40,10 @@ RUN --mount=type=bind,from=wheel_builder,source=/usr/local/,target=/tmp/usr/loca
4040
{% endif %}
4141

4242
{% if target not in ("dev", "local-dev") %}
43-
# Runtime target installs the prebuilt Dynamo wheels. Dev/local-dev build from
44-
# source later in the shared dev stage after the workspace is bind-mounted.
43+
# Runtime target installs only the prebuilt Dynamo wheels. SGLang and its NIXL
44+
# packages come from the upstream lmsysorg/sglang runtime image; --no-deps keeps
45+
# pip from replacing that stack. Dev/local-dev build from source later in the
46+
# shared dev stage after the workspace is bind-mounted.
4547
COPY --chmod=775 --chown=dynamo:0 --from=wheel_builder /opt/dynamo/dist/*.whl /opt/dynamo/wheelhouse/
4648

4749
RUN --mount=type=cache,target=/root/.cache/pip,sharing=locked \

container/templates/wheel_builder.Dockerfile

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -495,6 +495,7 @@ RUN --mount=type=cache,target=/root/.cache/uv \
495495
##################################
496496
##### wheel_builder ##############
497497
##################################
498+
{% if "nixl_ref" in context[framework] %}
498499
# Builds nixl (native + Python wheel) and kvbm wheel, then consolidates all wheels.
499500
# Runtime templates COPY from this stage.
500501

@@ -626,3 +627,11 @@ RUN --mount=type=secret,id=aws-web-identity-token,target=/run/secrets/aws-token
626627

627628
# Consolidate all wheels from the runtime wheel builder stage
628629
COPY --from=runtime_wheel_builder /opt/dynamo/dist/ /opt/dynamo/dist/
630+
{% else %}
631+
# SGLang uses NIXL from the upstream lmsysorg/sglang runtime image and does not
632+
# build Dynamo KVBM. Keep this alias so downstream stages can still COPY Dynamo
633+
# wheels and build tools from a common wheel_builder stage name.
634+
# SGLang dev/source builds may link nixl-sys against stubs when native NIXL is
635+
# absent; block-manager/KVBM runtime work should use vllm/trtllm/none images.
636+
FROM runtime_wheel_builder AS wheel_builder
637+
{% endif %}

0 commit comments

Comments
 (0)