From b81050807f6f686dc363ec0e431706981c6bd26c Mon Sep 17 00:00:00 2001 From: Jinfeng Date: Wed, 10 Jun 2026 17:25:12 +0000 Subject: [PATCH 01/15] checkpoint first version --- .github/workflows/build-wheel.yml | 10 ++++++ .github/workflows/test-wheel-linux.yml | 13 +++++++ .gitignore | 3 ++ ci/tools/env-vars | 1 + cuda_core/tests/test_module.py | 49 ++++++++++++++++++++++++++ 5 files changed, 76 insertions(+) diff --git a/.github/workflows/build-wheel.yml b/.github/workflows/build-wheel.yml index 418fd808304..9c6c286bad3 100644 --- a/.github/workflows/build-wheel.yml +++ b/.github/workflows/build-wheel.yml @@ -453,6 +453,16 @@ jobs: path: ${{ env.CUDA_CORE_CYTHON_TESTS_DIR }}/test_*${{ env.PY_EXT_SUFFIX }} if-no-files-found: error + - name: Build cuda.core test binaries + run: bash ${{ env.CUDA_CORE_TEST_BINARIES_DIR }}/build_test_binaries.sh + + - name: Upload cuda.core test binaries + uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1 + with: + name: ${{ env.CUDA_CORE_ARTIFACT_NAME }}-test-binaries + path: ${{ env.CUDA_CORE_TEST_BINARIES_DIR }}/*.o + if-no-files-found: error + # Note: This overwrites CUDA_PATH etc - name: Set up mini CTK uses: ./.github/actions/fetch_ctk diff --git a/.github/workflows/test-wheel-linux.yml b/.github/workflows/test-wheel-linux.yml index cb5dbc4c866..72ee298f77a 100644 --- a/.github/workflows/test-wheel-linux.yml +++ b/.github/workflows/test-wheel-linux.yml @@ -279,6 +279,19 @@ jobs: pwd ls -lahR $CUDA_CORE_CYTHON_TESTS_DIR + - name: Download cuda.core test binaries + uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 + with: + name: ${{ env.CUDA_CORE_ARTIFACT_NAME }}-test-binaries + path: ${{ env.CUDA_CORE_TEST_BINARIES_DIR }} + run-id: ${{ inputs.run-id || github.run_id }} + github-token: ${{ secrets.GITHUB_TOKEN }} + + - name: Display structure of downloaded cuda.core test binaries + run: | + pwd + ls -lahR $CUDA_CORE_TEST_BINARIES_DIR + - name: Set up Python ${{ matrix.PY_VER }} uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 with: diff --git a/.gitignore b/.gitignore index c55480b7551..d2a5bf6e52e 100644 --- a/.gitignore +++ b/.gitignore @@ -19,6 +19,9 @@ cache_driver cache_runtime cache_nvrtc +# cuda.core test object fixtures built locally / downloaded as CI artifacts +cuda_core/tests/test_binaries/*.o + # CUDA Python specific (auto-generated) cuda_bindings/cuda/bindings/_bindings/cyruntime.pxd cuda_bindings/cuda/bindings/_bindings/cyruntime.pyx diff --git a/ci/tools/env-vars b/ci/tools/env-vars index 30fac1cdce8..8ffbfa13472 100755 --- a/ci/tools/env-vars +++ b/ci/tools/env-vars @@ -34,6 +34,7 @@ CUDA_CORE_ARTIFACT_BASENAME="cuda-core-python${PYTHON_VERSION_FORMATTED}-${HOST_ echo "CUDA_CORE_ARTIFACT_NAME=${CUDA_CORE_ARTIFACT_BASENAME}-${SHA}" echo "CUDA_CORE_ARTIFACTS_DIR=$(realpath "${REPO_DIR}/cuda_core/dist")" echo "CUDA_CORE_CYTHON_TESTS_DIR=$(realpath "${REPO_DIR}/cuda_core/tests/cython")" + echo "CUDA_CORE_TEST_BINARIES_DIR=$(realpath "${REPO_DIR}/cuda_core/tests/test_binaries")" echo "PYTHON_VERSION_FORMATTED=${PYTHON_VERSION_FORMATTED}" } >> $GITHUB_ENV diff --git a/cuda_core/tests/test_module.py b/cuda_core/tests/test_module.py index 3a438f825a0..ebeb0775808 100644 --- a/cuda_core/tests/test_module.py +++ b/cuda_core/tests/test_module.py @@ -172,6 +172,35 @@ def get_saxpy_fatbin(init_cuda): return bytes(fatbin), sym_map +@pytest.fixture(scope="module") +def get_saxpy_object(): + """Read the pre-built saxpy.o. + + In CI: produced by build stage into a test wheel file. + In local dev: auto-built on demand if nvcc is available; if you edit + saxpy.cu, remove the stale saxpy.o to force a rebuild. + """ + import shutil + import subprocess + from pathlib import Path + + binaries_dir = Path(__file__).parent / "test_binaries" + obj_path = binaries_dir / "saxpy.o" + + if not obj_path.is_file(): + if shutil.which("nvcc") is None: + pytest.skip( + f"saxpy.o not found at {obj_path} and nvcc is unavailable. " + "In CI this is downloaded from the build stage." + ) + subprocess.run( # noqa: S603 + ["bash", str(binaries_dir / "build_test_binaries.sh")], # noqa: S607 + check=True, + ) + + return obj_path.read_bytes() + + def test_get_kernel(init_cuda): kernel = """extern "C" __global__ void ABC() { }""" @@ -330,6 +359,26 @@ def test_object_code_load_fatbin_from_file(get_saxpy_fatbin, tmp_path, convert_p mod_obj.get_kernel("saxpy") # force loading +def test_object_code_load_object(get_saxpy_object): + obj = get_saxpy_object + assert isinstance(obj, bytes) + mod_obj = ObjectCode.from_object(obj) + assert mod_obj.code == obj + assert mod_obj.code_type == "object" + # object code is only valid as linker input; get_kernel is unsupported + with pytest.raises(RuntimeError, match=r'Unsupported code type "object"'): + mod_obj.get_kernel("saxpy") + + +def test_object_code_load_object_from_file(get_saxpy_object, tmp_path): + obj_file = tmp_path / "test.o" + obj_file.write_bytes(get_saxpy_object) + arg = str(obj_file) + mod_obj = ObjectCode.from_object(arg) + assert mod_obj.code == arg + assert mod_obj.code_type == "object" + + def test_saxpy_arguments(get_saxpy_kernel_cubin, cuda12_4_prerequisite_check): krn, _ = get_saxpy_kernel_cubin From 19c8057150cc32bb05af911b5950f8fa5ff7d59d Mon Sep 17 00:00:00 2001 From: Jinfeng Date: Wed, 10 Jun 2026 18:34:58 +0000 Subject: [PATCH 02/15] add test_binaries folder --- .../tests/test_binaries/build_test_binaries.sh | 14 ++++++++++++++ cuda_core/tests/test_binaries/saxpy.cu | 12 ++++++++++++ 2 files changed, 26 insertions(+) create mode 100755 cuda_core/tests/test_binaries/build_test_binaries.sh create mode 100644 cuda_core/tests/test_binaries/saxpy.cu diff --git a/cuda_core/tests/test_binaries/build_test_binaries.sh b/cuda_core/tests/test_binaries/build_test_binaries.sh new file mode 100755 index 00000000000..6e4de10b261 --- /dev/null +++ b/cuda_core/tests/test_binaries/build_test_binaries.sh @@ -0,0 +1,14 @@ +#!/bin/bash + +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +set -euo pipefail + +# Build .o test fixtures. Invoked at CI build stage + +SCRIPTPATH=$(dirname "$(realpath "$0")") + +nvcc -dc -o "${SCRIPTPATH}/saxpy.o" "${SCRIPTPATH}/saxpy.cu" + +ls -lah "${SCRIPTPATH}/saxpy.o" diff --git a/cuda_core/tests/test_binaries/saxpy.cu b/cuda_core/tests/test_binaries/saxpy.cu new file mode 100644 index 00000000000..6ac3fa714d1 --- /dev/null +++ b/cuda_core/tests/test_binaries/saxpy.cu @@ -0,0 +1,12 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +#include + +template +__global__ void saxpy(const T a, const T* x, const T* y, T* out, size_t N) { + const unsigned int tid = threadIdx.x + blockIdx.x * blockDim.x; + for (size_t i = tid; i < N; i += gridDim.x * blockDim.x) { + out[tid] = a * x[tid] + y[tid]; + } +} From 8768a13c525b2e143bedc996c9e4d677146afa8a Mon Sep 17 00:00:00 2001 From: Jinfeng Date: Thu, 11 Jun 2026 16:30:04 +0000 Subject: [PATCH 03/15] add download test binaries in test-wheel-windows.yaml --- .github/workflows/test-wheel-windows.yml | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/.github/workflows/test-wheel-windows.yml b/.github/workflows/test-wheel-windows.yml index 6ccacfff8f1..6db50b89da7 100644 --- a/.github/workflows/test-wheel-windows.yml +++ b/.github/workflows/test-wheel-windows.yml @@ -259,6 +259,19 @@ jobs: Get-Location Get-ChildItem -Recurse -Force $env:CUDA_CORE_CYTHON_TESTS_DIR | Select-Object Mode, LastWriteTime, Length, FullName + - name: Download cuda.core test binaries + uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 + with: + name: ${{ env.CUDA_CORE_ARTIFACT_NAME }}-test-binaries + path: ${{ env.CUDA_CORE_TEST_BINARIES_DIR }} + run-id: ${{ inputs.run-id || github.run_id }} + github-token: ${{ secrets.GITHUB_TOKEN }} + + - name: Display structure of downloaded cuda.core test binaries + run: | + Get-Location + Get-ChildItem -Recurse -Force $env:CUDA_CORE_TEST_BINARIES_DIR | Select-Object Mode, LastWriteTime, Length, FullName + - name: Set up Python ${{ matrix.PY_VER }} uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 with: From b76a173c2c707df9b2403e9c312af325cf433720 Mon Sep 17 00:00:00 2001 From: Jinfeng Date: Fri, 12 Jun 2026 18:06:48 +0000 Subject: [PATCH 04/15] use device cuda/std/cstddef --- cuda_core/tests/test_binaries/saxpy.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cuda_core/tests/test_binaries/saxpy.cu b/cuda_core/tests/test_binaries/saxpy.cu index 6ac3fa714d1..9e380186635 100644 --- a/cuda_core/tests/test_binaries/saxpy.cu +++ b/cuda_core/tests/test_binaries/saxpy.cu @@ -1,7 +1,7 @@ // SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. // SPDX-License-Identifier: Apache-2.0 -#include +#include template __global__ void saxpy(const T a, const T* x, const T* y, T* out, size_t N) { From 0aa2aeb2387ffc85a884cbf9e5f40809944c3f0f Mon Sep 17 00:00:00 2001 From: Jinfeng Date: Fri, 12 Jun 2026 18:16:27 +0000 Subject: [PATCH 05/15] replace shutil with find_nvidia_binary_utility(nvcc) --- cuda_core/tests/test_module.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/cuda_core/tests/test_module.py b/cuda_core/tests/test_module.py index ebeb0775808..f2a663238ff 100644 --- a/cuda_core/tests/test_module.py +++ b/cuda_core/tests/test_module.py @@ -180,15 +180,16 @@ def get_saxpy_object(): In local dev: auto-built on demand if nvcc is available; if you edit saxpy.cu, remove the stale saxpy.o to force a rebuild. """ - import shutil import subprocess from pathlib import Path + from cuda.pathfinder import find_nvidia_binary_utility + binaries_dir = Path(__file__).parent / "test_binaries" obj_path = binaries_dir / "saxpy.o" if not obj_path.is_file(): - if shutil.which("nvcc") is None: + if find_nvidia_binary_utility("nvcc") is None: pytest.skip( f"saxpy.o not found at {obj_path} and nvcc is unavailable. " "In CI this is downloaded from the build stage." From 9c510fac75005707e11d32cbe4a76b7200abb6d6 Mon Sep 17 00:00:00 2001 From: Jinfeng Date: Fri, 12 Jun 2026 22:17:02 +0000 Subject: [PATCH 06/15] add testing with linker --- cuda_core/tests/test_binaries/saxpy.cu | 4 +++ cuda_core/tests/test_module.py | 45 ++++++++++++++++++++++++-- 2 files changed, 47 insertions(+), 2 deletions(-) diff --git a/cuda_core/tests/test_binaries/saxpy.cu b/cuda_core/tests/test_binaries/saxpy.cu index 9e380186635..f0afc58637c 100644 --- a/cuda_core/tests/test_binaries/saxpy.cu +++ b/cuda_core/tests/test_binaries/saxpy.cu @@ -3,6 +3,10 @@ #include +__device__ float saxpy_step(float a, float x, float y) { + return a * x + y; +} + template __global__ void saxpy(const T a, const T* x, const T* y, T* out, size_t N) { const unsigned int tid = threadIdx.x + blockIdx.x * blockDim.x; diff --git a/cuda_core/tests/test_module.py b/cuda_core/tests/test_module.py index f2a663238ff..f82777a95f4 100644 --- a/cuda_core/tests/test_module.py +++ b/cuda_core/tests/test_module.py @@ -8,7 +8,7 @@ import pytest import cuda.core -from cuda.core import Device, Kernel, ObjectCode, Program, ProgramOptions +from cuda.core import Device, Kernel, Linker, LinkerOptions, ObjectCode, Program, ProgramOptions from cuda.core._program import _can_load_generated_ptx from cuda.core._utils.cuda_utils import CUDAError, driver, handle_return from cuda.core._utils.version import binding_version, driver_version @@ -366,7 +366,6 @@ def test_object_code_load_object(get_saxpy_object): mod_obj = ObjectCode.from_object(obj) assert mod_obj.code == obj assert mod_obj.code_type == "object" - # object code is only valid as linker input; get_kernel is unsupported with pytest.raises(RuntimeError, match=r'Unsupported code type "object"'): mod_obj.get_kernel("saxpy") @@ -380,6 +379,48 @@ def test_object_code_load_object_from_file(get_saxpy_object, tmp_path): assert mod_obj.code_type == "object" +def test_object_code_load_object_with_linker(get_saxpy_object, init_cuda): + arch = "sm_" + "".join(f"{i}" for i in init_cuda.compute_capability) + kernel_ptx = Program( + r""" + extern __device__ float saxpy_step(float a, float x, float y); + extern "C" __global__ void linked_kernel(float a, float x, float y, float* out) { + if (threadIdx.x == 0 && blockIdx.x == 0) *out = saxpy_step(a, x, y); + } + """, + "c++", + ProgramOptions(relocatable_device_code=True, arch=arch), + ).compile("ptx") + linked = Linker( + kernel_ptx, + ObjectCode.from_object(get_saxpy_object), + options=LinkerOptions(arch=arch), + ).link("cubin") + kernel = linked.get_kernel("linked_kernel") + + import numpy as np + + stream = init_cuda.create_stream() + host_buf = cuda.core.LegacyPinnedMemoryResource().allocate(4) + result = np.from_dlpack(host_buf).view(np.float32) + result[:] = 0.0 + dev_buf = init_cuda.memory_resource.allocate(4, stream=init_cuda.default_stream) + + cuda.core.launch( + stream, + cuda.core.LaunchConfig(grid=1, block=1), + kernel, + np.float32(2.0), + np.float32(3.0), + np.float32(4.0), + dev_buf, + ) + dev_buf.copy_to(host_buf, stream=stream) + stream.sync() + + assert result[0] == 10.0 + + def test_saxpy_arguments(get_saxpy_kernel_cubin, cuda12_4_prerequisite_check): krn, _ = get_saxpy_kernel_cubin From 59eaf0c1bfe980defffad7ef149d3b1ea43b3d1b Mon Sep 17 00:00:00 2001 From: Jinfeng Li Date: Mon, 15 Jun 2026 13:28:47 -0400 Subject: [PATCH 07/15] Update cuda_core/tests/test_module.py Co-authored-by: Leo Fang --- cuda_core/tests/test_module.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cuda_core/tests/test_module.py b/cuda_core/tests/test_module.py index f82777a95f4..bdf90061e89 100644 --- a/cuda_core/tests/test_module.py +++ b/cuda_core/tests/test_module.py @@ -380,7 +380,7 @@ def test_object_code_load_object_from_file(get_saxpy_object, tmp_path): def test_object_code_load_object_with_linker(get_saxpy_object, init_cuda): - arch = "sm_" + "".join(f"{i}" for i in init_cuda.compute_capability) + arch = init_cuda.arch kernel_ptx = Program( r""" extern __device__ float saxpy_step(float a, float x, float y); From cd0fb0969f19b50179e6e979db34daa1e4ad6353 Mon Sep 17 00:00:00 2001 From: Jinfeng Date: Mon, 15 Jun 2026 17:25:26 +0000 Subject: [PATCH 08/15] add windows nvcc flag required by CCCL cuda/std/cstddef --- cuda_core/tests/test_binaries/build_test_binaries.sh | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/cuda_core/tests/test_binaries/build_test_binaries.sh b/cuda_core/tests/test_binaries/build_test_binaries.sh index 6e4de10b261..33014ead170 100755 --- a/cuda_core/tests/test_binaries/build_test_binaries.sh +++ b/cuda_core/tests/test_binaries/build_test_binaries.sh @@ -9,6 +9,12 @@ set -euo pipefail SCRIPTPATH=$(dirname "$(realpath "$0")") -nvcc -dc -o "${SCRIPTPATH}/saxpy.o" "${SCRIPTPATH}/saxpy.cu" +NVCC_EXTRA_FLAGS=() +if [[ "${OS:-}" == "Windows_NT" ]]; then + # CCCL headers (e.g. cuda/std/cstddef) require MSVC's conforming preprocessor. + NVCC_EXTRA_FLAGS+=(-Xcompiler /Zc:preprocessor) +fi + +nvcc -dc "${NVCC_EXTRA_FLAGS[@]}" -o "${SCRIPTPATH}/saxpy.o" "${SCRIPTPATH}/saxpy.cu" ls -lah "${SCRIPTPATH}/saxpy.o" From e8560f0c4d3c1203f89502b615c371b24a767dcd Mon Sep 17 00:00:00 2001 From: Jinfeng Date: Mon, 15 Jun 2026 17:29:05 +0000 Subject: [PATCH 09/15] addressing comment --- cuda_core/tests/test_module.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/cuda_core/tests/test_module.py b/cuda_core/tests/test_module.py index bdf90061e89..3bb73057288 100644 --- a/cuda_core/tests/test_module.py +++ b/cuda_core/tests/test_module.py @@ -180,6 +180,7 @@ def get_saxpy_object(): In local dev: auto-built on demand if nvcc is available; if you edit saxpy.cu, remove the stale saxpy.o to force a rebuild. """ + import os import subprocess from pathlib import Path @@ -197,6 +198,7 @@ def get_saxpy_object(): subprocess.run( # noqa: S603 ["bash", str(binaries_dir / "build_test_binaries.sh")], # noqa: S607 check=True, + env=os.environ, ) return obj_path.read_bytes() From fa3aadc9cbcf9871ea87c92a811b07a7899c595e Mon Sep 17 00:00:00 2001 From: Jinfeng Date: Mon, 15 Jun 2026 17:40:12 +0000 Subject: [PATCH 10/15] move imports to top --- cuda_core/tests/test_module.py | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/cuda_core/tests/test_module.py b/cuda_core/tests/test_module.py index 3bb73057288..d5b22afc88a 100644 --- a/cuda_core/tests/test_module.py +++ b/cuda_core/tests/test_module.py @@ -2,9 +2,13 @@ # SPDX-License-Identifier: Apache-2.0 import ctypes +import os import pickle +import subprocess import warnings +from pathlib import Path +import numpy as np import pytest import cuda.core @@ -12,6 +16,7 @@ from cuda.core._program import _can_load_generated_ptx from cuda.core._utils.cuda_utils import CUDAError, driver, handle_return from cuda.core._utils.version import binding_version, driver_version +from cuda.pathfinder import find_nvidia_binary_utility try: import numba @@ -180,12 +185,6 @@ def get_saxpy_object(): In local dev: auto-built on demand if nvcc is available; if you edit saxpy.cu, remove the stale saxpy.o to force a rebuild. """ - import os - import subprocess - from pathlib import Path - - from cuda.pathfinder import find_nvidia_binary_utility - binaries_dir = Path(__file__).parent / "test_binaries" obj_path = binaries_dir / "saxpy.o" @@ -400,8 +399,6 @@ def test_object_code_load_object_with_linker(get_saxpy_object, init_cuda): ).link("cubin") kernel = linked.get_kernel("linked_kernel") - import numpy as np - stream = init_cuda.create_stream() host_buf = cuda.core.LegacyPinnedMemoryResource().allocate(4) result = np.from_dlpack(host_buf).view(np.float32) From cba2acfae47746274a14a1fdcac28630af630f2c Mon Sep 17 00:00:00 2001 From: Jinfeng Date: Mon, 15 Jun 2026 17:58:36 +0000 Subject: [PATCH 11/15] add c++17 as required by cccl windows --- cuda_core/tests/test_binaries/build_test_binaries.sh | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/cuda_core/tests/test_binaries/build_test_binaries.sh b/cuda_core/tests/test_binaries/build_test_binaries.sh index 33014ead170..00299f76724 100755 --- a/cuda_core/tests/test_binaries/build_test_binaries.sh +++ b/cuda_core/tests/test_binaries/build_test_binaries.sh @@ -9,9 +9,8 @@ set -euo pipefail SCRIPTPATH=$(dirname "$(realpath "$0")") -NVCC_EXTRA_FLAGS=() +NVCC_EXTRA_FLAGS=(-std=c++17) if [[ "${OS:-}" == "Windows_NT" ]]; then - # CCCL headers (e.g. cuda/std/cstddef) require MSVC's conforming preprocessor. NVCC_EXTRA_FLAGS+=(-Xcompiler /Zc:preprocessor) fi From 0f4c9ad200e2290cf4671683f02767a45326d1e8 Mon Sep 17 00:00:00 2001 From: Jinfeng Date: Mon, 15 Jun 2026 18:45:31 +0000 Subject: [PATCH 12/15] fix test failure due to using init_cuda.arch instead of sm_{init_cuda.arch} --- cuda_core/tests/test_module.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cuda_core/tests/test_module.py b/cuda_core/tests/test_module.py index d5b22afc88a..e5b3d785899 100644 --- a/cuda_core/tests/test_module.py +++ b/cuda_core/tests/test_module.py @@ -381,7 +381,7 @@ def test_object_code_load_object_from_file(get_saxpy_object, tmp_path): def test_object_code_load_object_with_linker(get_saxpy_object, init_cuda): - arch = init_cuda.arch + arch = f"sm_{init_cuda.arch}" kernel_ptx = Program( r""" extern __device__ float saxpy_step(float a, float x, float y); From 5d0f64b3624850d2969cd7ff01005d0424d37b7e Mon Sep 17 00:00:00 2001 From: Jinfeng Date: Mon, 15 Jun 2026 21:09:04 +0000 Subject: [PATCH 13/15] avoid linking kernel_ptx with cubin that triggers nvrtc nvjitlink version mismatch in ci blackwell gpu --- cuda_core/tests/test_module.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cuda_core/tests/test_module.py b/cuda_core/tests/test_module.py index e5b3d785899..524dd471345 100644 --- a/cuda_core/tests/test_module.py +++ b/cuda_core/tests/test_module.py @@ -382,7 +382,7 @@ def test_object_code_load_object_from_file(get_saxpy_object, tmp_path): def test_object_code_load_object_with_linker(get_saxpy_object, init_cuda): arch = f"sm_{init_cuda.arch}" - kernel_ptx = Program( + kernel_code = Program( r""" extern __device__ float saxpy_step(float a, float x, float y); extern "C" __global__ void linked_kernel(float a, float x, float y, float* out) { @@ -391,9 +391,9 @@ def test_object_code_load_object_with_linker(get_saxpy_object, init_cuda): """, "c++", ProgramOptions(relocatable_device_code=True, arch=arch), - ).compile("ptx") + ).compile("cubin") linked = Linker( - kernel_ptx, + kernel_code, ObjectCode.from_object(get_saxpy_object), options=LinkerOptions(arch=arch), ).link("cubin") From f417be7e596e245004774858edfd277c63330131 Mon Sep 17 00:00:00 2001 From: Jinfeng Date: Tue, 23 Jun 2026 16:57:30 +0000 Subject: [PATCH 14/15] try pin lower ctk version in build and specify test gpu sm codes --- .github/workflows/build-wheel.yml | 14 +++++++++++++- .../tests/test_binaries/build_test_binaries.sh | 13 +++++++++++-- 2 files changed, 24 insertions(+), 3 deletions(-) diff --git a/.github/workflows/build-wheel.yml b/.github/workflows/build-wheel.yml index 9c6c286bad3..e86b51c87c9 100644 --- a/.github/workflows/build-wheel.yml +++ b/.github/workflows/build-wheel.yml @@ -453,8 +453,20 @@ jobs: path: ${{ env.CUDA_CORE_CYTHON_TESTS_DIR }}/test_*${{ env.PY_EXT_SUFFIX }} if-no-files-found: error + - name: Set up mini CTK for test binaries + uses: ./.github/actions/fetch_ctk + continue-on-error: false + with: + host-platform: ${{ inputs.host-platform }} + cuda-version: ${{ inputs.prev-cuda-version }} + cuda-path: "./cuda_toolkit_test_binaries" + - name: Build cuda.core test binaries - run: bash ${{ env.CUDA_CORE_TEST_BINARIES_DIR }}/build_test_binaries.sh + run: | + TEST_CTK="$(realpath ./cuda_toolkit_test_binaries)" + export PATH="${TEST_CTK}/bin:${PATH}" + export CUDA_HOME="${TEST_CTK}" + bash ${{ env.CUDA_CORE_TEST_BINARIES_DIR }}/build_test_binaries.sh - name: Upload cuda.core test binaries uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1 diff --git a/cuda_core/tests/test_binaries/build_test_binaries.sh b/cuda_core/tests/test_binaries/build_test_binaries.sh index 00299f76724..8fb69c102f5 100755 --- a/cuda_core/tests/test_binaries/build_test_binaries.sh +++ b/cuda_core/tests/test_binaries/build_test_binaries.sh @@ -5,7 +5,9 @@ set -euo pipefail -# Build .o test fixtures. Invoked at CI build stage +# Build .o test fixtures. Invoked at CI build stage with the oldest test-matrix +# CTK (prev-cuda-version, currently 12.9.x) so nvJitLink on 12.9/13.0/13.3 +# test jobs can consume the embedded device code. SCRIPTPATH=$(dirname "$(realpath "$0")") @@ -14,6 +16,13 @@ if [[ "${OS:-}" == "Windows_NT" ]]; then NVCC_EXTRA_FLAGS+=(-Xcompiler /Zc:preprocessor) fi -nvcc -dc "${NVCC_EXTRA_FLAGS[@]}" -o "${SCRIPTPATH}/saxpy.o" "${SCRIPTPATH}/saxpy.cu" +GENCODE=() +for cc in 70 75 80 89 90 120; do + if nvcc --list-gpu-code | grep -qx "sm_${cc}"; then + GENCODE+=(-gencode "arch=compute_${cc},code=sm_${cc}") + fi +done + +nvcc -dc "${NVCC_EXTRA_FLAGS[@]}" "${GENCODE[@]}" -o "${SCRIPTPATH}/saxpy.o" "${SCRIPTPATH}/saxpy.cu" ls -lah "${SCRIPTPATH}/saxpy.o" From 463aa4bedef323ebf3547217fa3998d2124015de Mon Sep 17 00:00:00 2001 From: Jinfeng Date: Fri, 26 Jun 2026 17:16:16 +0000 Subject: [PATCH 15/15] use ctk_prev build and add arch=all to nvcc --- .github/workflows/build-wheel.yml | 20 ++++--------------- .../test_binaries/build_test_binaries.sh | 14 +++---------- 2 files changed, 7 insertions(+), 27 deletions(-) diff --git a/.github/workflows/build-wheel.yml b/.github/workflows/build-wheel.yml index e86b51c87c9..27ee752b9d8 100644 --- a/.github/workflows/build-wheel.yml +++ b/.github/workflows/build-wheel.yml @@ -453,20 +453,17 @@ jobs: path: ${{ env.CUDA_CORE_CYTHON_TESTS_DIR }}/test_*${{ env.PY_EXT_SUFFIX }} if-no-files-found: error - - name: Set up mini CTK for test binaries + # Note: This overwrites CUDA_PATH etc + - name: Set up mini CTK uses: ./.github/actions/fetch_ctk continue-on-error: false with: host-platform: ${{ inputs.host-platform }} cuda-version: ${{ inputs.prev-cuda-version }} - cuda-path: "./cuda_toolkit_test_binaries" + cuda-path: "./cuda_toolkit_prev" - name: Build cuda.core test binaries - run: | - TEST_CTK="$(realpath ./cuda_toolkit_test_binaries)" - export PATH="${TEST_CTK}/bin:${PATH}" - export CUDA_HOME="${TEST_CTK}" - bash ${{ env.CUDA_CORE_TEST_BINARIES_DIR }}/build_test_binaries.sh + run: bash ${{ env.CUDA_CORE_TEST_BINARIES_DIR }}/build_test_binaries.sh - name: Upload cuda.core test binaries uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1 @@ -475,15 +472,6 @@ jobs: path: ${{ env.CUDA_CORE_TEST_BINARIES_DIR }}/*.o if-no-files-found: error - # Note: This overwrites CUDA_PATH etc - - name: Set up mini CTK - uses: ./.github/actions/fetch_ctk - continue-on-error: false - with: - host-platform: ${{ inputs.host-platform }} - cuda-version: ${{ inputs.prev-cuda-version }} - cuda-path: "./cuda_toolkit_prev" - - name: Download cuda.bindings build artifacts from the prior branch env: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/cuda_core/tests/test_binaries/build_test_binaries.sh b/cuda_core/tests/test_binaries/build_test_binaries.sh index 8fb69c102f5..8d2231bd90a 100755 --- a/cuda_core/tests/test_binaries/build_test_binaries.sh +++ b/cuda_core/tests/test_binaries/build_test_binaries.sh @@ -5,9 +5,7 @@ set -euo pipefail -# Build .o test fixtures. Invoked at CI build stage with the oldest test-matrix -# CTK (prev-cuda-version, currently 12.9.x) so nvJitLink on 12.9/13.0/13.3 -# test jobs can consume the embedded device code. +# Build .o test fixtures. Invoked at CI build stage SCRIPTPATH=$(dirname "$(realpath "$0")") @@ -16,13 +14,7 @@ if [[ "${OS:-}" == "Windows_NT" ]]; then NVCC_EXTRA_FLAGS+=(-Xcompiler /Zc:preprocessor) fi -GENCODE=() -for cc in 70 75 80 89 90 120; do - if nvcc --list-gpu-code | grep -qx "sm_${cc}"; then - GENCODE+=(-gencode "arch=compute_${cc},code=sm_${cc}") - fi -done - -nvcc -dc "${NVCC_EXTRA_FLAGS[@]}" "${GENCODE[@]}" -o "${SCRIPTPATH}/saxpy.o" "${SCRIPTPATH}/saxpy.cu" +nvcc -dc "${NVCC_EXTRA_FLAGS[@]}" -arch=all-major \ + -o "${SCRIPTPATH}/saxpy.o" "${SCRIPTPATH}/saxpy.cu" ls -lah "${SCRIPTPATH}/saxpy.o"