Devsh-Graphics-Programming · kevyuu · Feb 28, 2026 · Feb 28, 2026 · Feb 28, 2026 · Feb 28, 2026
diff --git a/3rdparty/jitify b/3rdparty/jitify
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -74,10 +74,10 @@ option(NBL_COMPILE_WITH_CUDA "Compile with CUDA interop?" OFF)
 
 if(NBL_COMPILE_WITH_CUDA)
 	find_package(CUDAToolkit REQUIRED)
-	if(${CUDAToolkit_VERSION} VERSION_GREATER "9.0")
-		message(STATUS "CUDA version 9.0+ found!")
+	if(${CUDAToolkit_VERSION} VERSION_GREATER_EQUAL "13.0")
+		message(STATUS "CUDA version ${CUDAToolkit_VERSION} found!")
 	else()
-		message(FATAL_ERROR "CUDA version 9.0+ needed for C++14 support!")
+		message(FATAL_ERROR "CUDA version 13.0+ needed for C++14 support!")
 	endif()
 endif()
 

diff --git a/examples_tests b/examples_tests
diff --git a/include/nbl/asset/IBuffer.h b/include/nbl/asset/IBuffer.h
@@ -42,6 +42,8 @@ class IBuffer : public IDescriptor, public core::IBuffer
 			//! synthetic Nabla inventions
 			// whether `IGPUCommandBuffer::updateBuffer` can be used on this buffer
 			EUF_INLINE_UPDATE_VIA_CMDBUF = 0x80000000u,
+
+            EUF_SYNTHETIC_FLAGS_MASK = EUF_INLINE_UPDATE_VIA_CMDBUF | 0 /* fill out as needed if anymore synthethic flags are added*/
         };
 
 		//!

diff --git a/include/nbl/video/CCUDADevice.h b/include/nbl/video/CCUDADevice.h
@@ -6,6 +6,9 @@
 
 
 #include "nbl/video/IPhysicalDevice.h"
+#include "nbl/video/CCUDAExportableMemory.h"
+#include "nbl/video/CCUDAImportedMemory.h"
+#include "nbl/video/CCUDAImportedSemaphore.h"
 
 
 #ifdef _NBL_COMPILE_WITH_CUDA_
@@ -24,9 +27,17 @@ namespace nbl::video
 {
 class CCUDAHandler;
 
-class CCUDADevice : public core::IReferenceCounted
+class NBL_API2 CCUDADevice : public core::IReferenceCounted
 {
-    public:
+  public:
+#ifdef _WIN32
+		static constexpr IDeviceMemoryAllocation::E_EXTERNAL_HANDLE_TYPE EXTERNAL_MEMORY_HANDLE_TYPE = IDeviceMemoryAllocation::EHT_OPAQUE_WIN32;
+		static constexpr CUmemAllocationHandleType ALLOCATION_HANDLE_TYPE = CU_MEM_HANDLE_TYPE_WIN32;
+#else
+		static constexpr IDeviceMemoryAllocation::E_EXTERNAL_HANDLE_TYPE EXTERNAL_MEMORY_HANDLE_TYPE = IDeviceMemoryAllocation::EHT_OPAQUE_FD;
+		static constexpr CUmemAllocationHandleType ALLOCATION_HANDLE_TYPE = CU_MEM_HANDLE_TYPE_POSIX_FILE_DESCRIPTOR;
+#endif
+
 		enum E_VIRTUAL_ARCHITECTURE
 		{
 			EVA_30,
@@ -63,132 +74,44 @@ class CCUDADevice : public core::IReferenceCounted
 		};
 		inline E_VIRTUAL_ARCHITECTURE getVirtualArchitecture() {return m_virtualArchitecture;}
 
+		CCUDADevice(core::smart_refctd_ptr<CVulkanConnection>&& vulkanConnection, IPhysicalDevice* const vulkanDevice, const E_VIRTUAL_ARCHITECTURE virtualArchitecture, CUdevice device, core::smart_refctd_ptr<CCUDAHandler>&& handler);
+
+		~CCUDADevice();
+
 		inline core::SRange<const char* const> geDefaultCompileOptions() const
 		{
 			return {m_defaultCompileOptions.data(),m_defaultCompileOptions.data()+m_defaultCompileOptions.size()};
 		}
 
-		// TODO/REDO Vulkan: https://docs.nvidia.com/cuda/cuda-driver-api/group__CUDA__EXTRES__INTEROP.html
-		// https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#vulkan-interoperability
-		// Watch out, use Driver API (`cu` functions) NOT the Runtime API (`cuda` functions)
-		// Also maybe separate this out into its own `CCUDA` class instead of nesting it here?
-#if 0
-		template<typename ObjType>
-		struct GraphicsAPIObjLink
-		{
-				GraphicsAPIObjLink() : obj(nullptr), cudaHandle(nullptr), acquired(false)
-				{
-					asImage = {nullptr};
-				}
-				GraphicsAPIObjLink(core::smart_refctd_ptr<ObjType>&& _obj) : GraphicsAPIObjLink()
-				{
-					obj = std::move(_obj);
-				}
-				GraphicsAPIObjLink(GraphicsAPIObjLink&& other) : GraphicsAPIObjLink()
-				{
-					operator=(std::move(other));
-				}
-
-				GraphicsAPIObjLink(const GraphicsAPIObjLink& other) = delete;
-				GraphicsAPIObjLink& operator=(const GraphicsAPIObjLink& other) = delete;
-				GraphicsAPIObjLink& operator=(GraphicsAPIObjLink&& other)
-				{
-					std::swap(obj,other.obj);
-					std::swap(cudaHandle,other.cudaHandle);
-					std::swap(acquired,other.acquired);
-					std::swap(asImage,other.asImage);
-					return *this;
-				}
-
-				~GraphicsAPIObjLink()
-				{
-					assert(!acquired); // you've fucked up, there's no way for us to fix it, you need to release the objects on a proper stream
-					if (obj)
-						CCUDAHandler::cuda.pcuGraphicsUnregisterResource(cudaHandle);
-				}
-
-				//
-				auto* getObject() const {return obj.get();}
-
-			private:
-				core::smart_refctd_ptr<ObjType> obj;
-				CUgraphicsResource cudaHandle;
-				bool acquired;
-
-				friend class CCUDAHandler;
-			public:
-				union
-				{
-					struct
-					{
-						CUdeviceptr pointer;
-					} asBuffer;
-					struct
-					{
-						CUmipmappedArray mipmappedArray;
-						CUarray array;
-					} asImage;
-				};
-		};
+		CUdevice getInternalObject() const { return m_handle; }
 
-		//
-		static CUresult registerBuffer(GraphicsAPIObjLink<video::IGPUBuffer>* link, uint32_t flags = CU_GRAPHICS_REGISTER_FLAGS_NONE);
-		static CUresult registerImage(GraphicsAPIObjLink<video::IGPUImage>* link, uint32_t flags = CU_GRAPHICS_REGISTER_FLAGS_NONE);
-
+		const CCUDAHandler* getHandler() const { return m_handler.get();  }
 
-		template<typename ObjType>
-		static CUresult acquireResourcesFromGraphics(void* tmpStorage, GraphicsAPIObjLink<ObjType>* linksBegin, GraphicsAPIObjLink<ObjType>* linksEnd, CUstream stream)
-		{
-			auto count = std::distance(linksBegin,linksEnd);
-
-			auto resources = reinterpret_cast<CUgraphicsResource*>(tmpStorage);
-			auto rit = resources;
-			for (auto iit=linksBegin; iit!=linksEnd; iit++,rit++)
-			{
-				if (iit->acquired)
-					return CUDA_ERROR_UNKNOWN;
-				*rit = iit->cudaHandle;
-			}
-
-			auto retval = cuda.pcuGraphicsMapResources(count,resources,stream);
-			for (auto iit=linksBegin; iit!=linksEnd; iit++)
-				iit->acquired = true;
-			return retval;
-		}
-		template<typename ObjType>
-		static CUresult releaseResourcesToGraphics(void* tmpStorage, GraphicsAPIObjLink<ObjType>* linksBegin, GraphicsAPIObjLink<ObjType>* linksEnd, CUstream stream)
-		{
-			auto count = std::distance(linksBegin,linksEnd);
-
-			auto resources = reinterpret_cast<CUgraphicsResource*>(tmpStorage);
-			auto rit = resources;
-			for (auto iit=linksBegin; iit!=linksEnd; iit++,rit++)
-			{
-				if (!iit->acquired)
-					return CUDA_ERROR_UNKNOWN;
-				*rit = iit->cudaHandle;
-			}
-
-			auto retval = cuda.pcuGraphicsUnmapResources(count,resources,stream);
-			for (auto iit=linksBegin; iit!=linksEnd; iit++)
-				iit->acquired = false;
-			return retval;
-		}
+		bool isMatchingDevice(const IPhysicalDevice* device) { return device && !memcmp(device->getProperties().deviceUUID, m_physicalDevice->getProperties().deviceUUID, 16); }
 
-		static CUresult acquireAndGetPointers(GraphicsAPIObjLink<video::IGPUBuffer>* linksBegin, GraphicsAPIObjLink<video::IGPUBuffer>* linksEnd, CUstream stream, size_t* outbufferSizes = nullptr);
-		static CUresult acquireAndGetMipmappedArray(GraphicsAPIObjLink<video::IGPUImage>* linksBegin, GraphicsAPIObjLink<video::IGPUImage>* linksEnd, CUstream stream);
-		static CUresult acquireAndGetArray(GraphicsAPIObjLink<video::IGPUImage>* linksBegin, GraphicsAPIObjLink<video::IGPUImage>* linksEnd, uint32_t* arrayIndices, uint32_t* mipLevels, CUstream stream);
-#endif
+		size_t roundToGranularity(CUmemLocationType location, size_t size) const;
+
+		core::smart_refctd_ptr<CCUDAExportableMemory> createExportableMemory(CCUDAExportableMemory::SCreationParams&& inParams);
+
+		core::smart_refctd_ptr<CCUDAImportedMemory> importExternalMemory(core::smart_refctd_ptr<IDeviceMemoryAllocation>&& mem);
 
-	protected:
-		friend class CCUDAHandler;
-		CCUDADevice(core::smart_refctd_ptr<CVulkanConnection>&& _vulkanConnection, IPhysicalDevice* const _vulkanDevice, const E_VIRTUAL_ARCHITECTURE _virtualArchitecture);
-		~CCUDADevice() = default;
-
+		core::smart_refctd_ptr<CCUDAImportedSemaphore> importExternalSemaphore(core::smart_refctd_ptr<ISemaphore>&& sem);
+
+	private:
+		CUresult reserveAddressAndMapMemory(CUdeviceptr* outPtr, size_t size, size_t alignment, CUmemLocationType location, CUmemGenericAllocationHandle memory) const;
+
+		static constexpr auto CudaMemoryLocationCount = 5;
+
+    const system::logger_opt_ptr m_logger;
 		std::vector<const char*> m_defaultCompileOptions;
 		core::smart_refctd_ptr<CVulkanConnection> m_vulkanConnection;
-		IPhysicalDevice* const m_vulkanDevice;
+		IPhysicalDevice* const m_physicalDevice;
 		E_VIRTUAL_ARCHITECTURE m_virtualArchitecture;
+
+		core::smart_refctd_ptr<CCUDAHandler> m_handler;
+		CUdevice m_handle;
+		CUcontext m_context;
+		std::array<size_t, CudaMemoryLocationCount> m_allocationGranularity;
 };
 
 }

diff --git a/include/nbl/video/CCUDAExportableMemory.h b/include/nbl/video/CCUDAExportableMemory.h
@@ -0,0 +1,65 @@
+// Copyright (C) 2018-2020 - DevSH Graphics Programming Sp. z O.O.
+// This file is part of the "Nabla Engine".
+// For conditions of distribution and use, see copyright notice in nabla.h
+#ifndef _NBL_VIDEO_C_CUDA_EXPORTABLE_MEMORY_H_
+#define _NBL_VIDEO_C_CUDA_EXPORTABLE_MEMORY_H_
+
+
+#ifdef _NBL_COMPILE_WITH_CUDA_
+
+#include "cuda.h"
+#include "nvrtc.h"
+#if CUDA_VERSION < 9000
+  #error "Need CUDA 9.0 SDK or higher."
+#endif
+
+// useful includes in the future
+//#include "cudaEGL.h"
+//#include "cudaVDPAU.h"
+
+namespace nbl::video
+{
+
+class CCUDADevice;
+
+class NBL_API2 CCUDAExportableMemory : public core::IReferenceCounted
+{
+    public:
+
+        struct SCreationParams
+        {
+            size_t            size;
+            uint32_t          alignment;
+            CUmemLocationType location;
+        };
+
+        struct SCachedCreationParams : SCreationParams
+        {
+            size_t granularSize;
+            CUdeviceptr ptr;
+            external_handle_t externalHandle;
+        };
+
+        CCUDAExportableMemory(core::smart_refctd_ptr<CCUDADevice> device, SCachedCreationParams&& params)
+            : m_device(std::move(device))
+            , m_params(std::move(params))
+        {}
+        ~CCUDAExportableMemory() override;
+
+        CUdeviceptr getDeviceptr() const { return m_params.ptr;  }
+
+        const SCreationParams& getCreationParams() const { return m_params; }
+
+        core::smart_refctd_ptr<IDeviceMemoryAllocation> exportAsMemory(ILogicalDevice* device, IDeviceMemoryBacked* dedication = nullptr) const;
+
+    private:
+
+        core::smart_refctd_ptr<CCUDADevice> m_device;
+        SCachedCreationParams m_params;
+};
+
+}
+
+#endif // _NBL_COMPILE_WITH_CUDA_
+
+#endif
+10 −5		Makefile
+137 −65		jitify.hpp
+72 −0		jitify_test.cu
+586 −0		nvrtc_cli.cpp
+58 −0		nvrtc_cli_test.sh