GPUOpen-LibrariesAndSDKs · KaoCC · Apr 25, 2025 · Apr 20, 2025 · Apr 22, 2025 · Apr 25, 2025
diff --git a/Orochi/OrochiUtils.cpp b/Orochi/OrochiUtils.cpp
@@ -36,6 +36,10 @@
 #include <sys/stat.h>
 #endif
 
+#ifdef ORO_LINK_ZSTD
+#include <contrib/zstd/lib/zstd.h>
+#endif
+
 inline std::wstring utf8_to_wstring( const std::string& str )
 {
 	std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>> myconv;
@@ -790,3 +794,35 @@ void OrochiUtils::launch2D( oroFunction func, int nx, int ny, const void** args,
 	OROASSERT( e == oroSuccess, 0 );
 }
 
+void OrochiUtils::DecompressPrecompiled(std::vector<unsigned char>& out, const unsigned char* compressedInput, size_t compressedInput_sizeByte, size_t uncompressed_sizeByte)
+{
+	if ( uncompressed_sizeByte > 0 ) // if the input data is actually compressed
+	{
+	#ifdef ORO_LINK_ZSTD
+		out.assign(uncompressed_sizeByte,0);
+
+		size_t decompressedSize = ZSTD_decompress(    
+			out.data(), // final uncompressed buffer
+			out.size(), // final size
+			compressedInput, // compressed buffer
+			compressedInput_sizeByte // compressed buffer - size
+			);
+
+		if ( decompressedSize != uncompressed_sizeByte )
+			throw std::runtime_error( "ERROR: ZSTD_decompress FAILED." );
+	#else
+
+		throw std::runtime_error( "ERROR: ZSTD is not part of this build." );
+
+	#endif
+
+	}
+	else // if the input data is NOT compressed, buypass this decompress process.
+	{
+		out = std::vector<unsigned char>(compressedInput, compressedInput + compressedInput_sizeByte );
+	}
+	return;
+}
+
+
+
diff --git a/Orochi/OrochiUtils.h b/Orochi/OrochiUtils.h
@@ -83,6 +83,9 @@ class OrochiUtils
 	static void getModule( oroDevice device, const char* code, const char* path, std::vector<const char*>* optsIn, const char* funcName, oroModule* moduleOut );
 	static void launch1D( oroFunction func, int nx, const void** args, int wgSize = 64, unsigned int sharedMemBytes = 0, oroStream stream = 0 );
 	static void launch2D( oroFunction func, int nx, int ny, const void** args, int wgSizeX = 8, int wgSizeY = 8, unsigned int sharedMemBytes = 0, oroStream stream = 0 );
+
+	// if 'uncompressed_sizeByte' is set to 0, it means the input value is not compressed and this function will output the raw buffer.
+	static void DecompressPrecompiled(std::vector<unsigned char>& out, const unsigned char* compressedInput, size_t compressedInput_sizeByte, size_t uncompressed_sizeByte);
 
 	template<typename T>
 	static void malloc( T*& ptr, size_t n )

diff --git a/ParallelPrimitives/RadixSort.cpp b/ParallelPrimitives/RadixSort.cpp
@@ -54,6 +54,7 @@ static const char** RadixSortKernelsIncludes = nullptr;
 #else
 const unsigned char oro_compiled_kernels_h[] = "";
 const size_t oro_compiled_kernels_h_size = 0;
+const size_t oro_compiled_kernels_h_size_uncompressed = 0;
 #endif
 
 constexpr uint64_t div_round_up64( uint64_t val, uint64_t divisor ) noexcept { return ( val + divisor - 1 ) / divisor; }
@@ -189,8 +190,8 @@ void RadixSort::compileKernels( const std::string& kernelPath, const std::string
 	{
 		if constexpr( usePrecompiledAndBakedKernel )
 		{
-			// Move the raw buffer into a std::vector, which avoids potential issues explained here:  github.com/GPUOpen-LibrariesAndSDKs/HIPRT/pull/38#issuecomment-2761698032
-			std::vector<unsigned char> binary(oro_compiled_kernels_h, oro_compiled_kernels_h + oro_compiled_kernels_h_size);
+			std::vector<unsigned char> binary;
+			OrochiUtils::DecompressPrecompiled(binary, oro_compiled_kernels_h, oro_compiled_kernels_h_size, oro_compiled_kernels_h_size_uncompressed);
 			oroFunctions[record.kernelType] = m_oroutils.getFunctionFromPrecompiledBinary_asData(binary.data(), binary.size(), record.kernelName.c_str() );
 		}
 		else if constexpr( useBakeKernel )

diff --git a/scripts/convert_binary_to_array.py b/scripts/convert_binary_to_array.py
@@ -1,27 +1,39 @@
-# convert_binary_to_header.py
+# convert_binary_to_array.py
 import sys
 from pathlib import Path
 
-def binary_to_c_array(bin_file, array_name):
+def binary_to_c_array(bin_file, array_name, size_BeforeCompression, compression_activated):
     with open(bin_file, 'rb') as f:
         binary_data = f.read()
 
     hex_array = ', '.join(f'0x{b:02x}' for b in binary_data)
     c_array = f'const unsigned char {array_name}[] = {{\n    {hex_array}\n}};\n'
-    c_array += f'const size_t {array_name}_size = sizeof({array_name});\n'
+    c_array += f'const size_t {array_name}_size = sizeof({array_name}); // {len(binary_data)}\n'
+
+    if not compression_activated:
+        size_BeforeCompression = 0 # set value to 0 if we are not using compression.
+    c_array += f'const size_t {array_name}_size_uncompressed = {size_BeforeCompression}; // set to 0 if NOT using the ZSTD compression.\n'
     return c_array
 
 if __name__ == "__main__":
-    if len(sys.argv) != 3:
-        print(f"Usage: {sys.argv[0]} <input_binary_file> <output_header_file>")
+    if len(sys.argv) != 5:
+        print(f"Usage: {sys.argv[0]} <input_binary_file_before_compression> <input_binary_file_after_compression> <output_header_file> <compression_activated>")
         sys.exit(1)
 
-    bin_file = sys.argv[1]
-    header_file_path = sys.argv[2]
+    bin_file_beforeCompression = sys.argv[1]
+    bin_file_afterCompression = sys.argv[2]    # not used if 'compression_activated' is OFF
+    header_file_path = sys.argv[3]
+    compression_activated = sys.argv[4].lower() == "on"     # sys.argv[4]  should be "ON" or "OFF" 
+
     header_file = Path(header_file_path).name
     array_name = header_file.replace('.', '_')
 
-    c_array = binary_to_c_array(bin_file, array_name)
+    if not compression_activated:
+        bin_file_afterCompression = bin_file_beforeCompression
+
+    c_array = binary_to_c_array(bin_file_afterCompression, array_name,  Path(bin_file_beforeCompression).stat().st_size,  compression_activated  )
     with open(header_file_path, 'w') as f:
-        f.write("// generated by convert_binary_to_header.py\n")
+        f.write("// generated by convert_binary_to_array.py\n")
+        if compression_activated:
+            f.write(f"// Data is compressed.\n")
         f.write(c_array)
diff --git a/scripts/create_archive.cmake b/scripts/create_archive.cmake
@@ -0,0 +1,23 @@
+
+# create_archive.cmake
+# Create a raw Zstd-compressed "archive" from a single file.
+
+# Variables expected:
+#   INPUT_FILE    – path to the file to compress
+#   OUTPUT_FILE   – path to the compressed file to generate
+#   DO_COMPRESS: ON/OFF
+
+
+if(DO_COMPRESS)
+	message("Compress ${INPUT_FILE} ...")
+	file(ARCHIVE_CREATE
+		OUTPUT            "${OUTPUT_FILE}"
+		PATHS             "${INPUT_FILE}"
+		FORMAT            raw
+		COMPRESSION       Zstd
+		COMPRESSION_LEVEL 9  #  0-9 for cmake >= 3.19   or  0-19 for cmake >= 3.26
+	)
+endif()
+
+
+