diff --git a/Orochi/OrochiUtils.cpp b/Orochi/OrochiUtils.cpp index 7c2fcd9..4604c71 100644 --- a/Orochi/OrochiUtils.cpp +++ b/Orochi/OrochiUtils.cpp @@ -36,6 +36,10 @@ #include #endif +#ifdef ORO_LINK_ZSTD +#include +#endif + inline std::wstring utf8_to_wstring( const std::string& str ) { std::wstring_convert> myconv; @@ -790,3 +794,45 @@ void OrochiUtils::launch2D( oroFunction func, int nx, int ny, const void** args, OROASSERT( e == oroSuccess, 0 ); } +void OrochiUtils::HandlePrecompiled(std::vector& out, const CompressedBuffer& buffer) +{ + #ifdef ORO_LINK_ZSTD + out.assign(buffer.uncompressedSize,0); + + size_t decompressedSize = ZSTD_decompress( + out.data(), // final uncompressed buffer + out.size(), // final size + buffer.data, // compressed buffer + buffer.size // compressed buffer - size + ); + + if ( decompressedSize != buffer.uncompressedSize ) + throw std::runtime_error( "ERROR: ZSTD_decompress FAILED." ); + #else + throw std::runtime_error( "ERROR: ZSTD is not part of this build." ); + #endif + return; +} + + +void OrochiUtils::HandlePrecompiled(std::vector& out, const RawBuffer& buffer) +{ + out = std::vector(buffer.data, buffer.data + buffer.size ); + return; +} + + +void OrochiUtils::HandlePrecompiled(std::vector& out, const unsigned char* rawData, size_t rawData_sizeByte, std::optional uncompressed_sizeByte) +{ + if (uncompressed_sizeByte.has_value()) { + // if the input buffer is compressed : + CompressedBuffer buffer{ rawData, rawData_sizeByte, uncompressed_sizeByte.value() }; + HandlePrecompiled(out, buffer ); + } else { + // if the input buffer is not compressed + RawBuffer buffer{ rawData, rawData_sizeByte }; + HandlePrecompiled(out, buffer ); + } +} + + diff --git a/Orochi/OrochiUtils.h b/Orochi/OrochiUtils.h index e8ca8cf..e5f5776 100644 --- a/Orochi/OrochiUtils.h +++ b/Orochi/OrochiUtils.h @@ -27,6 +27,7 @@ #include #include #include +#include #if defined( GNUC ) #include @@ -83,6 +84,20 @@ class OrochiUtils static void getModule( oroDevice device, const char* code, const char* path, std::vector* optsIn, const char* funcName, oroModule* moduleOut ); static void launch1D( oroFunction func, int nx, const void** args, int wgSize = 64, unsigned int sharedMemBytes = 0, oroStream stream = 0 ); static void launch2D( oroFunction func, int nx, int ny, const void** args, int wgSizeX = 8, int wgSizeY = 8, unsigned int sharedMemBytes = 0, oroStream stream = 0 ); + + + struct CompressedBuffer { + const unsigned char* data = nullptr; // compressed data + size_t size = 0; // size in byte of 'data' + size_t uncompressedSize = 0; // size of byte of the uncompressed data. + }; + struct RawBuffer { + const unsigned char* data = nullptr; + size_t size = 0; + }; + static void HandlePrecompiled(std::vector& out, const CompressedBuffer& buffer); + static void HandlePrecompiled(std::vector& out, const RawBuffer& buffer); + static void HandlePrecompiled(std::vector& out, const unsigned char* rawData, size_t rawData_sizeByte, std::optional uncompressed_sizeByte=std::nullopt); template static void malloc( T*& ptr, size_t n ) diff --git a/ParallelPrimitives/RadixSort.cpp b/ParallelPrimitives/RadixSort.cpp index b99ad8d..f9f7011 100644 --- a/ParallelPrimitives/RadixSort.cpp +++ b/ParallelPrimitives/RadixSort.cpp @@ -54,6 +54,8 @@ static const char** RadixSortKernelsIncludes = nullptr; #else const unsigned char oro_compiled_kernels_h[] = ""; const size_t oro_compiled_kernels_h_size = 0; +const size_t oro_compiled_kernels_h_size_uncompressed = 0; +const bool oro_compiled_kernels_h_isCompressed = false; #endif constexpr uint64_t div_round_up64( uint64_t val, uint64_t divisor ) noexcept { return ( val + divisor - 1 ) / divisor; } @@ -189,8 +191,8 @@ void RadixSort::compileKernels( const std::string& kernelPath, const std::string { if constexpr( usePrecompiledAndBakedKernel ) { - // Move the raw buffer into a std::vector, which avoids potential issues explained here: github.com/GPUOpen-LibrariesAndSDKs/HIPRT/pull/38#issuecomment-2761698032 - std::vector binary(oro_compiled_kernels_h, oro_compiled_kernels_h + oro_compiled_kernels_h_size); + std::vector binary; + OrochiUtils::HandlePrecompiled(binary, oro_compiled_kernels_h, oro_compiled_kernels_h_size, oro_compiled_kernels_h_isCompressed ? std::optional{oro_compiled_kernels_h_size_uncompressed} : std::nullopt); oroFunctions[record.kernelType] = m_oroutils.getFunctionFromPrecompiledBinary_asData(binary.data(), binary.size(), record.kernelName.c_str() ); } else if constexpr( useBakeKernel ) diff --git a/scripts/convert_binary_to_array.py b/scripts/convert_binary_to_array.py index baab3b8..190fcba 100644 --- a/scripts/convert_binary_to_array.py +++ b/scripts/convert_binary_to_array.py @@ -1,27 +1,47 @@ -# convert_binary_to_header.py +# convert_binary_to_array.py import sys from pathlib import Path -def binary_to_c_array(bin_file, array_name): +def binary_to_c_array(bin_file, array_name, size_BeforeCompression, compression_activated): with open(bin_file, 'rb') as f: binary_data = f.read() hex_array = ', '.join(f'0x{b:02x}' for b in binary_data) c_array = f'const unsigned char {array_name}[] = {{\n {hex_array}\n}};\n' - c_array += f'const size_t {array_name}_size = sizeof({array_name});\n' + c_array += f'const size_t {array_name}_size = sizeof({array_name}); // {len(binary_data)}\n' + + c_array += f'const size_t {array_name}_size_uncompressed = ' + if compression_activated: + c_array += f'{size_BeforeCompression}; // size of the data in bytes, once it has been uncompressed.\n' + else: + c_array += f'{array_name}_size; // same than raw buffer, because data is not compressed.\n' + + c_array += f'const bool {array_name}_isCompressed = ' + if compression_activated: + c_array += f'true;\n' + else: + c_array += f'false;\n' return c_array if __name__ == "__main__": - if len(sys.argv) != 3: - print(f"Usage: {sys.argv[0]} ") + if len(sys.argv) != 5: + print(f"Usage: {sys.argv[0]} ") sys.exit(1) - bin_file = sys.argv[1] - header_file_path = sys.argv[2] + bin_file_beforeCompression = sys.argv[1] + bin_file_afterCompression = sys.argv[2] # not used if 'compression_activated' is OFF + header_file_path = sys.argv[3] + compression_activated = sys.argv[4].lower() == "on" # sys.argv[4] should be "ON" or "OFF" + header_file = Path(header_file_path).name array_name = header_file.replace('.', '_') - c_array = binary_to_c_array(bin_file, array_name) + if not compression_activated: + bin_file_afterCompression = bin_file_beforeCompression + + c_array = binary_to_c_array(bin_file_afterCompression, array_name, Path(bin_file_beforeCompression).stat().st_size, compression_activated ) with open(header_file_path, 'w') as f: - f.write("// generated by convert_binary_to_header.py\n") + f.write("// generated by convert_binary_to_array.py\n") + if compression_activated: + f.write(f"// Data is compressed.\n") f.write(c_array) diff --git a/scripts/create_archive.cmake b/scripts/create_archive.cmake new file mode 100644 index 0000000..35d6a52 --- /dev/null +++ b/scripts/create_archive.cmake @@ -0,0 +1,23 @@ + +# create_archive.cmake +# Create a raw Zstd-compressed "archive" from a single file. + +# Variables expected: +# INPUT_FILE – path to the file to compress +# OUTPUT_FILE – path to the compressed file to generate +# DO_COMPRESS: ON/OFF + + +if(DO_COMPRESS) + message("Compress ${INPUT_FILE} ...") + file(ARCHIVE_CREATE + OUTPUT "${OUTPUT_FILE}" + PATHS "${INPUT_FILE}" + FORMAT raw + COMPRESSION Zstd + COMPRESSION_LEVEL 9 # 0-9 for cmake >= 3.19 or 0-19 for cmake >= 3.26 + ) +endif() + + +