Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 46 additions & 0 deletions Orochi/OrochiUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,10 @@
#include <sys/stat.h>
#endif

#ifdef ORO_LINK_ZSTD
#include <contrib/zstd/lib/zstd.h>
#endif

inline std::wstring utf8_to_wstring( const std::string& str )
{
std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>> myconv;
Expand Down Expand Up @@ -790,3 +794,45 @@ void OrochiUtils::launch2D( oroFunction func, int nx, int ny, const void** args,
OROASSERT( e == oroSuccess, 0 );
}

void OrochiUtils::HandlePrecompiled(std::vector<unsigned char>& out, const CompressedBuffer& buffer)
{
#ifdef ORO_LINK_ZSTD
out.assign(buffer.uncompressedSize,0);

size_t decompressedSize = ZSTD_decompress(
out.data(), // final uncompressed buffer
out.size(), // final size
buffer.data, // compressed buffer
buffer.size // compressed buffer - size
);

if ( decompressedSize != buffer.uncompressedSize )
throw std::runtime_error( "ERROR: ZSTD_decompress FAILED." );
#else
throw std::runtime_error( "ERROR: ZSTD is not part of this build." );
#endif
return;
}


void OrochiUtils::HandlePrecompiled(std::vector<unsigned char>& out, const RawBuffer& buffer)
{
out = std::vector<unsigned char>(buffer.data, buffer.data + buffer.size );
return;
}


void OrochiUtils::HandlePrecompiled(std::vector<unsigned char>& out, const unsigned char* rawData, size_t rawData_sizeByte, std::optional<size_t> uncompressed_sizeByte)
{
if (uncompressed_sizeByte.has_value()) {
// if the input buffer is compressed :
CompressedBuffer buffer{ rawData, rawData_sizeByte, uncompressed_sizeByte.value() };
HandlePrecompiled(out, buffer );
} else {
// if the input buffer is not compressed
RawBuffer buffer{ rawData, rawData_sizeByte };
HandlePrecompiled(out, buffer );
}
}


15 changes: 15 additions & 0 deletions Orochi/OrochiUtils.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
#include <filesystem>
#include <unordered_map>
#include <vector>
#include <optional>

#if defined( GNUC )
#include <signal.h>
Expand Down Expand Up @@ -83,6 +84,20 @@ class OrochiUtils
static void getModule( oroDevice device, const char* code, const char* path, std::vector<const char*>* optsIn, const char* funcName, oroModule* moduleOut );
static void launch1D( oroFunction func, int nx, const void** args, int wgSize = 64, unsigned int sharedMemBytes = 0, oroStream stream = 0 );
static void launch2D( oroFunction func, int nx, int ny, const void** args, int wgSizeX = 8, int wgSizeY = 8, unsigned int sharedMemBytes = 0, oroStream stream = 0 );


struct CompressedBuffer {
const unsigned char* data = nullptr; // compressed data
size_t size = 0; // size in byte of 'data'
size_t uncompressedSize = 0; // size of byte of the uncompressed data.
};
struct RawBuffer {
const unsigned char* data = nullptr;
size_t size = 0;
};
static void HandlePrecompiled(std::vector<unsigned char>& out, const CompressedBuffer& buffer);
static void HandlePrecompiled(std::vector<unsigned char>& out, const RawBuffer& buffer);
static void HandlePrecompiled(std::vector<unsigned char>& out, const unsigned char* rawData, size_t rawData_sizeByte, std::optional<size_t> uncompressed_sizeByte=std::nullopt);

template<typename T>
static void malloc( T*& ptr, size_t n )
Expand Down
6 changes: 4 additions & 2 deletions ParallelPrimitives/RadixSort.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,8 @@ static const char** RadixSortKernelsIncludes = nullptr;
#else
const unsigned char oro_compiled_kernels_h[] = "";
const size_t oro_compiled_kernels_h_size = 0;
const size_t oro_compiled_kernels_h_size_uncompressed = 0;
const bool oro_compiled_kernels_h_isCompressed = false;
#endif

constexpr uint64_t div_round_up64( uint64_t val, uint64_t divisor ) noexcept { return ( val + divisor - 1 ) / divisor; }
Expand Down Expand Up @@ -189,8 +191,8 @@ void RadixSort::compileKernels( const std::string& kernelPath, const std::string
{
if constexpr( usePrecompiledAndBakedKernel )
{
// Move the raw buffer into a std::vector, which avoids potential issues explained here: github.com/GPUOpen-LibrariesAndSDKs/HIPRT/pull/38#issuecomment-2761698032
std::vector<unsigned char> binary(oro_compiled_kernels_h, oro_compiled_kernels_h + oro_compiled_kernels_h_size);
std::vector<unsigned char> binary;
OrochiUtils::HandlePrecompiled(binary, oro_compiled_kernels_h, oro_compiled_kernels_h_size, oro_compiled_kernels_h_isCompressed ? std::optional<size_t>{oro_compiled_kernels_h_size_uncompressed} : std::nullopt);
oroFunctions[record.kernelType] = m_oroutils.getFunctionFromPrecompiledBinary_asData(binary.data(), binary.size(), record.kernelName.c_str() );
}
else if constexpr( useBakeKernel )
Expand Down
38 changes: 29 additions & 9 deletions scripts/convert_binary_to_array.py
Original file line number Diff line number Diff line change
@@ -1,27 +1,47 @@
# convert_binary_to_header.py
# convert_binary_to_array.py
import sys
from pathlib import Path

def binary_to_c_array(bin_file, array_name):
def binary_to_c_array(bin_file, array_name, size_BeforeCompression, compression_activated):
with open(bin_file, 'rb') as f:
binary_data = f.read()

hex_array = ', '.join(f'0x{b:02x}' for b in binary_data)
c_array = f'const unsigned char {array_name}[] = {{\n {hex_array}\n}};\n'
c_array += f'const size_t {array_name}_size = sizeof({array_name});\n'
c_array += f'const size_t {array_name}_size = sizeof({array_name}); // {len(binary_data)}\n'

c_array += f'const size_t {array_name}_size_uncompressed = '
if compression_activated:
c_array += f'{size_BeforeCompression}; // size of the data in bytes, once it has been uncompressed.\n'
else:
c_array += f'{array_name}_size; // same than raw buffer, because data is not compressed.\n'

c_array += f'const bool {array_name}_isCompressed = '
if compression_activated:
c_array += f'true;\n'
else:
c_array += f'false;\n'
return c_array

if __name__ == "__main__":
if len(sys.argv) != 3:
print(f"Usage: {sys.argv[0]} <input_binary_file> <output_header_file>")
if len(sys.argv) != 5:
print(f"Usage: {sys.argv[0]} <input_binary_file_before_compression> <input_binary_file_after_compression> <output_header_file> <compression_activated>")
sys.exit(1)

bin_file = sys.argv[1]
header_file_path = sys.argv[2]
bin_file_beforeCompression = sys.argv[1]
bin_file_afterCompression = sys.argv[2] # not used if 'compression_activated' is OFF
header_file_path = sys.argv[3]
compression_activated = sys.argv[4].lower() == "on" # sys.argv[4] should be "ON" or "OFF"

header_file = Path(header_file_path).name
array_name = header_file.replace('.', '_')

c_array = binary_to_c_array(bin_file, array_name)
if not compression_activated:
bin_file_afterCompression = bin_file_beforeCompression

c_array = binary_to_c_array(bin_file_afterCompression, array_name, Path(bin_file_beforeCompression).stat().st_size, compression_activated )
with open(header_file_path, 'w') as f:
f.write("// generated by convert_binary_to_header.py\n")
f.write("// generated by convert_binary_to_array.py\n")
if compression_activated:
f.write(f"// Data is compressed.\n")
f.write(c_array)
23 changes: 23 additions & 0 deletions scripts/create_archive.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@

# create_archive.cmake
# Create a raw Zstd-compressed "archive" from a single file.

# Variables expected:
# INPUT_FILE – path to the file to compress
# OUTPUT_FILE – path to the compressed file to generate
# DO_COMPRESS: ON/OFF


if(DO_COMPRESS)
message("Compress ${INPUT_FILE} ...")
file(ARCHIVE_CREATE
OUTPUT "${OUTPUT_FILE}"
PATHS "${INPUT_FILE}"
FORMAT raw
COMPRESSION Zstd
COMPRESSION_LEVEL 9 # 0-9 for cmake >= 3.19 or 0-19 for cmake >= 3.26
)
endif()