Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 36 additions & 0 deletions Orochi/OrochiUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,10 @@
#include <sys/stat.h>
#endif

#ifdef ORO_LINK_ZSTD
#include <contrib/zstd/lib/zstd.h>
#endif

inline std::wstring utf8_to_wstring( const std::string& str )
{
std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>> myconv;
Expand Down Expand Up @@ -790,3 +794,35 @@ void OrochiUtils::launch2D( oroFunction func, int nx, int ny, const void** args,
OROASSERT( e == oroSuccess, 0 );
}

void OrochiUtils::DecompressPrecompiled(std::vector<unsigned char>& out, const unsigned char* compressedInput, size_t compressedInput_sizeByte, size_t uncompressed_sizeByte)
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This API relies on the special value (uncompressed_sizeByte > 0) to decide which action to take, which would be error-prone and not very user-friendly.

I suggest to change it to one of the followings:

Create another function to handle the "NOT compressed" case, and let this function to solely handle the data that are "compressed". Or you can rename the function and utilize function overloading.

  1. Rename the function (Maybe call it "HandlePrecompiled" ), and use std::optional ( e.g., std::optional<size_t> uncompressed_sizeByte = std::nullopt)

and then you can check it like this:

if (uncompressed_sizeByte.has_value()) {
    // handle decompression
} else {
    // treat as raw input
}

Wrap the input into different structures:

struct CompressedBuffer {
    const unsigned char* data;
    size_t size;
    size_t uncompressedSize;
};

struct RawBuffer {
    const unsigned char* data;
    size_t size;
};

And then you use function overloading such as:

static void HandlePrecompiled(std::vector<unsigned char>& out, const CompressedBuffer& buffer);
static void HandlePrecompiled(std::vector<unsigned char>& out, const RawBuffer& buffer);

{
if ( uncompressed_sizeByte > 0 ) // if the input data is actually compressed
{
#ifdef ORO_LINK_ZSTD
out.assign(uncompressed_sizeByte,0);

size_t decompressedSize = ZSTD_decompress(
out.data(), // final uncompressed buffer
out.size(), // final size
compressedInput, // compressed buffer
compressedInput_sizeByte // compressed buffer - size
);

if ( decompressedSize != uncompressed_sizeByte )
throw std::runtime_error( "ERROR: ZSTD_decompress FAILED." );
#else

throw std::runtime_error( "ERROR: ZSTD is not part of this build." );

#endif

}
else // if the input data is NOT compressed, buypass this decompress process.
{
out = std::vector<unsigned char>(compressedInput, compressedInput + compressedInput_sizeByte );
}
return;
}



3 changes: 3 additions & 0 deletions Orochi/OrochiUtils.h
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,9 @@ class OrochiUtils
static void getModule( oroDevice device, const char* code, const char* path, std::vector<const char*>* optsIn, const char* funcName, oroModule* moduleOut );
static void launch1D( oroFunction func, int nx, const void** args, int wgSize = 64, unsigned int sharedMemBytes = 0, oroStream stream = 0 );
static void launch2D( oroFunction func, int nx, int ny, const void** args, int wgSizeX = 8, int wgSizeY = 8, unsigned int sharedMemBytes = 0, oroStream stream = 0 );

// if 'uncompressed_sizeByte' is set to 0, it means the input value is not compressed and this function will output the raw buffer.
static void DecompressPrecompiled(std::vector<unsigned char>& out, const unsigned char* compressedInput, size_t compressedInput_sizeByte, size_t uncompressed_sizeByte);

template<typename T>
static void malloc( T*& ptr, size_t n )
Expand Down
5 changes: 3 additions & 2 deletions ParallelPrimitives/RadixSort.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ static const char** RadixSortKernelsIncludes = nullptr;
#else
const unsigned char oro_compiled_kernels_h[] = "";
const size_t oro_compiled_kernels_h_size = 0;
const size_t oro_compiled_kernels_h_size_uncompressed = 0;
#endif

constexpr uint64_t div_round_up64( uint64_t val, uint64_t divisor ) noexcept { return ( val + divisor - 1 ) / divisor; }
Expand Down Expand Up @@ -189,8 +190,8 @@ void RadixSort::compileKernels( const std::string& kernelPath, const std::string
{
if constexpr( usePrecompiledAndBakedKernel )
{
// Move the raw buffer into a std::vector, which avoids potential issues explained here: github.com/GPUOpen-LibrariesAndSDKs/HIPRT/pull/38#issuecomment-2761698032
std::vector<unsigned char> binary(oro_compiled_kernels_h, oro_compiled_kernels_h + oro_compiled_kernels_h_size);
std::vector<unsigned char> binary;
OrochiUtils::DecompressPrecompiled(binary, oro_compiled_kernels_h, oro_compiled_kernels_h_size, oro_compiled_kernels_h_size_uncompressed);
oroFunctions[record.kernelType] = m_oroutils.getFunctionFromPrecompiledBinary_asData(binary.data(), binary.size(), record.kernelName.c_str() );
}
else if constexpr( useBakeKernel )
Expand Down
30 changes: 21 additions & 9 deletions scripts/convert_binary_to_array.py
Original file line number Diff line number Diff line change
@@ -1,27 +1,39 @@
# convert_binary_to_header.py
# convert_binary_to_array.py
import sys
from pathlib import Path

def binary_to_c_array(bin_file, array_name):
def binary_to_c_array(bin_file, array_name, size_BeforeCompression, compression_activated):
with open(bin_file, 'rb') as f:
binary_data = f.read()

hex_array = ', '.join(f'0x{b:02x}' for b in binary_data)
c_array = f'const unsigned char {array_name}[] = {{\n {hex_array}\n}};\n'
c_array += f'const size_t {array_name}_size = sizeof({array_name});\n'
c_array += f'const size_t {array_name}_size = sizeof({array_name}); // {len(binary_data)}\n'

if not compression_activated:
size_BeforeCompression = 0 # set value to 0 if we are not using compression.
c_array += f'const size_t {array_name}_size_uncompressed = {size_BeforeCompression}; // set to 0 if NOT using the ZSTD compression.\n'
return c_array

if __name__ == "__main__":
if len(sys.argv) != 3:
print(f"Usage: {sys.argv[0]} <input_binary_file> <output_header_file>")
if len(sys.argv) != 5:
print(f"Usage: {sys.argv[0]} <input_binary_file_before_compression> <input_binary_file_after_compression> <output_header_file> <compression_activated>")
sys.exit(1)

bin_file = sys.argv[1]
header_file_path = sys.argv[2]
bin_file_beforeCompression = sys.argv[1]
bin_file_afterCompression = sys.argv[2] # not used if 'compression_activated' is OFF
header_file_path = sys.argv[3]
compression_activated = sys.argv[4].lower() == "on" # sys.argv[4] should be "ON" or "OFF"

header_file = Path(header_file_path).name
array_name = header_file.replace('.', '_')

c_array = binary_to_c_array(bin_file, array_name)
if not compression_activated:
bin_file_afterCompression = bin_file_beforeCompression

c_array = binary_to_c_array(bin_file_afterCompression, array_name, Path(bin_file_beforeCompression).stat().st_size, compression_activated )
with open(header_file_path, 'w') as f:
f.write("// generated by convert_binary_to_header.py\n")
f.write("// generated by convert_binary_to_array.py\n")
if compression_activated:
f.write(f"// Data is compressed.\n")
f.write(c_array)
23 changes: 23 additions & 0 deletions scripts/create_archive.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@

# create_archive.cmake
# Create a raw Zstd-compressed "archive" from a single file.

# Variables expected:
# INPUT_FILE – path to the file to compress
# OUTPUT_FILE – path to the compressed file to generate
# DO_COMPRESS: ON/OFF


if(DO_COMPRESS)
message("Compress ${INPUT_FILE} ...")
file(ARCHIVE_CREATE
OUTPUT "${OUTPUT_FILE}"
PATHS "${INPUT_FILE}"
FORMAT raw
COMPRESSION Zstd
COMPRESSION_LEVEL 9 # 0-9 for cmake >= 3.19 or 0-19 for cmake >= 3.26
)
endif()