From a0d6e71e2aee557fb6688e4e48f9dce9f986d476 Mon Sep 17 00:00:00 2001 From: Amaan Qureshi Date: Mon, 16 Mar 2026 22:53:05 -0400 Subject: [PATCH 1/2] binary: fix string parsing on big-endian hosts When parsing a SPIR-V binary with a different endianness than the host (e.g. a spec-conformant little-endian binary on ppc64/s390x), the parser reads string operands from raw `_.words` without byte-swapping first. `MakeString` then extracts bytes assuming native word layout, producing garbled strings, for example, "OpenCL.std" reads as "nepOs.LC". Byte-swap the words before passing them to `MakeString` when `requires_endian_conversion` is true, matching how other operand types are already handled via `spvFixWord`. --- source/binary.cpp | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/source/binary.cpp b/source/binary.cpp index 0cad8b5220..65b3ee1d42 100644 --- a/source/binary.cpp +++ b/source/binary.cpp @@ -605,8 +605,19 @@ spv_result_t Parser::parseOperand(size_t inst_offset, case SPV_OPERAND_TYPE_LITERAL_STRING: case SPV_OPERAND_TYPE_OPTIONAL_LITERAL_STRING: { const size_t max_words = _.num_words - _.word_index; - std::string string = - spvtools::utils::MakeString(_.words + _.word_index, max_words, false); + std::string string; + if (_.requires_endian_conversion) { + // On big-endian hosts, _.words still holds raw little-endian data at + // this point. Byte-swap before extracting the string characters. + std::vector swapped(_.words + _.word_index, + _.words + _.word_index + max_words); + const spv_endianness_t endianness = _.endian; + for (auto& w : swapped) w = spvFixWord(w, endianness); + string = spvtools::utils::MakeString(swapped.data(), max_words, false); + } else { + string = spvtools::utils::MakeString(_.words + _.word_index, max_words, + false); + } if (string.length() == max_words * 4) return exhaustedInputDiagnostic(inst_offset, opcode, type); From 133e93b52ff0e388bb3fb13e9c3aed590a994e89 Mon Sep 17 00:00:00 2001 From: Amaan Qureshi Date: Mon, 16 Mar 2026 22:53:05 -0400 Subject: [PATCH 2/2] objdump: fix source extraction on big-endian hosts `extract_source.cpp` used `reinterpret_cast` on parsed instruction words to read string data. On big-endian hosts, bytes within each native-endian word are in high-to-low memory order, but SPIR-V strings pack characters starting from the lowest byte of each word. Use `MakeString` instead of raw casts, which correctly extracts characters from the low bits of each word regardless of host endianness. --- tools/objdump/extract_source.cpp | 64 ++++++++++---------------------- 1 file changed, 19 insertions(+), 45 deletions(-) diff --git a/tools/objdump/extract_source.cpp b/tools/objdump/extract_source.cpp index d6d49d92d3..de271d79b8 100644 --- a/tools/objdump/extract_source.cpp +++ b/tools/objdump/extract_source.cpp @@ -21,6 +21,7 @@ #include "source/latest_version_spirv_header.h" #include "source/opt/log.h" +#include "source/util/string_utils.h" #include "spirv-tools/libspirv.hpp" #include "tools/util/cli_consumer.h" @@ -28,38 +29,18 @@ namespace { constexpr auto kDefaultEnvironment = SPV_ENV_UNIVERSAL_1_6; -// Extract a string literal from a given range. -// Copies all the characters from `begin` to the first '\0' it encounters, while +// Extract a string literal from a given range of SPIR-V words. +// Copies all the characters up to the first '\0' it encounters, while // removing escape patterns. -// Not finding a '\0' before reaching `end` fails the extraction. -// -// Returns `true` if the extraction succeeded. -// `output` value is undefined if false is returned. -spv_result_t ExtractStringLiteral(const spv_position_t& loc, const char* begin, - const char* end, std::string* output) { - size_t sourceLength = std::distance(begin, end); - std::string escapedString; - escapedString.resize(sourceLength); - - size_t writeIndex = 0; - size_t readIndex = 0; - for (; readIndex < sourceLength; writeIndex++, readIndex++) { - const char read = begin[readIndex]; - if (read == '\0') { - escapedString.resize(writeIndex); - output->append(escapedString); - return SPV_SUCCESS; - } - - if (read == '\\') { - ++readIndex; - } - escapedString[writeIndex] = begin[readIndex]; +std::string ExtractAndUnescape(const uint32_t* words, size_t num_words) { + std::string raw = spvtools::utils::MakeString(words, num_words, false); + std::string result; + result.reserve(raw.size()); + for (size_t i = 0; i < raw.size(); i++) { + if (raw[i] == '\\' && i + 1 < raw.size()) i++; + result += raw[i]; } - - spvtools::Error(spvtools::utils::CLIMessageConsumer, "", loc, - "Missing NULL terminator for literal string."); - return SPV_ERROR_INVALID_BINARY; + return result; } spv_result_t extractOpString(const spv_position_t& loc, @@ -74,11 +55,9 @@ spv_result_t extractOpString(const spv_position_t& loc, } const auto& operand = instruction.operands[1]; - const char* stringBegin = - reinterpret_cast(instruction.words + operand.offset); - const char* stringEnd = reinterpret_cast( - instruction.words + operand.offset + operand.num_words); - return ExtractStringLiteral(loc, stringBegin, stringEnd, output); + *output = + ExtractAndUnescape(instruction.words + operand.offset, operand.num_words); + return SPV_SUCCESS; } spv_result_t extractOpSourceContinued( @@ -94,11 +73,9 @@ spv_result_t extractOpSourceContinued( } const auto& operand = instruction.operands[0]; - const char* stringBegin = - reinterpret_cast(instruction.words + operand.offset); - const char* stringEnd = reinterpret_cast( - instruction.words + operand.offset + operand.num_words); - return ExtractStringLiteral(loc, stringBegin, stringEnd, output); + output->append(ExtractAndUnescape(instruction.words + operand.offset, + operand.num_words)); + return SPV_SUCCESS; } spv_result_t extractOpSource(const spv_position_t& loc, @@ -124,11 +101,8 @@ spv_result_t extractOpSource(const spv_position_t& loc, return SPV_SUCCESS; } - const char* stringBegin = - reinterpret_cast(instruction.words + 4); - const char* stringEnd = - reinterpret_cast(instruction.words + instruction.num_words); - return ExtractStringLiteral(loc, stringBegin, stringEnd, code); + *code = ExtractAndUnescape(instruction.words + 4, instruction.num_words - 4); + return SPV_SUCCESS; } } // namespace