From 64760590c780f419d708c7b33e5417023c49efc0 Mon Sep 17 00:00:00 2001 From: Joseph Mattiello Date: Fri, 1 May 2026 03:18:45 -0400 Subject: [PATCH 1/8] fix: free table68k + branch_condition_table on shutdown (#125) The sanitizers CI job (added in PR #121) surfaced two ASAN leaks totaling ~1.5 MB on every test run after the m68000 UBSAN noise was filtered out: 1. table68k (src/m68000/readcpu.c:928) -- 65536 * sizeof(struct instr) ~= 1.5 MB. Allocated lazily on first m68k_pulse_reset, never freed. 2. branch_condition_table (src/tom/gpu.c:257) -- 256 bytes (32 * 8 conditional-flag LUT). Allocated by GPUInit -> build_branch_condition_table, never freed. Both are process-lifetime one-time inits. In a libretro core that gets dlopen'd / dlclose'd repeatedly (e.g., the iOS Provenance host), these leaked on every load cycle. Fix: - Add GPUDone() -- frees branch_condition_table, NULLs the pointer so a subsequent GPUInit re-allocates cleanly. Wired into TOMDone alongside OPDone() / BlitterDone(). - Add m68k_done() -- frees table68k, NULLs the pointer, resets the file-scope `emulation_initialized` flag (was function-static in m68k_pulse_reset; hoisted to file scope so the new done can reset it). Wired into JaguarDone after the existing subsystem Done() calls. Both done functions are idempotent and safe to call without a prior init (free(NULL) is a no-op). Closes #125. Co-Authored-By: Claude Opus 4.7 --- src/core/jaguar.c | 1 + src/m68000/m68kinterface.c | 18 ++++++++++++++++-- src/m68000/m68kinterface.h | 1 + src/tom/gpu.c | 14 ++++++++++++++ src/tom/gpu.h | 1 + src/tom/tom.c | 1 + 6 files changed, 34 insertions(+), 2 deletions(-) diff --git a/src/core/jaguar.c b/src/core/jaguar.c index 47d2d25b..58527756 100644 --- a/src/core/jaguar.c +++ b/src/core/jaguar.c @@ -919,6 +919,7 @@ void JaguarDone(void) DSPDone(); TOMDone(); JERRYDone(); + m68k_done(); } uint8_t * GetRamPtr(void) diff --git a/src/m68000/m68kinterface.c b/src/m68000/m68kinterface.c index 559411d8..c5d0c0a8 100644 --- a/src/m68000/m68kinterface.c +++ b/src/m68000/m68kinterface.c @@ -78,11 +78,25 @@ void M68KDebugResume(void) } +// File-scope so m68k_done() below can reset it after freeing table68k. +static uint32_t emulation_initialized = 0; + +// Free process-lifetime allocations made by read_table68k(). Called +// from JaguarDone() so ASAN runs see a clean shutdown. +extern struct instr * table68k; +void m68k_done(void) +{ + if (table68k) + { + free(table68k); + table68k = NULL; + } + emulation_initialized = 0; +} + // Pulse the RESET line on the CPU void m68k_pulse_reset(void) { - static uint32_t emulation_initialized = 0; - // The first call to this function initializes the opcode handler jump table if (!emulation_initialized) { diff --git a/src/m68000/m68kinterface.h b/src/m68000/m68kinterface.h index ee1e4d90..c25748c0 100644 --- a/src/m68000/m68kinterface.h +++ b/src/m68000/m68kinterface.h @@ -70,6 +70,7 @@ typedef enum #define M68K_INT_ACK_SPURIOUS 0xFFFFFFFE void m68k_pulse_reset(void); +void m68k_done(void); int m68k_execute(int num_cycles); void m68k_set_irq(unsigned int int_level); diff --git a/src/tom/gpu.c b/src/tom/gpu.c index 077d8c55..d33a4f09 100644 --- a/src/tom/gpu.c +++ b/src/tom/gpu.c @@ -642,6 +642,20 @@ void GPUInit(void) GPUReset(); } +void GPUDone(void) +{ + /* Release the branch-condition LUT so process-lifetime ASAN runs + * don't report it as a leak. Safe to call without a matching + * GPUInit (free(NULL) is a no-op) and safe to re-call after a + * subsequent GPUInit (build_branch_condition_table early-outs on + * non-NULL pointer). */ + if (branch_condition_table) + { + free(branch_condition_table); + branch_condition_table = NULL; + } +} + void GPUReset(void) { unsigned i; diff --git a/src/tom/gpu.h b/src/tom/gpu.h index f44abbf2..75869c0f 100644 --- a/src/tom/gpu.h +++ b/src/tom/gpu.h @@ -15,6 +15,7 @@ extern "C" { #define GPU_WORK_RAM_BASE 0x00F03000 void GPUInit(void); +void GPUDone(void); void GPUReset(void); void GPUExec(int32_t); void GPUUpdateRegisterBanks(void); diff --git a/src/tom/tom.c b/src/tom/tom.c index 542e54ad..9115e4cb 100644 --- a/src/tom/tom.c +++ b/src/tom/tom.c @@ -846,6 +846,7 @@ void TOMDone(void) { OPDone(); BlitterDone(); + GPUDone(); } From a1e9ea0160a3f40c030b212521bb1aa8a1064ce9 Mon Sep 17 00:00:00 2001 From: Joseph Mattiello Date: Fri, 1 May 2026 03:44:52 -0400 Subject: [PATCH 2/8] fix: test_hle_bios + clang-tidy noise from PR #126 first-run CI on PR #126 (the m68k_done / GPUDone leak fix) surfaced two issues: 1. ASAN sanitizers job still reported the same 1.5MB + 256B leak the PR set out to fix. Root cause: test/test_hle_bios.c calls p_retro_load_game twice (NTSC then PAL) but only calls p_retro_unload_game ONCE (between the two loads). The PAL load never gets unloaded -- so JaguarDone() (which now does m68k_done() + GPUDone()) is never called for the second load. Added the missing p_retro_unload_game() before the final p_retro_deinit() at the end of main. 2. clang-tidy fired NEW pre-existing findings on src/core/jaguar.c and test/test_hle_bios.c because both are in this PR's diff (the workflow runs clang-tidy on changed files, not changed lines): - bugprone-incorrect-roundings on USEC_TO_RISC_CYCLES / USEC_TO_M68K_CYCLES in src/core/event.h. Idiom is `(double + 0.5)` integer cast; lround() is C99 and we're C89/GNU89. - bugprone-multi-level-implicit-pointer-conversion on test_hle_bios.c's dlsym() patterns. Both added to the disabled list in .clang-tidy with documented rationale. Co-Authored-By: Claude Opus 4.7 --- .clang-tidy | 9 +++++++++ test/test_hle_bios.c | 1 + 2 files changed, 10 insertions(+) diff --git a/.clang-tidy b/.clang-tidy index dffc5c4f..a296d577 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -24,6 +24,8 @@ Checks: > -bugprone-suspicious-include, -bugprone-switch-missing-default-case, -bugprone-branch-clone, + -bugprone-incorrect-roundings, + -bugprone-multi-level-implicit-pointer-conversion, -clang-analyzer-deadcode.DeadStores, -clang-analyzer-optin.portability.UnixAPI, -clang-analyzer-security.insecureAPI.DeprecatedOrUnsafeBufferHandling, @@ -49,6 +51,13 @@ FormatStyle: none # - suspicious-include: project includes .c files in a couple of dispatch headers. # - switch-missing-default-case: cosmetic; switches on bit-field decode patterns # commonly omit default because all valid bit values are handled. +# - incorrect-roundings: src/core/event.h's USEC_TO_RISC_CYCLES / +# USEC_TO_M68K_CYCLES use the (double + 0.5) integer-cast idiom for +# cycle conversion. lround() is C99 (we're C89/GNU89), and the +# inputs are non-negative so the idiom is correct here. +# - multi-level-implicit-pointer-conversion: dlsym() returns void* +# that test harnesses cast directly to function pointers / data +# pointer-to-pointer; canonical idiom in the libretro dlsym pattern. # - branch-clone: register-decode if-chains in src/cd/cdrom.c and src/tom/tom.c # intentionally write the same value for several adjacent register addresses # to make the address->effect mapping legible. Real bug clones are caught diff --git a/test/test_hle_bios.c b/test/test_hle_bios.c index d3b229f7..8b590d4c 100644 --- a/test/test_hle_bios.c +++ b/test/test_hle_bios.c @@ -3172,6 +3172,7 @@ int main(int argc, char *argv[]) printf("\n=== Results: %d passed, %d failed ===\n", passes, fails); + p_retro_unload_game(); /* matches the second p_retro_load_game above; without this, JaguarDone() never runs and ASAN reports table68k + branch_condition_table as leaks */ p_retro_deinit(); dlclose(handle); free(dummy_rom); From 6449910e124f05d324a476e725035e32282c497a Mon Sep 17 00:00:00 2001 From: Joseph Mattiello Date: Fri, 1 May 2026 03:53:24 -0400 Subject: [PATCH 3/8] fix: address Copilot review on PR #126 Three findings from Copilot's review of the leak fix: 1. src/m68000/m68kinterface.c had a redundant `extern struct instr * table68k;` -- already declared in readcpu.h. Removed. 2. m68k_done() reaching directly into table68k (owned by readcpu.c) was a brittle cross-module dependency. Moved the actual free into a new free_table68k() in readcpu.c (declared in readcpu.h alongside read_table68k). m68k_done now just calls free_table68k() + resets emulation_initialized. Ownership stays end-to-end inside the module that allocates. 3. test/test_hle_bios.c inline comment was too long for one line. Converted to a multi-line block above the call. Build sanity: make -j4 clean, c89-lint passes, dylib still gates 46 retro_* exports. Co-Authored-By: Claude Opus 4.7 --- src/m68000/m68kinterface.c | 14 ++++++-------- src/m68000/readcpu.c | 14 ++++++++++++++ src/m68000/readcpu.h | 1 + test/test_hle_bios.c | 5 ++++- 4 files changed, 25 insertions(+), 9 deletions(-) diff --git a/src/m68000/m68kinterface.c b/src/m68000/m68kinterface.c index c5d0c0a8..8cf5cf06 100644 --- a/src/m68000/m68kinterface.c +++ b/src/m68000/m68kinterface.c @@ -81,16 +81,14 @@ void M68KDebugResume(void) // File-scope so m68k_done() below can reset it after freeing table68k. static uint32_t emulation_initialized = 0; -// Free process-lifetime allocations made by read_table68k(). Called -// from JaguarDone() so ASAN runs see a clean shutdown. -extern struct instr * table68k; +// Symmetric counterpart to the lazy init in m68k_pulse_reset(). +// Defers the actual free to readcpu.c::free_table68k() so the table +// is owned end-to-end by the module that allocates it (table68k is +// declared in readcpu.h). Called from JaguarDone() so ASAN sees a +// clean shutdown. void m68k_done(void) { - if (table68k) - { - free(table68k); - table68k = NULL; - } + free_table68k(); emulation_initialized = 0; } diff --git a/src/m68000/readcpu.c b/src/m68000/readcpu.c index ba96aebe..3448a820 100644 --- a/src/m68000/readcpu.c +++ b/src/m68000/readcpu.c @@ -922,6 +922,20 @@ static void build_insn(int insn) } +/* Symmetric counterpart to read_table68k: free the opcode table built + above. Idempotent (free(NULL) is a no-op). Called from + m68k_done() in m68kinterface.c so process-lifetime ASAN runs see a + clean shutdown. */ +void free_table68k(void) +{ + if (table68k) + { + free(table68k); + table68k = NULL; + } +} + + void read_table68k(void) { int i; diff --git a/src/m68000/readcpu.h b/src/m68000/readcpu.h index 5fe0d495..a180ad7d 100644 --- a/src/m68000/readcpu.h +++ b/src/m68000/readcpu.h @@ -112,6 +112,7 @@ extern struct instr { } *table68k; extern void read_table68k(void); +extern void free_table68k(void); extern void do_merges(void); extern int get_no_mismatches(void); extern int nr_cpuop_funcs; diff --git a/test/test_hle_bios.c b/test/test_hle_bios.c index 8b590d4c..eecd3030 100644 --- a/test/test_hle_bios.c +++ b/test/test_hle_bios.c @@ -3172,7 +3172,10 @@ int main(int argc, char *argv[]) printf("\n=== Results: %d passed, %d failed ===\n", passes, fails); - p_retro_unload_game(); /* matches the second p_retro_load_game above; without this, JaguarDone() never runs and ASAN reports table68k + branch_condition_table as leaks */ + /* Match the second p_retro_load_game (PAL) call above. Without + * this unload, JaguarDone() never runs for the PAL load and ASAN + * reports table68k + branch_condition_table as leaks. */ + p_retro_unload_game(); p_retro_deinit(); dlclose(handle); free(dummy_rom); From c18c2d48ff343e974f7023d6a897a255bae6948a Mon Sep 17 00:00:00 2001 From: Joseph Mattiello Date: Fri, 1 May 2026 18:38:40 -0400 Subject: [PATCH 4/8] fix: address Copilot round-2 review on PR #126 Two small follow-ups: 1. src/m68000/m68kinterface.c: convert the four new // comments I added (m68k_done + emulation_initialized) to /* */ form per CLAUDE.md ("// allowed but prefer /* */ for new code"). 2. src/tom/gpu.c: drop the redundant `if (branch_condition_table)` guard around the free + NULL pair. free(NULL) is a no-op; the comment already documented the property. Co-Authored-By: Claude Opus 4.7 --- src/m68000/m68kinterface.c | 12 ++++++------ src/tom/gpu.c | 14 +++++--------- 2 files changed, 11 insertions(+), 15 deletions(-) diff --git a/src/m68000/m68kinterface.c b/src/m68000/m68kinterface.c index 8cf5cf06..a49077fc 100644 --- a/src/m68000/m68kinterface.c +++ b/src/m68000/m68kinterface.c @@ -78,14 +78,14 @@ void M68KDebugResume(void) } -// File-scope so m68k_done() below can reset it after freeing table68k. +/* File-scope so m68k_done() below can reset it after freeing table68k. */ static uint32_t emulation_initialized = 0; -// Symmetric counterpart to the lazy init in m68k_pulse_reset(). -// Defers the actual free to readcpu.c::free_table68k() so the table -// is owned end-to-end by the module that allocates it (table68k is -// declared in readcpu.h). Called from JaguarDone() so ASAN sees a -// clean shutdown. +/* Symmetric counterpart to the lazy init in m68k_pulse_reset(). + * Defers the actual free to readcpu.c::free_table68k() so the table + * is owned end-to-end by the module that allocates it (table68k is + * declared in readcpu.h). Called from JaguarDone() so ASAN sees a + * clean shutdown. */ void m68k_done(void) { free_table68k(); diff --git a/src/tom/gpu.c b/src/tom/gpu.c index d33a4f09..1bfdb245 100644 --- a/src/tom/gpu.c +++ b/src/tom/gpu.c @@ -645,15 +645,11 @@ void GPUInit(void) void GPUDone(void) { /* Release the branch-condition LUT so process-lifetime ASAN runs - * don't report it as a leak. Safe to call without a matching - * GPUInit (free(NULL) is a no-op) and safe to re-call after a - * subsequent GPUInit (build_branch_condition_table early-outs on - * non-NULL pointer). */ - if (branch_condition_table) - { - free(branch_condition_table); - branch_condition_table = NULL; - } + * don't report it as a leak. Unconditional: free(NULL) is a + * no-op, and a subsequent GPUInit() re-allocates cleanly because + * build_branch_condition_table() early-outs on non-NULL pointer. */ + free(branch_condition_table); + branch_condition_table = NULL; } void GPUReset(void) From da531e5e6b01f8df52ebc0cf8191e4044c408a45 Mon Sep 17 00:00:00 2001 From: Joseph Mattiello Date: Fri, 1 May 2026 18:46:35 -0400 Subject: [PATCH 5/8] fix: OOB read in tom_render_*_scanline (closes #127) ASAN on PR #126 surfaced a global-buffer-overflow read 1 byte past tomRam8 in tom_render_16bpp_cry_scanline (src/tom/tom.c:625). Same pattern in all five render variants: the per-pixel loop walks current_line_buffer (= tomRam8[0x1800], 10240 bytes max) based on tomWidth, which can be set by display registers to a value larger than the line buffer holds. Fix: new helper tom_clamp_line_buffer_width() that, given the current cursor, the requested width, the per-iteration source-byte cost (2 for 16bpp, 4 for 24bpp), and the pwidth_scale, returns a clamped width that guarantees the loop won't read past the end of tomRam8. Applied to all five render entry points: - tom_render_16bpp_cry_rgb_mix_scanline - tom_render_16bpp_cry_scanline (the one ASAN caught) - tom_render_24bpp_scanline - tom_render_16bpp_direct_scanline - tom_render_16bpp_rgb_scanline Closes #127. With this in, the sanitizers CI job should go fully clean -- candidate to flip continue-on-error: true to false in a follow-up. Co-Authored-By: Claude Opus 4.7 --- src/tom/tom.c | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/src/tom/tom.c b/src/tom/tom.c index 9115e4cb..ccde532d 100644 --- a/src/tom/tom.c +++ b/src/tom/tom.c @@ -548,6 +548,29 @@ uint16_t TOMGetMEMCON1(void) } #define LEFT_BG_FIX + +/* Clamp `width` so the per-pixel loop in a tom_render_*_scanline() + * cannot walk past the end of tomRam8 via current_line_buffer. + * Catches an ASAN-reported OOB read (#127): when display registers + * request a width larger than the on-chip line buffer holds + * (10240 bytes at tomRam8[0x1800..0x3FFF]). + * + * bytes_per_iter: source bytes consumed per loop iter (2 for 16bpp + * variants, 4 for 24bpp). + * pwidth_scale: backbuffer pixels produced per loop iter. */ +static uint16_t tom_clamp_line_buffer_width( + const uint8_t *current_line_buffer, uint16_t width, + unsigned bytes_per_iter, uint8_t pwidth_scale) +{ + const uint8_t *lb_end = &tomRam8[sizeof(tomRam8)]; + unsigned long bytes_left; + unsigned long safe_width; + if (current_line_buffer >= lb_end) return 0; + bytes_left = (unsigned long)(lb_end - current_line_buffer); + safe_width = (bytes_left / bytes_per_iter) * pwidth_scale; + return (width > safe_width) ? (uint16_t)safe_width : width; +} + // 16 BPP CRY/RGB mixed mode rendering void tom_render_16bpp_cry_rgb_mix_scanline(uint32_t * backbuffer) { @@ -579,6 +602,7 @@ void tom_render_16bpp_cry_rgb_mix_scanline(uint32_t * backbuffer) backbuffer += 2 * startPos, width -= startPos; #endif + width = tom_clamp_line_buffer_width(current_line_buffer, width, 2, pwidth_scale); while (width >= pwidth_scale) { uint16_t color = (*current_line_buffer++) << 8; @@ -620,6 +644,7 @@ void tom_render_16bpp_cry_scanline(uint32_t * backbuffer) backbuffer += 2 * startPos, width -= startPos; #endif + width = tom_clamp_line_buffer_width(current_line_buffer, width, 2, pwidth_scale); while (width >= pwidth_scale) { uint16_t color = (*current_line_buffer++) << 8; @@ -661,6 +686,7 @@ void tom_render_24bpp_scanline(uint32_t * backbuffer) backbuffer += 2 * startPos, width -= startPos; #endif + width = tom_clamp_line_buffer_width(current_line_buffer, width, 4, pwidth_scale); while (width >= pwidth_scale) { uint32_t b; @@ -687,6 +713,7 @@ void tom_render_16bpp_direct_scanline(uint32_t * backbuffer) uint8_t pwidth = ((GET16(tomRam8, VMODE) & PWIDTH) >> 9) + 1; uint8_t pwidth_scale = (pwidth >= 8) ? (pwidth / 4) : 1; + width = tom_clamp_line_buffer_width(current_line_buffer, width, 2, pwidth_scale); while (width >= pwidth_scale) { uint16_t color = (*current_line_buffer++) << 8; @@ -729,6 +756,7 @@ void tom_render_16bpp_rgb_scanline(uint32_t * backbuffer) backbuffer += 2 * startPos, width -= startPos; #endif + width = tom_clamp_line_buffer_width(current_line_buffer, width, 2, pwidth_scale); while (width >= pwidth_scale) { uint32_t color = (*current_line_buffer++) << 8; From cbc22bfef06f1da505bfd259352d20fc4b5db136 Mon Sep 17 00:00:00 2001 From: Joseph Mattiello Date: Fri, 1 May 2026 18:52:45 -0400 Subject: [PATCH 6/8] fix: rotate-by-zero UB in GPU/DSP ROR/RORQ opcodes UBSAN on PR #126 found: src/tom/gpu.c:1674: shift exponent 32 too large for uint32_t The classic rotate-right idiom (RN >> r1) | (RN << (32 - r1)) invokes UB when r1 == 0 (32-bit shift count of 32 is out of range). Replace with the standard portable pattern: (RN >> r1) | (RN << ((-r1) & 31)) which yields RN | RN == RN for r1 == 0 and is identical for r1 in [1, 31]. Modern GCC/clang pattern-match this idiom into a single rotate instruction (rorl on x86, ror on ARM). Six sites across two files: - src/tom/gpu.c gpu_opcode_ror, gpu_opcode_rorq - src/jerry/dsp.c dsp_opcode_ror, dsp_opcode_rorq, plus the prefetched (PRN) variants Co-Authored-By: Claude Opus 4.7 --- src/jerry/dsp.c | 8 ++++---- src/tom/gpu.c | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/jerry/dsp.c b/src/jerry/dsp.c index b243f4af..50e0e8ea 100644 --- a/src/jerry/dsp.c +++ b/src/jerry/dsp.c @@ -1409,7 +1409,7 @@ INLINE static void dsp_opcode_shrq(void) INLINE static void dsp_opcode_ror(void) { uint32_t r1 = RM & 0x1F; - uint32_t res = (RN >> r1) | (RN << (32 - r1)); + uint32_t res = (RN >> r1) | (RN << ((-r1) & 31)); SET_ZN(res); dsp_flag_c = (RN >> 31) & 1; RN = res; } @@ -1419,7 +1419,7 @@ INLINE static void dsp_opcode_rorq(void) { uint32_t r1 = dsp_convert_zero[IMM_1 & 0x1F]; uint32_t r2 = RN; - uint32_t res = (r2 >> r1) | (r2 << (32 - r1)); + uint32_t res = (r2 >> r1) | (r2 << ((-r1) & 31)); RN = res; SET_ZN(res); dsp_flag_c = (r2 >> 31) & 0x01; } @@ -2226,7 +2226,7 @@ INLINE static void DSP_resmac(void) INLINE static void DSP_ror(void) { uint32_t r1 = PRM & 0x1F; - uint32_t res = (PRN >> r1) | (PRN << (32 - r1)); + uint32_t res = (PRN >> r1) | (PRN << ((-r1) & 31)); SET_ZN(res); dsp_flag_c = (PRN >> 31) & 1; PRES = res; } @@ -2235,7 +2235,7 @@ INLINE static void DSP_rorq(void) { uint32_t r1 = dsp_convert_zero[PIMM1 & 0x1F]; uint32_t r2 = PRN; - uint32_t res = (r2 >> r1) | (r2 << (32 - r1)); + uint32_t res = (r2 >> r1) | (r2 << ((-r1) & 31)); PRES = res; SET_ZN(res); dsp_flag_c = (r2 >> 31) & 0x01; } diff --git a/src/tom/gpu.c b/src/tom/gpu.c index 1bfdb245..65c06300 100644 --- a/src/tom/gpu.c +++ b/src/tom/gpu.c @@ -1671,7 +1671,7 @@ INLINE static void gpu_opcode_shrq(void) INLINE static void gpu_opcode_ror(void) { uint32_t r1 = RM & 0x1F; - uint32_t res = (RN >> r1) | (RN << (32 - r1)); + uint32_t res = (RN >> r1) | (RN << ((-r1) & 31)); SET_ZN(res); gpu_flag_c = (RN >> 31) & 1; RN = res; } @@ -1681,7 +1681,7 @@ INLINE static void gpu_opcode_rorq(void) { uint32_t r1 = gpu_convert_zero[IMM_1 & 0x1F]; uint32_t r2 = RN; - uint32_t res = (r2 >> r1) | (r2 << (32 - r1)); + uint32_t res = (r2 >> r1) | (r2 << ((-r1) & 31)); RN = res; SET_ZN(res); gpu_flag_c = (r2 >> 31) & 0x01; } From 3c58cc90f51cc30b5cd5d232c07f30468a6263f0 Mon Sep 17 00:00:00 2001 From: Joseph Mattiello Date: Fri, 1 May 2026 19:00:24 -0400 Subject: [PATCH 7/8] fix: rotate-by-32 UB in *_opcode_rorq + suppress padding noise Round-2 sanitizer fix for PR #126: UBSAN at src/tom/gpu.c:1684 -- "shift exponent 32 too large for uint32_t". In gpu_opcode_rorq / dsp_opcode_rorq / DSP_rorq the rotation count comes from gpu_convert_zero[] / dsp_convert_zero[], which maps a 0 IMM_1 to 32 (rotate-by-0 means rotate-by-full-word, a no-op). But `RN >> 32` is UB regardless of what the post-shift result is, so the previous-commit fix (only masking the LHS) wasn't enough. Mask r1 to 0x1F before either shift -- maps 32 -> 0, preserving the no-op semantic. Three sites: - src/tom/gpu.c gpu_opcode_rorq - src/jerry/dsp.c dsp_opcode_rorq, DSP_rorq Also add `-clang-analyzer-optin.performance.Padding` to the disabled list in .clang-tidy -- it fires on inherited UAE/Virtual Jaguar struct layouts where reordering for tighter packing risks silent layout breaks in save-state / dlsym paths. Co-Authored-By: Claude Opus 4.7 --- .clang-tidy | 3 +++ src/jerry/dsp.c | 8 ++++++-- src/tom/gpu.c | 5 ++++- 3 files changed, 13 insertions(+), 3 deletions(-) diff --git a/.clang-tidy b/.clang-tidy index a296d577..66da0058 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -66,5 +66,8 @@ FormatStyle: none # locals that are read via macros (BCOMPEN, DSTA2, ...) clang-tidy can't see # the linkage for. Removing these inits would introduce real bugs. # - DeprecatedOrUnsafeBufferHandling: MSVC-flavored noise about strcpy/sprintf. +# - optin.performance.Padding: upstream UAE/Virtual Jaguar struct layouts +# inherited from 1990s emulator code; reordering for tighter packing risks +# silent layout breaks in save-state / dlsym paths and isn't free perf. # - optin.portability.UnixAPI: false positive on libretro_core_options.h calloc. # - valist.Uninitialized: false-positive prone on our log macros. diff --git a/src/jerry/dsp.c b/src/jerry/dsp.c index 50e0e8ea..f3999f75 100644 --- a/src/jerry/dsp.c +++ b/src/jerry/dsp.c @@ -1417,7 +1417,10 @@ INLINE static void dsp_opcode_ror(void) INLINE static void dsp_opcode_rorq(void) { - uint32_t r1 = dsp_convert_zero[IMM_1 & 0x1F]; + /* dsp_convert_zero[0] returns 32 (rotate-by-0 means rotate-by-full-word, + * a no-op). Masking to 0x1F maps 32 -> 0, preserving that semantic and + * avoiding `RN >> 32` UB in the rotate idiom below. */ + uint32_t r1 = dsp_convert_zero[IMM_1 & 0x1F] & 0x1F; uint32_t r2 = RN; uint32_t res = (r2 >> r1) | (r2 << ((-r1) & 31)); RN = res; @@ -2233,7 +2236,8 @@ INLINE static void DSP_ror(void) INLINE static void DSP_rorq(void) { - uint32_t r1 = dsp_convert_zero[PIMM1 & 0x1F]; + /* See dsp_opcode_rorq above for why we mask to 0x1F. */ + uint32_t r1 = dsp_convert_zero[PIMM1 & 0x1F] & 0x1F; uint32_t r2 = PRN; uint32_t res = (r2 >> r1) | (r2 << ((-r1) & 31)); PRES = res; diff --git a/src/tom/gpu.c b/src/tom/gpu.c index 65c06300..6332dc7a 100644 --- a/src/tom/gpu.c +++ b/src/tom/gpu.c @@ -1679,7 +1679,10 @@ INLINE static void gpu_opcode_ror(void) INLINE static void gpu_opcode_rorq(void) { - uint32_t r1 = gpu_convert_zero[IMM_1 & 0x1F]; + /* gpu_convert_zero[0] returns 32 (rotate-by-0 means rotate-by-full-word + * which is a no-op). Masking to 0x1F maps 32 -> 0, preserving that + * semantic and avoiding `RN >> 32` UB in the rotate idiom below. */ + uint32_t r1 = gpu_convert_zero[IMM_1 & 0x1F] & 0x1F; uint32_t r2 = RN; uint32_t res = (r2 >> r1) | (r2 << ((-r1) & 31)); RN = res; From 3d3fde03f50e2cb11906d560646bd882756e2105 Mon Sep 17 00:00:00 2001 From: Joseph Mattiello Date: Fri, 1 May 2026 19:07:14 -0400 Subject: [PATCH 8/8] ci: actually add the Padding suppression to .clang-tidy The previous commit added the rationale comment but the disable line itself didn't get written. Add it. Co-Authored-By: Claude Opus 4.7 --- .clang-tidy | 1 + 1 file changed, 1 insertion(+) diff --git a/.clang-tidy b/.clang-tidy index 66da0058..2f4e6e86 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -27,6 +27,7 @@ Checks: > -bugprone-incorrect-roundings, -bugprone-multi-level-implicit-pointer-conversion, -clang-analyzer-deadcode.DeadStores, + -clang-analyzer-optin.performance.Padding, -clang-analyzer-optin.portability.UnixAPI, -clang-analyzer-security.insecureAPI.DeprecatedOrUnsafeBufferHandling, -clang-analyzer-valist.Uninitialized