diff --git a/.clang-tidy b/.clang-tidy index dffc5c4f..2f4e6e86 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -24,7 +24,10 @@ Checks: > -bugprone-suspicious-include, -bugprone-switch-missing-default-case, -bugprone-branch-clone, + -bugprone-incorrect-roundings, + -bugprone-multi-level-implicit-pointer-conversion, -clang-analyzer-deadcode.DeadStores, + -clang-analyzer-optin.performance.Padding, -clang-analyzer-optin.portability.UnixAPI, -clang-analyzer-security.insecureAPI.DeprecatedOrUnsafeBufferHandling, -clang-analyzer-valist.Uninitialized @@ -49,6 +52,13 @@ FormatStyle: none # - suspicious-include: project includes .c files in a couple of dispatch headers. # - switch-missing-default-case: cosmetic; switches on bit-field decode patterns # commonly omit default because all valid bit values are handled. +# - incorrect-roundings: src/core/event.h's USEC_TO_RISC_CYCLES / +# USEC_TO_M68K_CYCLES use the (double + 0.5) integer-cast idiom for +# cycle conversion. lround() is C99 (we're C89/GNU89), and the +# inputs are non-negative so the idiom is correct here. +# - multi-level-implicit-pointer-conversion: dlsym() returns void* +# that test harnesses cast directly to function pointers / data +# pointer-to-pointer; canonical idiom in the libretro dlsym pattern. # - branch-clone: register-decode if-chains in src/cd/cdrom.c and src/tom/tom.c # intentionally write the same value for several adjacent register addresses # to make the address->effect mapping legible. Real bug clones are caught @@ -57,5 +67,8 @@ FormatStyle: none # locals that are read via macros (BCOMPEN, DSTA2, ...) clang-tidy can't see # the linkage for. Removing these inits would introduce real bugs. # - DeprecatedOrUnsafeBufferHandling: MSVC-flavored noise about strcpy/sprintf. +# - optin.performance.Padding: upstream UAE/Virtual Jaguar struct layouts +# inherited from 1990s emulator code; reordering for tighter packing risks +# silent layout breaks in save-state / dlsym paths and isn't free perf. # - optin.portability.UnixAPI: false positive on libretro_core_options.h calloc. # - valist.Uninitialized: false-positive prone on our log macros. diff --git a/src/core/jaguar.c b/src/core/jaguar.c index 47d2d25b..58527756 100644 --- a/src/core/jaguar.c +++ b/src/core/jaguar.c @@ -919,6 +919,7 @@ void JaguarDone(void) DSPDone(); TOMDone(); JERRYDone(); + m68k_done(); } uint8_t * GetRamPtr(void) diff --git a/src/jerry/dsp.c b/src/jerry/dsp.c index b243f4af..f3999f75 100644 --- a/src/jerry/dsp.c +++ b/src/jerry/dsp.c @@ -1409,7 +1409,7 @@ INLINE static void dsp_opcode_shrq(void) INLINE static void dsp_opcode_ror(void) { uint32_t r1 = RM & 0x1F; - uint32_t res = (RN >> r1) | (RN << (32 - r1)); + uint32_t res = (RN >> r1) | (RN << ((-r1) & 31)); SET_ZN(res); dsp_flag_c = (RN >> 31) & 1; RN = res; } @@ -1417,9 +1417,12 @@ INLINE static void dsp_opcode_ror(void) INLINE static void dsp_opcode_rorq(void) { - uint32_t r1 = dsp_convert_zero[IMM_1 & 0x1F]; + /* dsp_convert_zero[0] returns 32 (rotate-by-0 means rotate-by-full-word, + * a no-op). Masking to 0x1F maps 32 -> 0, preserving that semantic and + * avoiding `RN >> 32` UB in the rotate idiom below. */ + uint32_t r1 = dsp_convert_zero[IMM_1 & 0x1F] & 0x1F; uint32_t r2 = RN; - uint32_t res = (r2 >> r1) | (r2 << (32 - r1)); + uint32_t res = (r2 >> r1) | (r2 << ((-r1) & 31)); RN = res; SET_ZN(res); dsp_flag_c = (r2 >> 31) & 0x01; } @@ -2226,16 +2229,17 @@ INLINE static void DSP_resmac(void) INLINE static void DSP_ror(void) { uint32_t r1 = PRM & 0x1F; - uint32_t res = (PRN >> r1) | (PRN << (32 - r1)); + uint32_t res = (PRN >> r1) | (PRN << ((-r1) & 31)); SET_ZN(res); dsp_flag_c = (PRN >> 31) & 1; PRES = res; } INLINE static void DSP_rorq(void) { - uint32_t r1 = dsp_convert_zero[PIMM1 & 0x1F]; + /* See dsp_opcode_rorq above for why we mask to 0x1F. */ + uint32_t r1 = dsp_convert_zero[PIMM1 & 0x1F] & 0x1F; uint32_t r2 = PRN; - uint32_t res = (r2 >> r1) | (r2 << (32 - r1)); + uint32_t res = (r2 >> r1) | (r2 << ((-r1) & 31)); PRES = res; SET_ZN(res); dsp_flag_c = (r2 >> 31) & 0x01; } diff --git a/src/m68000/m68kinterface.c b/src/m68000/m68kinterface.c index 559411d8..a49077fc 100644 --- a/src/m68000/m68kinterface.c +++ b/src/m68000/m68kinterface.c @@ -78,11 +78,23 @@ void M68KDebugResume(void) } +/* File-scope so m68k_done() below can reset it after freeing table68k. */ +static uint32_t emulation_initialized = 0; + +/* Symmetric counterpart to the lazy init in m68k_pulse_reset(). + * Defers the actual free to readcpu.c::free_table68k() so the table + * is owned end-to-end by the module that allocates it (table68k is + * declared in readcpu.h). Called from JaguarDone() so ASAN sees a + * clean shutdown. */ +void m68k_done(void) +{ + free_table68k(); + emulation_initialized = 0; +} + // Pulse the RESET line on the CPU void m68k_pulse_reset(void) { - static uint32_t emulation_initialized = 0; - // The first call to this function initializes the opcode handler jump table if (!emulation_initialized) { diff --git a/src/m68000/m68kinterface.h b/src/m68000/m68kinterface.h index ee1e4d90..c25748c0 100644 --- a/src/m68000/m68kinterface.h +++ b/src/m68000/m68kinterface.h @@ -70,6 +70,7 @@ typedef enum #define M68K_INT_ACK_SPURIOUS 0xFFFFFFFE void m68k_pulse_reset(void); +void m68k_done(void); int m68k_execute(int num_cycles); void m68k_set_irq(unsigned int int_level); diff --git a/src/m68000/readcpu.c b/src/m68000/readcpu.c index ba96aebe..3448a820 100644 --- a/src/m68000/readcpu.c +++ b/src/m68000/readcpu.c @@ -922,6 +922,20 @@ static void build_insn(int insn) } +/* Symmetric counterpart to read_table68k: free the opcode table built + above. Idempotent (free(NULL) is a no-op). Called from + m68k_done() in m68kinterface.c so process-lifetime ASAN runs see a + clean shutdown. */ +void free_table68k(void) +{ + if (table68k) + { + free(table68k); + table68k = NULL; + } +} + + void read_table68k(void) { int i; diff --git a/src/m68000/readcpu.h b/src/m68000/readcpu.h index 5fe0d495..a180ad7d 100644 --- a/src/m68000/readcpu.h +++ b/src/m68000/readcpu.h @@ -112,6 +112,7 @@ extern struct instr { } *table68k; extern void read_table68k(void); +extern void free_table68k(void); extern void do_merges(void); extern int get_no_mismatches(void); extern int nr_cpuop_funcs; diff --git a/src/tom/gpu.c b/src/tom/gpu.c index 077d8c55..6332dc7a 100644 --- a/src/tom/gpu.c +++ b/src/tom/gpu.c @@ -642,6 +642,16 @@ void GPUInit(void) GPUReset(); } +void GPUDone(void) +{ + /* Release the branch-condition LUT so process-lifetime ASAN runs + * don't report it as a leak. Unconditional: free(NULL) is a + * no-op, and a subsequent GPUInit() re-allocates cleanly because + * build_branch_condition_table() early-outs on non-NULL pointer. */ + free(branch_condition_table); + branch_condition_table = NULL; +} + void GPUReset(void) { unsigned i; @@ -1661,7 +1671,7 @@ INLINE static void gpu_opcode_shrq(void) INLINE static void gpu_opcode_ror(void) { uint32_t r1 = RM & 0x1F; - uint32_t res = (RN >> r1) | (RN << (32 - r1)); + uint32_t res = (RN >> r1) | (RN << ((-r1) & 31)); SET_ZN(res); gpu_flag_c = (RN >> 31) & 1; RN = res; } @@ -1669,9 +1679,12 @@ INLINE static void gpu_opcode_ror(void) INLINE static void gpu_opcode_rorq(void) { - uint32_t r1 = gpu_convert_zero[IMM_1 & 0x1F]; + /* gpu_convert_zero[0] returns 32 (rotate-by-0 means rotate-by-full-word + * which is a no-op). Masking to 0x1F maps 32 -> 0, preserving that + * semantic and avoiding `RN >> 32` UB in the rotate idiom below. */ + uint32_t r1 = gpu_convert_zero[IMM_1 & 0x1F] & 0x1F; uint32_t r2 = RN; - uint32_t res = (r2 >> r1) | (r2 << (32 - r1)); + uint32_t res = (r2 >> r1) | (r2 << ((-r1) & 31)); RN = res; SET_ZN(res); gpu_flag_c = (r2 >> 31) & 0x01; } diff --git a/src/tom/gpu.h b/src/tom/gpu.h index f44abbf2..75869c0f 100644 --- a/src/tom/gpu.h +++ b/src/tom/gpu.h @@ -15,6 +15,7 @@ extern "C" { #define GPU_WORK_RAM_BASE 0x00F03000 void GPUInit(void); +void GPUDone(void); void GPUReset(void); void GPUExec(int32_t); void GPUUpdateRegisterBanks(void); diff --git a/src/tom/tom.c b/src/tom/tom.c index 542e54ad..ccde532d 100644 --- a/src/tom/tom.c +++ b/src/tom/tom.c @@ -548,6 +548,29 @@ uint16_t TOMGetMEMCON1(void) } #define LEFT_BG_FIX + +/* Clamp `width` so the per-pixel loop in a tom_render_*_scanline() + * cannot walk past the end of tomRam8 via current_line_buffer. + * Catches an ASAN-reported OOB read (#127): when display registers + * request a width larger than the on-chip line buffer holds + * (10240 bytes at tomRam8[0x1800..0x3FFF]). + * + * bytes_per_iter: source bytes consumed per loop iter (2 for 16bpp + * variants, 4 for 24bpp). + * pwidth_scale: backbuffer pixels produced per loop iter. */ +static uint16_t tom_clamp_line_buffer_width( + const uint8_t *current_line_buffer, uint16_t width, + unsigned bytes_per_iter, uint8_t pwidth_scale) +{ + const uint8_t *lb_end = &tomRam8[sizeof(tomRam8)]; + unsigned long bytes_left; + unsigned long safe_width; + if (current_line_buffer >= lb_end) return 0; + bytes_left = (unsigned long)(lb_end - current_line_buffer); + safe_width = (bytes_left / bytes_per_iter) * pwidth_scale; + return (width > safe_width) ? (uint16_t)safe_width : width; +} + // 16 BPP CRY/RGB mixed mode rendering void tom_render_16bpp_cry_rgb_mix_scanline(uint32_t * backbuffer) { @@ -579,6 +602,7 @@ void tom_render_16bpp_cry_rgb_mix_scanline(uint32_t * backbuffer) backbuffer += 2 * startPos, width -= startPos; #endif + width = tom_clamp_line_buffer_width(current_line_buffer, width, 2, pwidth_scale); while (width >= pwidth_scale) { uint16_t color = (*current_line_buffer++) << 8; @@ -620,6 +644,7 @@ void tom_render_16bpp_cry_scanline(uint32_t * backbuffer) backbuffer += 2 * startPos, width -= startPos; #endif + width = tom_clamp_line_buffer_width(current_line_buffer, width, 2, pwidth_scale); while (width >= pwidth_scale) { uint16_t color = (*current_line_buffer++) << 8; @@ -661,6 +686,7 @@ void tom_render_24bpp_scanline(uint32_t * backbuffer) backbuffer += 2 * startPos, width -= startPos; #endif + width = tom_clamp_line_buffer_width(current_line_buffer, width, 4, pwidth_scale); while (width >= pwidth_scale) { uint32_t b; @@ -687,6 +713,7 @@ void tom_render_16bpp_direct_scanline(uint32_t * backbuffer) uint8_t pwidth = ((GET16(tomRam8, VMODE) & PWIDTH) >> 9) + 1; uint8_t pwidth_scale = (pwidth >= 8) ? (pwidth / 4) : 1; + width = tom_clamp_line_buffer_width(current_line_buffer, width, 2, pwidth_scale); while (width >= pwidth_scale) { uint16_t color = (*current_line_buffer++) << 8; @@ -729,6 +756,7 @@ void tom_render_16bpp_rgb_scanline(uint32_t * backbuffer) backbuffer += 2 * startPos, width -= startPos; #endif + width = tom_clamp_line_buffer_width(current_line_buffer, width, 2, pwidth_scale); while (width >= pwidth_scale) { uint32_t color = (*current_line_buffer++) << 8; @@ -846,6 +874,7 @@ void TOMDone(void) { OPDone(); BlitterDone(); + GPUDone(); } diff --git a/test/test_hle_bios.c b/test/test_hle_bios.c index d3b229f7..eecd3030 100644 --- a/test/test_hle_bios.c +++ b/test/test_hle_bios.c @@ -3172,6 +3172,10 @@ int main(int argc, char *argv[]) printf("\n=== Results: %d passed, %d failed ===\n", passes, fails); + /* Match the second p_retro_load_game (PAL) call above. Without + * this unload, JaguarDone() never runs for the PAL load and ASAN + * reports table68k + branch_condition_table as leaks. */ + p_retro_unload_game(); p_retro_deinit(); dlclose(handle); free(dummy_rom);