Skip to content

Commit a1bffbf

Browse files
committed
Fix CI buildbot failures: MSVC C89, SSE2 32-bit, webOS cross-compile
- libretro.c: Move extern declarations to top of function blocks for MSVC 2005/2010 C89 compliance (error C2143 mid-block declarations) - blitter_simd_sse2.c: Replace _mm_cvtsi128_si64 with _mm_storel_epi64 helper — the former only exists on x86_64, breaking 32-bit x86 builds (Linux i686, Windows i686 MinGW). Also fix C89 mid-block declaration in sse2_zcomp and sse2_byte_merge. - Makefile.common: Detect cross-compiler prefixes (arm-, aarch64-, mips, powerpc) in CC to skip host uname -m SIMD fallback. Fixes webOS ARM build getting SSE2 when built on an x86_64 host. Made-with: Cursor
1 parent 065c8b3 commit a1bffbf

3 files changed

Lines changed: 28 additions & 11 deletions

File tree

Makefile.common

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,10 @@ endif
109109
# Native build fallback: auto-detect from host architecture, but only for
110110
# native-build platforms (unix/osx/win). Cross-compile targets (vita, ps3,
111111
# libnx, etc.) set platform explicitly and should not use host detection.
112+
# Also skip when CC looks like a cross-compiler (e.g. arm-webos-linux-gnueabi-gcc).
113+
BLITTER_CROSS_CC := $(findstring arm-,$(CC))$(findstring aarch64-,$(CC))$(findstring mips,$(CC))$(findstring powerpc,$(CC))
112114
ifeq ($(BLITTER_SIMD_SRC),)
115+
ifeq ($(BLITTER_CROSS_CC),)
113116
ifneq (,$(filter unix osx win,$(platform)))
114117
ifneq (,$(filter x86_64 i686 i386,$(shell uname -m 2>/dev/null)))
115118
BLITTER_SIMD_SRC := $(CORE_DIR)/src/blitter_simd_sse2.c
@@ -120,6 +123,7 @@ endif
120123
endif
121124
endif
122125
endif
126+
endif
123127

124128
# Fall back to scalar if no SIMD was selected (e.g., exotic platforms)
125129
ifeq ($(BLITTER_SIMD_SRC),)

libretro.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -781,6 +781,8 @@ bool retro_serialize(void *data, size_t size)
781781
uint8_t *buf, *start;
782782
size_t written;
783783
uint32_t magic, version, flags, reserved;
784+
extern uint8_t jerry_ram_8[];
785+
extern bool lowerField;
784786

785787
if (!data || size < STATE_SIZE)
786788
return false;
@@ -802,11 +804,9 @@ bool retro_serialize(void *data, size_t size)
802804
STATE_SAVE_BUF(buf, jaguarMainRAM, 0x200000); /* 2 MB main RAM */
803805
STATE_SAVE_BUF(buf, tomRam8, 0x4000); /* 16 KB TOM registers */
804806

805-
extern uint8_t jerry_ram_8[];
806807
STATE_SAVE_BUF(buf, jerry_ram_8, 0x10000); /* 64 KB JERRY registers */
807808

808809
/* Jaguar misc state */
809-
extern bool lowerField;
810810
STATE_SAVE_VAR(buf, lowerField);
811811

812812
/* Module state */
@@ -838,6 +838,8 @@ bool retro_unserialize(const void *data, size_t size)
838838
{
839839
const uint8_t *buf;
840840
uint32_t magic, version, flags, reserved;
841+
extern uint8_t jerry_ram_8[];
842+
extern bool lowerField;
841843

842844
if (!data || size < STATE_SIZE)
843845
return false;
@@ -857,11 +859,9 @@ bool retro_unserialize(const void *data, size_t size)
857859
STATE_LOAD_BUF(buf, jaguarMainRAM, 0x200000);
858860
STATE_LOAD_BUF(buf, tomRam8, 0x4000);
859861

860-
extern uint8_t jerry_ram_8[];
861862
STATE_LOAD_BUF(buf, jerry_ram_8, 0x10000);
862863

863864
/* Jaguar misc state */
864-
extern bool lowerField;
865865
STATE_LOAD_VAR(buf, lowerField);
866866

867867
/* Module state */

src/blitter_simd_sse2.c

Lines changed: 20 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,17 @@
1010

1111
#include "blitter_simd.h"
1212
#include <emmintrin.h> /* SSE2 */
13+
#include <string.h> /* memcpy for type-punning extract */
14+
15+
/* _mm_cvtsi128_si64 only exists on x86_64 (needs 64-bit GP register).
16+
* memcpy from the __m128i is portable, alignment-safe, and compilers
17+
* optimize it to a single register move. */
18+
static uint64_t sse2_extract_u64(__m128i v)
19+
{
20+
uint64_t r;
21+
memcpy(&r, &v, sizeof(r));
22+
return r;
23+
}
1324

1425
/* Logic Function Unit — SSE2
1526
*
@@ -41,7 +52,7 @@ static uint64_t sse2_lfu(uint64_t srcd, uint64_t dstd, uint8_t lfu_func)
4152
__m128i t3 = _mm_and_si128(_mm_and_si128(vs, vd), vf3);
4253

4354
__m128i result = _mm_or_si128(_mm_or_si128(t0, t1), _mm_or_si128(t2, t3));
44-
return (uint64_t)_mm_cvtsi128_si64(result);
55+
return sse2_extract_u64(result);
4556
}
4657

4758
/* Data Comparator — SSE2
@@ -75,6 +86,7 @@ static uint8_t sse2_dcomp(uint64_t patd, uint64_t srcd, uint64_t dstd, bool cmpd
7586
static uint8_t sse2_zcomp(uint64_t srcz, uint64_t dstz, uint8_t zmode)
7687
{
7788
uint8_t result = 0;
89+
uint8_t packed = 0;
7890

7991
__m128i vs = _mm_set_epi64x(0, (int64_t)srcz);
8092
__m128i vd = _mm_set_epi64x(0, (int64_t)dstz);
@@ -107,7 +119,6 @@ static uint8_t sse2_zcomp(uint64_t srcz, uint64_t dstz, uint8_t zmode)
107119

108120
/* movemask gives 2 bits per 16-bit lane (one per byte).
109121
* Convert to 1 bit per lane: lanes at positions 0,2,4,6 */
110-
uint8_t packed = 0;
111122
if (result & 0x03) packed |= 0x01; /* lane 0: bytes 0-1 */
112123
if (result & 0x0C) packed |= 0x02; /* lane 1: bytes 2-3 */
113124
if (result & 0x30) packed |= 0x04; /* lane 2: bytes 4-5 */
@@ -129,6 +140,8 @@ static uint64_t sse2_byte_merge(uint64_t src, uint64_t dst, uint16_t mask)
129140
* Bytes 1-7 = 0xFF or 0x00 from mask bits 8-14 (whole-byte select).
130141
* We expand each bit to a full 0xFF byte using sign-extension. */
131142
uint64_t sel64 = (uint64_t)(mask & 0xFF); /* byte 0: per-bit */
143+
__m128i vmask, vsrc, vdst, r;
144+
132145
sel64 |= (uint64_t)((uint8_t)(-(int8_t)((mask >> 8) & 1))) << 8;
133146
sel64 |= (uint64_t)((uint8_t)(-(int8_t)((mask >> 9) & 1))) << 16;
134147
sel64 |= (uint64_t)((uint8_t)(-(int8_t)((mask >> 10) & 1))) << 24;
@@ -137,17 +150,17 @@ static uint64_t sse2_byte_merge(uint64_t src, uint64_t dst, uint16_t mask)
137150
sel64 |= (uint64_t)((uint8_t)(-(int8_t)((mask >> 13) & 1))) << 48;
138151
sel64 |= (uint64_t)((uint8_t)(-(int8_t)((mask >> 14) & 1))) << 56;
139152

140-
__m128i vmask = _mm_set_epi64x(0, (int64_t)sel64);
141-
__m128i vsrc = _mm_set_epi64x(0, (int64_t)src);
142-
__m128i vdst = _mm_set_epi64x(0, (int64_t)dst);
153+
vmask = _mm_set_epi64x(0, (int64_t)sel64);
154+
vsrc = _mm_set_epi64x(0, (int64_t)src);
155+
vdst = _mm_set_epi64x(0, (int64_t)dst);
143156

144157
/* result = (src & mask) | (dst & ~mask) */
145-
__m128i r = _mm_or_si128(
158+
r = _mm_or_si128(
146159
_mm_and_si128(vsrc, vmask),
147160
_mm_andnot_si128(vmask, vdst)
148161
);
149162

150-
return (uint64_t)_mm_cvtsi128_si64(r);
163+
return sse2_extract_u64(r);
151164
}
152165

153166
const blitter_simd_ops_t blitter_simd_ops = {

0 commit comments

Comments
 (0)