Skip to content

Commit 4957273

Browse files
committed
Fix CI buildbot failures: MSVC C89, SSE2 32-bit, webOS cross-compile
- libretro.c: Move extern declarations to top of function blocks for MSVC 2005/2010 C89 compliance (error C2143 mid-block declarations) - blitter_simd_sse2.c: Replace _mm_cvtsi128_si64 with _mm_storel_epi64 helper — the former only exists on x86_64, breaking 32-bit x86 builds (Linux i686, Windows i686 MinGW). Also fix C89 mid-block declaration in sse2_zcomp and sse2_byte_merge. - Makefile.common: Detect cross-compiler prefixes (arm-, aarch64-, mips, powerpc) in CC to skip host uname -m SIMD fallback. Fixes webOS ARM build getting SSE2 when built on an x86_64 host. Made-with: Cursor
1 parent 065c8b3 commit 4957273

3 files changed

Lines changed: 26 additions & 11 deletions

File tree

Makefile.common

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,10 @@ endif
109109
# Native build fallback: auto-detect from host architecture, but only for
110110
# native-build platforms (unix/osx/win). Cross-compile targets (vita, ps3,
111111
# libnx, etc.) set platform explicitly and should not use host detection.
112+
# Also skip when CC looks like a cross-compiler (e.g. arm-webos-linux-gnueabi-gcc).
113+
BLITTER_CROSS_CC := $(findstring arm-,$(CC))$(findstring aarch64-,$(CC))$(findstring mips,$(CC))$(findstring powerpc,$(CC))
112114
ifeq ($(BLITTER_SIMD_SRC),)
115+
ifeq ($(BLITTER_CROSS_CC),)
113116
ifneq (,$(filter unix osx win,$(platform)))
114117
ifneq (,$(filter x86_64 i686 i386,$(shell uname -m 2>/dev/null)))
115118
BLITTER_SIMD_SRC := $(CORE_DIR)/src/blitter_simd_sse2.c
@@ -120,6 +123,7 @@ endif
120123
endif
121124
endif
122125
endif
126+
endif
123127

124128
# Fall back to scalar if no SIMD was selected (e.g., exotic platforms)
125129
ifeq ($(BLITTER_SIMD_SRC),)

libretro.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -781,6 +781,8 @@ bool retro_serialize(void *data, size_t size)
781781
uint8_t *buf, *start;
782782
size_t written;
783783
uint32_t magic, version, flags, reserved;
784+
extern uint8_t jerry_ram_8[];
785+
extern bool lowerField;
784786

785787
if (!data || size < STATE_SIZE)
786788
return false;
@@ -802,11 +804,9 @@ bool retro_serialize(void *data, size_t size)
802804
STATE_SAVE_BUF(buf, jaguarMainRAM, 0x200000); /* 2 MB main RAM */
803805
STATE_SAVE_BUF(buf, tomRam8, 0x4000); /* 16 KB TOM registers */
804806

805-
extern uint8_t jerry_ram_8[];
806807
STATE_SAVE_BUF(buf, jerry_ram_8, 0x10000); /* 64 KB JERRY registers */
807808

808809
/* Jaguar misc state */
809-
extern bool lowerField;
810810
STATE_SAVE_VAR(buf, lowerField);
811811

812812
/* Module state */
@@ -838,6 +838,8 @@ bool retro_unserialize(const void *data, size_t size)
838838
{
839839
const uint8_t *buf;
840840
uint32_t magic, version, flags, reserved;
841+
extern uint8_t jerry_ram_8[];
842+
extern bool lowerField;
841843

842844
if (!data || size < STATE_SIZE)
843845
return false;
@@ -857,11 +859,9 @@ bool retro_unserialize(const void *data, size_t size)
857859
STATE_LOAD_BUF(buf, jaguarMainRAM, 0x200000);
858860
STATE_LOAD_BUF(buf, tomRam8, 0x4000);
859861

860-
extern uint8_t jerry_ram_8[];
861862
STATE_LOAD_BUF(buf, jerry_ram_8, 0x10000);
862863

863864
/* Jaguar misc state */
864-
extern bool lowerField;
865865
STATE_LOAD_VAR(buf, lowerField);
866866

867867
/* Module state */

src/blitter_simd_sse2.c

Lines changed: 18 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,15 @@
1111
#include "blitter_simd.h"
1212
#include <emmintrin.h> /* SSE2 */
1313

14+
/* _mm_cvtsi128_si64 only exists on x86_64 (needs 64-bit GP register).
15+
* Use _mm_storel_epi64 which works on both 32-bit and 64-bit x86. */
16+
static uint64_t sse2_extract_u64(__m128i v)
17+
{
18+
uint64_t r;
19+
_mm_storel_epi64((__m128i *)&r, v);
20+
return r;
21+
}
22+
1423
/* Logic Function Unit — SSE2
1524
*
1625
* The truth table has 4 terms, each gated by one bit of lfu_func.
@@ -41,7 +50,7 @@ static uint64_t sse2_lfu(uint64_t srcd, uint64_t dstd, uint8_t lfu_func)
4150
__m128i t3 = _mm_and_si128(_mm_and_si128(vs, vd), vf3);
4251

4352
__m128i result = _mm_or_si128(_mm_or_si128(t0, t1), _mm_or_si128(t2, t3));
44-
return (uint64_t)_mm_cvtsi128_si64(result);
53+
return sse2_extract_u64(result);
4554
}
4655

4756
/* Data Comparator — SSE2
@@ -75,6 +84,7 @@ static uint8_t sse2_dcomp(uint64_t patd, uint64_t srcd, uint64_t dstd, bool cmpd
7584
static uint8_t sse2_zcomp(uint64_t srcz, uint64_t dstz, uint8_t zmode)
7685
{
7786
uint8_t result = 0;
87+
uint8_t packed = 0;
7888

7989
__m128i vs = _mm_set_epi64x(0, (int64_t)srcz);
8090
__m128i vd = _mm_set_epi64x(0, (int64_t)dstz);
@@ -107,7 +117,6 @@ static uint8_t sse2_zcomp(uint64_t srcz, uint64_t dstz, uint8_t zmode)
107117

108118
/* movemask gives 2 bits per 16-bit lane (one per byte).
109119
* Convert to 1 bit per lane: lanes at positions 0,2,4,6 */
110-
uint8_t packed = 0;
111120
if (result & 0x03) packed |= 0x01; /* lane 0: bytes 0-1 */
112121
if (result & 0x0C) packed |= 0x02; /* lane 1: bytes 2-3 */
113122
if (result & 0x30) packed |= 0x04; /* lane 2: bytes 4-5 */
@@ -129,6 +138,8 @@ static uint64_t sse2_byte_merge(uint64_t src, uint64_t dst, uint16_t mask)
129138
* Bytes 1-7 = 0xFF or 0x00 from mask bits 8-14 (whole-byte select).
130139
* We expand each bit to a full 0xFF byte using sign-extension. */
131140
uint64_t sel64 = (uint64_t)(mask & 0xFF); /* byte 0: per-bit */
141+
__m128i vmask, vsrc, vdst, r;
142+
132143
sel64 |= (uint64_t)((uint8_t)(-(int8_t)((mask >> 8) & 1))) << 8;
133144
sel64 |= (uint64_t)((uint8_t)(-(int8_t)((mask >> 9) & 1))) << 16;
134145
sel64 |= (uint64_t)((uint8_t)(-(int8_t)((mask >> 10) & 1))) << 24;
@@ -137,17 +148,17 @@ static uint64_t sse2_byte_merge(uint64_t src, uint64_t dst, uint16_t mask)
137148
sel64 |= (uint64_t)((uint8_t)(-(int8_t)((mask >> 13) & 1))) << 48;
138149
sel64 |= (uint64_t)((uint8_t)(-(int8_t)((mask >> 14) & 1))) << 56;
139150

140-
__m128i vmask = _mm_set_epi64x(0, (int64_t)sel64);
141-
__m128i vsrc = _mm_set_epi64x(0, (int64_t)src);
142-
__m128i vdst = _mm_set_epi64x(0, (int64_t)dst);
151+
vmask = _mm_set_epi64x(0, (int64_t)sel64);
152+
vsrc = _mm_set_epi64x(0, (int64_t)src);
153+
vdst = _mm_set_epi64x(0, (int64_t)dst);
143154

144155
/* result = (src & mask) | (dst & ~mask) */
145-
__m128i r = _mm_or_si128(
156+
r = _mm_or_si128(
146157
_mm_and_si128(vsrc, vmask),
147158
_mm_andnot_si128(vmask, vdst)
148159
);
149160

150-
return (uint64_t)_mm_cvtsi128_si64(r);
161+
return sse2_extract_u64(r);
151162
}
152163

153164
const blitter_simd_ops_t blitter_simd_ops = {

0 commit comments

Comments
 (0)