Skip to content

Commit 953f1f5

Browse files
authored
Fix Jaguar DSP MAC accumulator to true 40-bit semantics (#118)
Track IMACN accumulation in hardware-correct signed 40-bit wrap and keep bits 63..40 clear so RESMAC and control-register reads stay consistent. Adds dsp_acc40.h helpers and test/test_dsp_mac40.c (CI). Mitigates long-IIR DSP paths (e.g. pink-noise generation) diverging vs real HW. Made-with: Cursor
1 parent 15207f5 commit 953f1f5

5 files changed

Lines changed: 123 additions & 6 deletions

File tree

.github/workflows/c-cpp.yml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -190,6 +190,10 @@ jobs:
190190
-o test_blitter_scalar test/test_blitter_simd.c src/blitter_simd_scalar.c
191191
./test_blitter_scalar
192192
193+
echo "==> DSP 40-bit MAC accumulator regression (dsp_acc40.h)..."
194+
$CC -O2 -Wall -I src -o test_dsp_mac40 test/test_dsp_mac40.c
195+
./test_dsp_mac40
196+
193197
- name: Upload artifact
194198
uses: actions/upload-artifact@v4
195199
with:

CLAUDE.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,7 @@ Key docs:
9393
### Testing
9494

9595
See `docs/test-infrastructure.md` for all test harnesses:
96+
- `test/test_dsp_mac40.c` — Jaguar DSP **40-bit MAC** accumulator semantics (`dsp_acc40.h`), run in CI with SIMD tests; relevant for long IIR chains (e.g. pink-noise generators on DSP).
9697
- `test/headless.py` — Python headless runner via libretro.py (screenshots, frame control)
9798
- `test/regression_test.sh` — screenshot regression suite with baseline comparison
9899
- `test/test_cd_boot.c` — low-level C harness with dlsym access to 68K registers and RAM

src/dsp.c

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
//
1616

1717
#include "dsp.h"
18+
#include "dsp_acc40.h"
1819

1920
#include <stdlib.h>
2021
#include "dac.h"
@@ -1250,8 +1251,8 @@ INLINE static void dsp_opcode_addqt(void)
12501251
INLINE static void dsp_opcode_imacn(void)
12511252
{
12521253
int32_t res = (int16_t)RM * (int16_t)RN;
1253-
dsp_acc += (int64_t)res;
1254-
//Should we AND the result to fit into 40 bits here???
1254+
1255+
dsp_acc_mac_apply(&dsp_acc, res);
12551256
}
12561257

12571258

@@ -1379,7 +1380,8 @@ INLINE static void dsp_opcode_imultn(void)
13791380
{
13801381
// This is OK, since this multiply won't overflow 32 bits...
13811382
int32_t res = (int32_t)((int16_t)RN * (int16_t)RM);
1382-
dsp_acc = (int64_t)res;
1383+
1384+
dsp_acc_set_from_i32(&dsp_acc, res);
13831385
SET_ZN(res);
13841386
}
13851387

@@ -1828,8 +1830,8 @@ INLINE static void DSP_div(void)
18281830
INLINE static void DSP_imacn(void)
18291831
{
18301832
int32_t res = (int16_t)PRM * (int16_t)PRN;
1831-
dsp_acc += (int64_t)res;
1832-
//Should we AND the result to fit into 40 bits here???
1833+
1834+
dsp_acc_mac_apply(&dsp_acc, res);
18331835
NO_WRITEBACK;
18341836
}
18351837

@@ -1843,7 +1845,8 @@ INLINE static void DSP_imultn(void)
18431845
{
18441846
// This is OK, since this multiply won't overflow 32 bits...
18451847
int32_t res = (int32_t)((int16_t)PRN * (int16_t)PRM);
1846-
dsp_acc = (int64_t)res;
1848+
1849+
dsp_acc_set_from_i32(&dsp_acc, res);
18471850
SET_ZN(res);
18481851
NO_WRITEBACK;
18491852
}

src/dsp_acc40.h

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
/*
2+
* Jaguar DSP MAC accumulator is 40-bit signed two's complement.
3+
* Store only bits 39..0 in the low bits of uint64_t (bits 63..40 must stay zero)
4+
* so RESMAC and control-register reads (dsp_acc >> 32) stay correct.
5+
*/
6+
7+
#ifndef DSP_ACC40_H
8+
#define DSP_ACC40_H
9+
10+
#include <stdint.h>
11+
12+
#ifndef DSP_ACC40_INLINE
13+
#if defined(_MSC_VER)
14+
#define DSP_ACC40_INLINE static __inline
15+
#else
16+
#define DSP_ACC40_INLINE static inline
17+
#endif
18+
#endif
19+
20+
#define DSP_ACC_U40_MASK UINT64_C(0xFFFFFFFFFF)
21+
22+
DSP_ACC40_INLINE int64_t dsp_acc_i40_signed(uint64_t raw)
23+
{
24+
uint64_t u = raw & DSP_ACC_U40_MASK;
25+
if (u & (UINT64_C(1) << 39))
26+
return (int64_t)(u | UINT64_C(0xFFFFFF0000000000));
27+
return (int64_t)u;
28+
}
29+
30+
DSP_ACC40_INLINE uint64_t dsp_acc_wrap_store_i40(int64_t v)
31+
{
32+
int64_t t;
33+
34+
t = v & (((int64_t)1 << 40) - 1);
35+
if (t & ((int64_t)1 << 39))
36+
t |= ~(((int64_t)1 << 40) - 1);
37+
return (uint64_t)t & DSP_ACC_U40_MASK;
38+
}
39+
40+
DSP_ACC40_INLINE void dsp_acc_mac_apply(uint64_t *acc, int32_t prod)
41+
{
42+
int64_t a;
43+
44+
a = dsp_acc_i40_signed(*acc);
45+
*acc = dsp_acc_wrap_store_i40(a + (int64_t)prod);
46+
}
47+
48+
DSP_ACC40_INLINE void dsp_acc_set_from_i32(uint64_t *acc, int32_t res)
49+
{
50+
*acc = dsp_acc_wrap_store_i40((int64_t)res);
51+
}
52+
53+
#endif

test/test_dsp_mac40.c

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
/*
2+
* Unit tests for src/dsp_acc40.h (Jaguar DSP 40-bit MAC semantics).
3+
* Build: cc -O2 -Wall -I../src -o test_dsp_mac40 test/test_dsp_mac40.c
4+
*/
5+
6+
#include <stdio.h>
7+
#include <stdlib.h>
8+
#include <stdint.h>
9+
10+
#include "dsp_acc40.h"
11+
12+
int main(void)
13+
{
14+
uint64_t acc;
15+
int i;
16+
int failed = 0;
17+
18+
/* Only 40 physical bits; high 24 must stay clear */
19+
acc = 0;
20+
for (i = 0; i < 50000; i++)
21+
dsp_acc_mac_apply(&acc, 7777);
22+
if (acc & ~(DSP_ACC_U40_MASK))
23+
{
24+
fprintf(stderr, "FAIL: bits above 39 set after MAC loop (acc=%llx)\n",
25+
(unsigned long long)acc);
26+
failed++;
27+
}
28+
29+
/* RESMAC-style low 32 must be stable across wraps */
30+
acc = DSP_ACC_U40_MASK;
31+
dsp_acc_mac_apply(&acc, 1);
32+
if ((uint32_t)acc != 0)
33+
{
34+
fprintf(stderr, "FAIL: wrap +1 from 40-bit all-ones (got low32=%x)\n",
35+
(unsigned int)(uint32_t)acc);
36+
failed++;
37+
}
38+
39+
/* Load int32 -1 into acc (sign-extended in 40-bit domain) */
40+
dsp_acc_set_from_i32(&acc, -1);
41+
if (acc != DSP_ACC_U40_MASK)
42+
{
43+
fprintf(stderr, "FAIL: set -1 (acc=%llx expected %llx)\n",
44+
(unsigned long long)acc, (unsigned long long)DSP_ACC_U40_MASK);
45+
failed++;
46+
}
47+
48+
if (failed)
49+
{
50+
fprintf(stderr, "test_dsp_mac40: %d failure(s)\n", failed);
51+
return 1;
52+
}
53+
54+
printf("test_dsp_mac40: OK\n");
55+
return 0;
56+
}

0 commit comments

Comments
 (0)