Skip to content

Commit 3f6c66a

Browse files
committed
blitter.c use structs, const and u-ints
GPU_RUNNING running macro was pretty slow on ARM for some reason. Bitswise structs are faster in my testing Try some blitter optimizations blitter sign fixes Signed-off-by: Joe Mattiello <[email protected]> Comment out unused simd import Signed-off-by: Joe Mattiello <[email protected]> Try some GPU optimizations and clarity post rebase cleanup Signed-off-by: Joseph Mattello <[email protected]> Remove USE_STRUCTS ifdef Signed-off-by: Joseph Mattello <[email protected]> gpu.c Fix bad return Signed-off-by: Joseph Mattello <[email protected]> Fix gpu opcode bad merge Signed-off-by: Joseph Mattello <[email protected]> remove duplicate struct defs Signed-off-by: Joseph Mattello <[email protected]>
1 parent 13d9abf commit 3f6c66a

1 file changed

Lines changed: 100 additions & 88 deletions

File tree

src/blitter.c

Lines changed: 100 additions & 88 deletions
Original file line numberDiff line numberDiff line change
@@ -87,41 +87,41 @@ void BlitterMidsummer2(void);
8787

8888
// Blitter command bits
8989

90-
#define SRCEN (cmd & 0x00000001)
91-
#define SRCENZ (cmd & 0x00000002)
92-
#define SRCENX (cmd & 0x00000004)
93-
#define DSTEN (cmd & 0x00000008)
94-
#define DSTENZ (cmd & 0x00000010)
95-
#define DSTWRZ (cmd & 0x00000020)
96-
#define CLIPA1 (cmd & 0x00000040)
97-
98-
#define UPDA1F (cmd & 0x00000100)
99-
#define UPDA1 (cmd & 0x00000200)
100-
#define UPDA2 (cmd & 0x00000400)
101-
102-
#define DSTA2 (cmd & 0x00000800)
103-
104-
#define Z_OP_INF (cmd & 0x00040000)
105-
#define Z_OP_EQU (cmd & 0x00080000)
106-
#define Z_OP_SUP (cmd & 0x00100000)
107-
108-
#define LFU_NAN (cmd & 0x00200000)
109-
#define LFU_NA (cmd & 0x00400000)
110-
#define LFU_AN (cmd & 0x00800000)
111-
#define LFU_A (cmd & 0x01000000)
112-
113-
#define CMPDST (cmd & 0x02000000)
114-
#define BCOMPEN (cmd & 0x04000000)
115-
#define DCOMPEN (cmd & 0x08000000)
116-
117-
#define PATDSEL (cmd & 0x00010000)
118-
#define ADDDSEL (cmd & 0x00020000)
119-
#define TOPBEN (cmd & 0x00004000)
120-
#define TOPNEN (cmd & 0x00008000)
121-
#define BKGWREN (cmd & 0x10000000)
122-
#define GOURD (cmd & 0x00001000)
123-
#define GOURZ (cmd & 0x00002000)
124-
#define SRCSHADE (cmd & 0x40000000)
90+
#define SRCEN (cmd.bits.b0)
91+
#define SRCENZ (cmd.bits.b1)
92+
#define SRCENX (cmd.bits.b2)
93+
#define DSTEN (cmd.bits.b3)
94+
#define DSTENZ (cmd.bits.b4)
95+
#define DSTWRZ (cmd.bits.b5)
96+
#define CLIPA1 (cmd.bits.b6)
97+
98+
#define UPDA1F (cmd.bits.b8)
99+
#define UPDA1 (cmd.bits.b9)
100+
#define UPDA2 (cmd.bits.b10)
101+
102+
#define DSTA2 (cmd.bits.b11)
103+
104+
#define Z_OP_INF (cmd.bits.b18)
105+
#define Z_OP_EQU (cmd.bits.b19)
106+
#define Z_OP_SUP (cmd.bits.b20)
107+
108+
#define LFU_NAN (cmd.bits.b21)
109+
#define LFU_NA (cmd.bits.b22)
110+
#define LFU_AN (cmd.bits.b23)
111+
#define LFU_A (cmd.bits.b24)
112+
113+
#define CMPDST (cmd.bits.b25)
114+
#define BCOMPEN (cmd.bits.b26)
115+
#define DCOMPEN (cmd.bits.b27)
116+
117+
#define PATDSEL (cmd.bits.b16)
118+
#define ADDDSEL (cmd.bits.b17)
119+
#define TOPBEN (cmd.bits.b14)
120+
#define TOPNEN (cmd.bits.b15)
121+
#define BKGWREN (cmd.bits.b28)
122+
#define GOURD (cmd.bits.b12)
123+
#define GOURZ (cmd.bits.b13)
124+
#define SRCSHADE (cmd.bits.b30)
125125

126126

127127
#define XADDPHR 0
@@ -305,8 +305,11 @@ static int32_t a1_clip_x, a1_clip_y;
305305
// to optimize the blitter, then we may revisit it in the future...
306306

307307
// Generic blit handler
308-
void blitter_generic(uint32_t cmd)
308+
void blitter_generic(uint32_t cmdi)
309309
{
310+
Bits32 cmd;
311+
cmd.WORD = cmdi;
312+
310313
uint32_t srcdata, srczdata, dstdata, dstzdata, writedata, inhibit;
311314
uint32_t bppSrc = (DSTA2 ? 1 << ((REG(A1_FLAGS) >> 3) & 0x07) : 1 << ((REG(A2_FLAGS) >> 3) & 0x07));
312315

@@ -338,14 +341,14 @@ void blitter_generic(uint32_t cmd)
338341

339342
if (SRCENZ)
340343
srczdata = READ_ZDATA(a2, REG(A2_FLAGS));
341-
else if (cmd & 0x0001C020) // PATDSEL | TOPBEN | TOPNEN | DSTWRZ
344+
else if (cmd.WORD & 0x0001C020) // PATDSEL | TOPBEN | TOPNEN | DSTWRZ
342345
srczdata = READ_RDATA(SRCZINT, a2, REG(A2_FLAGS), a2_phrase_mode);
343346
}
344347
else // Use SRCDATA register...
345348
{
346349
srcdata = READ_RDATA(SRCDATA, a2, REG(A2_FLAGS), a2_phrase_mode);
347350

348-
if (cmd & 0x0001C020) // PATDSEL | TOPBEN | TOPNEN | DSTWRZ
351+
if (cmd.WORD & 0x0001C020) // PATDSEL | TOPBEN | TOPNEN | DSTWRZ
349352
srczdata = READ_RDATA(SRCZINT, a2, REG(A2_FLAGS), a2_phrase_mode);
350353
}
351354

@@ -516,13 +519,13 @@ void blitter_generic(uint32_t cmd)
516519
srcdata = READ_PIXEL(a1, REG(A1_FLAGS));
517520
if (SRCENZ)
518521
srczdata = READ_ZDATA(a1, REG(A1_FLAGS));
519-
else if (cmd & 0x0001C020) // PATDSEL | TOPBEN | TOPNEN | DSTWRZ
522+
else if (cmd.WORD & 0x0001C020) // PATDSEL | TOPBEN | TOPNEN | DSTWRZ
520523
srczdata = READ_RDATA(SRCZINT, a1, REG(A1_FLAGS), a1_phrase_mode);
521524
}
522525
else
523526
{
524527
srcdata = READ_RDATA(SRCDATA, a1, REG(A1_FLAGS), a1_phrase_mode);
525-
if (cmd & 0x001C020) // PATDSEL | TOPBEN | TOPNEN | DSTWRZ
528+
if (cmd.WORD & 0x001C020) // PATDSEL | TOPBEN | TOPNEN | DSTWRZ
526529
srczdata = READ_RDATA(SRCZINT, a1, REG(A1_FLAGS), a1_phrase_mode);
527530
}
528531

@@ -756,20 +759,23 @@ void blitter_generic(uint32_t cmd)
756759
WREG(A2_PIXEL, (a2_y & 0xFFFF0000) | ((a2_x >> 16) & 0xFFFF));
757760
}
758761

759-
void blitter_blit(uint32_t cmd)
762+
void blitter_blit(uint32_t cmdi)
760763
{
764+
Bits32 cmd;
765+
cmd.WORD = cmdi;
766+
761767
uint32_t m, e;
762768
uint32_t pitchValue[4] = { 0, 1, 3, 2 };
763769
colour_index = 0;
764-
src = cmd & 0x07;
765-
dst = (cmd >> 3) & 0x07;
766-
misc = (cmd >> 6) & 0x03;
767-
a1ctl = (cmd >> 8) & 0x7;
768-
mode = (cmd >> 11) & 0x07;
769-
ity = (cmd >> 14) & 0x0F;
770-
zop = (cmd >> 18) & 0x07;
771-
op = (cmd >> 21) & 0x0F;
772-
ctrl = (cmd >> 25) & 0x3F;
770+
src = cmd.WORD & 0x07;
771+
dst = (cmd.WORD >> 3) & 0x07;
772+
misc = (cmd.WORD >> 6) & 0x03;
773+
a1ctl = (cmd.WORD >> 8) & 0x7;
774+
mode = (cmd.WORD >> 11) & 0x07;
775+
ity = (cmd.WORD >> 14) & 0x0F;
776+
zop = (cmd.WORD >> 18) & 0x07;
777+
op = (cmd.WORD >> 21) & 0x0F;
778+
ctrl = (cmd.WORD >> 25) & 0x3F;
773779

774780
// Addresses in A1/2_BASE are *phrase* aligned, i.e., bottom three bits are ignored!
775781
// NOTE: This fixes Rayman's bad collision detection AND keeps T2K working!
@@ -952,7 +958,7 @@ void blitter_blit(uint32_t cmd)
952958
gd_ca = 0xFFFFFF00 | gd_ca;
953959
}
954960

955-
blitter_generic(cmd);
961+
blitter_generic(cmd.WORD);
956962
}
957963
#endif
958964
/*******************************************************************************
@@ -1113,10 +1119,11 @@ void BlitterWriteWord(uint32_t offset, uint16_t data, uint32_t who/*=UNKNOWN*/)
11131119
// I.e., the second write of 32-bit value--not convinced this is the best way to do this!
11141120
// But then again, according to the Jaguar docs, this is correct...!
11151121
{
1116-
if (vjs.useFastBlitter)
1117-
blitter_blit(GET32(blitter_ram, 0x38));
1118-
else
1119-
BlitterMidsummer2();
1122+
if (vjs.useFastBlitter) {
1123+
blitter_blit(GET32(blitter_ram, 0x38));
1124+
} else {
1125+
BlitterMidsummer2();
1126+
}
11201127
}
11211128
}
11221129
//F02278,9,A,B
@@ -1135,10 +1142,10 @@ void BlitterWriteLong(uint32_t offset, uint32_t data, uint32_t who)
11351142
void ADDRGEN(uint32_t *, uint32_t *, bool, bool,
11361143
uint16_t, uint16_t, uint32_t, uint8_t, uint8_t, uint8_t, uint8_t,
11371144
uint16_t, uint16_t, uint32_t, uint8_t, uint8_t, uint8_t, uint8_t);
1138-
void ADDARRAY(uint16_t * addq, uint8_t daddasel, uint8_t daddbsel, uint8_t daddmode,
1139-
uint64_t dstd, uint32_t iinc, uint8_t initcin[], uint64_t initinc, uint16_t initpix,
1140-
uint32_t istep, uint64_t patd, uint64_t srcd, uint64_t srcz1, uint64_t srcz2,
1141-
uint32_t zinc, uint32_t zstep);
1145+
void ADDARRAY(const uint16_t * addq, const uint8_t daddasel, const uint8_t daddbsel, const uint8_t daddmode,
1146+
const uint64_t dstd, const uint32_t iinc, const uint8_t initcin[], const uint64_t initinc, const uint16_t initpix,
1147+
const uint32_t istep, const uint64_t patd, const uint64_t srcd, const uint64_t srcz1, const uint64_t srcz2,
1148+
const uint32_t zinc, const uint32_t zstep);
11421149
void ADD16SAT(uint16_t *r, uint8_t *co, uint16_t a, uint16_t b, uint8_t cin, bool sat, bool eightbit, bool hicinh);
11431150
void ADDAMUX(int16_t *adda_x, int16_t *adda_y, uint8_t addasel, int16_t a1_step_x, int16_t a1_step_y,
11441151
int16_t a1_stepf_x, int16_t a1_stepf_y, int16_t a2_step_x, int16_t a2_step_y,
@@ -1166,7 +1173,8 @@ void BlitterMidsummer2(void)
11661173
//Will remove stuff that isn't in Jaguar I once fully described (stuff like texture won't
11671174
//be described here at all)...
11681175

1169-
uint32_t cmd = GET32(blitter_ram, COMMAND);
1176+
Bits32 cmd;
1177+
cmd.WORD = GET32(blitter_ram, COMMAND);
11701178

11711179
// Line states passed in via the command register
11721180

@@ -1177,7 +1185,7 @@ void BlitterMidsummer2(void)
11771185
patdsel = (PATDSEL), adddsel = (ADDDSEL), cmpdst = (CMPDST), bcompen = (BCOMPEN),
11781186
dcompen = (DCOMPEN), bkgwren = (BKGWREN), srcshade = (SRCSHADE);
11791187

1180-
uint8_t zmode = (cmd & 0x01C0000) >> 18, lfufunc = (cmd & 0x1E00000) >> 21;
1188+
uint8_t zmode = (cmd.WORD & 0x01C0000) >> 18, lfufunc = (cmd.WORD & 0x1E00000) >> 21;
11811189
//Missing: BUSHI
11821190
//Where to find various lines:
11831191
// clip_a1 -> inner
@@ -2397,10 +2405,10 @@ void ADDRGEN(uint32_t *address, uint32_t *pixa, bool gena2, bool zaddr,
23972405
////////////////////////////////////////////////////////////////////////////////////////////
23982406
////////////////////////////////////////////////////////////////////////////////////////////
23992407

2400-
void ADDARRAY(uint16_t * addq, uint8_t daddasel, uint8_t daddbsel, uint8_t daddmode,
2401-
uint64_t dstd, uint32_t iinc, uint8_t initcin[], uint64_t initinc, uint16_t initpix,
2402-
uint32_t istep, uint64_t patd, uint64_t srcd, uint64_t srcz1, uint64_t srcz2,
2403-
uint32_t zinc, uint32_t zstep)
2408+
void ADDARRAY(const uint16_t * addq, const uint8_t daddasel, const uint8_t daddbsel, const uint8_t daddmode,
2409+
const uint64_t dstd, const uint32_t iinc, const uint8_t initcin[], const uint64_t initinc, const uint16_t initpix,
2410+
const uint32_t istep, const uint64_t patd, const uint64_t srcd, const uint64_t srcz1, const uint64_t srcz2,
2411+
const uint32_t zinc, const uint32_t zstep)
24042412
{
24052413
unsigned i;
24062414
uint16_t adda[4];
@@ -2851,7 +2859,7 @@ Patdhi := JOIN (patdhi, patd[32..63]);*/
28512859
uint8_t dech38el[2][8] = { { 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80 },
28522860
{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } };
28532861
int en;
2854-
uint64_t cmpd;
2862+
Bits64 cmpd;
28552863
uint8_t dbinht;
28562864
uint16_t addq[4];
28572865
uint8_t initcin[4] = { 0, 0, 0, 0 };
@@ -2875,23 +2883,23 @@ Zstep := JOIN (zstep, zstep[0..31]);*/
28752883
/*Datacomp := DATACOMP (dcomp[0..7], cmpdst, dstdlo, dstdhi, patdlo, patdhi, srcdlo, srcdhi);*/
28762884
////////////////////////////////////// C++ CODE //////////////////////////////////////
28772885
*dcomp = 0;
2878-
cmpd = *patd ^ (cmpdst ? dstd : srcd);
2886+
cmpd.DATA = *patd ^ (cmpdst ? dstd : srcd);
28792887

2880-
if ((cmpd & 0x00000000000000FFLL) == 0)
2888+
if (cmpd.bytes.b0 == 0)
28812889
*dcomp |= 0x01u;
2882-
if ((cmpd & 0x000000000000FF00LL) == 0)
2890+
if (cmpd.bytes.b1 == 0)
28832891
*dcomp |= 0x02u;
2884-
if ((cmpd & 0x0000000000FF0000LL) == 0)
2892+
if (cmpd.bytes.b2 == 0)
28852893
*dcomp |= 0x04u;
2886-
if ((cmpd & 0x00000000FF000000LL) == 0)
2894+
if (cmpd.bytes.b3 == 0)
28872895
*dcomp |= 0x08u;
2888-
if ((cmpd & 0x000000FF00000000LL) == 0)
2896+
if (cmpd.bytes.b4 == 0)
28892897
*dcomp |= 0x10u;
2890-
if ((cmpd & 0x0000FF0000000000LL) == 0)
2898+
if (cmpd.bytes.b5 == 0)
28912899
*dcomp |= 0x20u;
2892-
if ((cmpd & 0x00FF000000000000LL) == 0)
2900+
if (cmpd.bytes.b6 == 0)
28932901
*dcomp |= 0x40u;
2894-
if ((cmpd & 0xFF00000000000000LL) == 0)
2902+
if (cmpd.bytes.b7 == 0)
28952903
*dcomp |= 0x80u;
28962904
//////////////////////////////////////////////////////////////////////////////////////
28972905

@@ -2909,25 +2917,25 @@ with srcshift bits 4 & 5 selecting the start position
29092917
*/
29102918
//So... basically what we have here is:
29112919
*zcomp = 0;
2912-
2913-
if ((((*srcz & 0x000000000000FFFFLL) < (dstz & 0x000000000000FFFFLL)) && (zmode & 0x01))
2914-
|| (((*srcz & 0x000000000000FFFFLL) == (dstz & 0x000000000000FFFFLL)) && (zmode & 0x02))
2915-
|| (((*srcz & 0x000000000000FFFFLL) > (dstz & 0x000000000000FFFFLL)) && (zmode & 0x04)))
2920+
// TODO: Byte and bit this - @joematt provenance
2921+
if ((((*srcz & 0x000000000000FFFFLL) < (dstz & 0x000000000000FFFFLL)) && (zmode & 0x01u))
2922+
|| (((*srcz & 0x000000000000FFFFLL) == (dstz & 0x000000000000FFFFLL)) && (zmode & 0x02u))
2923+
|| (((*srcz & 0x000000000000FFFFLL) > (dstz & 0x000000000000FFFFLL)) && (zmode & 0x04u)))
29162924
*zcomp |= 0x01u;
29172925

2918-
if ((((*srcz & 0x00000000FFFF0000LL) < (dstz & 0x00000000FFFF0000LL)) && (zmode & 0x01))
2919-
|| (((*srcz & 0x00000000FFFF0000LL) == (dstz & 0x00000000FFFF0000LL)) && (zmode & 0x02))
2920-
|| (((*srcz & 0x00000000FFFF0000LL) > (dstz & 0x00000000FFFF0000LL)) && (zmode & 0x04)))
2926+
if ((((*srcz & 0x00000000FFFF0000LL) < (dstz & 0x00000000FFFF0000LL)) && (zmode & 0x01u))
2927+
|| (((*srcz & 0x00000000FFFF0000LL) == (dstz & 0x00000000FFFF0000LL)) && (zmode & 0x02u))
2928+
|| (((*srcz & 0x00000000FFFF0000LL) > (dstz & 0x00000000FFFF0000LL)) && (zmode & 0x04u)))
29212929
*zcomp |= 0x02u;
29222930

2923-
if ((((*srcz & 0x0000FFFF00000000LL) < (dstz & 0x0000FFFF00000000LL)) && (zmode & 0x01))
2924-
|| (((*srcz & 0x0000FFFF00000000LL) == (dstz & 0x0000FFFF00000000LL)) && (zmode & 0x02))
2925-
|| (((*srcz & 0x0000FFFF00000000LL) > (dstz & 0x0000FFFF00000000LL)) && (zmode & 0x04)))
2931+
if ((((*srcz & 0x0000FFFF00000000LL) < (dstz & 0x0000FFFF00000000LL)) && (zmode & 0x01u))
2932+
|| (((*srcz & 0x0000FFFF00000000LL) == (dstz & 0x0000FFFF00000000LL)) && (zmode & 0x02u))
2933+
|| (((*srcz & 0x0000FFFF00000000LL) > (dstz & 0x0000FFFF00000000LL)) && (zmode & 0x04u)))
29262934
*zcomp |= 0x04u;
29272935

2928-
if ((((*srcz & 0xFFFF000000000000LL) < (dstz & 0xFFFF000000000000LL)) && (zmode & 0x01))
2929-
|| (((*srcz & 0xFFFF000000000000LL) == (dstz & 0xFFFF000000000000LL)) && (zmode & 0x02))
2930-
|| (((*srcz & 0xFFFF000000000000LL) > (dstz & 0xFFFF000000000000LL)) && (zmode & 0x04)))
2936+
if ((((*srcz & 0xFFFF000000000000LL) < (dstz & 0xFFFF000000000000LL)) && (zmode & 0x01u))
2937+
|| (((*srcz & 0xFFFF000000000000LL) == (dstz & 0xFFFF000000000000LL)) && (zmode & 0x02u))
2938+
|| (((*srcz & 0xFFFF000000000000LL) > (dstz & 0xFFFF000000000000LL)) && (zmode & 0x04u)))
29312939
*zcomp |= 0x08u;
29322940

29332941
//TEMP, TO TEST IF ZCOMP IS THE CULPRIT...
@@ -3040,6 +3048,8 @@ Sfine := DECH38EL (s_fine[0..7], dstart[0..2], sfen\);*/
30403048
/*Maskt[0] := BUF1 (maskt[0], s_fine[0]);
30413049
Maskt[1-7] := OAN1P (maskt[1-7], maskt[0-6], s_fine[1-7], e_fine\[1-7]);*/
30423050
////////////////////////////////////// C++ CODE //////////////////////////////////////
3051+
// TODO: Byte and bit this - @joematt provenance
3052+
30433053
maskt = s_fine & 0x0001;
30443054
maskt |= (((maskt & 0x0001) || (s_fine & 0x02u)) && (e_fine & 0x02u) ? 0x0002 : 0x0000);
30453055
maskt |= (((maskt & 0x0002) || (s_fine & 0x04u)) && (e_fine & 0x04u) ? 0x0004 : 0x0000);
@@ -3051,6 +3061,7 @@ Maskt[1-7] := OAN1P (maskt[1-7], maskt[0-6], s_fine[1-7], e_fine\[1-7]);*/
30513061
//////////////////////////////////////////////////////////////////////////////////////
30523062

30533063
/* Produce a look-ahead on the ripple carry */
3064+
// TODO: Byte and bit this - @joematt provenance
30543065
maskt |= (((s_coarse & e_coarse & 0x01u) || (s_coarse & 0x02u)) && (e_coarse & 0x02u) ? 0x0100 : 0x0000);
30553066
maskt |= (((maskt & 0x0100) || (s_coarse & 0x04u)) && (e_coarse & 0x04u) ? 0x0200 : 0x0000);
30563067
maskt |= (((maskt & 0x0200) || (s_coarse & 0x08u)) && (e_coarse & 0x08u) ? 0x0400 : 0x0000);
@@ -3087,6 +3098,7 @@ Masku[14] := MX2 (masku[14], maskt[14], maskt[0], mir_byte);*/
30873098
mir_bit = true/*big_pix*/ && !phrase_mode;
30883099
mir_byte = true/*big_pix*/ && phrase_mode;
30893100
masku = maskt;
3101+
// TODO: Byte and bit this - @joematt provenance
30903102

30913103
if (mir_bit)
30923104
{

0 commit comments

Comments
 (0)