Skip to content

Commit 345edcb

Browse files
committed
gfx/d3d9: Upload RGUI menu framebuffer directly as ARGB4444
Both d3d9_hlsl and d3d9_cg previously expanded RGUI's 16bpp menu framebuffer to 32bpp on the CPU every frame via a per-pixel loop, then uploaded the result into a D3DFMT_A8R8G8B8 texture. RGUI already assembles its framebuffer in 16bpp; D3D9 has supported D3DFMT_A4R4G4B4 as a baseline texture format since launch. Add D3D9_ARGB4444_FORMAT to d3d9_common.h (with D3DFMT_LIN_A4R4G4B4 for the _XBOX build path), and "d3d9_hlsl"/"d3d9_cg" cases to the RGUI pixel format dispatcher selecting argb32_to_argb4444 (already in use for the rsx/PS3 driver, which targets the same ARGB4444 bit layout). In both menu set_texture_frame paths, allocate the menu texture as D3DFMT_A4R4G4B4 when rgb32 is false (the only case in current practice; RGUI is the sole caller), and upload row-by-row via memcpy. The rgb32 = true API branch is preserved for forward compatibility and continues to use D3DFMT_A8R8G8B8. Track the bpp of the currently-allocated menu texture in a new d3d9_video_t::menu_tex_rgb32 field so the texture is recreated when the rgb32 flag flips between calls. Endian-safe by construction: argb32_to_argb4444 produces a host-endian uint16_t with A in bits 15..12 down to B in 3..0; D3DFMT_A4R4G4B4 is read by D3D as host-endian 16-bit units with the same bit assignments. Same contract as the original ARGB8888 path, just one storage size smaller, so the ordering holds on both LE (PC) and BE (Xbox 360) hosts without a byte swap.
1 parent f2f2f5a commit 345edcb

4 files changed

Lines changed: 68 additions & 43 deletions

File tree

gfx/common/d3d9_common.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,10 +33,12 @@
3333
#define D3D9_RGB565_FORMAT D3DFMT_LIN_R5G6B5
3434
#define D3D9_ARGB8888_FORMAT D3DFMT_LIN_A8R8G8B8
3535
#define D3D9_XRGB8888_FORMAT D3DFMT_LIN_X8R8G8B8
36+
#define D3D9_ARGB4444_FORMAT D3DFMT_LIN_A4R4G4B4
3637
#else
3738
#define D3D9_RGB565_FORMAT D3DFMT_R5G6B5
3839
#define D3D9_ARGB8888_FORMAT D3DFMT_A8R8G8B8
3940
#define D3D9_XRGB8888_FORMAT D3DFMT_X8R8G8B8
41+
#define D3D9_ARGB4444_FORMAT D3DFMT_A4R4G4B4
4042
#endif
4143

4244
RETRO_BEGIN_DECLS
@@ -90,6 +92,14 @@ typedef struct d3d9_video
9092

9193
/* Only used for Xbox */
9294
bool widescreen_mode;
95+
96+
/* Bit-depth of the data most recently uploaded to `menu->tex`.
97+
* The menu texture is created with a fixed pixel format (16bpp
98+
* ARGB4444 for the RGUI fast path, 32bpp ARGB8888 otherwise),
99+
* so we must recreate it when set_menu_texture_frame is called
100+
* with a different `rgb32` value. Defaults to false; the first
101+
* call will see a NULL tex and create one regardless. */
102+
bool menu_tex_rgb32;
93103
} d3d9_video_t;
94104

95105
bool d3d9_create_device(void *dev,

gfx/drivers/d3d9cg.c

Lines changed: 28 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -4552,17 +4552,26 @@ static void d3d9_cg_set_menu_texture_frame(void *data,
45524552

45534553
if ( (!d3d->menu->tex)
45544554
|| (d3d->menu->tex_w != width)
4555-
|| (d3d->menu->tex_h != height))
4555+
|| (d3d->menu->tex_h != height)
4556+
|| (d3d->menu_tex_rgb32 != rgb32))
45564557
{
45574558
if (d3d->menu->tex)
45584559
IDirect3DTexture9_Release((LPDIRECT3DTEXTURE9)d3d->menu->tex);
45594560

45604561
d3d->menu->tex = NULL;
45614562
{
4563+
/* RGUI sends 16bpp ARGB4444 (the d3d9 case in RGUI's pixel
4564+
* format dispatcher selects argb32_to_argb4444), so we can
4565+
* upload it byte-for-byte into a D3DFMT_A4R4G4B4 texture and
4566+
* skip the per-pixel CPU expansion to ARGB8888 the previous
4567+
* implementation did every frame. The rgb32 path is preserved
4568+
* for callers that hand us 32bpp data; in current practice no
4569+
* such caller exists, but the API contract supports it. */
45624570
void *_tbuf = NULL;
45634571
if (SUCCEEDED(IDirect3DDevice9_CreateTexture(d3d->dev,
45644572
width, height, 1, 0,
4565-
D3DFMT_A8R8G8B8, D3DPOOL_MANAGED,
4573+
rgb32 ? D3DFMT_A8R8G8B8 : D3D9_ARGB4444_FORMAT,
4574+
D3DPOOL_MANAGED,
45664575
(struct IDirect3DTexture9**)&_tbuf, NULL)))
45674576
d3d->menu->tex = (LPDIRECT3DTEXTURE9)_tbuf;
45684577
}
@@ -4575,6 +4584,7 @@ static void d3d9_cg_set_menu_texture_frame(void *data,
45754584

45764585
d3d->menu->tex_w = width;
45774586
d3d->menu->tex_h = height;
4587+
d3d->menu_tex_rgb32 = rgb32;
45784588
}
45794589

45804590
d3d->menu->alpha_mod = alpha;
@@ -4583,7 +4593,7 @@ static void d3d9_cg_set_menu_texture_frame(void *data,
45834593
0, &d3dlr, NULL, D3DLOCK_NOSYSLOCK)))
45844594
return;
45854595
{
4586-
unsigned h, w;
4596+
unsigned h;
45874597

45884598
if (rgb32)
45894599
{
@@ -4599,26 +4609,23 @@ static void d3d9_cg_set_menu_texture_frame(void *data,
45994609
}
46004610
else
46014611
{
4602-
uint32_t *dst = (uint32_t*)d3dlr.pBits;
4603-
const uint16_t *src = (const uint16_t*)frame;
4612+
/* Direct ARGB4444 upload. The bit layout produced by
4613+
* argb32_to_argb4444 (host-endian uint16_t with A in bits
4614+
* 15..12, R 11..8, G 7..4, B 3..0) matches D3DFMT_A4R4G4B4
4615+
* exactly: D3D reads the locked memory as host-endian
4616+
* 16-bit units with the same bit assignments, so the same
4617+
* source bytes work on LE (PC) and BE (Xbox 360) hosts
4618+
* without a byte swap. */
4619+
uint8_t *dst = (uint8_t*)d3dlr.pBits;
4620+
const uint8_t *src = (const uint8_t*)frame;
4621+
unsigned src_pitch = width * sizeof(uint16_t);
4622+
unsigned row_bytes = width * sizeof(uint16_t);
46044623

4605-
for (h = 0; h < height; h++,
4606-
dst += d3dlr.Pitch >> 2,
4607-
src += width)
4624+
for (h = 0; h < height; h++, dst += d3dlr.Pitch, src += src_pitch)
46084625
{
4609-
for (w = 0; w < width; w++)
4610-
{
4611-
uint16_t c = src[w];
4612-
uint32_t r = (c >> 12) & 0xf;
4613-
uint32_t g = (c >> 8) & 0xf;
4614-
uint32_t b = (c >> 4) & 0xf;
4615-
uint32_t a = (c >> 0) & 0xf;
4616-
r = ((r << 4) | r) << 16;
4617-
g = ((g << 4) | g) << 8;
4618-
b = ((b << 4) | b) << 0;
4619-
a = ((a << 4) | a) << 24;
4620-
dst[w] = r | g | b | a;
4621-
}
4626+
memcpy(dst, src, row_bytes);
4627+
if (d3dlr.Pitch > (int)row_bytes)
4628+
memset(dst + row_bytes, 0, d3dlr.Pitch - row_bytes);
46224629
}
46234630
}
46244631
}

gfx/drivers/d3d9hlsl.c

Lines changed: 27 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -7852,15 +7852,23 @@ static void d3d9_hlsl_set_menu_texture_frame(void *data,
78527852

78537853
if ( (!d3d->menu->tex)
78547854
|| (d3d->menu->tex_w != width)
7855-
|| (d3d->menu->tex_h != height))
7855+
|| (d3d->menu->tex_h != height)
7856+
|| (d3d->menu_tex_rgb32 != rgb32))
78567857
{
78577858
if (d3d->menu->tex)
78587859
IDirect3DTexture9_Release((LPDIRECT3DTEXTURE9)d3d->menu->tex);
78597860

78607861
d3d->menu->tex = NULL;
7862+
/* RGUI sends 16bpp ARGB4444 (the d3d9 case in RGUI's pixel
7863+
* format dispatcher selects argb32_to_argb4444), so we can
7864+
* upload it byte-for-byte into a D3DFMT_A4R4G4B4 texture and
7865+
* skip the per-pixel CPU expansion to ARGB8888 the previous
7866+
* implementation did every frame. The rgb32 path is preserved
7867+
* for callers that hand us 32bpp data; in current practice no
7868+
* such caller exists, but the API contract supports it. */
78617869
IDirect3DDevice9_CreateTexture(d3d->dev,
78627870
width, height, 1,
7863-
0, D3D9_ARGB8888_FORMAT,
7871+
0, rgb32 ? D3D9_ARGB8888_FORMAT : D3D9_ARGB4444_FORMAT,
78647872
D3DPOOL_MANAGED,
78657873
(struct IDirect3DTexture9**)&d3d->menu->tex, NULL);
78667874

@@ -7872,14 +7880,15 @@ static void d3d9_hlsl_set_menu_texture_frame(void *data,
78727880

78737881
d3d->menu->tex_w = width;
78747882
d3d->menu->tex_h = height;
7883+
d3d->menu_tex_rgb32 = rgb32;
78757884
}
78767885

78777886
d3d->menu->alpha_mod = alpha;
78787887

78797888
IDirect3DTexture9_LockRect((LPDIRECT3DTEXTURE9)d3d->menu->tex,
78807889
0, &d3dlr, NULL, D3DLOCK_NOSYSLOCK);
78817890
{
7882-
unsigned h, w;
7891+
unsigned h;
78837892

78847893
if (rgb32)
78857894
{
@@ -7895,26 +7904,23 @@ static void d3d9_hlsl_set_menu_texture_frame(void *data,
78957904
}
78967905
else
78977906
{
7898-
uint32_t *dst = (uint32_t*)d3dlr.pBits;
7899-
const uint16_t *src = (const uint16_t*)frame;
7907+
/* Direct ARGB4444 upload. The bit layout produced by
7908+
* argb32_to_argb4444 (host-endian uint16_t with A in bits
7909+
* 15..12, R 11..8, G 7..4, B 3..0) matches D3DFMT_A4R4G4B4
7910+
* exactly: D3D reads the locked memory as host-endian
7911+
* 16-bit units with the same bit assignments, so the same
7912+
* source bytes work on LE (PC) and BE (Xbox 360) hosts
7913+
* without a byte swap. */
7914+
uint8_t *dst = (uint8_t*)d3dlr.pBits;
7915+
const uint8_t *src = (const uint8_t*)frame;
7916+
unsigned src_pitch = width * sizeof(uint16_t);
7917+
unsigned row_bytes = width * sizeof(uint16_t);
79007918

7901-
for (h = 0; h < height; h++,
7902-
dst += d3dlr.Pitch >> 2,
7903-
src += width)
7919+
for (h = 0; h < height; h++, dst += d3dlr.Pitch, src += src_pitch)
79047920
{
7905-
for (w = 0; w < width; w++)
7906-
{
7907-
uint16_t c = src[w];
7908-
uint32_t r = (c >> 12) & 0xf;
7909-
uint32_t g = (c >> 8) & 0xf;
7910-
uint32_t b = (c >> 4) & 0xf;
7911-
uint32_t a = (c >> 0) & 0xf;
7912-
r = ((r << 4) | r) << 16;
7913-
g = ((g << 4) | g) << 8;
7914-
b = ((b << 4) | b) << 0;
7915-
a = ((a << 4) | a) << 24;
7916-
dst[w] = r | g | b | a;
7917-
}
7921+
memcpy(dst, src, row_bytes);
7922+
if (d3dlr.Pitch > (int)row_bytes)
7923+
memset(dst + row_bytes, 0, d3dlr.Pitch - row_bytes);
79187924
}
79197925
}
79207926
}

menu/drivers/rgui.c

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1396,7 +1396,9 @@ static bool rgui_set_pixel_format_function(void)
13961396
argb32_to_pixel_platform_format = argb32_to_rgb5a3;
13971397
else if (string_is_equal(driver_ident, "psp1")) /* PSP */
13981398
argb32_to_pixel_platform_format = argb32_to_abgr4444;
1399-
else if (string_is_equal(driver_ident, "rsx")) /* PS3 */
1399+
else if ( string_is_equal(driver_ident, "rsx") /* PS3 */
1400+
|| string_is_equal(driver_ident, "d3d9_hlsl") /* D3D9 (PC + Xbox 360) */
1401+
|| string_is_equal(driver_ident, "d3d9_cg"))
14001402
argb32_to_pixel_platform_format = argb32_to_argb4444;
14011403
else if ( string_is_equal(driver_ident, "d3d10") /* D3D10/11/12 */
14021404
|| string_is_equal(driver_ident, "d3d11")

0 commit comments

Comments
 (0)