Skip to content

Commit d3ce5a0

Browse files
committed
VIDEO/D3D12: Implement read_viewport for GPU screenshots
D3D12 was the only desktop driver still missing read_viewport support, so 'Screenshot: Use GPU' was hidden from the menu and the F8 screenshot path fell back to the raw core framebuffer — losing shader output, overlays, integer scaling and aspect correction. Implement read_viewport following the D3D11 pattern but with native D3D12 primitives: - video_driver_cached_frame() to re-render the last frame, then wait on the main queue fence so the command allocator is safe to reset. - GetCopyableFootprints() to get the 256-byte-aligned row pitch and total size the readback buffer needs. - A per-call D3D12_HEAP_TYPE_READBACK committed buffer sized to that footprint. Allocating per screenshot keeps state minimal and naturally handles swapchain resizes; screenshots are infrequent enough that the allocation cost is irrelevant. - PRESENT -> COPY_SOURCE transition, CopyTextureRegion from the swapchain backbuffer subresource into the readback buffer's placed footprint, COPY_SOURCE -> PRESENT. - Signal / wait on the fence, Map with a non-null read range, swizzle rows into the caller's BGR24 bottom-up buffer, clamping to the current viewport. Handles both R8G8B8A8 and B8G8R8A8 backbuffer formats (with or without the _SRGB variants). HDR readback is explicitly declined with a log entry, matching D3D11. A proper HDR implementation would need a dedicated HDR -> SDR tonemap pass (cf. the Vulkan driver's hdr_to_sdr pipeline); that's a separate piece of work and will come in a follow-up. With this change the 'Screenshot: Use GPU' menu entry becomes visible for D3D12 (menu_displaylist.c already gates it on read_viewport + viewport_info being non-NULL) and F8 produces window-sized output matching what the user sees on screen.
1 parent 3d641ef commit d3ce5a0

1 file changed

Lines changed: 242 additions & 1 deletion

File tree

gfx/drivers/d3d12.c

Lines changed: 242 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6059,6 +6059,247 @@ static struct video_shader* d3d12_gfx_get_current_shader(void* data)
60596059
return d3d12->shader_preset;
60606060
}
60616061

6062+
static bool d3d12_gfx_read_viewport(void* data, uint8_t* buffer, bool is_idle)
6063+
{
6064+
d3d12_video_t* d3d12 = (d3d12_video_t*)data;
6065+
D3D12GraphicsCommandList cmd;
6066+
D3D12Resource back_buffer = NULL;
6067+
D3D12Resource readback = NULL;
6068+
D3D12_RESOURCE_DESC tex_desc;
6069+
D3D12_HEAP_PROPERTIES heap_props;
6070+
D3D12_RESOURCE_DESC buf_desc;
6071+
D3D12_PLACED_SUBRESOURCE_FOOTPRINT footprint;
6072+
D3D12_TEXTURE_COPY_LOCATION src_loc;
6073+
D3D12_TEXTURE_COPY_LOCATION dst_loc;
6074+
D3D12_BOX src_box;
6075+
D3D12_RANGE read_range;
6076+
UINT64 total_bytes = 0;
6077+
UINT num_rows = 0;
6078+
UINT64 row_size_bytes = 0;
6079+
const uint8_t* src_pixels = NULL;
6080+
uint8_t* mapped = NULL;
6081+
unsigned vp_x, vp_y, vp_w, vp_h, y, x;
6082+
bool is_bgra;
6083+
6084+
if (!d3d12)
6085+
return false;
6086+
6087+
#ifdef HAVE_DXGI_HDR
6088+
/* HDR readback is not implemented for D3D12. The backbuffer is in
6089+
* either HDR10 PQ (RGB10A2) or scRGB (FP16) — converting either back
6090+
* to SDR requires a dedicated tonemap pass, similar to the Vulkan
6091+
* driver's hdr_to_sdr pipeline. Bail out cleanly so the caller can
6092+
* fall back to the raw-framebuffer path. */
6093+
if (d3d12->flags & D3D12_ST_FLAG_HDR_ENABLE)
6094+
{
6095+
RARCH_ERR("[D3D12] HDR screenshot not supported.\n");
6096+
return false;
6097+
}
6098+
#endif
6099+
6100+
if (!is_idle)
6101+
video_driver_cached_frame();
6102+
6103+
/* Ensure the cached_frame submission above has finished on the GPU
6104+
* before we reuse the command allocator. */
6105+
{
6106+
D3D12Fence fence = d3d12->queue.fence;
6107+
d3d12->queue.handle->lpVtbl->Signal(d3d12->queue.handle, fence,
6108+
++d3d12->queue.fenceValue);
6109+
if (fence->lpVtbl->GetCompletedValue(fence) < d3d12->queue.fenceValue)
6110+
{
6111+
fence->lpVtbl->SetEventOnCompletion(fence,
6112+
d3d12->queue.fenceValue, d3d12->queue.fenceEvent);
6113+
WaitForSingleObject(d3d12->queue.fenceEvent, INFINITE);
6114+
}
6115+
}
6116+
6117+
/* cached_frame rendered into chain.renderTargets[chain.frame_index]
6118+
* and Present'd it without updating frame_index afterwards, so that
6119+
* slot still refers to the buffer we want to read back. */
6120+
back_buffer = d3d12->chain.renderTargets[d3d12->chain.frame_index];
6121+
if (!back_buffer)
6122+
return false;
6123+
6124+
/* We intentionally don't call ID3D12Resource::GetDesc here: the
6125+
* Windows SDK version takes an out-param while the MinGW header
6126+
* returns the struct by value, which breaks cross-toolchain builds.
6127+
* The only fields we need are Format / Width / Height, which we
6128+
* already know from the swapchain state. */
6129+
memset(&tex_desc, 0, sizeof(tex_desc));
6130+
tex_desc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D;
6131+
tex_desc.Alignment = 0;
6132+
tex_desc.Width = (UINT64)d3d12->chain.viewport.Width;
6133+
tex_desc.Height = (UINT) d3d12->chain.viewport.Height;
6134+
tex_desc.DepthOrArraySize = 1;
6135+
tex_desc.MipLevels = 1;
6136+
tex_desc.Format = d3d12->chain.formats[d3d12->chain.bit_depth];
6137+
tex_desc.SampleDesc.Count = 1;
6138+
tex_desc.SampleDesc.Quality = 0;
6139+
tex_desc.Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN;
6140+
tex_desc.Flags = D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET;
6141+
6142+
/* Only RGBA8 / BGRA8 are expected in the SDR path. Anything else
6143+
* means a format we don't know how to swizzle to BGR24. */
6144+
switch (tex_desc.Format)
6145+
{
6146+
case DXGI_FORMAT_R8G8B8A8_UNORM:
6147+
case DXGI_FORMAT_R8G8B8A8_UNORM_SRGB:
6148+
is_bgra = false;
6149+
break;
6150+
case DXGI_FORMAT_B8G8R8A8_UNORM:
6151+
case DXGI_FORMAT_B8G8R8A8_UNORM_SRGB:
6152+
is_bgra = true;
6153+
break;
6154+
default:
6155+
RARCH_ERR("[D3D12] Unexpected swapchain format %u.\n",
6156+
(unsigned)tex_desc.Format);
6157+
return false;
6158+
}
6159+
6160+
/* Ask the device what layout a readback copy of this texture needs.
6161+
* D3D12 requires 256-byte row pitches and 512-byte base offsets in
6162+
* readback buffers, so we can't just pick arbitrary dimensions. */
6163+
d3d12->device->lpVtbl->GetCopyableFootprints(d3d12->device,
6164+
&tex_desc, 0, 1, 0, &footprint, &num_rows,
6165+
&row_size_bytes, &total_bytes);
6166+
6167+
/* Create a readback heap buffer large enough for the footprint. */
6168+
heap_props.Type = D3D12_HEAP_TYPE_READBACK;
6169+
heap_props.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN;
6170+
heap_props.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN;
6171+
heap_props.CreationNodeMask = 1;
6172+
heap_props.VisibleNodeMask = 1;
6173+
6174+
buf_desc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER;
6175+
buf_desc.Alignment = D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT;
6176+
buf_desc.Width = total_bytes;
6177+
buf_desc.Height = 1;
6178+
buf_desc.DepthOrArraySize = 1;
6179+
buf_desc.MipLevels = 1;
6180+
buf_desc.Format = DXGI_FORMAT_UNKNOWN;
6181+
buf_desc.SampleDesc.Count = 1;
6182+
buf_desc.SampleDesc.Quality = 0;
6183+
buf_desc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR;
6184+
buf_desc.Flags = D3D12_RESOURCE_FLAG_NONE;
6185+
6186+
if (FAILED(d3d12->device->lpVtbl->CreateCommittedResource(d3d12->device,
6187+
&heap_props, D3D12_HEAP_FLAG_NONE,
6188+
&buf_desc, D3D12_RESOURCE_STATE_COPY_DEST, NULL,
6189+
uuidof(ID3D12Resource), (void**)&readback)))
6190+
{
6191+
RARCH_ERR("[D3D12] Failed to create readback buffer.\n");
6192+
return false;
6193+
}
6194+
6195+
/* Record a tiny command list that transitions the backbuffer to
6196+
* COPY_SOURCE, copies it into the readback buffer, and transitions
6197+
* it back to PRESENT so Present on the next frame stays legal. */
6198+
d3d12->queue.allocator->lpVtbl->Reset(d3d12->queue.allocator);
6199+
cmd = d3d12->queue.cmd;
6200+
cmd->lpVtbl->Reset(cmd, d3d12->queue.allocator,
6201+
d3d12->pipes[VIDEO_SHADER_STOCK_BLEND]);
6202+
6203+
D3D12_RESOURCE_TRANSITION(cmd, back_buffer,
6204+
D3D12_RESOURCE_STATE_PRESENT,
6205+
D3D12_RESOURCE_STATE_COPY_SOURCE);
6206+
6207+
src_loc.pResource = back_buffer;
6208+
src_loc.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX;
6209+
src_loc.SubresourceIndex = 0;
6210+
6211+
dst_loc.pResource = readback;
6212+
dst_loc.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT;
6213+
dst_loc.PlacedFootprint = footprint;
6214+
6215+
src_box.left = 0;
6216+
src_box.top = 0;
6217+
src_box.front = 0;
6218+
src_box.right = (UINT)tex_desc.Width;
6219+
src_box.bottom = tex_desc.Height;
6220+
src_box.back = 1;
6221+
6222+
cmd->lpVtbl->CopyTextureRegion(cmd, &dst_loc, 0, 0, 0, &src_loc, &src_box);
6223+
6224+
D3D12_RESOURCE_TRANSITION(cmd, back_buffer,
6225+
D3D12_RESOURCE_STATE_COPY_SOURCE,
6226+
D3D12_RESOURCE_STATE_PRESENT);
6227+
6228+
cmd->lpVtbl->Close(cmd);
6229+
d3d12->queue.handle->lpVtbl->ExecuteCommandLists(d3d12->queue.handle, 1,
6230+
(ID3D12CommandList* const*)&d3d12->queue.cmd);
6231+
6232+
/* Wait for the copy to complete before mapping. */
6233+
{
6234+
D3D12Fence fence = d3d12->queue.fence;
6235+
d3d12->queue.handle->lpVtbl->Signal(d3d12->queue.handle, fence,
6236+
++d3d12->queue.fenceValue);
6237+
if (fence->lpVtbl->GetCompletedValue(fence) < d3d12->queue.fenceValue)
6238+
{
6239+
fence->lpVtbl->SetEventOnCompletion(fence,
6240+
d3d12->queue.fenceValue, d3d12->queue.fenceEvent);
6241+
WaitForSingleObject(d3d12->queue.fenceEvent, INFINITE);
6242+
}
6243+
}
6244+
6245+
read_range.Begin = 0;
6246+
read_range.End = (SIZE_T)total_bytes;
6247+
if (FAILED(readback->lpVtbl->Map(readback, 0, &read_range,
6248+
(void**)&mapped)))
6249+
{
6250+
Release(readback);
6251+
RARCH_ERR("[D3D12] Failed to map readback buffer.\n");
6252+
return false;
6253+
}
6254+
6255+
src_pixels = mapped + footprint.Offset;
6256+
6257+
vp_x = (d3d12->vp.x > 0) ? d3d12->vp.x : 0;
6258+
vp_y = (d3d12->vp.y > 0) ? d3d12->vp.y : 0;
6259+
vp_w = (d3d12->vp.width > d3d12->vp.full_width)
6260+
? d3d12->vp.full_width : d3d12->vp.width;
6261+
vp_h = (d3d12->vp.height > d3d12->vp.full_height)
6262+
? d3d12->vp.full_height : d3d12->vp.height;
6263+
6264+
src_pixels += (size_t)footprint.Footprint.RowPitch * vp_y;
6265+
6266+
/* Unswizzle into the caller's BGR24 bottom-up output buffer,
6267+
* clamped to the current viewport. */
6268+
for (y = 0; y < vp_h; y++, src_pixels += footprint.Footprint.RowPitch)
6269+
{
6270+
uint8_t* dst = buffer + 3 * (vp_h - y - 1) * vp_w;
6271+
6272+
if (is_bgra)
6273+
{
6274+
/* BGRA source -> BGR dst: drop alpha, keep channel order. */
6275+
for (x = 0; x < vp_w; x++)
6276+
{
6277+
dst[3 * x + 0] = src_pixels[4 * (x + vp_x) + 0];
6278+
dst[3 * x + 1] = src_pixels[4 * (x + vp_x) + 1];
6279+
dst[3 * x + 2] = src_pixels[4 * (x + vp_x) + 2];
6280+
}
6281+
}
6282+
else
6283+
{
6284+
/* RGBA source -> BGR dst: swap R and B. */
6285+
for (x = 0; x < vp_w; x++)
6286+
{
6287+
dst[3 * x + 0] = src_pixels[4 * (x + vp_x) + 2];
6288+
dst[3 * x + 1] = src_pixels[4 * (x + vp_x) + 1];
6289+
dst[3 * x + 2] = src_pixels[4 * (x + vp_x) + 0];
6290+
}
6291+
}
6292+
}
6293+
6294+
{
6295+
D3D12_RANGE empty_write = { 0, 0 };
6296+
readback->lpVtbl->Unmap(readback, 0, &empty_write);
6297+
}
6298+
6299+
Release(readback);
6300+
return true;
6301+
}
6302+
60626303
static void d3d12_gfx_viewport_info(void* data, struct video_viewport* vp)
60636304
{
60646305
d3d12_video_t* d3d12 = (d3d12_video_t*)data;
@@ -6490,7 +6731,7 @@ video_driver_t video_d3d12 = {
64906731
NULL, /* set_viewport */
64916732
d3d12_gfx_set_rotation,
64926733
d3d12_gfx_viewport_info,
6493-
NULL, /* read_viewport */
6734+
d3d12_gfx_read_viewport,
64946735
NULL, /* read_frame_raw */
64956736
#ifdef HAVE_OVERLAY
64966737
d3d12_get_overlay_interface,

0 commit comments

Comments
 (0)