@@ -352,6 +352,7 @@ typedef struct
352352 {
353353 dxgi_hdr_uniform_t ubo_values ;
354354 D3D11Buffer ubo ;
355+ D3D11PixelShader ps_readback ; /* lazy: HDR -> SDR tonemap PS used by read_viewport */
355356 float menu_nits ;
356357 float max_output_nits ;
357358 float min_output_nits ;
@@ -2783,6 +2784,7 @@ static void d3d11_gfx_free(void* data)
27832784
27842785#ifdef HAVE_DXGI_HDR
27852786 Release (d3d11 -> hdr .ubo );
2787+ Release (d3d11 -> hdr .ps_readback );
27862788#endif
27872789
27882790 d3d11_release_shader (& d3d11 -> sprites .shader );
@@ -5016,6 +5018,231 @@ static void d3d11_gfx_viewport_info(void* data, struct video_viewport* vp)
50165018 * vp = d3d11 -> vp ;
50175019}
50185020
5021+ #ifdef HAVE_DXGI_HDR
5022+ /* GPU path for HDR screenshot readback.
5023+ *
5024+ * Runs a single full-screen pass that samples the captured HDR backbuffer
5025+ * and writes sRGB-encoded SDR into a B8G8R8A8_UNORM render target, then
5026+ * copies that to a CPU-mappable staging texture and unswizzles into the
5027+ * caller's BGR24 output. Mirrors the Vulkan driver's hdr_to_sdr path
5028+ * and the CPU implementation in dxgi_hdr_readback_to_bgr24() — either
5029+ * one should produce visually identical screenshots.
5030+ *
5031+ * Returns false on any failure; the caller then falls back to the CPU
5032+ * decoder so HDR screenshots still work even if the GPU path breaks
5033+ * (driver PSO compile bug, OOM, unexpected state, etc.). */
5034+ static bool d3d11_gpu_hdr_readback_to_bgr24 (
5035+ d3d11_video_t * d3d11 ,
5036+ ID3D11Resource * src_backbuffer_res ,
5037+ DXGI_FORMAT src_format ,
5038+ unsigned full_width ,
5039+ unsigned full_height ,
5040+ unsigned vp_x , unsigned vp_y ,
5041+ unsigned vp_w , unsigned vp_h ,
5042+ uint8_t * buffer )
5043+ {
5044+ ID3D11Device * device = d3d11 -> device ;
5045+ ID3D11DeviceContext * context = d3d11 -> context ;
5046+ d3d11_shader_t * hdr_shader = & d3d11 -> shaders [VIDEO_SHADER_STOCK_HDR ];
5047+ d3d11_texture_t src_tex = { 0 };
5048+ d3d11_texture_t sdr_tex = { 0 };
5049+ ID3D11Texture2D * staging_tex = NULL ;
5050+ ID3D11Resource * staging_res = NULL ;
5051+ ID3D11Resource * sdr_res = NULL ;
5052+ D3D11_TEXTURE2D_DESC staging_desc ;
5053+ D3D11_MAPPED_SUBRESOURCE map ;
5054+ D3D11_VIEWPORT vp ;
5055+ D3D11_RECT sc ;
5056+ unsigned hdr_mode ;
5057+ unsigned y , x ;
5058+ UINT stride = sizeof (d3d11_vertex_t );
5059+ UINT offset = 0 ;
5060+ bool mapped = false;
5061+ bool ret = false;
5062+
5063+ if (src_format == DXGI_FORMAT_R10G10B10A2_UNORM )
5064+ hdr_mode = 1 ;
5065+ else if (src_format == DXGI_FORMAT_R16G16B16A16_FLOAT )
5066+ hdr_mode = 2 ;
5067+ else
5068+ return false;
5069+
5070+ /* Lazy compile of the readback pixel shader. Only pays the cost
5071+ * the first time a screenshot is taken with HDR enabled. */
5072+ if (!d3d11 -> hdr .ps_readback )
5073+ {
5074+ static const char shader_src [] =
5075+ #include "d3d_shaders/hdr_sm5.hlsl.h"
5076+ ;
5077+ D3DBlob ps_code = NULL ;
5078+ if (!d3d_compile (shader_src , sizeof (shader_src ), NULL ,
5079+ "PSMainToSDR" , "ps_5_0" , & ps_code ) || !ps_code )
5080+ {
5081+ RARCH_ERR ("[D3D11] Failed to compile PSMainToSDR for HDR readback.\n" );
5082+ return false;
5083+ }
5084+ if (FAILED (device -> lpVtbl -> CreatePixelShader (device ,
5085+ ps_code -> lpVtbl -> GetBufferPointer (ps_code ),
5086+ ps_code -> lpVtbl -> GetBufferSize (ps_code ),
5087+ NULL , & d3d11 -> hdr .ps_readback )))
5088+ {
5089+ ps_code -> lpVtbl -> Release (ps_code );
5090+ RARCH_ERR ("[D3D11] Failed to create readback PS.\n" );
5091+ return false;
5092+ }
5093+ ps_code -> lpVtbl -> Release (ps_code );
5094+ }
5095+
5096+ /* HDR-format intermediate source: a shader-readable copy of the
5097+ * swapchain backbuffer. (The swapchain itself is created with
5098+ * RENDER_TARGET_OUTPUT only and cannot be bound as an SRV.) */
5099+ src_tex .desc .Width = full_width ;
5100+ src_tex .desc .Height = full_height ;
5101+ src_tex .desc .Format = src_format ;
5102+ src_tex .desc .BindFlags = D3D11_BIND_SHADER_RESOURCE ;
5103+ if (!d3d11_init_texture (device , & src_tex ))
5104+ goto cleanup ;
5105+
5106+ context -> lpVtbl -> CopyResource (context ,
5107+ (ID3D11Resource * )src_tex .handle , src_backbuffer_res );
5108+
5109+ /* SDR render target: receives the tonemap output. */
5110+ sdr_tex .desc .Width = full_width ;
5111+ sdr_tex .desc .Height = full_height ;
5112+ sdr_tex .desc .Format = DXGI_FORMAT_B8G8R8A8_UNORM ;
5113+ sdr_tex .desc .BindFlags = D3D11_BIND_RENDER_TARGET ;
5114+ if (!d3d11_init_texture (device , & sdr_tex ))
5115+ goto cleanup ;
5116+
5117+ /* Populate the UBO with readback-specific values and push. */
5118+ {
5119+ const float prev_it = d3d11 -> hdr .ubo_values .inverse_tonemap ;
5120+ const float prev_h = d3d11 -> hdr .ubo_values .hdr10 ;
5121+ const unsigned prev_m = d3d11 -> hdr .ubo_values .hdr_mode ;
5122+ const float prev_sc = d3d11 -> hdr .ubo_values .scanlines ;
5123+ D3D11_MAPPED_SUBRESOURCE mapped_ubo ;
5124+
5125+ d3d11 -> hdr .ubo_values .inverse_tonemap = 0.0f ;
5126+ d3d11 -> hdr .ubo_values .hdr10 = 0.0f ;
5127+ d3d11 -> hdr .ubo_values .hdr_mode = hdr_mode ;
5128+ d3d11 -> hdr .ubo_values .scanlines = 0.0f ;
5129+
5130+ if (SUCCEEDED (context -> lpVtbl -> Map (context ,
5131+ (ID3D11Resource * )d3d11 -> hdr .ubo , 0 ,
5132+ D3D11_MAP_WRITE_DISCARD , 0 , & mapped_ubo )))
5133+ {
5134+ * (dxgi_hdr_uniform_t * )mapped_ubo .pData = d3d11 -> hdr .ubo_values ;
5135+ context -> lpVtbl -> Unmap (context ,
5136+ (ID3D11Resource * )d3d11 -> hdr .ubo , 0 );
5137+ }
5138+
5139+ d3d11 -> hdr .ubo_values .inverse_tonemap = prev_it ;
5140+ d3d11 -> hdr .ubo_values .hdr10 = prev_h ;
5141+ d3d11 -> hdr .ubo_values .hdr_mode = prev_m ;
5142+ d3d11 -> hdr .ubo_values .scanlines = prev_sc ;
5143+ }
5144+
5145+ /* Bind state: VS / IL / GS from the HDR stock shader, PS from our
5146+ * readback shader. */
5147+ context -> lpVtbl -> IASetInputLayout (context , hdr_shader -> layout );
5148+ context -> lpVtbl -> VSSetShader (context , hdr_shader -> vs , NULL , 0 );
5149+ context -> lpVtbl -> PSSetShader (context , d3d11 -> hdr .ps_readback , NULL , 0 );
5150+ context -> lpVtbl -> GSSetShader (context , hdr_shader -> gs , NULL , 0 );
5151+ context -> lpVtbl -> IASetPrimitiveTopology (context ,
5152+ D3D11_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP );
5153+
5154+ context -> lpVtbl -> VSSetConstantBuffers (context , 0 , 1 , & d3d11 -> hdr .ubo );
5155+ context -> lpVtbl -> PSSetConstantBuffers (context , 0 , 1 , & d3d11 -> hdr .ubo );
5156+ context -> lpVtbl -> PSSetShaderResources (context , 0 , 1 , & src_tex .view );
5157+ context -> lpVtbl -> PSSetSamplers (context , 0 , 1 ,
5158+ & d3d11 -> samplers [RARCH_FILTER_UNSPEC ][RARCH_WRAP_DEFAULT ]);
5159+
5160+ context -> lpVtbl -> IASetVertexBuffers (context , 0 , 1 ,
5161+ & d3d11 -> frame .vbo , & stride , & offset );
5162+
5163+ context -> lpVtbl -> OMSetRenderTargets (context , 1 , & sdr_tex .rt_view , NULL );
5164+ context -> lpVtbl -> OMSetBlendState (context , d3d11 -> blend_disable , NULL , 0xFFFFFFFF );
5165+
5166+ vp .TopLeftX = 0.0f ;
5167+ vp .TopLeftY = 0.0f ;
5168+ vp .Width = (float )full_width ;
5169+ vp .Height = (float )full_height ;
5170+ vp .MinDepth = 0.0f ;
5171+ vp .MaxDepth = 1.0f ;
5172+ sc .left = 0 ;
5173+ sc .top = 0 ;
5174+ sc .right = (LONG )full_width ;
5175+ sc .bottom = (LONG )full_height ;
5176+ context -> lpVtbl -> RSSetViewports (context , 1 , & vp );
5177+ context -> lpVtbl -> RSSetScissorRects (context , 1 , & sc );
5178+ context -> lpVtbl -> RSSetState (context , d3d11 -> scissor_disabled );
5179+
5180+ context -> lpVtbl -> Draw (context , 4 , 0 );
5181+
5182+ /* Unbind SRV before we may read from the same texture as a source
5183+ * for anything else (and to stop D3D11 complaining about RTV/SRV
5184+ * aliasing if anything upstream uses the same slot). */
5185+ {
5186+ ID3D11ShaderResourceView * null_srv = NULL ;
5187+ context -> lpVtbl -> PSSetShaderResources (context , 0 , 1 , & null_srv );
5188+ }
5189+
5190+ /* Staging copy of the SDR RT. */
5191+ staging_desc = sdr_tex .desc ;
5192+ staging_desc .MipLevels = 1 ;
5193+ staging_desc .BindFlags = 0 ;
5194+ staging_desc .MiscFlags = 0 ;
5195+ staging_desc .Usage = D3D11_USAGE_STAGING ;
5196+ staging_desc .CPUAccessFlags = D3D11_CPU_ACCESS_READ ;
5197+ if (FAILED (device -> lpVtbl -> CreateTexture2D (device , & staging_desc ,
5198+ NULL , & staging_tex )))
5199+ goto cleanup ;
5200+
5201+ #ifdef __cplusplus
5202+ staging_tex -> lpVtbl -> QueryInterface (staging_tex , IID_ID3D11Resource , (void * * )& staging_res );
5203+ sdr_tex .handle -> lpVtbl -> QueryInterface (sdr_tex .handle , IID_ID3D11Resource , (void * * )& sdr_res );
5204+ #else
5205+ staging_tex -> lpVtbl -> QueryInterface (staging_tex , & IID_ID3D11Resource , (void * * )& staging_res );
5206+ sdr_tex .handle -> lpVtbl -> QueryInterface (sdr_tex .handle , & IID_ID3D11Resource , (void * * )& sdr_res );
5207+ #endif
5208+ context -> lpVtbl -> CopyResource (context , staging_res , sdr_res );
5209+
5210+ if (FAILED (context -> lpVtbl -> Map (context , staging_res , 0 ,
5211+ D3D11_MAP_READ , 0 , & map )))
5212+ goto cleanup ;
5213+ mapped = true;
5214+
5215+ /* BGRA8 -> BGR24, bottom-up, clamped to viewport. */
5216+ {
5217+ const uint8_t * src_row = (const uint8_t * )map .pData + (size_t )map .RowPitch * vp_y ;
5218+ for (y = 0 ; y < vp_h ; y ++ , src_row += map .RowPitch )
5219+ {
5220+ uint8_t * dst = buffer + 3 * (size_t )(vp_h - y - 1 ) * vp_w ;
5221+ for (x = 0 ; x < vp_w ; x ++ )
5222+ {
5223+ dst [3 * x + 0 ] = src_row [4 * (x + vp_x ) + 0 ];
5224+ dst [3 * x + 1 ] = src_row [4 * (x + vp_x ) + 1 ];
5225+ dst [3 * x + 2 ] = src_row [4 * (x + vp_x ) + 2 ];
5226+ }
5227+ }
5228+ }
5229+ ret = true;
5230+
5231+ cleanup :
5232+ if (mapped )
5233+ context -> lpVtbl -> Unmap (context , staging_res , 0 );
5234+ if (staging_res )
5235+ staging_res -> lpVtbl -> Release (staging_res );
5236+ if (sdr_res )
5237+ sdr_res -> lpVtbl -> Release (sdr_res );
5238+ if (staging_tex )
5239+ staging_tex -> lpVtbl -> Release (staging_tex );
5240+ d3d11_release_texture (& sdr_tex );
5241+ d3d11_release_texture (& src_tex );
5242+ return ret ;
5243+ }
5244+ #endif /* HAVE_DXGI_HDR */
5245+
50195246static bool d3d11_gfx_read_viewport (void * data , uint8_t * buffer , bool is_idle )
50205247{
50215248 d3d11_video_t * d3d11 = (d3d11_video_t * )data ;
@@ -5118,9 +5345,19 @@ static bool d3d11_gfx_read_viewport(void* data, uint8_t* buffer, bool is_idle)
51185345#ifdef HAVE_DXGI_HDR
51195346 case DXGI_FORMAT_R10G10B10A2_UNORM :
51205347 case DXGI_FORMAT_R16G16B16A16_FLOAT :
5121- /* HDR10 PQ or scRGB: hand off to the CPU HDR decoder.
5122- * It undoes the forward HDR encoding using paper_white_nits
5123- * and writes sRGB-encoded BGR24 bottom-up. */
5348+ /* HDR10 PQ or scRGB. Try the GPU tonemap pass first — it's
5349+ * faster and avoids the per-pixel CPU cost at 4K — and fall
5350+ * back to the CPU decoder on any failure so HDR screenshots
5351+ * still work even if the GPU path breaks (driver PSO compile
5352+ * bug, OOM, etc.). */
5353+ if (d3d11_gpu_hdr_readback_to_bgr24 (
5354+ d3d11 , BackBufferResource , StagingDesc .Format ,
5355+ StagingDesc .Width , StagingDesc .Height ,
5356+ vp_x , vp_y , vp_width , vp_height ,
5357+ buffer ))
5358+ break ;
5359+
5360+ RARCH_WARN ("[D3D11] GPU HDR readback failed, falling back to CPU.\n" );
51245361 if (!dxgi_hdr_readback_to_bgr24 (
51255362 StagingDesc .Format ,
51265363 Map .pData , Map .RowPitch ,
0 commit comments