Skip to content

Commit 7d3b452

Browse files
committed
VIDEO/D3D: HDR screenshot support for D3D11 and D3D12 read_viewport
Both drivers previously bailed out with "HDR screenshot not supported" when the swapchain was in HDR10 PQ (R10G10B10A2_UNORM) or scRGB (R16G16B16A16_FLOAT) format, falling back to the raw core framebuffer path and losing shaders, overlays, scaling and everything else the GPU had composed. Add a shared CPU-side HDR -> SDR decoder in gfx/common/dxgi_common: bool dxgi_hdr_readback_to_bgr24(src_format, src_data, src_pitch, src_x, src_y, width, height, paper_white_nits, dst_bgr24); which runs the inverse of the forward composition path and writes sRGB-encoded BGR24 bottom-up, matching the read_viewport contract the rest of the frontend expects. Implementation is pure C: an IEEE 754 binary16 -> binary32 decoder for scRGB, an ST.2084 PQ EOTF for HDR10, BT.2020 -> BT.709 primaries rotation, a hue-preserving peak-channel tonemap (mirroring the forward inverse-tonemap), and an sRGB OETF for writing to UNORM8. At default settings (max_nits == paper_white_nits) the round-trip is analytically exact for in-range SDR content; super-bright and out-of-gamut pixels clamp in the OETF, which is the desired tonemapped screenshot behaviour. Wire both d3d11_gfx_read_viewport and d3d12_gfx_read_viewport to the helper: remove the HDR early-return, route the two HDR swapchain formats through the decoder, and pass d3dNN->hdr.ubo_values. paper_white_nits as the calibration input. The SDR RGBA8 / BGRA8 paths are unchanged except for a cosmetic enum refactor in D3D12. Known limitations (tracked for a follow-up): - ExpandGamut is not reversed; non-default gamut expansions produce slightly desaturated screenshots relative to on-display output. - Per-pixel CPU cost, roughly 1.6 ms at 4K per screenshot. The read_viewport signature permits moving this to a GPU tonemap pass behind the same ABI at a later date without touching callers. - Tonemap() uses paper_white for both max and reference white, matching the existing forward composition in hdr_sm5.hlsl.h and hdr.frag rather than fixing it here.
1 parent d3ce5a0 commit 7d3b452

4 files changed

Lines changed: 365 additions & 70 deletions

File tree

gfx/common/dxgi_common.c

Lines changed: 231 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2912,4 +2912,235 @@ void dxgi_set_hdr_metadata(
29122912
g_hdr10_meta_data = hdr10_meta_data;
29132913
}
29142914
}
2915+
2916+
/* ------------------------------------------------------------------ *
2917+
* HDR screenshot readback: CPU-side HDR -> SDR decoder
2918+
*
2919+
* Called from video driver read_viewport implementations when the
2920+
* swapchain is in an HDR format. Mirrors the forward HDR path in
2921+
* hdr_sm5.hlsl.h but runs inverse, on the CPU, producing SDR BGR24.
2922+
*
2923+
* The CPU path keeps the driver-side code simple (no extra PSO,
2924+
* intermediate render target, descriptor heap slot, or resize
2925+
* handling). A future revision can move this to a GPU tonemap pass
2926+
* behind the same signature without caller changes.
2927+
* ------------------------------------------------------------------ */
2928+
2929+
#include <math.h>
2930+
#include <stdint.h>
2931+
2932+
/* IEEE 754 binary16 -> binary32. Used to decode FP16 scRGB samples. */
2933+
static INLINE float dxgi_half_to_float(uint16_t h)
2934+
{
2935+
uint32_t sign = (uint32_t)(h >> 15) & 0x1u;
2936+
uint32_t exp = (uint32_t)(h >> 10) & 0x1Fu;
2937+
uint32_t mant = (uint32_t) h & 0x3FFu;
2938+
uint32_t f;
2939+
union { uint32_t u; float f; } u;
2940+
2941+
if (exp == 0)
2942+
{
2943+
if (mant == 0)
2944+
f = sign << 31; /* signed zero */
2945+
else
2946+
{
2947+
/* Subnormal: normalize. */
2948+
while (!(mant & 0x400u))
2949+
{
2950+
mant <<= 1;
2951+
exp -= 1; /* exp starts at 0, goes negative */
2952+
}
2953+
exp += 1;
2954+
mant &= ~0x400u;
2955+
f = (sign << 31) | ((exp + 127 - 15) << 23) | (mant << 13);
2956+
}
2957+
}
2958+
else if (exp == 0x1F)
2959+
{
2960+
/* Inf / NaN: propagate to float32 Inf/NaN with the same mantissa
2961+
* low bits (clamped away later, so exact bit-pattern doesn't matter). */
2962+
f = (sign << 31) | (0xFFu << 23) | (mant << 13);
2963+
}
2964+
else
2965+
f = (sign << 31) | ((exp + 127 - 15) << 23) | (mant << 13);
2966+
2967+
u.u = f;
2968+
return u.f;
2969+
}
2970+
2971+
/* ST.2084 (PQ) EOTF. Input is the non-linear PQ code value in [0,1];
2972+
* output is linear light normalized so 1.0 == 10000 nits. */
2973+
static INLINE float dxgi_st2084_to_linear(float pq)
2974+
{
2975+
static const float m1_inv = 1.0f / 0.1593017578f;
2976+
static const float m2_inv = 1.0f / 78.84375f;
2977+
static const float c1 = 0.8359375f;
2978+
static const float c2 = 18.8515625f;
2979+
static const float c3 = 18.6875f;
2980+
float Np, num, den;
2981+
2982+
if (pq <= 0.0f)
2983+
return 0.0f;
2984+
Np = powf(pq, m2_inv);
2985+
num = Np - c1;
2986+
if (num < 0.0f)
2987+
num = 0.0f;
2988+
den = c2 - c3 * Np;
2989+
if (den <= 0.0f)
2990+
return 0.0f;
2991+
return powf(num / den, m1_inv);
2992+
}
2993+
2994+
/* Linear [0,1] -> sRGB encoded [0,1]. Standard sRGB OETF. */
2995+
static INLINE float dxgi_linear_to_srgb(float l)
2996+
{
2997+
if (l <= 0.0f)
2998+
return 0.0f;
2999+
if (l >= 1.0f)
3000+
return 1.0f;
3001+
if (l <= 0.0031308f)
3002+
return l * 12.92f;
3003+
return 1.055f * powf(l, 1.0f / 2.4f) - 0.055f;
3004+
}
3005+
3006+
/* Reverse of the forward "inverse tonemap" used to lift SDR into HDR
3007+
* at composition time. sdr = hdr / (1 + hdr * k), where
3008+
* k = 1 - (paper_white / max_nits). We always compose with the same
3009+
* value for max_nits and paper_white at forward time, so this reduces
3010+
* to a no-op for in-range SDR content and gently compresses super-white.
3011+
* Applied per-pixel, on the max component, to preserve hue. */
3012+
static INLINE void dxgi_tonemap_to_sdr(float *r, float *g, float *b,
3013+
float max_nits, float paper_white_nits)
3014+
{
3015+
float peak_ratio, k, m, denom, scale;
3016+
3017+
m = *r;
3018+
if (*g > m) m = *g;
3019+
if (*b > m) m = *b;
3020+
if (m < 1.0e-4f)
3021+
return;
3022+
3023+
peak_ratio = max_nits / paper_white_nits;
3024+
k = 1.0f - (1.0f / peak_ratio);
3025+
denom = 1.0f + m * k;
3026+
if (denom < 1.0e-4f)
3027+
denom = 1.0e-4f;
3028+
3029+
scale = 1.0f / denom;
3030+
*r *= scale;
3031+
*g *= scale;
3032+
*b *= scale;
3033+
}
3034+
3035+
/* BT.2020 -> BT.709 colour-primary rotation. Matches k2020to709 in
3036+
* the forward HLSL/GLSL shaders. */
3037+
static INLINE void dxgi_rec2020_to_rec709(float *r, float *g, float *b)
3038+
{
3039+
float R = *r, G = *g, B = *b;
3040+
*r = 1.6604910f * R + -0.5876411f * G + -0.0728499f * B;
3041+
*g = -0.1245505f * R + 1.1328999f * G + -0.0083494f * B;
3042+
*b = -0.0181508f * R + -0.1005789f * G + 1.1187297f * B;
3043+
}
3044+
3045+
static INLINE uint8_t dxgi_float_to_unorm8(float x)
3046+
{
3047+
int v;
3048+
if (x <= 0.0f) return 0;
3049+
if (x >= 1.0f) return 255;
3050+
v = (int)(x * 255.0f + 0.5f);
3051+
if (v < 0) v = 0;
3052+
if (v > 255) v = 255;
3053+
return (uint8_t)v;
3054+
}
3055+
3056+
bool dxgi_hdr_readback_to_bgr24(
3057+
DXGI_FORMAT src_format,
3058+
const void* src_data,
3059+
unsigned src_pitch,
3060+
unsigned src_x,
3061+
unsigned src_y,
3062+
unsigned width,
3063+
unsigned height,
3064+
float paper_white_nits,
3065+
uint8_t* dst_bgr24)
3066+
{
3067+
unsigned y, x;
3068+
3069+
if (!src_data || !dst_bgr24 || !width || !height)
3070+
return false;
3071+
if (paper_white_nits < 1.0f)
3072+
paper_white_nits = 200.0f; /* sane fallback if UBO not populated */
3073+
3074+
if (src_format == DXGI_FORMAT_R10G10B10A2_UNORM)
3075+
{
3076+
/* HDR10 PQ. Pixel layout per D3D: 10R | 10G | 10B | 2A (LSB to
3077+
* MSB inside the uint32), so:
3078+
* R = bits 0..9 (code & 0x3FF)
3079+
* G = bits 10..19 ((code >> 10) & 0x3FF)
3080+
* B = bits 20..29 ((code >> 20) & 0x3FF) */
3081+
const uint8_t* src_row = (const uint8_t*)src_data
3082+
+ (size_t)src_pitch * src_y;
3083+
3084+
for (y = 0; y < height; y++, src_row += src_pitch)
3085+
{
3086+
uint8_t* dst = dst_bgr24 + 3 * (size_t)(height - y - 1) * width;
3087+
const uint32_t* src = (const uint32_t*)src_row + src_x;
3088+
for (x = 0; x < width; x++)
3089+
{
3090+
uint32_t px = src[x];
3091+
float r_pq = (float)((px ) & 0x3FFu) * (1.0f / 1023.0f);
3092+
float g_pq = (float)((px >> 10) & 0x3FFu) * (1.0f / 1023.0f);
3093+
float b_pq = (float)((px >> 20) & 0x3FFu) * (1.0f / 1023.0f);
3094+
/* PQ -> linear, normalized so 1.0 = 10000 nits. */
3095+
float r = dxgi_st2084_to_linear(r_pq);
3096+
float g = dxgi_st2084_to_linear(g_pq);
3097+
float b = dxgi_st2084_to_linear(b_pq);
3098+
/* Rescale to paper-white-relative linear (SDR 1.0 == paper_white).
3099+
* The forward path scaled by (paper_white / 10000) before PQ
3100+
* encoding, so we undo it by multiplying by (10000 / paper_white). */
3101+
float scale = 10000.0f / paper_white_nits;
3102+
r *= scale; g *= scale; b *= scale;
3103+
/* BT.2020 -> BT.709 so the final sRGB encode is meaningful. */
3104+
dxgi_rec2020_to_rec709(&r, &g, &b);
3105+
/* Tonemap any super-white back into [0,1] using the same
3106+
* peak_ratio the forward inverse-tonemap used. */
3107+
dxgi_tonemap_to_sdr(&r, &g, &b, paper_white_nits, paper_white_nits);
3108+
/* sRGB OETF for the BGR24 output. */
3109+
dst[3 * x + 0] = dxgi_float_to_unorm8(dxgi_linear_to_srgb(b));
3110+
dst[3 * x + 1] = dxgi_float_to_unorm8(dxgi_linear_to_srgb(g));
3111+
dst[3 * x + 2] = dxgi_float_to_unorm8(dxgi_linear_to_srgb(r));
3112+
}
3113+
}
3114+
return true;
3115+
}
3116+
3117+
if (src_format == DXGI_FORMAT_R16G16B16A16_FLOAT)
3118+
{
3119+
/* scRGB: linear BT.709, FP16, 1.0 == 80 nits.
3120+
* Undo the forward scale of (paper_white / 80) so SDR 1.0 == 1.0. */
3121+
const uint8_t* src_row = (const uint8_t*)src_data
3122+
+ (size_t)src_pitch * src_y;
3123+
const float inv_scale = 80.0f / paper_white_nits;
3124+
3125+
for (y = 0; y < height; y++, src_row += src_pitch)
3126+
{
3127+
uint8_t* dst = dst_bgr24 + 3 * (size_t)(height - y - 1) * width;
3128+
const uint16_t* src = (const uint16_t*)src_row + src_x * 4;
3129+
for (x = 0; x < width; x++)
3130+
{
3131+
float r = dxgi_half_to_float(src[4 * x + 0]) * inv_scale;
3132+
float g = dxgi_half_to_float(src[4 * x + 1]) * inv_scale;
3133+
float b = dxgi_half_to_float(src[4 * x + 2]) * inv_scale;
3134+
/* scRGB carries legal negative values for out-of-gamut
3135+
* colours; clamping in LinearToSRGB handles them. */
3136+
dst[3 * x + 0] = dxgi_float_to_unorm8(dxgi_linear_to_srgb(b));
3137+
dst[3 * x + 1] = dxgi_float_to_unorm8(dxgi_linear_to_srgb(g));
3138+
dst[3 * x + 2] = dxgi_float_to_unorm8(dxgi_linear_to_srgb(r));
3139+
}
3140+
}
3141+
return true;
3142+
}
3143+
3144+
return false;
3145+
}
29153146
#endif

gfx/common/dxgi_common.h

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -476,6 +476,26 @@ void dxgi_set_hdr_metadata(
476476
float max_cll,
477477
float max_fall
478478
);
479+
480+
/* Convert an HDR swapchain-format pixel buffer into SDR BGR24 bottom-up,
481+
* suitable for a screenshot. src_format is the swapchain format:
482+
* - DXGI_FORMAT_R10G10B10A2_UNORM : HDR10 (ST.2084 PQ, BT.2020)
483+
* - DXGI_FORMAT_R16G16B16A16_FLOAT: scRGB (linear BT.709, 1.0 = 80 nits)
484+
* paper_white_nits is the user's configured SDR paper-white (typically
485+
* 200), used to scale the HDR encoding back down so SDR content in the
486+
* original framebuffer maps back to SDR 1.0. Source is top-down; the
487+
* output is laid out bottom-up to match the read_viewport contract.
488+
* Returns false if the format is not an HDR format we handle. */
489+
bool dxgi_hdr_readback_to_bgr24(
490+
DXGI_FORMAT src_format,
491+
const void* src_data,
492+
unsigned src_pitch,
493+
unsigned src_x,
494+
unsigned src_y,
495+
unsigned width,
496+
unsigned height,
497+
float paper_white_nits,
498+
uint8_t* dst_bgr24);
479499
#endif
480500

481501
DXGI_FORMAT glslang_format_to_dxgi(glslang_format fmt);

gfx/drivers/d3d11.c

Lines changed: 54 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -5035,15 +5035,6 @@ static bool d3d11_gfx_read_viewport(void* data, uint8_t* buffer, bool is_idle)
50355035
if (!d3d11)
50365036
return false;
50375037

5038-
/*This implementation produces wrong result when using HDR*/
5039-
#ifdef HAVE_DXGI_HDR
5040-
if ((d3d11->flags & D3D11_ST_FLAG_HDR_ENABLE))
5041-
{
5042-
RARCH_ERR("[D3D11] HDR screenshot not supported.\n");
5043-
return false;
5044-
}
5045-
#endif
5046-
50475038
/* Get the back buffer. */
50485039
m_SwapChain = d3d11->swapChain;
50495040
#ifdef __cplusplus
@@ -5080,36 +5071,72 @@ static bool d3d11_gfx_read_viewport(void* data, uint8_t* buffer, bool is_idle)
50805071
/* Copy back buffer to back buffer staging. */
50815072
d3d11->context->lpVtbl->CopyResource(d3d11->context, BackBufferStaging, BackBufferResource);
50825073

5083-
/* Create the image. */
5074+
/* Map the staging texture for CPU read. */
50845075
d3d11->context->lpVtbl->Map(d3d11->context, BackBufferStaging, 0, D3D11_MAP_READ, 0, &Map);
50855076
BackBufferData = (const uint8_t*)Map.pData;
50865077

5087-
/* Assuming format is DXGI_FORMAT_R8G8B8A8_UNORM */
5088-
if (StagingDesc.Format == DXGI_FORMAT_R8G8B8A8_UNORM)
50895078
{
5079+
unsigned vp_x = (d3d11->vp.x > 0) ? d3d11->vp.x : 0;
50905080
unsigned vp_y = (d3d11->vp.y > 0) ? d3d11->vp.y : 0;
50915081
unsigned vp_width = (d3d11->vp.width > d3d11->vp.full_width) ? d3d11->vp.full_width : d3d11->vp.width;
50925082
unsigned vp_height = (d3d11->vp.height > d3d11->vp.full_height) ? d3d11->vp.full_height : d3d11->vp.height;
50935083

5094-
BackBufferData += Map.RowPitch * vp_y;
5084+
ret = true;
50955085

5096-
for (y = 0; y < vp_height; y++, BackBufferData += Map.RowPitch)
5086+
switch (StagingDesc.Format)
50975087
{
5098-
bufferRow = buffer + 3 * (vp_height - y - 1) * vp_width;
5088+
case DXGI_FORMAT_R8G8B8A8_UNORM:
5089+
/* SDR RGBA8 -> BGR24 swizzle, bottom-up. */
5090+
BackBufferData += Map.RowPitch * vp_y;
5091+
for (y = 0; y < vp_height; y++, BackBufferData += Map.RowPitch)
5092+
{
5093+
bufferRow = buffer + 3 * (vp_height - y - 1) * vp_width;
5094+
for (x = 0; x < vp_width; x++)
5095+
{
5096+
bufferRow[3 * x + 2] = BackBufferData[4 * (x + vp_x) + 0];
5097+
bufferRow[3 * x + 1] = BackBufferData[4 * (x + vp_x) + 1];
5098+
bufferRow[3 * x + 0] = BackBufferData[4 * (x + vp_x) + 2];
5099+
}
5100+
}
5101+
break;
50995102

5100-
for (x = 0; x < vp_width; x++)
5101-
{
5102-
bufferRow[3 * x + 2] = BackBufferData[4 * (x + d3d11->vp.x) + 0];
5103-
bufferRow[3 * x + 1] = BackBufferData[4 * (x + d3d11->vp.x) + 1];
5104-
bufferRow[3 * x + 0] = BackBufferData[4 * (x + d3d11->vp.x) + 2];
5105-
}
5103+
case DXGI_FORMAT_B8G8R8A8_UNORM:
5104+
/* SDR BGRA8 -> BGR24 byte-drop, bottom-up. */
5105+
BackBufferData += Map.RowPitch * vp_y;
5106+
for (y = 0; y < vp_height; y++, BackBufferData += Map.RowPitch)
5107+
{
5108+
bufferRow = buffer + 3 * (vp_height - y - 1) * vp_width;
5109+
for (x = 0; x < vp_width; x++)
5110+
{
5111+
bufferRow[3 * x + 0] = BackBufferData[4 * (x + vp_x) + 0];
5112+
bufferRow[3 * x + 1] = BackBufferData[4 * (x + vp_x) + 1];
5113+
bufferRow[3 * x + 2] = BackBufferData[4 * (x + vp_x) + 2];
5114+
}
5115+
}
5116+
break;
5117+
5118+
#ifdef HAVE_DXGI_HDR
5119+
case DXGI_FORMAT_R10G10B10A2_UNORM:
5120+
case DXGI_FORMAT_R16G16B16A16_FLOAT:
5121+
/* HDR10 PQ or scRGB: hand off to the CPU HDR decoder.
5122+
* It undoes the forward HDR encoding using paper_white_nits
5123+
* and writes sRGB-encoded BGR24 bottom-up. */
5124+
if (!dxgi_hdr_readback_to_bgr24(
5125+
StagingDesc.Format,
5126+
Map.pData, Map.RowPitch,
5127+
vp_x, vp_y, vp_width, vp_height,
5128+
d3d11->hdr.ubo_values.paper_white_nits,
5129+
buffer))
5130+
ret = false;
5131+
break;
5132+
#endif
5133+
5134+
default:
5135+
RARCH_ERR("[D3D11] Unexpected swapchain format %u.\n",
5136+
(unsigned)StagingDesc.Format);
5137+
ret = false;
5138+
break;
51065139
}
5107-
ret = true;
5108-
}
5109-
else
5110-
{
5111-
RARCH_ERR("[D3D11] Unexpected swapchain format.\n");
5112-
ret = false;
51135140
}
51145141

51155142
d3d11->context->lpVtbl->Unmap(d3d11->context, BackBufferStaging, 0);

0 commit comments

Comments
 (0)