@@ -45,10 +45,11 @@ struct parameters_t
4545// We do some dumb things with bitfields here like not using `vector<uint16_t,N>`, because AMD doesn't support them in push constants
4646struct SPerWorkgroup
4747{
48- static inline SPerWorkgroup create (const float32_t3 _scale, const uint16_t3 output, const uint16_t3 preload, const uint16_t _otherPreloadOffset)
48+ static inline SPerWorkgroup create (const float32_t3 _scale, const uint16_t _imageDim, const uint16_t3 output, const uint16_t3 preload, const uint16_t _otherPreloadOffset)
4949 {
5050 SPerWorkgroup retval;
5151 retval.scale = _scale;
52+ retval.imageDim = _imageDim;
5253 retval.preloadWidth = preload[0 ];
5354 retval.preloadHeight = preload[1 ];
5455 retval.preloadDepth = preload[2 ];
@@ -59,34 +60,37 @@ struct SPerWorkgroup
5960 return retval;
6061 }
6162
62- inline uint16_t3 getOutput ( ) NBL_CONST_MEMBER_FUNC
63+ inline uint16_t3 getOutputBaseCoord ( const uint16_t3 workgroup ) NBL_CONST_MEMBER_FUNC
6364 {
64- return uint16_t3 (outputWidth,outputHeight,outputDepth);
65+ return workgroup* uint16_t3 (outputWidth,outputHeight,outputDepth);
6566 }
6667
6768 inline uint16_t3 getWorkgroupCount (const uint16_t3 outExtent, const uint16_t layersToBlit=0 ) NBL_CONST_MEMBER_FUNC
6869 {
69- uint16_t3 retval = uint16_t3 (1 ,1 ,1 );
70- retval += (outExtent-uint16_t3 (1 ,1 ,1 ))/getOutput ();
70+ const uint16_t3 unit = uint16_t3 (1 ,1 ,1 );
71+ uint16_t3 retval = unit;
72+ retval += (outExtent-unit)/getOutputBaseCoord (unit);
7173 if (layersToBlit)
72- retval[3 ] = layersToBlit;
74+ retval[2 ] = layersToBlit;
7375 return retval;
7476 }
7577
7678#ifndef __HLSL_VERSION
77- inline operator bool () const
79+ explicit inline operator bool () const
7880 {
7981 return outputWidth && outputHeight && outputDepth && preloadWidth && preloadHeight && preloadDepth;
8082 }
8183#endif
8284
8385 // ratio of input pixels to output
8486 float32_t3 scale;
87+ // whether its an image1D, image2D or image3D
88+ uint32_t imageDim : 2 ;
89+ uint32_t unused0 : 14 ; // channel, iterationRegionPrefixSums ?
8590 // 16bit in each dimension because some GPUs actually have enough shared memory for 32k pixels
8691 uint32_t outputWidth : 16 ;
8792 uint32_t outputHeight : 16 ;
8893 uint32_t outputDepth : 16 ;
89- uint32_t unused0 : 16 ; // channel, image type, iterationRegionPrefixSums ?
9094 uint32_t preloadWidth : 16 ;
9195 uint32_t preloadHeight : 16 ;
9296 uint32_t preloadDepth : 16 ;
@@ -97,22 +101,27 @@ struct SPerWorkgroup
97101
98102struct Parameters
99103{
100- static Parameters create (
101- const SPerWorkgroup perWG,
102- const uint16_t3 inImageExtent, const uint16_t3 outImageExtent
103- )
104+ #ifndef __HLSL_VERSION
105+ explicit inline operator bool () const
104106 {
105- Parameters retval;
106- retval.perWG = perWG;
107- return retval;
107+ return bool (perWG);
108108 }
109+ #endif
109110
110- SPerWorkgroup perWG;
111- // general settings
112- uint32_t lastChannel : 2 ;
113- uint32_t coverage : 1 ;
114- uint32_t unused : 29 ;
111+ SPerWorkgroup perWG; // rename to perBlitWG?
112+ //! general settings
113+ uint32_t inputDescIx : 19 ;
114+ uint32_t samplerDescIx : 11 ;
115+ uint32_t unused0 : 2 ;
116+ //
117+ uint32_t outputDescIx : 19 ;
118+ uint32_t channelCount : 3 ;
119+ uint32_t unused1 : 10 ;
120+ //
121+ uint32_t unused2 : 12 ;
115122 //! coverage settings
123+ uint32_t intermAlphaDescIx : 19 ;
124+ uint32_t coverage : 1 ;
116125 // required to compare the atomic count of passing pixels against, so we can get original coverage
117126 uint32_t inPixelCount;
118127};
0 commit comments