Skip to content

Commit 11de5d2

Browse files
cryvoshErichDonGubler
authored andcommitted
Changelog and more tests
1 parent 80710e3 commit 11de5d2

7 files changed

Lines changed: 290 additions & 106 deletions

CHANGELOG.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,9 @@ Bottom level categories:
5757
- `DisplayHandle` should now be passed to `InstanceDescriptor` for correct EGL initialization on Wayland. By @MarijnS95 in [#8012](https://github.com/gfx-rs/wgpu/pull/8012)
5858
Note that the existing workaround to create surfaces before the adapter is no longer valid.
5959

60+
#### naga
61+
- Fixed `workgroupUniformLoad` incorrectly returning an atomic when called on an atomic, it now returns the inner `T` as per the spec. By @cryvosh in [#8791](https://github.com/gfx-rs/wgpu/pull/8791).
62+
6063
### Documentation
6164

6265
#### General
Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,13 @@
11
// Test workgroupUniformLoad specialization for atomic<T> -> T
2-
// Issue: https://github.com/gfx-rs/wgpu/issues/8785
2+
3+
struct AtomicStruct {
4+
atomic_scalar: atomic<u32>,
5+
atomic_arr: array<atomic<i32>, 2>,
6+
}
37

48
var<workgroup> wg_scalar: atomic<u32>;
59
var<workgroup> wg_signed: atomic<i32>;
10+
var<workgroup> wg_struct: AtomicStruct;
611

712
@compute @workgroup_size(64)
813
fn test_atomic_workgroup_uniform_load(
@@ -11,9 +16,11 @@ fn test_atomic_workgroup_uniform_load(
1116
) {
1217
let active_tile_index = workgroup_id.x + workgroup_id.y * 32768;
1318

14-
// Each thread may set the atomic
19+
// Each thread may set the atomics
1520
atomicOr(&wg_scalar, u32(active_tile_index >= 64));
1621
atomicAdd(&wg_signed, 1i);
22+
atomicStore(&wg_struct.atomic_scalar, 1u);
23+
atomicAdd(&wg_struct.atomic_arr[0], 1i);
1724

1825
workgroupBarrier();
1926

@@ -23,8 +30,14 @@ fn test_atomic_workgroup_uniform_load(
2330
// workgroupUniformLoad on atomic<i32> should return i32
2431
let signed_val: i32 = workgroupUniformLoad(&wg_signed);
2532

26-
// Should be able to use the result in comparisons
27-
if scalar_val == 0u && signed_val > 0i {
33+
// workgroupUniformLoad on struct.atomic_scalar should return u32
34+
let struct_scalar: u32 = workgroupUniformLoad(&wg_struct.atomic_scalar);
35+
36+
// workgroupUniformLoad on struct.atomic_arr[i] should return i32
37+
let struct_arr_val: i32 = workgroupUniformLoad(&wg_struct.atomic_arr[0]);
38+
39+
// Should be able to use all results in comparisons
40+
if scalar_val == 0u && signed_val > 0i && struct_scalar > 0u && struct_arr_val > 0i {
2841
return;
2942
}
3043
}

naga/tests/out/glsl/wgsl-workgroup-uniform-load-atomic.test_atomic_workgroup_uniform_load.Compute.glsl

Lines changed: 39 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5,43 +5,76 @@ precision highp int;
55

66
layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in;
77

8+
struct AtomicStruct {
9+
uint atomic_scalar;
10+
int atomic_arr[2];
11+
};
812
shared uint wg_scalar;
913

1014
shared int wg_signed;
1115

16+
shared AtomicStruct wg_struct;
17+
1218

1319
void main() {
1420
if (gl_LocalInvocationID == uvec3(0u)) {
1521
wg_scalar = 0u;
1622
wg_signed = 0;
23+
wg_struct = AtomicStruct(0u, int[2](0, 0));
1724
}
1825
memoryBarrierShared();
1926
barrier();
2027
uvec3 workgroup_id = gl_WorkGroupID;
2128
uvec3 local_id = gl_LocalInvocationID;
2229
bool local = false;
30+
bool local_1 = false;
31+
bool local_2 = false;
2332
uint active_tile_index = (workgroup_id.x + (workgroup_id.y * 32768u));
2433
uint _e11 = atomicOr(wg_scalar, uint((active_tile_index >= 64u)));
2534
int _e14 = atomicAdd(wg_signed, 1);
35+
wg_struct.atomic_scalar = 1u;
36+
int _e22 = atomicAdd(wg_struct.atomic_arr[0], 1);
37+
memoryBarrierShared();
38+
barrier();
39+
memoryBarrierShared();
40+
barrier();
41+
uint _e24 = wg_scalar;
42+
memoryBarrierShared();
43+
barrier();
2644
memoryBarrierShared();
2745
barrier();
46+
int _e26 = wg_signed;
2847
memoryBarrierShared();
2948
barrier();
30-
uint _e16 = wg_scalar;
3149
memoryBarrierShared();
3250
barrier();
51+
uint _e29 = wg_struct.atomic_scalar;
3352
memoryBarrierShared();
3453
barrier();
35-
int _e18 = wg_signed;
3654
memoryBarrierShared();
3755
barrier();
38-
if ((_e16 == 0u)) {
39-
local = (_e18 > 0);
56+
int _e33 = wg_struct.atomic_arr[0];
57+
memoryBarrierShared();
58+
barrier();
59+
if ((_e24 == 0u)) {
60+
local = (_e26 > 0);
4061
} else {
4162
local = false;
4263
}
43-
bool _e26 = local;
44-
if (_e26) {
64+
bool _e41 = local;
65+
if (_e41) {
66+
local_1 = (_e29 > 0u);
67+
} else {
68+
local_1 = false;
69+
}
70+
bool _e47 = local_1;
71+
if (_e47) {
72+
local_2 = (_e33 > 0);
73+
} else {
74+
local_2 = false;
75+
}
76+
bool _e53 = local_2;
77+
if (_e53) {
4578
return;
4679
} else {
4780
return;

naga/tests/out/hlsl/wgsl-workgroup-uniform-load-atomic.hlsl

Lines changed: 35 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,33 +1,62 @@
1+
struct AtomicStruct {
2+
uint atomic_scalar;
3+
int atomic_arr[2];
4+
};
5+
16
groupshared uint wg_scalar;
27
groupshared int wg_signed;
8+
groupshared AtomicStruct wg_struct;
39

410
[numthreads(64, 1, 1)]
511
void test_atomic_workgroup_uniform_load(uint3 workgroup_id : SV_GroupID, uint3 local_id : SV_GroupThreadID, uint3 __local_invocation_id : SV_GroupThreadID)
612
{
713
if (all(__local_invocation_id == uint3(0u, 0u, 0u))) {
814
wg_scalar = (uint)0;
915
wg_signed = (int)0;
16+
wg_struct = (AtomicStruct)0;
1017
}
1118
GroupMemoryBarrierWithGroupSync();
1219
bool local = (bool)0;
20+
bool local_1 = (bool)0;
21+
bool local_2 = (bool)0;
1322

1423
uint active_tile_index = (workgroup_id.x + (workgroup_id.y * 32768u));
1524
uint _e11; InterlockedOr(wg_scalar, uint((active_tile_index >= 64u)), _e11);
1625
int _e14; InterlockedAdd(wg_signed, int(1), _e14);
26+
wg_struct.atomic_scalar = 1u;
27+
int _e22; InterlockedAdd(wg_struct.atomic_arr[0], int(1), _e22);
28+
GroupMemoryBarrierWithGroupSync();
29+
GroupMemoryBarrierWithGroupSync();
30+
uint _e24 = wg_scalar;
31+
GroupMemoryBarrierWithGroupSync();
32+
GroupMemoryBarrierWithGroupSync();
33+
int _e26 = wg_signed;
1734
GroupMemoryBarrierWithGroupSync();
1835
GroupMemoryBarrierWithGroupSync();
19-
uint _e16 = wg_scalar;
36+
uint _e29 = wg_struct.atomic_scalar;
2037
GroupMemoryBarrierWithGroupSync();
2138
GroupMemoryBarrierWithGroupSync();
22-
int _e18 = wg_signed;
39+
int _e33 = wg_struct.atomic_arr[0];
2340
GroupMemoryBarrierWithGroupSync();
24-
if ((_e16 == 0u)) {
25-
local = (_e18 > int(0));
41+
if ((_e24 == 0u)) {
42+
local = (_e26 > int(0));
2643
} else {
2744
local = false;
2845
}
29-
bool _e26 = local;
30-
if (_e26) {
46+
bool _e41 = local;
47+
if (_e41) {
48+
local_1 = (_e29 > 0u);
49+
} else {
50+
local_1 = false;
51+
}
52+
bool _e47 = local_1;
53+
if (_e47) {
54+
local_2 = (_e33 > int(0));
55+
} else {
56+
local_2 = false;
57+
}
58+
bool _e53 = local_2;
59+
if (_e53) {
3160
return;
3261
} else {
3362
return;

naga/tests/out/msl/wgsl-workgroup-uniform-load-atomic.msl

Lines changed: 36 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,13 @@
44

55
using metal::uint;
66

7+
struct type_2 {
8+
metal::atomic_int inner[2];
9+
};
10+
struct AtomicStruct {
11+
metal::atomic_uint atomic_scalar;
12+
type_2 atomic_arr;
13+
};
714

815
struct test_atomic_workgroup_uniform_loadInput {
916
};
@@ -12,30 +19,57 @@ kernel void test_atomic_workgroup_uniform_load(
1219
, metal::uint3 local_id [[thread_position_in_threadgroup]]
1320
, threadgroup metal::atomic_uint& wg_scalar
1421
, threadgroup metal::atomic_int& wg_signed
22+
, threadgroup AtomicStruct& wg_struct
1523
) {
1624
if (metal::all(local_id == metal::uint3(0u))) {
1725
metal::atomic_store_explicit(&wg_scalar, 0, metal::memory_order_relaxed);
1826
metal::atomic_store_explicit(&wg_signed, 0, metal::memory_order_relaxed);
27+
metal::atomic_store_explicit(&wg_struct.atomic_scalar, 0, metal::memory_order_relaxed);
28+
for (int __i0 = 0; __i0 < 2; __i0++) {
29+
metal::atomic_store_explicit(&wg_struct.atomic_arr.inner[__i0], 0, metal::memory_order_relaxed);
30+
}
1931
}
2032
metal::threadgroup_barrier(metal::mem_flags::mem_threadgroup);
2133
bool local = {};
34+
bool local_1 = {};
35+
bool local_2 = {};
2236
uint active_tile_index = workgroup_id.x + (workgroup_id.y * 32768u);
2337
uint _e11 = metal::atomic_fetch_or_explicit(&wg_scalar, static_cast<uint>(active_tile_index >= 64u), metal::memory_order_relaxed);
2438
int _e14 = metal::atomic_fetch_add_explicit(&wg_signed, 1, metal::memory_order_relaxed);
39+
metal::atomic_store_explicit(&wg_struct.atomic_scalar, 1u, metal::memory_order_relaxed);
40+
int _e22 = metal::atomic_fetch_add_explicit(&wg_struct.atomic_arr.inner[0], 1, metal::memory_order_relaxed);
2541
metal::threadgroup_barrier(metal::mem_flags::mem_threadgroup);
2642
metal::threadgroup_barrier(metal::mem_flags::mem_threadgroup);
2743
uint unnamed = metal::atomic_load_explicit(&wg_scalar, metal::memory_order_relaxed);
2844
metal::threadgroup_barrier(metal::mem_flags::mem_threadgroup);
2945
metal::threadgroup_barrier(metal::mem_flags::mem_threadgroup);
3046
int unnamed_1 = metal::atomic_load_explicit(&wg_signed, metal::memory_order_relaxed);
3147
metal::threadgroup_barrier(metal::mem_flags::mem_threadgroup);
48+
metal::threadgroup_barrier(metal::mem_flags::mem_threadgroup);
49+
uint unnamed_2 = metal::atomic_load_explicit(&wg_struct.atomic_scalar, metal::memory_order_relaxed);
50+
metal::threadgroup_barrier(metal::mem_flags::mem_threadgroup);
51+
metal::threadgroup_barrier(metal::mem_flags::mem_threadgroup);
52+
int unnamed_3 = metal::atomic_load_explicit(&wg_struct.atomic_arr.inner[0], metal::memory_order_relaxed);
53+
metal::threadgroup_barrier(metal::mem_flags::mem_threadgroup);
3254
if (unnamed == 0u) {
3355
local = unnamed_1 > 0;
3456
} else {
3557
local = false;
3658
}
37-
bool _e26 = local;
38-
if (_e26) {
59+
bool _e41 = local;
60+
if (_e41) {
61+
local_1 = unnamed_2 > 0u;
62+
} else {
63+
local_1 = false;
64+
}
65+
bool _e47 = local_1;
66+
if (_e47) {
67+
local_2 = unnamed_3 > 0;
68+
} else {
69+
local_2 = false;
70+
}
71+
bool _e53 = local_2;
72+
if (_e53) {
3973
return;
4074
} else {
4175
return;

0 commit comments

Comments
 (0)