Skip to content

Commit 3ca6a1c

Browse files
andyleisersonteoxoy
authored andcommitted
Defer some bind group processing until draw/dispatch
This allows us to save some work when a bind group is replaced, although WebGPU still requires us to fail a submission if any resource in any bind group that was ever referenced during encoding is destroyed. Fixes gfx-rs#8399
1 parent e8dfc72 commit 3ca6a1c

9 files changed

Lines changed: 244 additions & 117 deletions

File tree

cts_runner/test.lst

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -125,7 +125,13 @@ webgpu:api,validation,render_pass,render_pass_descriptor:color_attachments,depth
125125
// FAIL: webgpu:api,validation,render_pass,render_pass_descriptor:color_attachments,depthSlice,overlaps,diff_miplevel:*
126126
webgpu:api,validation,render_pass,render_pass_descriptor:resolveTarget,*
127127
webgpu:api,validation,render_pass,resolve:resolve_attachment:*
128+
webgpu:api,validation,resource_usages,buffer,in_pass_encoder:*
129+
// FAIL: 8 other cases in resource_usages,texture,in_pass_encoder. https://github.com/gfx-rs/wgpu/issues/3126
130+
webgpu:api,validation,resource_usages,texture,in_pass_encoder:scope,*
131+
webgpu:api,validation,resource_usages,texture,in_pass_encoder:shader_stages_and_visibility,*
132+
webgpu:api,validation,resource_usages,texture,in_pass_encoder:subresources_and_binding_types_combination_for_aspect:*
128133
webgpu:api,validation,resource_usages,texture,in_pass_encoder:subresources_and_binding_types_combination_for_color:compute=false;type0="render-target";type1="render-target"
134+
webgpu:api,validation,resource_usages,texture,in_pass_encoder:unused_bindings_in_pipeline:*
129135
webgpu:api,validation,texture,rg11b10ufloat_renderable:*
130136
webgpu:api,operation,render_pipeline,overrides:*
131137
webgpu:api,operation,rendering,basic:clear:*

wgpu-core/src/command/bind.rs

Lines changed: 36 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
use core::{iter::zip, ops::Range};
2+
13
use alloc::{boxed::Box, sync::Arc, vec::Vec};
24

35
use arrayvec::ArrayVec;
@@ -192,14 +194,16 @@ mod compat {
192194
}
193195

194196
#[derive(Debug, Default)]
195-
pub(crate) struct BoundBindGroupLayouts {
197+
pub(super) struct BoundBindGroupLayouts {
196198
entries: ArrayVec<Entry, { hal::MAX_BIND_GROUPS }>,
199+
rebind_start: usize,
197200
}
198201

199202
impl BoundBindGroupLayouts {
200203
pub fn new() -> Self {
201204
Self {
202205
entries: (0..hal::MAX_BIND_GROUPS).map(|_| Entry::empty()).collect(),
206+
rebind_start: 0,
203207
}
204208
}
205209

@@ -211,15 +215,19 @@ mod compat {
211215
.unwrap_or(self.entries.len())
212216
}
213217

214-
fn make_range(&self, start_index: usize) -> Range<usize> {
218+
/// Get the range of entries that needs to be rebound, and clears it.
219+
pub fn take_rebind_range(&mut self) -> Range<usize> {
215220
let end = self.num_valid_entries();
216-
start_index..end.max(start_index)
221+
let start = self.rebind_start;
222+
self.rebind_start = end;
223+
start..end.max(start)
224+
}
225+
226+
pub fn update_start_index(&mut self, start_index: usize) {
227+
self.rebind_start = self.rebind_start.min(start_index);
217228
}
218229

219-
pub fn update_expectations(
220-
&mut self,
221-
expectations: &[Arc<BindGroupLayout>],
222-
) -> Range<usize> {
230+
pub fn update_expectations(&mut self, expectations: &[Arc<BindGroupLayout>]) {
223231
let start_index = self
224232
.entries
225233
.iter()
@@ -237,12 +245,12 @@ mod compat {
237245
for e in self.entries[expectations.len()..].iter_mut() {
238246
e.expected = None;
239247
}
240-
self.make_range(start_index)
248+
self.update_start_index(start_index);
241249
}
242250

243-
pub fn assign(&mut self, index: usize, value: Arc<BindGroupLayout>) -> Range<usize> {
251+
pub fn assign(&mut self, index: usize, value: Arc<BindGroupLayout>) {
244252
self.entries[index].assigned = Some(value);
245-
self.make_range(index)
253+
self.update_start_index(index);
246254
}
247255

248256
pub fn list_active(&self) -> impl Iterator<Item = usize> + '_ {
@@ -333,10 +341,10 @@ impl Binder {
333341
&'a mut self,
334342
new: &Arc<PipelineLayout>,
335343
late_sized_buffer_groups: &[LateSizedBufferGroup],
336-
) -> (usize, &'a [EntryPayload]) {
344+
) {
337345
let old_id_opt = self.pipeline_layout.replace(new.clone());
338346

339-
let mut bind_range = self.manager.update_expectations(&new.bind_group_layouts);
347+
self.manager.update_expectations(&new.bind_group_layouts);
340348

341349
// Update the buffer binding sizes that are required by shaders.
342350
for (payload, late_group) in self.payloads.iter_mut().zip(late_sized_buffer_groups) {
@@ -363,19 +371,17 @@ impl Binder {
363371
if let Some(old) = old_id_opt {
364372
// root constants are the base compatibility property
365373
if old.push_constant_ranges != new.push_constant_ranges {
366-
bind_range.start = 0;
374+
self.manager.update_start_index(0);
367375
}
368376
}
369-
370-
(bind_range.start, &self.payloads[bind_range])
371377
}
372378

373379
pub(super) fn assign_group<'a>(
374380
&'a mut self,
375381
index: usize,
376382
bind_group: &Arc<BindGroup>,
377383
offsets: &[wgt::DynamicOffset],
378-
) -> &'a [EntryPayload] {
384+
) {
379385
let payload = &mut self.payloads[index];
380386
payload.group = Some(bind_group.clone());
381387
payload.dynamic_offsets.clear();
@@ -401,8 +407,20 @@ impl Binder {
401407
}
402408
}
403409

404-
let bind_range = self.manager.assign(index, bind_group.layout.clone());
405-
&self.payloads[bind_range]
410+
self.manager.assign(index, bind_group.layout.clone());
411+
}
412+
413+
/// Get the range of entries that needs to be rebound, and clears it.
414+
pub(super) fn take_rebind_range(&mut self) -> Range<usize> {
415+
self.manager.take_rebind_range()
416+
}
417+
418+
pub(super) fn entries(
419+
&self,
420+
range: Range<usize>,
421+
) -> impl ExactSizeIterator<Item = (usize, &'_ EntryPayload)> + '_ {
422+
let payloads = &self.payloads[range.clone()];
423+
zip(range, payloads)
406424
}
407425

408426
pub(super) fn list_active<'a>(&'a self) -> impl Iterator<Item = &'a Arc<BindGroup>> + 'a {

wgpu-core/src/command/compute.rs

Lines changed: 62 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,10 @@ use wgt::{
77
use alloc::{borrow::Cow, boxed::Box, sync::Arc, vec::Vec};
88
use core::{convert::Infallible, fmt, str};
99

10-
use crate::{api_log, binding_model::BindError, resource::RawResourceAccess};
10+
use crate::{
11+
api_log, binding_model::BindError, command::pass::flush_bindings_helper,
12+
resource::RawResourceAccess,
13+
};
1114
use crate::{
1215
binding_model::{LateMinBufferBindingSizeMismatch, PushConstantUploadError},
1316
command::{
@@ -280,27 +283,68 @@ impl<'scope, 'snatch_guard, 'cmd_enc> State<'scope, 'snatch_guard, 'cmd_enc> {
280283
}
281284
}
282285

283-
// `extra_buffer` is there to represent the indirect buffer that is also
284-
// part of the usage scope.
285-
fn flush_states(
286+
/// Flush binding state in preparation for a dispatch.
287+
///
288+
/// # Differences between render and compute passes
289+
///
290+
/// There are differences between the `flush_bindings` implementations for
291+
/// render and compute passes, because render passes have a single usage
292+
/// scope for the entire pass, and compute passes have a separate usage
293+
/// scope for each dispatch.
294+
///
295+
/// For compute passes, bind groups are merged into a fresh usage scope
296+
/// here, not into the pass usage scope within calls to `set_bind_group`. As
297+
/// specified by WebGPU, for compute passes, we merge only the bind groups
298+
/// that are actually used by the pipeline, unlike render passes, which
299+
/// merge every bind group that is ever set, even if it is not ultimately
300+
/// used by the pipeline.
301+
///
302+
/// For compute passes, we call `drain_barriers` here, because barriers may
303+
/// be needed before each dispatch if a previous dispatch had a conflicting
304+
/// usage. For render passes, barriers are emitted once at the start of the
305+
/// render pass.
306+
///
307+
/// # Indirect buffer handling
308+
///
309+
/// For indirect dispatches without validation, pass both `indirect_buffer`
310+
/// and `indirect_buffer_index_if_not_validating`. The indirect buffer will
311+
/// be added to the usage scope and the tracker.
312+
///
313+
/// For indirect dispatches with validation, pass only `indirect_buffer`.
314+
/// The indirect buffer will be added to the usage scope to detect usage
315+
/// conflicts. The indirect buffer does not need to be added to the tracker;
316+
/// the indirect validation code handles transitions manually.
317+
fn flush_bindings(
286318
&mut self,
287-
indirect_buffer: Option<TrackerIndex>,
288-
) -> Result<(), ResourceUsageCompatibilityError> {
319+
indirect_buffer: Option<&Arc<Buffer>>,
320+
indirect_buffer_index_if_not_validating: Option<TrackerIndex>,
321+
) -> Result<(), ComputePassErrorInner> {
322+
let mut scope = self.pass.base.device.new_usage_scope();
323+
289324
for bind_group in self.pass.binder.list_active() {
290-
unsafe { self.pass.scope.merge_bind_group(&bind_group.used)? };
291-
// Note: stateless trackers are not merged: the lifetime reference
292-
// is held to the bind group itself.
325+
unsafe { scope.merge_bind_group(&bind_group.used)? };
293326
}
294327

295-
for bind_group in self.pass.binder.list_active() {
296-
self.intermediate_trackers
297-
.set_from_bind_group(&mut self.pass.scope, &bind_group.used);
328+
// When indirect validation is turned on, our actual use of the buffer
329+
// is `STORAGE_READ_ONLY`, but for usage scope validation, we still want
330+
// to treat it as indirect so we can detect the conflicts prescribed by
331+
// WebGPU. The usage scope we construct here never leaves this function
332+
// (and is not used to populate a tracker), so it's fine to do this.
333+
if let Some(buffer) = indirect_buffer {
334+
scope
335+
.buffers
336+
.merge_single(buffer, wgt::BufferUses::INDIRECT)?;
298337
}
299338

300-
// Add the state of the indirect buffer if it hasn't been hit before.
339+
// Add the state of the indirect buffer, if needed (see above).
301340
self.intermediate_trackers
302341
.buffers
303-
.set_multiple(&mut self.pass.scope.buffers, indirect_buffer);
342+
.set_multiple(&mut scope.buffers, indirect_buffer_index_if_not_validating);
343+
344+
flush_bindings_helper(&mut self.pass, |bind_group| {
345+
self.intermediate_trackers
346+
.set_from_bind_group(&mut scope, &bind_group.used)
347+
})?;
304348

305349
CommandEncoder::drain_barriers(
306350
self.pass.base.raw_encoder,
@@ -821,7 +865,7 @@ fn set_pipeline(
821865
}
822866

823867
// Rebind resources
824-
pass::rebind_resources::<ComputePassErrorInner, _>(
868+
pass::change_pipeline_layout::<ComputePassErrorInner, _>(
825869
&mut state.pass,
826870
&pipeline.layout,
827871
&pipeline.late_sized_buffer_groups,
@@ -850,7 +894,7 @@ fn dispatch(state: &mut State, groups: [u32; 3]) -> Result<(), ComputePassErrorI
850894

851895
state.is_ready()?;
852896

853-
state.flush_states(None)?;
897+
state.flush_bindings(None, None)?;
854898

855899
let groups_size_limit = state
856900
.pass
@@ -1051,7 +1095,7 @@ fn dispatch_indirect(
10511095
}]);
10521096
}
10531097

1054-
state.flush_states(None)?;
1098+
state.flush_bindings(Some(&buffer), None)?;
10551099
unsafe {
10561100
state
10571101
.pass
@@ -1060,14 +1104,8 @@ fn dispatch_indirect(
10601104
.dispatch_indirect(params.dst_buffer, 0);
10611105
}
10621106
} else {
1063-
state
1064-
.pass
1065-
.scope
1066-
.buffers
1067-
.merge_single(&buffer, wgt::BufferUses::INDIRECT)?;
1068-
10691107
use crate::resource::Trackable;
1070-
state.flush_states(Some(buffer.tracker_index()))?;
1108+
state.flush_bindings(Some(&buffer), Some(buffer.tracker_index()))?;
10711109

10721110
let buf_raw = buffer.try_raw(state.pass.base.snatch_guard)?;
10731111
unsafe {

0 commit comments

Comments
 (0)