mirror of
https://github.com/gfx-rs/wgpu.git
synced 2026-04-22 03:02:01 -04:00
Zero-initialize workgroup memory (#3174)
fixes https://github.com/gfx-rs/wgpu/issues/2430
This commit is contained in:
@@ -225,7 +225,8 @@ surface.configure(&device, &config);
|
||||
- Implemented correleation between user timestamps and platform specific presentation timestamps via [`Adapter::get_presentation_timestamp`]. By @cwfitzgerald in [#3240](https://github.com/gfx-rs/wgpu/pull/3240)
|
||||
- Added support for `Features::SHADER_PRIMITIVE_INDEX` on all backends. By @cwfitzgerald in [#3272](https://github.com/gfx-rs/wgpu/pull/3272)
|
||||
- Implemented `TextureFormat::Stencil8`, allowing for stencil testing without depth components. By @Dinnerbone in [#3343](https://github.com/gfx-rs/wgpu/pull/3343)
|
||||
- Implemented `add_srgb_suffix()` for `TextureFormat` for converting linear formats to sRGB. By @Elabajaba in [#3419](https://github.com/gfx-rs/wgpu/pull/3419)
|
||||
- Implemented `add_srgb_suffix()` for `TextureFormat` for converting linear formats to sRGB. By @Elabajaba in [#3419](https://github.com/gfx-rs/wgpu/pull/3419)
|
||||
- Zero-initialize workgroup memory. By @teoxoy in [#3174](https://github.com/gfx-rs/wgpu/pull/3174)
|
||||
|
||||
#### GLES
|
||||
|
||||
|
||||
2
Cargo.lock
generated
2
Cargo.lock
generated
@@ -1471,7 +1471,7 @@ dependencies = [
|
||||
[[package]]
|
||||
name = "naga"
|
||||
version = "0.10.0"
|
||||
source = "git+https://github.com/gfx-rs/naga?rev=1be8024#1be8024bda3594987b417bead5024b98be9ab521"
|
||||
source = "git+https://github.com/gfx-rs/naga?rev=c7d02151f08d6285683795289b5725b827d836d1#c7d02151f08d6285683795289b5725b827d836d1"
|
||||
dependencies = [
|
||||
"bit-set",
|
||||
"bitflags",
|
||||
|
||||
@@ -39,7 +39,7 @@ path = "./wgpu-hal"
|
||||
|
||||
[workspace.dependencies.naga]
|
||||
git = "https://github.com/gfx-rs/naga"
|
||||
rev = "1be8024"
|
||||
rev = "c7d02151f08d6285683795289b5725b827d836d1"
|
||||
version = "0.10"
|
||||
|
||||
[workspace.dependencies]
|
||||
|
||||
@@ -67,7 +67,7 @@ thiserror = "1"
|
||||
|
||||
[dependencies.naga]
|
||||
git = "https://github.com/gfx-rs/naga"
|
||||
rev = "1be8024"
|
||||
rev = "c7d02151f08d6285683795289b5725b827d836d1"
|
||||
version = "0.10"
|
||||
features = ["clone", "span", "validate"]
|
||||
|
||||
|
||||
@@ -113,14 +113,14 @@ android_system_properties = "0.1.1"
|
||||
|
||||
[dependencies.naga]
|
||||
git = "https://github.com/gfx-rs/naga"
|
||||
rev = "1be8024"
|
||||
rev = "c7d02151f08d6285683795289b5725b827d836d1"
|
||||
version = "0.10"
|
||||
features = ["clone"]
|
||||
|
||||
# DEV dependencies
|
||||
[dev-dependencies.naga]
|
||||
git = "https://github.com/gfx-rs/naga"
|
||||
rev = "1be8024"
|
||||
rev = "c7d02151f08d6285683795289b5725b827d836d1"
|
||||
version = "0.10"
|
||||
features = ["wgsl-in"]
|
||||
|
||||
|
||||
@@ -1070,6 +1070,7 @@ impl crate::Device<super::Api> for super::Device {
|
||||
fake_missing_bindings: false,
|
||||
special_constants_binding,
|
||||
push_constants_target,
|
||||
zero_initialize_workgroup_memory: true,
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
@@ -1032,6 +1032,7 @@ impl crate::Device<super::Api> for super::Device {
|
||||
version: self.shared.shading_language_version,
|
||||
writer_flags,
|
||||
binding_map,
|
||||
zero_initialize_workgroup_memory: true,
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
@@ -699,6 +699,7 @@ impl crate::Device<super::Api> for super::Device {
|
||||
// TODO: support bounds checks on binding arrays
|
||||
binding_array: naga::proc::BoundsCheckPolicy::Unchecked,
|
||||
},
|
||||
zero_initialize_workgroup_memory: true,
|
||||
},
|
||||
total_push_constants,
|
||||
})
|
||||
|
||||
@@ -31,6 +31,8 @@ pub struct PhysicalDeviceFeatures {
|
||||
vk::PhysicalDeviceShaderFloat16Int8Features,
|
||||
vk::PhysicalDevice16BitStorageFeatures,
|
||||
)>,
|
||||
zero_initialize_workgroup_memory:
|
||||
Option<vk::PhysicalDeviceZeroInitializeWorkgroupMemoryFeatures>,
|
||||
}
|
||||
|
||||
// This is safe because the structs have `p_next: *mut c_void`, which we null out/never read.
|
||||
@@ -69,6 +71,9 @@ impl PhysicalDeviceFeatures {
|
||||
info = info.push_next(f16_i8_feature);
|
||||
info = info.push_next(_16bit_feature);
|
||||
}
|
||||
if let Some(ref mut feature) = self.zero_initialize_workgroup_memory {
|
||||
info = info.push_next(feature);
|
||||
}
|
||||
info
|
||||
}
|
||||
|
||||
@@ -286,6 +291,19 @@ impl PhysicalDeviceFeatures {
|
||||
} else {
|
||||
None
|
||||
},
|
||||
zero_initialize_workgroup_memory: if effective_api_version >= vk::API_VERSION_1_3
|
||||
|| enabled_extensions.contains(&vk::KhrZeroInitializeWorkgroupMemoryFn::name())
|
||||
{
|
||||
Some(
|
||||
vk::PhysicalDeviceZeroInitializeWorkgroupMemoryFeatures::builder()
|
||||
.shader_zero_initialize_workgroup_memory(
|
||||
private_caps.zero_initialize_workgroup_memory,
|
||||
)
|
||||
.build(),
|
||||
)
|
||||
} else {
|
||||
None
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
@@ -885,6 +903,16 @@ impl super::InstanceShared {
|
||||
builder = builder.push_next(&mut next.1);
|
||||
}
|
||||
|
||||
// `VK_KHR_zero_initialize_workgroup_memory` is promoted to 1.3
|
||||
if capabilities.effective_api_version >= vk::API_VERSION_1_3
|
||||
|| capabilities.supports_extension(vk::KhrZeroInitializeWorkgroupMemoryFn::name())
|
||||
{
|
||||
let next = features
|
||||
.zero_initialize_workgroup_memory
|
||||
.insert(vk::PhysicalDeviceZeroInitializeWorkgroupMemoryFeatures::default());
|
||||
builder = builder.push_next(next);
|
||||
}
|
||||
|
||||
let mut features2 = builder.build();
|
||||
unsafe {
|
||||
get_device_properties.get_physical_device_features2(phd, &mut features2);
|
||||
@@ -1044,6 +1072,11 @@ impl super::Instance {
|
||||
.image_robustness
|
||||
.map_or(false, |ext| ext.robust_image_access != 0),
|
||||
},
|
||||
zero_initialize_workgroup_memory: phd_features
|
||||
.zero_initialize_workgroup_memory
|
||||
.map_or(false, |ext| {
|
||||
ext.shader_zero_initialize_workgroup_memory == vk::TRUE
|
||||
}),
|
||||
};
|
||||
let capabilities = crate::Capabilities {
|
||||
limits: phd_capabilities.to_wgpu_limits(),
|
||||
@@ -1246,6 +1279,14 @@ impl super::Adapter {
|
||||
// TODO: support bounds checks on binding arrays
|
||||
binding_array: naga::proc::BoundsCheckPolicy::Unchecked,
|
||||
},
|
||||
zero_initialize_workgroup_memory: if self
|
||||
.private_caps
|
||||
.zero_initialize_workgroup_memory
|
||||
{
|
||||
spv::ZeroInitializeWorkgroupMemoryMode::Native
|
||||
} else {
|
||||
spv::ZeroInitializeWorkgroupMemoryMode::Polyfill
|
||||
},
|
||||
// We need to build this separately for each invocation, so just default it out here
|
||||
binding_map: BTreeMap::default(),
|
||||
}
|
||||
|
||||
@@ -166,6 +166,7 @@ struct PrivateCapabilities {
|
||||
non_coherent_map_mask: wgt::BufferAddress,
|
||||
robust_buffer_access: bool,
|
||||
robust_image_access: bool,
|
||||
zero_initialize_workgroup_memory: bool,
|
||||
}
|
||||
|
||||
bitflags::bitflags!(
|
||||
|
||||
@@ -17,6 +17,7 @@ use crate::common::TestingContext;
|
||||
|
||||
mod numeric_builtins;
|
||||
mod struct_layout;
|
||||
mod zero_init_workgroup_mem;
|
||||
|
||||
#[derive(Clone, Copy, PartialEq)]
|
||||
enum InputStorageType {
|
||||
|
||||
183
wgpu/tests/shader/zero_init_workgroup_mem.rs
Normal file
183
wgpu/tests/shader/zero_init_workgroup_mem.rs
Normal file
@@ -0,0 +1,183 @@
|
||||
use std::num::NonZeroU64;
|
||||
|
||||
use wgpu::{
|
||||
include_wgsl, Backends, BindGroupDescriptor, BindGroupEntry, BindGroupLayoutDescriptor,
|
||||
BindGroupLayoutEntry, BindingResource, BindingType, BufferBinding, BufferBindingType,
|
||||
BufferDescriptor, BufferUsages, CommandEncoderDescriptor, ComputePassDescriptor,
|
||||
ComputePipelineDescriptor, DownlevelFlags, Limits, Maintain, MapMode, PipelineLayoutDescriptor,
|
||||
ShaderStages,
|
||||
};
|
||||
|
||||
use crate::common::{initialize_test, TestParameters, TestingContext};
|
||||
|
||||
#[test]
|
||||
fn zero_init_workgroup_mem() {
|
||||
initialize_test(
|
||||
TestParameters::default()
|
||||
.downlevel_flags(DownlevelFlags::COMPUTE_SHADERS)
|
||||
.limits(Limits::downlevel_defaults())
|
||||
// remove once we get to https://github.com/gfx-rs/wgpu/issues/3193 or
|
||||
// https://github.com/gfx-rs/wgpu/issues/3160
|
||||
.specific_failure(
|
||||
Some(Backends::DX12),
|
||||
Some(5140),
|
||||
Some("Microsoft Basic Render Driver"),
|
||||
true,
|
||||
)
|
||||
// this one is flakey
|
||||
.specific_failure(
|
||||
Some(Backends::VULKAN),
|
||||
Some(6880),
|
||||
Some("SwiftShader"),
|
||||
true,
|
||||
)
|
||||
// TODO: investigate why it fails
|
||||
.specific_failure(Some(Backends::GL), Some(65541), Some("llvmpipe"), false),
|
||||
zero_init_workgroup_mem_impl,
|
||||
);
|
||||
}
|
||||
|
||||
const DISPATCH_SIZE: (u32, u32, u32) = (64, 64, 64);
|
||||
const TOTAL_WORK_GROUPS: u32 = DISPATCH_SIZE.0 * DISPATCH_SIZE.1 * DISPATCH_SIZE.2;
|
||||
|
||||
/// nr of bytes we use in the shader
|
||||
const SHADER_WORKGROUP_MEMORY: u32 = 512 * 4 + 4;
|
||||
// assume we have this much workgroup memory (2GB)
|
||||
const MAX_DEVICE_WORKGROUP_MEMORY: u32 = i32::MAX as u32;
|
||||
const NR_OF_DISPATCHES: u32 =
|
||||
MAX_DEVICE_WORKGROUP_MEMORY / (SHADER_WORKGROUP_MEMORY * TOTAL_WORK_GROUPS) + 1; // TODO: use div_ceil once stabilized
|
||||
|
||||
const OUTPUT_ARRAY_SIZE: u32 = TOTAL_WORK_GROUPS * NR_OF_DISPATCHES;
|
||||
const BUFFER_SIZE: u64 = OUTPUT_ARRAY_SIZE as u64 * 4;
|
||||
const BUFFER_BINDING_SIZE: u32 = TOTAL_WORK_GROUPS * 4;
|
||||
|
||||
fn zero_init_workgroup_mem_impl(ctx: TestingContext) {
|
||||
let bgl = ctx
|
||||
.device
|
||||
.create_bind_group_layout(&BindGroupLayoutDescriptor {
|
||||
label: None,
|
||||
entries: &[BindGroupLayoutEntry {
|
||||
binding: 0,
|
||||
visibility: ShaderStages::COMPUTE,
|
||||
ty: BindingType::Buffer {
|
||||
ty: BufferBindingType::Storage { read_only: false },
|
||||
has_dynamic_offset: true,
|
||||
min_binding_size: None,
|
||||
},
|
||||
count: None,
|
||||
}],
|
||||
});
|
||||
|
||||
let output_buffer = ctx.device.create_buffer(&BufferDescriptor {
|
||||
label: Some("output buffer"),
|
||||
size: BUFFER_SIZE,
|
||||
usage: BufferUsages::COPY_DST | BufferUsages::COPY_SRC | BufferUsages::STORAGE,
|
||||
mapped_at_creation: false,
|
||||
});
|
||||
|
||||
let mapping_buffer = ctx.device.create_buffer(&BufferDescriptor {
|
||||
label: Some("mapping buffer"),
|
||||
size: BUFFER_SIZE,
|
||||
usage: BufferUsages::COPY_DST | BufferUsages::MAP_READ,
|
||||
mapped_at_creation: false,
|
||||
});
|
||||
|
||||
let bg = ctx.device.create_bind_group(&BindGroupDescriptor {
|
||||
label: None,
|
||||
layout: &bgl,
|
||||
entries: &[BindGroupEntry {
|
||||
binding: 0,
|
||||
resource: BindingResource::Buffer(BufferBinding {
|
||||
buffer: &output_buffer,
|
||||
offset: 0,
|
||||
size: Some(NonZeroU64::new(BUFFER_BINDING_SIZE as u64).unwrap()),
|
||||
}),
|
||||
}],
|
||||
});
|
||||
|
||||
let pll = ctx
|
||||
.device
|
||||
.create_pipeline_layout(&PipelineLayoutDescriptor {
|
||||
label: None,
|
||||
bind_group_layouts: &[&bgl],
|
||||
push_constant_ranges: &[],
|
||||
});
|
||||
|
||||
let sm = ctx
|
||||
.device
|
||||
.create_shader_module(include_wgsl!("zero_init_workgroup_mem.wgsl"));
|
||||
|
||||
let pipeline_read = ctx
|
||||
.device
|
||||
.create_compute_pipeline(&ComputePipelineDescriptor {
|
||||
label: Some("pipeline read"),
|
||||
layout: Some(&pll),
|
||||
module: &sm,
|
||||
entry_point: "read",
|
||||
});
|
||||
|
||||
let pipeline_write = ctx
|
||||
.device
|
||||
.create_compute_pipeline(&ComputePipelineDescriptor {
|
||||
label: Some("pipeline write"),
|
||||
layout: None,
|
||||
module: &sm,
|
||||
entry_point: "write",
|
||||
});
|
||||
|
||||
// -- Initializing data --
|
||||
|
||||
let output_pre_init_data = vec![1; OUTPUT_ARRAY_SIZE as usize];
|
||||
ctx.queue.write_buffer(
|
||||
&output_buffer,
|
||||
0,
|
||||
bytemuck::cast_slice(&output_pre_init_data),
|
||||
);
|
||||
|
||||
// -- Run test --
|
||||
|
||||
let mut encoder = ctx
|
||||
.device
|
||||
.create_command_encoder(&CommandEncoderDescriptor::default());
|
||||
|
||||
let mut cpass = encoder.begin_compute_pass(&ComputePassDescriptor::default());
|
||||
|
||||
cpass.set_pipeline(&pipeline_write);
|
||||
for _ in 0..NR_OF_DISPATCHES {
|
||||
cpass.dispatch_workgroups(DISPATCH_SIZE.0, DISPATCH_SIZE.1, DISPATCH_SIZE.2);
|
||||
}
|
||||
|
||||
cpass.set_pipeline(&pipeline_read);
|
||||
for i in 0..NR_OF_DISPATCHES {
|
||||
cpass.set_bind_group(0, &bg, &[i * BUFFER_BINDING_SIZE]);
|
||||
cpass.dispatch_workgroups(DISPATCH_SIZE.0, DISPATCH_SIZE.1, DISPATCH_SIZE.2);
|
||||
}
|
||||
drop(cpass);
|
||||
|
||||
// -- Pulldown data --
|
||||
|
||||
encoder.copy_buffer_to_buffer(&output_buffer, 0, &mapping_buffer, 0, BUFFER_SIZE);
|
||||
|
||||
ctx.queue.submit(Some(encoder.finish()));
|
||||
|
||||
mapping_buffer.slice(..).map_async(MapMode::Read, |_| ());
|
||||
ctx.device.poll(Maintain::Wait);
|
||||
|
||||
let mapped = mapping_buffer.slice(..).get_mapped_range();
|
||||
|
||||
let typed: &[u32] = bytemuck::cast_slice(&*mapped);
|
||||
|
||||
// -- Check results --
|
||||
|
||||
let num_disptaches_failed = typed.iter().filter(|&&res| res != 0).count();
|
||||
let ratio = (num_disptaches_failed as f32 / OUTPUT_ARRAY_SIZE as f32) * 100.;
|
||||
|
||||
assert!(
|
||||
num_disptaches_failed == 0,
|
||||
"Zero-initialization of workgroup memory failed ({:.0}% of disptaches failed).",
|
||||
ratio
|
||||
);
|
||||
|
||||
drop(mapped);
|
||||
mapping_buffer.unmap();
|
||||
}
|
||||
31
wgpu/tests/shader/zero_init_workgroup_mem.wgsl
Normal file
31
wgpu/tests/shader/zero_init_workgroup_mem.wgsl
Normal file
@@ -0,0 +1,31 @@
|
||||
const array_size = 512u;
|
||||
|
||||
struct WStruct {
|
||||
arr: array<u32, array_size>,
|
||||
atom: atomic<u32>
|
||||
}
|
||||
|
||||
var<workgroup> w_mem: WStruct;
|
||||
|
||||
@group(0) @binding(0)
|
||||
var<storage, read_write> output: array<u32>;
|
||||
|
||||
@compute @workgroup_size(1)
|
||||
fn read(@builtin(workgroup_id) wgid: vec3<u32>, @builtin(num_workgroups) num_workgroups: vec3<u32>) {
|
||||
var is_zero = true;
|
||||
for(var i = 0u; i < array_size; i++) {
|
||||
is_zero &= w_mem.arr[i] == 0u;
|
||||
}
|
||||
is_zero &= atomicLoad(&w_mem.atom) == 0u;
|
||||
|
||||
let idx = wgid.x + (wgid.y * num_workgroups.x) + (wgid.z * num_workgroups.x * num_workgroups.y);
|
||||
output[idx] = u32(!is_zero);
|
||||
}
|
||||
|
||||
@compute @workgroup_size(1)
|
||||
fn write() {
|
||||
for(var i = 0u; i < array_size; i++) {
|
||||
w_mem.arr[i] = i;
|
||||
}
|
||||
atomicStore(&w_mem.atom, 3u);
|
||||
}
|
||||
Reference in New Issue
Block a user