diff --git a/wgpu-core/src/command/compute.rs b/wgpu-core/src/command/compute.rs index 5258b41f4a..ae2521cacb 100644 --- a/wgpu-core/src/command/compute.rs +++ b/wgpu-core/src/command/compute.rs @@ -121,6 +121,10 @@ pub enum DispatchError { //expected: BindGroupLayoutId, //provided: Option<(BindGroupLayoutId, BindGroupId)>, }, + #[error( + "each current dispatch group size dimension ({current:?}) must be less or equal to {limit}" + )] + InvalidGroupSize { current: [u32; 3], limit: u32 }, } /// Error encountered when performing a compute pass. @@ -535,6 +539,22 @@ impl Global { &*texture_guard, ) .map_pass_err(scope)?; + + let groups_size_limit = cmd_buf.limits.max_compute_workgroups_per_dimension; + + if groups[0] > groups_size_limit + || groups[1] > groups_size_limit + || groups[2] > groups_size_limit + { + return Err(ComputePassErrorInner::Dispatch( + DispatchError::InvalidGroupSize { + current: groups, + limit: groups_size_limit, + }, + )) + .map_pass_err(scope); + } + unsafe { raw.dispatch(groups); } diff --git a/wgpu-hal/src/dx12/adapter.rs b/wgpu-hal/src/dx12/adapter.rs index 8a2aa574c5..d5e195d511 100644 --- a/wgpu-hal/src/dx12/adapter.rs +++ b/wgpu-hal/src/dx12/adapter.rs @@ -245,7 +245,13 @@ impl super::Adapter { max_push_constant_size: 0, min_uniform_buffer_offset_alignment: d3d12::D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT, - min_storage_buffer_offset_alignment: 4, // TODO? + min_storage_buffer_offset_alignment: 4, + max_compute_workgroup_size_x: d3d12::D3D12_CS_THREAD_GROUP_MAX_X, + max_compute_workgroup_size_y: d3d12::D3D12_CS_THREAD_GROUP_MAX_Y, + max_compute_workgroup_size_z: d3d12::D3D12_CS_THREAD_GROUP_MAX_Z, + max_compute_workgroups_per_dimension: + d3d12::D3D12_CS_DISPATCH_MAX_THREAD_GROUPS_PER_DIMENSION, + // TODO? }, alignments: crate::Alignments { buffer_copy_offset: wgt::BufferSize::new( diff --git a/wgpu-hal/src/gles/adapter.rs b/wgpu-hal/src/gles/adapter.rs index 5174f5d364..6354f7f215 100644 --- a/wgpu-hal/src/gles/adapter.rs +++ b/wgpu-hal/src/gles/adapter.rs @@ -328,6 +328,12 @@ impl super::Adapter { gl.get_parameter_i32(glow::MAX_VERTEX_UNIFORM_BLOCKS) .min(gl.get_parameter_i32(glow::MAX_FRAGMENT_UNIFORM_BLOCKS)) as u32; + let max_compute_workgroups_per_dimension = gl + .get_parameter_indexed_i32(glow::MAX_COMPUTE_WORK_GROUP_COUNT, 0) + .min(gl.get_parameter_indexed_i32(glow::MAX_COMPUTE_WORK_GROUP_COUNT, 1)) + .min(gl.get_parameter_indexed_i32(glow::MAX_COMPUTE_WORK_GROUP_COUNT, 2)) + as u32; + let limits = wgt::Limits { max_texture_dimension_1d: max_texture_size, max_texture_dimension_2d: max_texture_size, @@ -367,6 +373,16 @@ impl super::Adapter { max_push_constant_size: 0, min_uniform_buffer_offset_alignment, min_storage_buffer_offset_alignment, + max_compute_workgroup_size_x: gl + .get_parameter_indexed_i32(glow::MAX_COMPUTE_WORK_GROUP_SIZE, 0) + as u32, + max_compute_workgroup_size_y: gl + .get_parameter_indexed_i32(glow::MAX_COMPUTE_WORK_GROUP_SIZE, 1) + as u32, + max_compute_workgroup_size_z: gl + .get_parameter_indexed_i32(glow::MAX_COMPUTE_WORK_GROUP_SIZE, 2) + as u32, + max_compute_workgroups_per_dimension, }; let mut workarounds = super::Workarounds::empty(); diff --git a/wgpu-hal/src/metal/adapter.rs b/wgpu-hal/src/metal/adapter.rs index 038e6dbd2f..77f4b9135b 100644 --- a/wgpu-hal/src/metal/adapter.rs +++ b/wgpu-hal/src/metal/adapter.rs @@ -946,6 +946,11 @@ impl super::PrivateCapabilities { max_push_constant_size: 0x1000, min_uniform_buffer_offset_alignment: self.buffer_alignment as u32, min_storage_buffer_offset_alignment: self.buffer_alignment as u32, + //TODO: double-check how these match Metal feature set tables + max_compute_workgroup_size_x: 256, + max_compute_workgroup_size_y: 256, + max_compute_workgroup_size_z: 64, + max_compute_workgroups_per_dimension: 0xFFFF, }, alignments: crate::Alignments { buffer_copy_offset: wgt::BufferSize::new(self.buffer_alignment).unwrap(), diff --git a/wgpu-hal/src/vulkan/adapter.rs b/wgpu-hal/src/vulkan/adapter.rs index c37d00f05e..87001b6542 100644 --- a/wgpu-hal/src/vulkan/adapter.rs +++ b/wgpu-hal/src/vulkan/adapter.rs @@ -591,6 +591,11 @@ impl PhysicalDeviceCapabilities { limits.max_per_stage_descriptor_storage_buffers }; + let max_compute_workgroup_sizes = limits.max_compute_work_group_size; + let max_compute_workgroups_per_dimension = limits.max_compute_work_group_count[0] + .min(limits.max_compute_work_group_count[1]) + .min(limits.max_compute_work_group_count[2]); + wgt::Limits { max_texture_dimension_1d: limits.max_image_dimension1_d, max_texture_dimension_2d: limits.max_image_dimension2_d, @@ -618,6 +623,10 @@ impl PhysicalDeviceCapabilities { max_push_constant_size: limits.max_push_constants_size, min_uniform_buffer_offset_alignment: limits.min_uniform_buffer_offset_alignment as u32, min_storage_buffer_offset_alignment: limits.min_storage_buffer_offset_alignment as u32, + max_compute_workgroup_size_x: max_compute_workgroup_sizes[0], + max_compute_workgroup_size_y: max_compute_workgroup_sizes[1], + max_compute_workgroup_size_z: max_compute_workgroup_sizes[2], + max_compute_workgroups_per_dimension, } } diff --git a/wgpu-info/src/main.rs b/wgpu-info/src/main.rs index e780c0e412..37f8700fcb 100644 --- a/wgpu-info/src/main.rs +++ b/wgpu-info/src/main.rs @@ -50,6 +50,10 @@ fn print_info_from_adapter(adapter: &wgpu::Adapter, idx: usize) { max_push_constant_size, min_uniform_buffer_offset_alignment, min_storage_buffer_offset_alignment, + max_compute_workgroup_size_x, + max_compute_workgroup_size_y, + max_compute_workgroup_size_z, + max_compute_workgroups_per_dimension } = limits; println!("\t\tMax Texture Dimension 1d: {}", max_texture_dimension_1d); println!("\t\tMax Texture Dimension 2d: {}", max_texture_dimension_2d); @@ -71,6 +75,10 @@ fn print_info_from_adapter(adapter: &wgpu::Adapter, idx: usize) { println!("\t\tMax Push Constant Size: {}", max_push_constant_size); println!("\t\tMin Uniform Buffer Offset Alignment: {}", min_uniform_buffer_offset_alignment); println!("\t\tMin Storage Buffer Offset Alignment: {}", min_storage_buffer_offset_alignment); + println!("\t\tMax Compute Workgroup Size X: {}", max_compute_workgroup_size_x); + println!("\t\tMax Compute Workgroup Size Y: {}", max_compute_workgroup_size_y); + println!("\t\tMax Compute Workgroup Size Z: {}", max_compute_workgroup_size_z); + println!("\t\tMax Compute Workgroups Per Dimmension: {}", max_compute_workgroups_per_dimension); println!("\tDownlevel Properties:"); let wgpu::DownlevelCapabilities { shader_model, diff --git a/wgpu-types/src/lib.rs b/wgpu-types/src/lib.rs index 35466b7d06..2f8d4ea464 100644 --- a/wgpu-types/src/lib.rs +++ b/wgpu-types/src/lib.rs @@ -641,6 +641,19 @@ pub struct Limits { /// when creating a `BindGroup`, or for `set_bind_group` `dynamicOffsets`. /// Defaults to 256. Lower is "better". pub min_storage_buffer_offset_alignment: u32, + + /// The maximum value of the workgroup_size X dimension for a compute stage `ShaderModule` entry-point. + /// Defaults to 256. + pub max_compute_workgroup_size_x: u32, + /// The maximum value of the workgroup_size Y dimension for a compute stage `ShaderModule` entry-point. + /// Defaults to 256. + pub max_compute_workgroup_size_y: u32, + /// The maximum value of the workgroup_size Z dimension for a compute stage `ShaderModule` entry-point. + /// Defaults to 256. + pub max_compute_workgroup_size_z: u32, + /// The maximum value for each dimension of a `ComputePass::dispatch(x, y, z)` operation. + /// Defaults to 65535. + pub max_compute_workgroups_per_dimension: u32, } impl Default for Limits { @@ -666,6 +679,10 @@ impl Default for Limits { max_push_constant_size: 0, min_uniform_buffer_offset_alignment: 256, min_storage_buffer_offset_alignment: 256, + max_compute_workgroup_size_x: 256, + max_compute_workgroup_size_y: 256, + max_compute_workgroup_size_z: 64, + max_compute_workgroups_per_dimension: 65535, } } } @@ -694,6 +711,10 @@ impl Limits { max_push_constant_size: 0, min_uniform_buffer_offset_alignment: 256, min_storage_buffer_offset_alignment: 256, + max_compute_workgroup_size_x: 256, + max_compute_workgroup_size_y: 256, + max_compute_workgroup_size_z: 64, + max_compute_workgroups_per_dimension: 65535, } } diff --git a/wgpu/tests/common/mod.rs b/wgpu/tests/common/mod.rs index ee2ce39e71..9993d3bc92 100644 --- a/wgpu/tests/common/mod.rs +++ b/wgpu/tests/common/mod.rs @@ -38,33 +38,6 @@ pub struct TestingContext { pub queue: Queue, } -// A rather arbitrary set of limits which should be lower than all devices wgpu reasonably expects to run on and provides enough resources for most tests to run. -// Adjust as needed if they are too low/high. -pub fn lowest_reasonable_limits() -> Limits { - Limits { - max_texture_dimension_1d: 1024, - max_texture_dimension_2d: 1024, - max_texture_dimension_3d: 32, - max_texture_array_layers: 32, - max_bind_groups: 2, - max_dynamic_uniform_buffers_per_pipeline_layout: 2, - max_dynamic_storage_buffers_per_pipeline_layout: 2, - max_sampled_textures_per_shader_stage: 2, - max_samplers_per_shader_stage: 2, - max_storage_buffers_per_shader_stage: 2, - max_storage_textures_per_shader_stage: 2, - max_uniform_buffers_per_shader_stage: 2, - max_uniform_buffer_binding_size: 256, - max_storage_buffer_binding_size: 1 << 16, - max_vertex_buffers: 4, - max_vertex_attributes: 4, - max_vertex_buffer_array_stride: 32, - max_push_constant_size: 0, - min_uniform_buffer_offset_alignment: 256, - min_storage_buffer_offset_alignment: 256, - } -} - fn lowest_downlevel_properties() -> DownlevelCapabilities { DownlevelCapabilities { flags: wgt::DownlevelFlags::empty(),