use std::mem::size_of; use std::num::NonZeroU64; use wgpu::util::RenderEncoder; use wgpu::*; use wgpu_test::{gpu_test, GpuTestConfiguration, TestParameters, TestingContext}; /// We want to test that partial updates to push constants work as expected. /// /// As such, we dispatch two compute passes, one which writes the values /// before a partial update, and one which writes the values after the partial update. /// /// If the update code is working correctly, the values not written to by the second update /// will remain unchanged. #[gpu_test] static PARTIAL_UPDATE: GpuTestConfiguration = GpuTestConfiguration::new() .parameters( TestParameters::default() .features(wgpu::Features::PUSH_CONSTANTS) .limits(wgpu::Limits { max_push_constant_size: 32, ..Default::default() }), ) .run_async(partial_update_test); const SHADER: &str = r#" struct Pc { offset: u32, vector: vec4f, } var pc: Pc; @group(0) @binding(0) var output: array; @compute @workgroup_size(1) fn main() { output[pc.offset] = pc.vector; } "#; async fn partial_update_test(ctx: TestingContext) { let sm = ctx .device .create_shader_module(wgpu::ShaderModuleDescriptor { label: Some("shader"), source: wgpu::ShaderSource::Wgsl(SHADER.into()), }); let bgl = ctx .device .create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor { label: Some("bind_group_layout"), entries: &[wgpu::BindGroupLayoutEntry { binding: 0, visibility: wgpu::ShaderStages::COMPUTE, ty: wgpu::BindingType::Buffer { ty: wgpu::BufferBindingType::Storage { read_only: false }, has_dynamic_offset: false, min_binding_size: NonZeroU64::new(16), }, count: None, }], }); let gpu_buffer = ctx.device.create_buffer(&wgpu::BufferDescriptor { label: Some("gpu_buffer"), size: 32, usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_SRC, mapped_at_creation: false, }); let cpu_buffer = ctx.device.create_buffer(&wgpu::BufferDescriptor { label: Some("cpu_buffer"), size: 32, usage: wgpu::BufferUsages::COPY_DST | wgpu::BufferUsages::MAP_READ, mapped_at_creation: false, }); let bind_group = ctx.device.create_bind_group(&wgpu::BindGroupDescriptor { label: Some("bind_group"), layout: &bgl, entries: &[wgpu::BindGroupEntry { binding: 0, resource: gpu_buffer.as_entire_binding(), }], }); let pipeline_layout = ctx .device .create_pipeline_layout(&wgpu::PipelineLayoutDescriptor { label: Some("pipeline_layout"), bind_group_layouts: &[&bgl], push_constant_ranges: &[wgpu::PushConstantRange { stages: wgpu::ShaderStages::COMPUTE, range: 0..32, }], }); let pipeline = ctx .device .create_compute_pipeline(&wgpu::ComputePipelineDescriptor { label: Some("pipeline"), layout: Some(&pipeline_layout), module: &sm, entry_point: Some("main"), compilation_options: Default::default(), cache: None, }); let mut encoder = ctx .device .create_command_encoder(&wgpu::CommandEncoderDescriptor { label: Some("encoder"), }); { let mut cpass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor { label: Some("compute_pass"), timestamp_writes: None, }); cpass.set_pipeline(&pipeline); cpass.set_bind_group(0, &bind_group, &[]); // -- Dispatch 0 -- // Dispatch number cpass.set_push_constants(0, bytemuck::bytes_of(&[0_u32])); // Update the whole vector. cpass.set_push_constants(16, bytemuck::bytes_of(&[1.0_f32, 2.0, 3.0, 4.0])); cpass.dispatch_workgroups(1, 1, 1); // -- Dispatch 1 -- // Dispatch number cpass.set_push_constants(0, bytemuck::bytes_of(&[1_u32])); // Update just the y component of the vector. cpass.set_push_constants(20, bytemuck::bytes_of(&[5.0_f32])); cpass.dispatch_workgroups(1, 1, 1); } encoder.copy_buffer_to_buffer(&gpu_buffer, 0, &cpu_buffer, 0, 32); ctx.queue.submit([encoder.finish()]); cpu_buffer.slice(..).map_async(wgpu::MapMode::Read, |_| ()); ctx.async_poll(wgpu::PollType::wait()).await.unwrap(); let data = cpu_buffer.slice(..).get_mapped_range(); let floats: &[f32] = bytemuck::cast_slice(&data); // first 4 floats the initial value // second 4 floats the first update assert_eq!(floats, [1.0, 2.0, 3.0, 4.0, 1.0, 5.0, 3.0, 4.0]); } #[gpu_test] static RENDER_PASS_TEST: GpuTestConfiguration = GpuTestConfiguration::new() .parameters( TestParameters::default() .features(Features::PUSH_CONSTANTS | Features::VERTEX_WRITABLE_STORAGE) .limits(wgpu::Limits { max_push_constant_size: 64, ..Default::default() }), ) .run_async(move |ctx| async move { for use_render_bundle in [false, true] { render_pass_test(&ctx, use_render_bundle).await; } }); // This shader simply moves the values from vector_constants and push_constants into the // result buffer. It expects to be called 4 times (with vector_index in 0..4) with its // topology being PointList, so that each vertex shader call leads to exactly one fragment // call. const SHADER2: &str = " const POSITION: vec4f = vec4f(0, 0, 0, 1); struct PushConstants { vertex_constants: vec4i, fragment_constants: vec4i, } var push_constants: PushConstants; @group(0) @binding(0) var result: array; struct VertexOutput { @builtin(position) position: vec4f, @location(0) index: u32, } @vertex fn vertex( @builtin(vertex_index) ix: u32, ) -> VertexOutput { result[ix] = push_constants.vertex_constants[ix]; return VertexOutput(POSITION, ix); } @fragment fn fragment( @location(0) ix: u32, ) -> @location(0) vec4f { result[ix + 4u] = push_constants.fragment_constants[ix]; return vec4f(); } "; async fn render_pass_test(ctx: &TestingContext, use_render_bundle: bool) { let output_buffer = ctx.device.create_buffer(&BufferDescriptor { label: Some("output buffer"), size: 8 * size_of::() as BufferAddress, usage: BufferUsages::STORAGE | BufferUsages::COPY_SRC, mapped_at_creation: false, }); let cpu_buffer = ctx.device.create_buffer(&BufferDescriptor { label: Some("cpu buffer"), size: output_buffer.size(), usage: BufferUsages::COPY_DST | BufferUsages::MAP_READ, mapped_at_creation: false, }); // We need an output texture, even though we're not ever going to look at it. let output_texture = ctx.device.create_texture(&TextureDescriptor { size: Extent3d { width: 2, height: 2, depth_or_array_layers: 1, }, mip_level_count: 1, sample_count: 1, dimension: TextureDimension::D2, format: TextureFormat::Rgba8UnormSrgb, usage: TextureUsages::RENDER_ATTACHMENT, label: Some("Output Texture"), view_formats: &[], }); let output_texture_view = output_texture.create_view(&Default::default()); let shader = ctx.device.create_shader_module(ShaderModuleDescriptor { label: Some("Shader"), source: ShaderSource::Wgsl(SHADER2.into()), }); let bind_group_layout = ctx .device .create_bind_group_layout(&BindGroupLayoutDescriptor { label: None, entries: &[BindGroupLayoutEntry { binding: 0, visibility: ShaderStages::VERTEX_FRAGMENT, ty: BindingType::Buffer { ty: BufferBindingType::Storage { read_only: false }, has_dynamic_offset: false, min_binding_size: None, }, count: None, }], }); let render_pipeline_layout = ctx .device .create_pipeline_layout(&PipelineLayoutDescriptor { bind_group_layouts: &[&bind_group_layout], push_constant_ranges: &[PushConstantRange { stages: ShaderStages::VERTEX_FRAGMENT, range: 0..8 * size_of::() as u32, }], ..Default::default() }); let pipeline = ctx .device .create_render_pipeline(&RenderPipelineDescriptor { label: Some("Render Pipeline"), layout: Some(&render_pipeline_layout), vertex: VertexState { module: &shader, entry_point: None, buffers: &[], compilation_options: Default::default(), }, fragment: Some(FragmentState { module: &shader, entry_point: None, targets: &[Some(output_texture.format().into())], compilation_options: Default::default(), }), primitive: PrimitiveState { topology: PrimitiveTopology::PointList, ..Default::default() }, depth_stencil: None, multisample: MultisampleState::default(), multiview: None, cache: None, }); let render_pass_desc = RenderPassDescriptor { label: Some("Render Pass"), color_attachments: &[Some(RenderPassColorAttachment { view: &output_texture_view, resolve_target: None, ops: Operations { load: LoadOp::Clear(Color::default()), store: StoreOp::Store, }, })], ..Default::default() }; let bind_group = ctx.device.create_bind_group(&BindGroupDescriptor { label: Some("bind group"), layout: &pipeline.get_bind_group_layout(0), entries: &[BindGroupEntry { binding: 0, resource: output_buffer.as_entire_binding(), }], }); let data: Vec = (0..8).map(|i| (i * i) - 1).collect(); fn do_encoding<'a>( encoder: &mut dyn RenderEncoder<'a>, pipeline: &'a RenderPipeline, bind_group: &'a BindGroup, data: &'a Vec, ) { let data_as_u8: &[u8] = bytemuck::cast_slice(data.as_slice()); encoder.set_pipeline(pipeline); encoder.set_push_constants(ShaderStages::VERTEX_FRAGMENT, 0, data_as_u8); encoder.set_bind_group(0, Some(bind_group), &[]); encoder.draw(0..4, 0..1); } let mut command_encoder = ctx .device .create_command_encoder(&CommandEncoderDescriptor::default()); { let mut render_pass = command_encoder.begin_render_pass(&render_pass_desc); if use_render_bundle { // Execute the commands in a render_bundle_encoder. let mut render_bundle_encoder = ctx.device .create_render_bundle_encoder(&RenderBundleEncoderDescriptor { color_formats: &[Some(output_texture.format())], sample_count: 1, ..RenderBundleEncoderDescriptor::default() }); do_encoding(&mut render_bundle_encoder, &pipeline, &bind_group, &data); let render_bundle = render_bundle_encoder.finish(&RenderBundleDescriptor::default()); render_pass.execute_bundles([&render_bundle]); } else { // Execute the commands directly. do_encoding(&mut render_pass, &pipeline, &bind_group, &data); } } // Move the result to the cpu buffer, so that we can read them. command_encoder.copy_buffer_to_buffer(&output_buffer, 0, &cpu_buffer, 0, output_buffer.size()); let command_buffer = command_encoder.finish(); ctx.queue.submit([command_buffer]); cpu_buffer.slice(..).map_async(MapMode::Read, |_| ()); ctx.async_poll(wgpu::PollType::wait()).await.unwrap(); let mapped_data = cpu_buffer.slice(..).get_mapped_range(); let result = bytemuck::cast_slice::(&mapped_data).to_vec(); drop(mapped_data); cpu_buffer.unmap(); assert_eq!(&result, &data); }