Better validation

This commit is contained in:
rickwebiii
2023-02-24 13:32:16 -08:00
parent 4a4f959f26
commit 9003580da4
9 changed files with 219 additions and 64 deletions

1
Cargo.lock generated
View File

@@ -2450,6 +2450,7 @@ dependencies = [
"sunscreen_curve25519",
"tokio",
"wgpu",
"wgpu-core",
]
[[package]]

View File

@@ -18,11 +18,12 @@ wgpu = { version = "0.15.1", optional = true }
[build-dependencies]
naga = { version = "0.11.0", optional = true, features = ["wgsl-in"]}
wgpu-core = { version = "0.15.1", optional = true, features = ["metal", "wgsl"] }
[features]
default = ["webgpu"]
nightly-features = []
metal = ["dep:metal", "gpu"]
webgpu = ["dep:wgpu", "dep:tokio", "dep:futures", "dep:naga", "dep:bytemuck", "gpu"]
webgpu = ["dep:wgpu", "dep:tokio", "dep:futures", "dep:naga", "dep:wgpu-core", "dep:bytemuck", "gpu"]
gpu = []
pina = []

View File

@@ -1,11 +1,12 @@
#[cfg(feature = "webgpu")]
// This simply concatenates all the wgsl shaders, which get compiled at runtime.
fn compile_wgsl_shaders() {
use std::{path::PathBuf, process::Command};
use std::fs::{DirEntry, File, read_to_string};
use std::fs::{read_to_string, DirEntry, File};
use std::io::Write;
use std::{path::PathBuf};
use naga::valid::{ValidationFlags, Capabilities};
use naga::valid::{Capabilities, ValidationFlags};
use wgpu_core::pipeline::{CreateShaderModuleError, ShaderError};
let outdir = PathBuf::from(std::env::var("OUT_DIR").unwrap());
let shader_dir = PathBuf::from(".")
@@ -27,9 +28,7 @@ fn compile_wgsl_shaders() {
let include_file: Box<dyn Fn(&DirEntry) -> bool> = if config == "test" {
Box::new(is_wgsl_file)
} else {
Box::new(|file: &DirEntry| {
is_wgsl_file(file) && !is_test_wgsl_file(file)
})
Box::new(|file: &DirEntry| is_wgsl_file(file) && !is_test_wgsl_file(file))
};
let shaders = std::fs::read_dir(&shader_dir)
@@ -38,7 +37,7 @@ fn compile_wgsl_shaders() {
.filter(include_file);
let out_file_path = outdir.join(format!("shaders-{config}.wgsl"));
{
let mut out_file = File::create(&out_file_path).unwrap();
@@ -50,15 +49,23 @@ fn compile_wgsl_shaders() {
};
// Validate the shader
let shader_contents = read_to_string(out_file_path).unwrap();
let shader_contents = read_to_string(&out_file_path).unwrap();
let parse_result = naga::front::wgsl::parse_str(&shader_contents);
if let Err(e) = parse_result {
panic!("{}", e.message());
let e = ShaderError {
source: shader_contents,
label: None,
inner: Box::new(e)
};
let e = CreateShaderModuleError::Parsing(e);
panic!("{}", e);
}
let mut validator = naga::valid::Validator::new(ValidationFlags::all(), Capabilities::empty());
let mut validator =
naga::valid::Validator::new(ValidationFlags::all(), Capabilities::empty());
let validation_results = validator.validate(&parse_result.unwrap());

View File

@@ -1,11 +1,21 @@
use core::slice;
use std::{borrow::Cow, ops::Deref, mem::{align_of, MaybeUninit, size_of}};
use std::{
borrow::Cow,
mem::{align_of, size_of, MaybeUninit},
ops::Deref,
};
use bytemuck::{cast, cast_slice, Pod};
use futures::channel::oneshot;
use lazy_static::lazy_static;
use tokio::runtime::{Builder as TokioRuntimeBuilder, Runtime as TokioRuntime};
use wgpu::{Instance, RequestAdapterOptions, Device, Queue, ShaderModuleDescriptor, ShaderModule, BufferDescriptor, COPY_BUFFER_ALIGNMENT, BufferUsages, Buffer, ComputePipelineDescriptor, BindGroupDescriptor, BindGroupEntry, CommandEncoderDescriptor, ComputePassDescriptor, util::{BufferInitDescriptor, DeviceExt}, Maintain};
use wgpu::{
util::{BufferInitDescriptor, DeviceExt},
BindGroupDescriptor, BindGroupEntry, Buffer, BufferDescriptor, BufferUsages,
CommandEncoderDescriptor, ComputePassDescriptor, ComputePipelineDescriptor, Device, Instance,
Maintain, Queue, RequestAdapterOptions, ShaderModule, ShaderModuleDescriptor,
COPY_BUFFER_ALIGNMENT,
};
mod ristrettovec;
mod scalarvec;
@@ -23,14 +33,10 @@ pub struct Runtime {
// test and release. In test, we #define the TEST macro, which exposes test kernels.
// The release library does not feature these kernels.
#[cfg(not(test))]
const SHADERS: &str = include_str!(concat!(
env!("OUT_DIR"),
"/shaders-release.wgsl"
));
const SHADERS: &str = include_str!(concat!(env!("OUT_DIR"), "/shaders-release.wgsl"));
#[cfg(test)]
const SHADERS: &str =
include_str!(concat!(env!("OUT_DIR"), "/shaders-test.wgsl"));
const SHADERS: &str = include_str!(concat!(env!("OUT_DIR"), "/shaders-test.wgsl"));
fn assert_aligned<T>(ptr: *const T) {
assert!(ptr.cast::<()>().align_offset(align_of::<T>()) == 0);
@@ -52,6 +58,33 @@ trait BufferExt {
fn get_data<T: Pod + Copy>(&self) -> Vec<T>;
}
pub trait GpuVec {
type IterType: Sized;
fn get_buffer(&self) -> &Buffer;
fn len(&self) -> usize;
fn byte_len(&self) -> usize {
self.len() * size_of::<Self::IterType>()
}
fn u32_len(&self) -> usize {
self.byte_len() / size_of::<u32>()
}
fn run_unary<Rhs: GpuVec>(&self, output: &Buffer, kernel_name: &'static str) {
let runtime = Runtime::get();
let len = GpuU32::new(self.len() as u32);
runtime.run(
kernel_name,
&[self.get_buffer(), &DUMMY_BUFFER, output, &len.data],
&Grid::new(self.len() as u32 / 128, 1, 1),
);
}
}
impl BufferExt for Buffer {
fn clone(&self) -> Buffer {
let runtime = Runtime::get();
@@ -65,7 +98,9 @@ impl BufferExt for Buffer {
fn copy_into(&self, dst: &Buffer) {
let runtime = Runtime::get();
let mut encoder = runtime.device.create_command_encoder(&CommandEncoderDescriptor { label: None });
let mut encoder = runtime
.device
.create_command_encoder(&CommandEncoderDescriptor { label: None });
encoder.copy_buffer_to_buffer(self, 0, &dst, 0, self.size());
runtime.queue.submit(Some(encoder.finish()));
@@ -76,7 +111,7 @@ impl BufferExt for Buffer {
let (s, r) = oneshot::channel();
// In vanilla WebGPU, if you use the MAP_READ flag, you must also set COPY_DST
// and *only* COPY_DST. This means you can't use such buffers in compute
// and *only* COPY_DST. This means you can't use such buffers in compute
// shaders. As such, we create a temporary buffer with these properties so we
// can copy data out of the shader-capable buffer and return the results.
let runtime = Runtime::get();
@@ -85,13 +120,15 @@ impl BufferExt for Buffer {
label: None,
size: self.size(),
usage: BufferUsages::MAP_READ | BufferUsages::COPY_DST,
mapped_at_creation: false
mapped_at_creation: false,
});
self.copy_into(&copy_buf);
let buffer_slice = copy_buf.slice(..);
buffer_slice.map_async(wgpu::MapMode::Read, move |v| { s.send(v).unwrap(); });
buffer_slice.map_async(wgpu::MapMode::Read, move |v| {
s.send(v).unwrap();
});
runtime.device.poll(Maintain::Wait);
@@ -113,15 +150,19 @@ impl Runtime {
pub fn alloc<T>(&self, len: usize) -> Buffer {
let len = size_of::<T>() * len;
// Round up len to a multiple of COPY_BUFFER_ALIGNMENT, as required to use
// Round up len to a multiple of COPY_BUFFER_ALIGNMENT, as required to use
// mapped_at_creation=true
let len = if len % COPY_BUFFER_ALIGNMENT as usize == 0 { len } else { (len / COPY_BUFFER_ALIGNMENT as usize + 1) * COPY_BUFFER_ALIGNMENT as usize};
let len = if len % COPY_BUFFER_ALIGNMENT as usize == 0 {
len
} else {
(len / COPY_BUFFER_ALIGNMENT as usize + 1) * COPY_BUFFER_ALIGNMENT as usize
};
let buffer = self.device.create_buffer(&BufferDescriptor {
label: None,
size: len as u64,
usage: BufferUsages::STORAGE | BufferUsages::COPY_SRC | BufferUsages::COPY_DST,
mapped_at_creation: false
mapped_at_creation: false,
});
buffer
@@ -130,9 +171,13 @@ impl Runtime {
pub fn alloc_from_slice<T: Pod>(&self, data: &[T]) -> Buffer {
let len = size_of::<T>() * data.len();
// Round up len to a multiple of COPY_BUFFER_ALIGNMENT, as required to use
// Round up len to a multiple of COPY_BUFFER_ALIGNMENT, as required to use
// mapped_at_creation=true
let len = if len % COPY_BUFFER_ALIGNMENT as usize == 0 { len } else { (len / COPY_BUFFER_ALIGNMENT as usize + 1) * COPY_BUFFER_ALIGNMENT as usize};
let len = if len % COPY_BUFFER_ALIGNMENT as usize == 0 {
len
} else {
(len / COPY_BUFFER_ALIGNMENT as usize + 1) * COPY_BUFFER_ALIGNMENT as usize
};
let buffer = self.device.create_buffer_init(&BufferInitDescriptor {
label: None,
@@ -144,19 +189,23 @@ impl Runtime {
}
pub fn run(&self, kernel_name: &'static str, args: &[&Buffer], threadgroups: &Grid) {
let pipeline = self.device.create_compute_pipeline(&ComputePipelineDescriptor {
label: None,
layout: None,
module: &self.shaders,
entry_point: kernel_name
});
let pipeline = self
.device
.create_compute_pipeline(&ComputePipelineDescriptor {
label: None,
layout: None,
module: &self.shaders,
entry_point: kernel_name,
});
let bindings = args.iter().enumerate().map(|(i, b)| {
BindGroupEntry {
let bindings = args
.iter()
.enumerate()
.map(|(i, b)| BindGroupEntry {
binding: i as u32,
resource: b.as_entire_binding()
}
}).collect::<Vec<_>>();
resource: b.as_entire_binding(),
})
.collect::<Vec<_>>();
let layout = pipeline.get_bind_group_layout(0);
@@ -166,7 +215,9 @@ impl Runtime {
entries: &bindings,
});
let mut encoder = self.device.create_command_encoder(&CommandEncoderDescriptor { label: None });
let mut encoder = self
.device
.create_command_encoder(&CommandEncoderDescriptor { label: None });
{
let (x, y, z) = threadgroups.0;
@@ -183,7 +234,6 @@ impl Runtime {
}
}
lazy_static! {
static ref TOKIO_RUNTIME: TokioRuntime = {
TokioRuntimeBuilder::new_current_thread()
@@ -228,10 +278,16 @@ lazy_static! {
shaders
}
};
/// A 4-byte buffer that exists so you can bind a buffer to `g_b`
/// in unary shaders.
static ref DUMMY_BUFFER: Buffer = {
Runtime::get().alloc::<u32>(1)
};
}
pub struct GpuU32 {
data: Buffer
data: Buffer,
}
impl GpuU32 {
@@ -239,9 +295,7 @@ impl GpuU32 {
let runtime = Runtime::get();
let data = runtime.alloc_from_slice(&[val]);
Self {
data
}
Self { data }
}
}
@@ -269,10 +323,14 @@ mod tests {
let n = GpuU32::new(a.len() as u32);
runtime.run("add", &[&a_gpu, &b_gpu, &c_gpu, &n.data], &Grid::new(1, 1, 1));
runtime.run(
"add",
&[&a_gpu, &b_gpu, &c_gpu, &n.data],
&Grid::new(1, 1, 1),
);
for (c, (a, b)) in c_gpu.get_data::<u32>().iter().zip(a.iter().zip(b.iter())) {
assert_eq!(*c, a + b);
}
}
}
}

View File

@@ -1,3 +1 @@
pub struct GpuRistrettoPointVec {
}
pub struct GpuRistrettoPointVec {}

View File

@@ -3,7 +3,7 @@ use std::mem::size_of;
use curve25519_dalek::scalar::Scalar;
use wgpu::Buffer;
use super::{Runtime, BufferExt};
use super::{BufferExt, Runtime};
pub struct GpuScalarVec {
data: Buffer,
@@ -12,14 +12,14 @@ pub struct GpuScalarVec {
impl Clone for GpuScalarVec {
fn clone(&self) -> Self {
Self {
data: self.data.clone()
data: self.data.clone(),
}
}
}
pub struct Scalars {
data: Vec<u32>,
i: usize
i: usize,
}
impl Scalars {
@@ -109,7 +109,7 @@ impl GpuScalarVec {
}
Self {
data: runtime.alloc_from_slice(&packed_data)
data: runtime.alloc_from_slice(&packed_data),
}
}
@@ -124,13 +124,16 @@ impl GpuScalarVec {
mod tests {
use rand::thread_rng;
use crate::webgpu_impl::{Grid, GpuU32};
use crate::webgpu_impl::{GpuU32, Grid};
use super::*;
#[test]
fn can_unpack_scalarvec() {
let a = (0..238).into_iter().map(|_| Scalar::random(&mut thread_rng())).collect::<Vec<_>>();
let a = (0..238)
.into_iter()
.map(|_| Scalar::random(&mut thread_rng()))
.collect::<Vec<_>>();
let a_v = GpuScalarVec::new(&a);
@@ -146,7 +149,10 @@ mod tests {
#[test]
fn can_clone_scalarvec() {
let a = (0..238).into_iter().map(|_| Scalar::random(&mut thread_rng())).collect::<Vec<_>>();
let a = (0..238)
.into_iter()
.map(|_| Scalar::random(&mut thread_rng()))
.collect::<Vec<_>>();
let a_v = GpuScalarVec::new(&a);
let a_v_clone = a_v.clone();
@@ -161,11 +167,13 @@ mod tests {
assert_eq!(count, a.len());
}
#[test]
fn can_pack_unpack_shader_operand_a() {
// Use 238 because it's a weird number not a multiple of the threadgroup size.
let a = (0..238).into_iter().map(|_| Scalar::random(&mut thread_rng())).collect::<Vec<_>>();
let a = (0..238)
.into_iter()
.map(|_| Scalar::random(&mut thread_rng()))
.collect::<Vec<_>>();
let a_v = GpuScalarVec::new(&a);
let c_v = a_v.clone();
@@ -174,6 +182,32 @@ mod tests {
let len = GpuU32::new(a.len() as u32);
runtime.run("test_scalar_can_pack_unpack_a", &[&a_v.data, &a_v.data, &c_v.data, &len.data], &Grid::new(2, 1, 1));
runtime.run(
"test_scalar_can_pack_unpack_a",
&[&a_v.data, &a_v.data, &c_v.data, &len.data],
&Grid::new(2, 1, 1),
);
}
}
#[test]
fn can_pack_unpack_shader_operand_b() {
// Use 238 because it's a weird number not a multiple of the threadgroup size.
let a = (0..238)
.into_iter()
.map(|_| Scalar::random(&mut thread_rng()))
.collect::<Vec<_>>();
let a_v = GpuScalarVec::new(&a);
let c_v = a_v.clone();
let runtime = Runtime::get();
let len = GpuU32::new(a.len() as u32);
runtime.run(
"test_scalar_can_pack_unpack_b",
&[&a_v.data, &a_v.data, &c_v.data, &len.data],
&Grid::new(2, 1, 1),
);
}
}

View File

@@ -0,0 +1,6 @@
const Scalar29_Zero: Scalar29 = Scalar29(array<u32, 9>(0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u));
const Scalar29_L: Scalar29 = Scalar29(array<u32, 9>(
0x1cf5d3edu, 0x009318d2u, 0x1de73596u, 0x1df3bd45u, 0x0000014du, 0x00000000u, 0x00000000u, 0x00000000u,
0x00100000u,
));

View File

@@ -10,4 +10,18 @@ fn test_scalar_can_pack_unpack_a(
let a = scalar29_unpack_a(gid.x, g_len);
scalar29_pack_c(a, gid.x, g_len);
}
@compute
@workgroup_size(128, 1, 1)
fn test_scalar_can_pack_unpack_b(
@builtin(global_invocation_id) gid: vec3<u32>,
) {
if gid.x >= g_len {
unused_b();
return;
}
let b = scalar29_unpack_b(gid.x, g_len);
scalar29_pack_c(b, gid.x, g_len);
}

View File

@@ -10,8 +10,6 @@ struct Foo {
v: array<u32>
}
const Scalar29_Zero: Scalar29 = Scalar29(array<u32, 9>(0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u));
// WGSL is terrible in that you can't pass arrays of unknown length to functions. So, we create
// functions for unpacking bindings a, b respectively.
fn scalar29_unpack_a(grid_tid: u32, stride: u32) -> Scalar29 {
@@ -69,3 +67,41 @@ fn scalar29_pack_c(val: Scalar29, grid_tid: u32, stride: u32) {
word = val.v[7] >> 21u | val.v[8u] << 8u;
g_c[7u * stride + grid_tid] = word;
}
fn scalar29_add(a: ptr<function, Scalar29>, b: ptr<function, Scalar29>) -> Scalar29 {
var sum = Scalar29_Zero;
let mask = (0x1u << 29u) - 1u;
// a + b
var carry = 0u;
for (var i = 0u; i < 9u; i++) {
carry = a.v[i] + b.v[i] + (carry >> 29u);
sum.v[i] = carry & mask;
}
// subtract l if the sum is >= l
return scalar29_sub(&sum, &Scalar29_L);
}
fn scalar29_sub(a: ptr<function, Scalar29>, b: ptr<function, Scalar29>) -> Scalar29 {
var difference = Scalar29_Zero;
let mask = (1u << 29u) - 1u;
// a - b
var borrow = 0u;
for (var i = 0u; i < 9u; i++) {
borrow = a.v[i] - (b.v[i] + (borrow >> 31u));
difference.v[i] = borrow & mask;
}
// conditionally add l if the difference is negative
let underflow_mask = ((borrow >> 31u) ^ 1u) - 1u;
var carry = 0u;
for (var i = 0u; i < 9u; i++) {
carry = (carry >> 29) + difference[i] + (Scalar29_L.v[i] & underflow_mask);
difference[i] = carry & mask;
}
return difference;
}