mirror of
https://github.com/Sunscreen-tech/Sunscreen.git
synced 2026-05-11 03:00:37 -04:00
Better validation
This commit is contained in:
1
Cargo.lock
generated
1
Cargo.lock
generated
@@ -2450,6 +2450,7 @@ dependencies = [
|
||||
"sunscreen_curve25519",
|
||||
"tokio",
|
||||
"wgpu",
|
||||
"wgpu-core",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
||||
@@ -18,11 +18,12 @@ wgpu = { version = "0.15.1", optional = true }
|
||||
|
||||
[build-dependencies]
|
||||
naga = { version = "0.11.0", optional = true, features = ["wgsl-in"]}
|
||||
wgpu-core = { version = "0.15.1", optional = true, features = ["metal", "wgsl"] }
|
||||
|
||||
[features]
|
||||
default = ["webgpu"]
|
||||
nightly-features = []
|
||||
metal = ["dep:metal", "gpu"]
|
||||
webgpu = ["dep:wgpu", "dep:tokio", "dep:futures", "dep:naga", "dep:bytemuck", "gpu"]
|
||||
webgpu = ["dep:wgpu", "dep:tokio", "dep:futures", "dep:naga", "dep:wgpu-core", "dep:bytemuck", "gpu"]
|
||||
gpu = []
|
||||
pina = []
|
||||
@@ -1,11 +1,12 @@
|
||||
#[cfg(feature = "webgpu")]
|
||||
// This simply concatenates all the wgsl shaders, which get compiled at runtime.
|
||||
fn compile_wgsl_shaders() {
|
||||
use std::{path::PathBuf, process::Command};
|
||||
use std::fs::{DirEntry, File, read_to_string};
|
||||
use std::fs::{read_to_string, DirEntry, File};
|
||||
use std::io::Write;
|
||||
use std::{path::PathBuf};
|
||||
|
||||
use naga::valid::{ValidationFlags, Capabilities};
|
||||
use naga::valid::{Capabilities, ValidationFlags};
|
||||
use wgpu_core::pipeline::{CreateShaderModuleError, ShaderError};
|
||||
|
||||
let outdir = PathBuf::from(std::env::var("OUT_DIR").unwrap());
|
||||
let shader_dir = PathBuf::from(".")
|
||||
@@ -27,9 +28,7 @@ fn compile_wgsl_shaders() {
|
||||
let include_file: Box<dyn Fn(&DirEntry) -> bool> = if config == "test" {
|
||||
Box::new(is_wgsl_file)
|
||||
} else {
|
||||
Box::new(|file: &DirEntry| {
|
||||
is_wgsl_file(file) && !is_test_wgsl_file(file)
|
||||
})
|
||||
Box::new(|file: &DirEntry| is_wgsl_file(file) && !is_test_wgsl_file(file))
|
||||
};
|
||||
|
||||
let shaders = std::fs::read_dir(&shader_dir)
|
||||
@@ -38,7 +37,7 @@ fn compile_wgsl_shaders() {
|
||||
.filter(include_file);
|
||||
|
||||
let out_file_path = outdir.join(format!("shaders-{config}.wgsl"));
|
||||
|
||||
|
||||
{
|
||||
let mut out_file = File::create(&out_file_path).unwrap();
|
||||
|
||||
@@ -50,15 +49,23 @@ fn compile_wgsl_shaders() {
|
||||
};
|
||||
|
||||
// Validate the shader
|
||||
let shader_contents = read_to_string(out_file_path).unwrap();
|
||||
let shader_contents = read_to_string(&out_file_path).unwrap();
|
||||
|
||||
let parse_result = naga::front::wgsl::parse_str(&shader_contents);
|
||||
|
||||
if let Err(e) = parse_result {
|
||||
panic!("{}", e.message());
|
||||
let e = ShaderError {
|
||||
source: shader_contents,
|
||||
label: None,
|
||||
inner: Box::new(e)
|
||||
};
|
||||
|
||||
let e = CreateShaderModuleError::Parsing(e);
|
||||
panic!("{}", e);
|
||||
}
|
||||
|
||||
let mut validator = naga::valid::Validator::new(ValidationFlags::all(), Capabilities::empty());
|
||||
let mut validator =
|
||||
naga::valid::Validator::new(ValidationFlags::all(), Capabilities::empty());
|
||||
|
||||
let validation_results = validator.validate(&parse_result.unwrap());
|
||||
|
||||
|
||||
@@ -1,11 +1,21 @@
|
||||
use core::slice;
|
||||
use std::{borrow::Cow, ops::Deref, mem::{align_of, MaybeUninit, size_of}};
|
||||
use std::{
|
||||
borrow::Cow,
|
||||
mem::{align_of, size_of, MaybeUninit},
|
||||
ops::Deref,
|
||||
};
|
||||
|
||||
use bytemuck::{cast, cast_slice, Pod};
|
||||
use futures::channel::oneshot;
|
||||
use lazy_static::lazy_static;
|
||||
use tokio::runtime::{Builder as TokioRuntimeBuilder, Runtime as TokioRuntime};
|
||||
use wgpu::{Instance, RequestAdapterOptions, Device, Queue, ShaderModuleDescriptor, ShaderModule, BufferDescriptor, COPY_BUFFER_ALIGNMENT, BufferUsages, Buffer, ComputePipelineDescriptor, BindGroupDescriptor, BindGroupEntry, CommandEncoderDescriptor, ComputePassDescriptor, util::{BufferInitDescriptor, DeviceExt}, Maintain};
|
||||
use wgpu::{
|
||||
util::{BufferInitDescriptor, DeviceExt},
|
||||
BindGroupDescriptor, BindGroupEntry, Buffer, BufferDescriptor, BufferUsages,
|
||||
CommandEncoderDescriptor, ComputePassDescriptor, ComputePipelineDescriptor, Device, Instance,
|
||||
Maintain, Queue, RequestAdapterOptions, ShaderModule, ShaderModuleDescriptor,
|
||||
COPY_BUFFER_ALIGNMENT,
|
||||
};
|
||||
|
||||
mod ristrettovec;
|
||||
mod scalarvec;
|
||||
@@ -23,14 +33,10 @@ pub struct Runtime {
|
||||
// test and release. In test, we #define the TEST macro, which exposes test kernels.
|
||||
// The release library does not feature these kernels.
|
||||
#[cfg(not(test))]
|
||||
const SHADERS: &str = include_str!(concat!(
|
||||
env!("OUT_DIR"),
|
||||
"/shaders-release.wgsl"
|
||||
));
|
||||
const SHADERS: &str = include_str!(concat!(env!("OUT_DIR"), "/shaders-release.wgsl"));
|
||||
|
||||
#[cfg(test)]
|
||||
const SHADERS: &str =
|
||||
include_str!(concat!(env!("OUT_DIR"), "/shaders-test.wgsl"));
|
||||
const SHADERS: &str = include_str!(concat!(env!("OUT_DIR"), "/shaders-test.wgsl"));
|
||||
|
||||
fn assert_aligned<T>(ptr: *const T) {
|
||||
assert!(ptr.cast::<()>().align_offset(align_of::<T>()) == 0);
|
||||
@@ -52,6 +58,33 @@ trait BufferExt {
|
||||
fn get_data<T: Pod + Copy>(&self) -> Vec<T>;
|
||||
}
|
||||
|
||||
pub trait GpuVec {
|
||||
type IterType: Sized;
|
||||
|
||||
fn get_buffer(&self) -> &Buffer;
|
||||
|
||||
fn len(&self) -> usize;
|
||||
|
||||
fn byte_len(&self) -> usize {
|
||||
self.len() * size_of::<Self::IterType>()
|
||||
}
|
||||
|
||||
fn u32_len(&self) -> usize {
|
||||
self.byte_len() / size_of::<u32>()
|
||||
}
|
||||
|
||||
fn run_unary<Rhs: GpuVec>(&self, output: &Buffer, kernel_name: &'static str) {
|
||||
let runtime = Runtime::get();
|
||||
let len = GpuU32::new(self.len() as u32);
|
||||
|
||||
runtime.run(
|
||||
kernel_name,
|
||||
&[self.get_buffer(), &DUMMY_BUFFER, output, &len.data],
|
||||
&Grid::new(self.len() as u32 / 128, 1, 1),
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
impl BufferExt for Buffer {
|
||||
fn clone(&self) -> Buffer {
|
||||
let runtime = Runtime::get();
|
||||
@@ -65,7 +98,9 @@ impl BufferExt for Buffer {
|
||||
fn copy_into(&self, dst: &Buffer) {
|
||||
let runtime = Runtime::get();
|
||||
|
||||
let mut encoder = runtime.device.create_command_encoder(&CommandEncoderDescriptor { label: None });
|
||||
let mut encoder = runtime
|
||||
.device
|
||||
.create_command_encoder(&CommandEncoderDescriptor { label: None });
|
||||
encoder.copy_buffer_to_buffer(self, 0, &dst, 0, self.size());
|
||||
|
||||
runtime.queue.submit(Some(encoder.finish()));
|
||||
@@ -76,7 +111,7 @@ impl BufferExt for Buffer {
|
||||
let (s, r) = oneshot::channel();
|
||||
|
||||
// In vanilla WebGPU, if you use the MAP_READ flag, you must also set COPY_DST
|
||||
// and *only* COPY_DST. This means you can't use such buffers in compute
|
||||
// and *only* COPY_DST. This means you can't use such buffers in compute
|
||||
// shaders. As such, we create a temporary buffer with these properties so we
|
||||
// can copy data out of the shader-capable buffer and return the results.
|
||||
let runtime = Runtime::get();
|
||||
@@ -85,13 +120,15 @@ impl BufferExt for Buffer {
|
||||
label: None,
|
||||
size: self.size(),
|
||||
usage: BufferUsages::MAP_READ | BufferUsages::COPY_DST,
|
||||
mapped_at_creation: false
|
||||
mapped_at_creation: false,
|
||||
});
|
||||
|
||||
self.copy_into(©_buf);
|
||||
|
||||
let buffer_slice = copy_buf.slice(..);
|
||||
buffer_slice.map_async(wgpu::MapMode::Read, move |v| { s.send(v).unwrap(); });
|
||||
buffer_slice.map_async(wgpu::MapMode::Read, move |v| {
|
||||
s.send(v).unwrap();
|
||||
});
|
||||
|
||||
runtime.device.poll(Maintain::Wait);
|
||||
|
||||
@@ -113,15 +150,19 @@ impl Runtime {
|
||||
pub fn alloc<T>(&self, len: usize) -> Buffer {
|
||||
let len = size_of::<T>() * len;
|
||||
|
||||
// Round up len to a multiple of COPY_BUFFER_ALIGNMENT, as required to use
|
||||
// Round up len to a multiple of COPY_BUFFER_ALIGNMENT, as required to use
|
||||
// mapped_at_creation=true
|
||||
let len = if len % COPY_BUFFER_ALIGNMENT as usize == 0 { len } else { (len / COPY_BUFFER_ALIGNMENT as usize + 1) * COPY_BUFFER_ALIGNMENT as usize};
|
||||
let len = if len % COPY_BUFFER_ALIGNMENT as usize == 0 {
|
||||
len
|
||||
} else {
|
||||
(len / COPY_BUFFER_ALIGNMENT as usize + 1) * COPY_BUFFER_ALIGNMENT as usize
|
||||
};
|
||||
|
||||
let buffer = self.device.create_buffer(&BufferDescriptor {
|
||||
label: None,
|
||||
size: len as u64,
|
||||
usage: BufferUsages::STORAGE | BufferUsages::COPY_SRC | BufferUsages::COPY_DST,
|
||||
mapped_at_creation: false
|
||||
mapped_at_creation: false,
|
||||
});
|
||||
|
||||
buffer
|
||||
@@ -130,9 +171,13 @@ impl Runtime {
|
||||
pub fn alloc_from_slice<T: Pod>(&self, data: &[T]) -> Buffer {
|
||||
let len = size_of::<T>() * data.len();
|
||||
|
||||
// Round up len to a multiple of COPY_BUFFER_ALIGNMENT, as required to use
|
||||
// Round up len to a multiple of COPY_BUFFER_ALIGNMENT, as required to use
|
||||
// mapped_at_creation=true
|
||||
let len = if len % COPY_BUFFER_ALIGNMENT as usize == 0 { len } else { (len / COPY_BUFFER_ALIGNMENT as usize + 1) * COPY_BUFFER_ALIGNMENT as usize};
|
||||
let len = if len % COPY_BUFFER_ALIGNMENT as usize == 0 {
|
||||
len
|
||||
} else {
|
||||
(len / COPY_BUFFER_ALIGNMENT as usize + 1) * COPY_BUFFER_ALIGNMENT as usize
|
||||
};
|
||||
|
||||
let buffer = self.device.create_buffer_init(&BufferInitDescriptor {
|
||||
label: None,
|
||||
@@ -144,19 +189,23 @@ impl Runtime {
|
||||
}
|
||||
|
||||
pub fn run(&self, kernel_name: &'static str, args: &[&Buffer], threadgroups: &Grid) {
|
||||
let pipeline = self.device.create_compute_pipeline(&ComputePipelineDescriptor {
|
||||
label: None,
|
||||
layout: None,
|
||||
module: &self.shaders,
|
||||
entry_point: kernel_name
|
||||
});
|
||||
let pipeline = self
|
||||
.device
|
||||
.create_compute_pipeline(&ComputePipelineDescriptor {
|
||||
label: None,
|
||||
layout: None,
|
||||
module: &self.shaders,
|
||||
entry_point: kernel_name,
|
||||
});
|
||||
|
||||
let bindings = args.iter().enumerate().map(|(i, b)| {
|
||||
BindGroupEntry {
|
||||
let bindings = args
|
||||
.iter()
|
||||
.enumerate()
|
||||
.map(|(i, b)| BindGroupEntry {
|
||||
binding: i as u32,
|
||||
resource: b.as_entire_binding()
|
||||
}
|
||||
}).collect::<Vec<_>>();
|
||||
resource: b.as_entire_binding(),
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
let layout = pipeline.get_bind_group_layout(0);
|
||||
|
||||
@@ -166,7 +215,9 @@ impl Runtime {
|
||||
entries: &bindings,
|
||||
});
|
||||
|
||||
let mut encoder = self.device.create_command_encoder(&CommandEncoderDescriptor { label: None });
|
||||
let mut encoder = self
|
||||
.device
|
||||
.create_command_encoder(&CommandEncoderDescriptor { label: None });
|
||||
|
||||
{
|
||||
let (x, y, z) = threadgroups.0;
|
||||
@@ -183,7 +234,6 @@ impl Runtime {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
lazy_static! {
|
||||
static ref TOKIO_RUNTIME: TokioRuntime = {
|
||||
TokioRuntimeBuilder::new_current_thread()
|
||||
@@ -228,10 +278,16 @@ lazy_static! {
|
||||
shaders
|
||||
}
|
||||
};
|
||||
|
||||
/// A 4-byte buffer that exists so you can bind a buffer to `g_b`
|
||||
/// in unary shaders.
|
||||
static ref DUMMY_BUFFER: Buffer = {
|
||||
Runtime::get().alloc::<u32>(1)
|
||||
};
|
||||
}
|
||||
|
||||
pub struct GpuU32 {
|
||||
data: Buffer
|
||||
data: Buffer,
|
||||
}
|
||||
|
||||
impl GpuU32 {
|
||||
@@ -239,9 +295,7 @@ impl GpuU32 {
|
||||
let runtime = Runtime::get();
|
||||
let data = runtime.alloc_from_slice(&[val]);
|
||||
|
||||
Self {
|
||||
data
|
||||
}
|
||||
Self { data }
|
||||
}
|
||||
}
|
||||
|
||||
@@ -269,10 +323,14 @@ mod tests {
|
||||
|
||||
let n = GpuU32::new(a.len() as u32);
|
||||
|
||||
runtime.run("add", &[&a_gpu, &b_gpu, &c_gpu, &n.data], &Grid::new(1, 1, 1));
|
||||
runtime.run(
|
||||
"add",
|
||||
&[&a_gpu, &b_gpu, &c_gpu, &n.data],
|
||||
&Grid::new(1, 1, 1),
|
||||
);
|
||||
|
||||
for (c, (a, b)) in c_gpu.get_data::<u32>().iter().zip(a.iter().zip(b.iter())) {
|
||||
assert_eq!(*c, a + b);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,3 +1 @@
|
||||
pub struct GpuRistrettoPointVec {
|
||||
|
||||
}
|
||||
pub struct GpuRistrettoPointVec {}
|
||||
|
||||
@@ -3,7 +3,7 @@ use std::mem::size_of;
|
||||
use curve25519_dalek::scalar::Scalar;
|
||||
use wgpu::Buffer;
|
||||
|
||||
use super::{Runtime, BufferExt};
|
||||
use super::{BufferExt, Runtime};
|
||||
|
||||
pub struct GpuScalarVec {
|
||||
data: Buffer,
|
||||
@@ -12,14 +12,14 @@ pub struct GpuScalarVec {
|
||||
impl Clone for GpuScalarVec {
|
||||
fn clone(&self) -> Self {
|
||||
Self {
|
||||
data: self.data.clone()
|
||||
data: self.data.clone(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub struct Scalars {
|
||||
data: Vec<u32>,
|
||||
i: usize
|
||||
i: usize,
|
||||
}
|
||||
|
||||
impl Scalars {
|
||||
@@ -109,7 +109,7 @@ impl GpuScalarVec {
|
||||
}
|
||||
|
||||
Self {
|
||||
data: runtime.alloc_from_slice(&packed_data)
|
||||
data: runtime.alloc_from_slice(&packed_data),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -124,13 +124,16 @@ impl GpuScalarVec {
|
||||
mod tests {
|
||||
use rand::thread_rng;
|
||||
|
||||
use crate::webgpu_impl::{Grid, GpuU32};
|
||||
use crate::webgpu_impl::{GpuU32, Grid};
|
||||
|
||||
use super::*;
|
||||
|
||||
|
||||
#[test]
|
||||
fn can_unpack_scalarvec() {
|
||||
let a = (0..238).into_iter().map(|_| Scalar::random(&mut thread_rng())).collect::<Vec<_>>();
|
||||
let a = (0..238)
|
||||
.into_iter()
|
||||
.map(|_| Scalar::random(&mut thread_rng()))
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
let a_v = GpuScalarVec::new(&a);
|
||||
|
||||
@@ -146,7 +149,10 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn can_clone_scalarvec() {
|
||||
let a = (0..238).into_iter().map(|_| Scalar::random(&mut thread_rng())).collect::<Vec<_>>();
|
||||
let a = (0..238)
|
||||
.into_iter()
|
||||
.map(|_| Scalar::random(&mut thread_rng()))
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
let a_v = GpuScalarVec::new(&a);
|
||||
let a_v_clone = a_v.clone();
|
||||
@@ -161,11 +167,13 @@ mod tests {
|
||||
assert_eq!(count, a.len());
|
||||
}
|
||||
|
||||
|
||||
#[test]
|
||||
fn can_pack_unpack_shader_operand_a() {
|
||||
// Use 238 because it's a weird number not a multiple of the threadgroup size.
|
||||
let a = (0..238).into_iter().map(|_| Scalar::random(&mut thread_rng())).collect::<Vec<_>>();
|
||||
let a = (0..238)
|
||||
.into_iter()
|
||||
.map(|_| Scalar::random(&mut thread_rng()))
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
let a_v = GpuScalarVec::new(&a);
|
||||
let c_v = a_v.clone();
|
||||
@@ -174,6 +182,32 @@ mod tests {
|
||||
|
||||
let len = GpuU32::new(a.len() as u32);
|
||||
|
||||
runtime.run("test_scalar_can_pack_unpack_a", &[&a_v.data, &a_v.data, &c_v.data, &len.data], &Grid::new(2, 1, 1));
|
||||
runtime.run(
|
||||
"test_scalar_can_pack_unpack_a",
|
||||
&[&a_v.data, &a_v.data, &c_v.data, &len.data],
|
||||
&Grid::new(2, 1, 1),
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn can_pack_unpack_shader_operand_b() {
|
||||
// Use 238 because it's a weird number not a multiple of the threadgroup size.
|
||||
let a = (0..238)
|
||||
.into_iter()
|
||||
.map(|_| Scalar::random(&mut thread_rng()))
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
let a_v = GpuScalarVec::new(&a);
|
||||
let c_v = a_v.clone();
|
||||
|
||||
let runtime = Runtime::get();
|
||||
|
||||
let len = GpuU32::new(a.len() as u32);
|
||||
|
||||
runtime.run(
|
||||
"test_scalar_can_pack_unpack_b",
|
||||
&[&a_v.data, &a_v.data, &c_v.data, &len.data],
|
||||
&Grid::new(2, 1, 1),
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
6
sunscreen_math/src/webgpu_impl/shaders/constants.wgsl
Normal file
6
sunscreen_math/src/webgpu_impl/shaders/constants.wgsl
Normal file
@@ -0,0 +1,6 @@
|
||||
const Scalar29_Zero: Scalar29 = Scalar29(array<u32, 9>(0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u));
|
||||
|
||||
const Scalar29_L: Scalar29 = Scalar29(array<u32, 9>(
|
||||
0x1cf5d3edu, 0x009318d2u, 0x1de73596u, 0x1df3bd45u, 0x0000014du, 0x00000000u, 0x00000000u, 0x00000000u,
|
||||
0x00100000u,
|
||||
));
|
||||
@@ -10,4 +10,18 @@ fn test_scalar_can_pack_unpack_a(
|
||||
|
||||
let a = scalar29_unpack_a(gid.x, g_len);
|
||||
scalar29_pack_c(a, gid.x, g_len);
|
||||
}
|
||||
|
||||
@compute
|
||||
@workgroup_size(128, 1, 1)
|
||||
fn test_scalar_can_pack_unpack_b(
|
||||
@builtin(global_invocation_id) gid: vec3<u32>,
|
||||
) {
|
||||
if gid.x >= g_len {
|
||||
unused_b();
|
||||
return;
|
||||
}
|
||||
|
||||
let b = scalar29_unpack_b(gid.x, g_len);
|
||||
scalar29_pack_c(b, gid.x, g_len);
|
||||
}
|
||||
@@ -10,8 +10,6 @@ struct Foo {
|
||||
v: array<u32>
|
||||
}
|
||||
|
||||
const Scalar29_Zero: Scalar29 = Scalar29(array<u32, 9>(0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u));
|
||||
|
||||
// WGSL is terrible in that you can't pass arrays of unknown length to functions. So, we create
|
||||
// functions for unpacking bindings a, b respectively.
|
||||
fn scalar29_unpack_a(grid_tid: u32, stride: u32) -> Scalar29 {
|
||||
@@ -69,3 +67,41 @@ fn scalar29_pack_c(val: Scalar29, grid_tid: u32, stride: u32) {
|
||||
word = val.v[7] >> 21u | val.v[8u] << 8u;
|
||||
g_c[7u * stride + grid_tid] = word;
|
||||
}
|
||||
|
||||
fn scalar29_add(a: ptr<function, Scalar29>, b: ptr<function, Scalar29>) -> Scalar29 {
|
||||
var sum = Scalar29_Zero;
|
||||
let mask = (0x1u << 29u) - 1u;
|
||||
|
||||
// a + b
|
||||
var carry = 0u;
|
||||
for (var i = 0u; i < 9u; i++) {
|
||||
carry = a.v[i] + b.v[i] + (carry >> 29u);
|
||||
sum.v[i] = carry & mask;
|
||||
}
|
||||
|
||||
// subtract l if the sum is >= l
|
||||
return scalar29_sub(&sum, &Scalar29_L);
|
||||
}
|
||||
|
||||
fn scalar29_sub(a: ptr<function, Scalar29>, b: ptr<function, Scalar29>) -> Scalar29 {
|
||||
var difference = Scalar29_Zero;
|
||||
let mask = (1u << 29u) - 1u;
|
||||
|
||||
// a - b
|
||||
var borrow = 0u;
|
||||
for (var i = 0u; i < 9u; i++) {
|
||||
borrow = a.v[i] - (b.v[i] + (borrow >> 31u));
|
||||
difference.v[i] = borrow & mask;
|
||||
}
|
||||
|
||||
// conditionally add l if the difference is negative
|
||||
let underflow_mask = ((borrow >> 31u) ^ 1u) - 1u;
|
||||
|
||||
var carry = 0u;
|
||||
for (var i = 0u; i < 9u; i++) {
|
||||
carry = (carry >> 29) + difference[i] + (Scalar29_L.v[i] & underflow_mask);
|
||||
difference[i] = carry & mask;
|
||||
}
|
||||
|
||||
return difference;
|
||||
}
|
||||
Reference in New Issue
Block a user