Add countLeadingZeros (#2226)

* Add countLeadingZeros

* [glsl-out] Bake countLeadingZeros

* [hlsl-out] Bake countLeadingZeros

* [hlsl-out] Update Baked expressions

* Remove unnecessary bake for sints

* [glsl-out] CountLeadingZeros without findMSB

* Don't check negatives when uint

* Perform the type conv after mix

* use log2

* fix clippy lints

---------

Co-authored-by: teoxoy <28601907+teoxoy@users.noreply.github.com>
This commit is contained in:
Evan Mark Hopkins
2023-01-31 15:31:05 -05:00
committed by GitHub
parent a2b39e45bf
commit 6be394dac3
17 changed files with 310 additions and 65 deletions

View File

@@ -1114,33 +1114,33 @@ impl<'a, W: Write> Writer<'a, W> {
fn update_expressions_to_bake(&mut self, func: &crate::Function, info: &valid::FunctionInfo) {
use crate::Expression;
self.need_bake_expressions.clear();
for expr in func.expressions.iter() {
let expr_info = &info[expr.0];
let min_ref_count = func.expressions[expr.0].bake_ref_count();
for (fun_handle, expr) in func.expressions.iter() {
let expr_info = &info[fun_handle];
let min_ref_count = func.expressions[fun_handle].bake_ref_count();
if min_ref_count <= expr_info.ref_count {
self.need_bake_expressions.insert(expr.0);
self.need_bake_expressions.insert(fun_handle);
}
// if the expression is a Dot product with integer arguments,
// then the args needs baking as well
if let (
fun_handle,
&Expression::Math {
fun: crate::MathFunction::Dot,
arg,
arg1,
..
},
) = expr
{
let inner = info[fun_handle].ty.inner_with(&self.module.types);
if let TypeInner::Scalar { kind, .. } = *inner {
match kind {
crate::ScalarKind::Sint | crate::ScalarKind::Uint => {
self.need_bake_expressions.insert(arg);
self.need_bake_expressions.insert(arg1.unwrap());
if let Expression::Math { fun, arg, arg1, .. } = *expr {
match fun {
crate::MathFunction::Dot => {
// if the expression is a Dot product with integer arguments,
// then the args needs baking as well
let inner = info[fun_handle].ty.inner_with(&self.module.types);
if let TypeInner::Scalar { kind, .. } = *inner {
match kind {
crate::ScalarKind::Sint | crate::ScalarKind::Uint => {
self.need_bake_expressions.insert(arg);
self.need_bake_expressions.insert(arg1.unwrap());
}
_ => {}
}
}
_ => {}
}
crate::MathFunction::CountLeadingZeros => {
self.need_bake_expressions.insert(arg);
}
_ => {}
}
}
}
@@ -2928,6 +2928,54 @@ impl<'a, W: Write> Writer<'a, W> {
Mf::Transpose => "transpose",
Mf::Determinant => "determinant",
// bits
Mf::CountLeadingZeros => {
match *ctx.info[arg].ty.inner_with(&self.module.types) {
crate::TypeInner::Vector { size, kind, .. } => {
let s = back::vector_size_str(size);
if let crate::ScalarKind::Uint = kind {
write!(self.out, "uvec{s}(")?;
} else {
write!(self.out, "ivec{s}(")?;
}
write!(self.out, "mix(vec{s}(31.0) - floor(log2(vec{s}(")?;
self.write_expr(arg, ctx)?;
write!(self.out, ") + 0.5)), ")?;
if let crate::ScalarKind::Uint = kind {
write!(self.out, "vec{s}(32.0), lessThanEqual(")?;
self.write_expr(arg, ctx)?;
write!(self.out, ", uvec{s}(0u))))")?;
} else {
write!(self.out, "mix(vec{s}(0.0), vec{s}(32.0), equal(")?;
self.write_expr(arg, ctx)?;
write!(self.out, ", ivec{s}(0))), lessThanEqual(")?;
self.write_expr(arg, ctx)?;
write!(self.out, ", ivec{s}(0))))")?;
}
}
crate::TypeInner::Scalar { kind, .. } => {
write!(self.out, "(")?;
self.write_expr(arg, ctx)?;
if let crate::ScalarKind::Uint = kind {
write!(self.out, " == 0u ? 32u : uint(")?;
} else {
write!(self.out, " <= 0 ? (")?;
self.write_expr(arg, ctx)?;
write!(self.out, " == 0 ? 32 : 0) : int(")?;
}
write!(self.out, "31.0 - floor(log2(float(")?;
self.write_expr(arg, ctx)?;
write!(self.out, ") + 0.5))))")?;
}
_ => unreachable!(),
};
return Ok(());
}
Mf::CountOneBits => "bitCount",
Mf::ReverseBits => "bitfieldReverse",
Mf::ExtractBits => "bitfieldExtract",

View File

@@ -107,7 +107,7 @@ mod writer;
use std::fmt::Error as FmtError;
use thiserror::Error;
use crate::proc;
use crate::{back, proc};
#[derive(Clone, Debug, Default, PartialEq, Eq, Hash)]
#[cfg_attr(feature = "serialize", derive(serde::Serialize))]
@@ -280,4 +280,5 @@ pub struct Writer<'a, W> {
named_expressions: crate::NamedExpressions,
wrapped: Wrapped,
temp_access_chain: Vec<storage::SubAccess>,
need_bake_expressions: back::NeedBakeExpressions,
}

View File

@@ -83,6 +83,7 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> {
named_expressions: crate::NamedExpressions::default(),
wrapped: super::Wrapped::default(),
temp_access_chain: Vec::new(),
need_bake_expressions: Default::default(),
}
}
@@ -93,6 +94,46 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> {
self.entry_point_io.clear();
self.named_expressions.clear();
self.wrapped.clear();
self.need_bake_expressions.clear();
}
/// Helper method used to find which expressions of a given function require baking
///
/// # Notes
/// Clears `need_bake_expressions` set before adding to it
fn update_expressions_to_bake(
&mut self,
module: &Module,
func: &crate::Function,
info: &valid::FunctionInfo,
) {
use crate::Expression;
self.need_bake_expressions.clear();
for (fun_handle, expr) in func.expressions.iter() {
let expr_info = &info[fun_handle];
let min_ref_count = func.expressions[fun_handle].bake_ref_count();
if min_ref_count <= expr_info.ref_count {
self.need_bake_expressions.insert(fun_handle);
}
if let Expression::Math { fun, arg, .. } = *expr {
match fun {
crate::MathFunction::Asinh
| crate::MathFunction::Acosh
| crate::MathFunction::Atanh
| crate::MathFunction::Unpack2x16float => {
self.need_bake_expressions.insert(arg);
}
crate::MathFunction::CountLeadingZeros => {
let inner = info[fun_handle].ty.inner_with(&module.types);
if let Some(crate::ScalarKind::Sint) = inner.scalar_kind() {
self.need_bake_expressions.insert(arg);
}
}
_ => {}
}
}
}
}
pub fn write(
@@ -244,7 +285,7 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> {
// before writing all statements and expressions.
self.write_wrapped_functions(module, &ctx)?;
self.write_function(module, name.as_str(), function, &ctx)?;
self.write_function(module, name.as_str(), function, &ctx, info)?;
writeln!(self.out)?;
}
@@ -296,7 +337,7 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> {
}
let name = self.names[&NameKey::EntryPoint(index as u16)].clone();
self.write_function(module, &name, &ep.function, &ctx)?;
self.write_function(module, &name, &ep.function, &ctx, info)?;
if index < module.entry_points.len() - 1 {
writeln!(self.out)?;
@@ -1034,9 +1075,12 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> {
name: &str,
func: &crate::Function,
func_ctx: &back::FunctionCtx<'_>,
info: &valid::FunctionInfo,
) -> BackendResult {
// Function Declaration Syntax - https://docs.microsoft.com/en-us/windows/win32/direct3dhlsl/dx-graphics-hlsl-function-syntax
self.update_expressions_to_bake(module, func, info);
// Write modifier
if let Some(crate::FunctionResult {
binding:
@@ -1284,15 +1328,12 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> {
// Otherwise, we could accidentally write variable name instead of full expression.
// Also, we use sanitized names! It defense backend from generating variable with name from reserved keywords.
Some(self.namer.call(name))
} else if self.need_bake_expressions.contains(&handle) {
Some(format!("_expr{}", handle.index()))
} else if info.ref_count == 0 {
Some(self.namer.call(""))
} else {
let min_ref_count = func_ctx.expressions[handle].bake_ref_count();
if min_ref_count <= info.ref_count {
Some(format!("_expr{}", handle.index()))
} else {
None
}
None
};
if let Some(name) = expr_name {
@@ -2510,6 +2551,7 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> {
Unpack2x16float,
Regular(&'static str),
MissingIntOverload(&'static str),
CountLeadingZeros,
}
let fun = match fun {
@@ -2572,6 +2614,7 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> {
Mf::Transpose => Function::Regular("transpose"),
Mf::Determinant => Function::Regular("determinant"),
// bits
Mf::CountLeadingZeros => Function::CountLeadingZeros,
Mf::CountOneBits => Function::MissingIntOverload("countbits"),
Mf::ReverseBits => Function::MissingIntOverload("reversebits"),
Mf::FindLsb => Function::Regular("firstbitlow"),
@@ -2639,6 +2682,43 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> {
write!(self.out, ")")?;
}
}
Function::CountLeadingZeros => {
match *func_ctx.info[arg].ty.inner_with(&module.types) {
TypeInner::Vector { size, kind, .. } => {
let s = match size {
crate::VectorSize::Bi => ".xx",
crate::VectorSize::Tri => ".xxx",
crate::VectorSize::Quad => ".xxxx",
};
if let ScalarKind::Uint = kind {
write!(self.out, "asuint((31){s} - firstbithigh(")?;
} else {
write!(self.out, "(")?;
self.write_expr(module, arg, func_ctx)?;
write!(
self.out,
" < (0){s} ? (0){s} : (31){s} - firstbithigh("
)?;
}
}
TypeInner::Scalar { kind, .. } => {
if let ScalarKind::Uint = kind {
write!(self.out, "asuint(31 - firstbithigh(")?;
} else {
write!(self.out, "(")?;
self.write_expr(module, arg, func_ctx)?;
write!(self.out, " < 0 ? 0 : 31 - firstbithigh(")?;
}
}
_ => unreachable!(),
}
self.write_expr(module, arg, func_ctx)?;
write!(self.out, "))")?;
return Ok(());
}
}
}
Expression::Swizzle {

View File

@@ -1689,6 +1689,7 @@ impl<W: Write> Writer<W> {
Mf::Transpose => "transpose",
Mf::Determinant => "determinant",
// bits
Mf::CountLeadingZeros => "clz",
Mf::CountOneBits => "popcount",
Mf::ReverseBits => "reverse_bits",
Mf::ExtractBits => "extract_bits",

View File

@@ -724,7 +724,7 @@ impl<'w> BlockContext<'w> {
self.temp_list.resize(size as _, arg1_id);
let id = self.gen_id();
block.body.push(Instruction::composite_construct(
block.body.push(Instruction::constant_composite(
result_type_id,
id,
&self.temp_list,
@@ -735,7 +735,7 @@ impl<'w> BlockContext<'w> {
self.temp_list.resize(size as _, arg2_id);
let id = self.gen_id();
block.body.push(Instruction::composite_construct(
block.body.push(Instruction::constant_composite(
result_type_id,
id,
&self.temp_list,
@@ -888,6 +888,71 @@ impl<'w> BlockContext<'w> {
id,
arg0_id,
)),
Mf::CountLeadingZeros => {
let int = crate::ScalarValue::Sint(31);
let (int_type_id, int_id) = match *arg_ty {
crate::TypeInner::Vector { size, width, .. } => {
let ty = self.get_type_id(LookupType::Local(LocalType::Value {
vector_size: Some(size),
kind: crate::ScalarKind::Sint,
width,
pointer_space: None,
}));
self.temp_list.clear();
self.temp_list
.resize(size as _, self.writer.get_constant_scalar(int, width));
let id = self.gen_id();
block.body.push(Instruction::constant_composite(
ty,
id,
&self.temp_list,
));
(ty, id)
}
crate::TypeInner::Scalar { width, .. } => (
self.get_type_id(LookupType::Local(LocalType::Value {
vector_size: None,
kind: crate::ScalarKind::Sint,
width,
pointer_space: None,
})),
self.writer.get_constant_scalar(int, width),
),
_ => unreachable!(),
};
block.body.push(Instruction::ext_inst(
self.writer.gl450_ext_inst_id,
spirv::GLOp::FindUMsb,
int_type_id,
id,
&[arg0_id],
));
let sub_id = self.gen_id();
block.body.push(Instruction::binary(
spirv::Op::ISub,
int_type_id,
sub_id,
int_id,
id,
));
if let Some(crate::ScalarKind::Uint) = arg_scalar_kind {
block.body.push(Instruction::unary(
spirv::Op::Bitcast,
result_type_id,
self.gen_id(),
sub_id,
));
}
return Ok(());
}
Mf::CountOneBits => MathOp::Custom(Instruction::unary(
spirv::Op::BitCount,
result_type_id,

View File

@@ -1578,6 +1578,7 @@ impl<W: Write> Writer<W> {
Mf::Transpose => Function::Regular("transpose"),
Mf::Determinant => Function::Regular("determinant"),
// bits
Mf::CountLeadingZeros => Function::Regular("countLeadingZeros"),
Mf::CountOneBits => Function::Regular("countOneBits"),
Mf::ReverseBits => Function::Regular("reverseBits"),
Mf::ExtractBits => Function::Regular("extractBits"),

View File

@@ -191,6 +191,7 @@ pub fn map_standard_fun(word: &str) -> Option<crate::MathFunction> {
"transpose" => Mf::Transpose,
"determinant" => Mf::Determinant,
// bits
"countLeadingZeros" => Mf::CountLeadingZeros,
"countOneBits" => Mf::CountOneBits,
"reverseBits" => Mf::ReverseBits,
"extractBits" => Mf::ExtractBits,

View File

@@ -1066,6 +1066,7 @@ pub enum MathFunction {
Transpose,
Determinant,
// bits
CountLeadingZeros,
CountOneBits,
ReverseBits,
ExtractBits,

View File

@@ -279,6 +279,7 @@ impl super::MathFunction {
Self::Transpose => 1,
Self::Determinant => 1,
// bits
Self::CountLeadingZeros => 1,
Self::CountOneBits => 1,
Self::ReverseBits => 1,
Self::ExtractBits => 3,

View File

@@ -793,6 +793,7 @@ impl<'a> ResolveContext<'a> {
)),
},
// bits
Mf::CountLeadingZeros |
Mf::CountOneBits |
Mf::ReverseBits |
Mf::ExtractBits |

View File

@@ -1223,7 +1223,11 @@ impl super::Validator {
_ => return Err(ExpressionError::InvalidArgumentType(fun, 0, arg)),
}
}
Mf::CountOneBits | Mf::ReverseBits | Mf::FindLsb | Mf::FindMsb => {
Mf::CountLeadingZeros
| Mf::CountOneBits
| Mf::ReverseBits
| Mf::FindLsb
| Mf::FindMsb => {
if arg1_ty.is_some() | arg2_ty.is_some() | arg3_ty.is_some() {
return Err(ExpressionError::WrongArgumentCount(fun));
}

View File

@@ -10,4 +10,8 @@ fn main() {
let g = refract(v, v, f);
let const_dot = dot(vec2<i32>(), vec2<i32>());
let first_leading_bit_abs = firstLeadingBit(abs(0u));
let clz_a = countLeadingZeros(-1);
let clz_b = countLeadingZeros(1u);
let clz_c = countLeadingZeros(vec2(-1));
let clz_d = countLeadingZeros(vec2(1u));
}

View File

@@ -14,5 +14,11 @@ void main() {
vec4 g = refract(v, v, 1.0);
int const_dot = ( + ivec2(0, 0).x * ivec2(0, 0).x + ivec2(0, 0).y * ivec2(0, 0).y);
uint first_leading_bit_abs = uint(findMSB(uint(abs(int(0u)))));
int clz_a = (-1 <= 0 ? (-1 == 0 ? 32 : 0) : int(31.0 - floor(log2(float(-1) + 0.5))));
uint clz_b = (1u == 0u ? 32u : uint(31.0 - floor(log2(float(1u) + 0.5))));
ivec2 _e20 = ivec2(-1);
ivec2 clz_c = ivec2(mix(vec2(31.0) - floor(log2(vec2(_e20) + 0.5)), mix(vec2(0.0), vec2(32.0), equal(_e20, ivec2(0))), lessThanEqual(_e20, ivec2(0))));
uvec2 _e23 = uvec2(1u);
uvec2 clz_d = uvec2(mix(vec2(31.0) - floor(log2(vec2(_e23) + 0.5)), vec2(32.0), lessThanEqual(_e23, uvec2(0u))));
}

View File

@@ -10,4 +10,9 @@ void main()
float4 g = refract(v, v, 1.0);
int const_dot = dot(int2(0, 0), int2(0, 0));
uint first_leading_bit_abs = firstbithigh(abs(0u));
int clz_a = (-1 < 0 ? 0 : 31 - firstbithigh(-1));
uint clz_b = asuint(31 - firstbithigh(1u));
int2 _expr20 = (-1).xx;
int2 clz_c = (_expr20 < (0).xx ? (0).xx : (31).xx - firstbithigh(_expr20));
uint2 clz_d = asuint((31).xx - firstbithigh((1u).xx));
}

View File

@@ -17,4 +17,8 @@ vertex void main_(
metal::float4 g = metal::refract(v, v, 1.0);
int const_dot = ( + const_type_1_.x * const_type_1_.x + const_type_1_.y * const_type_1_.y);
uint first_leading_bit_abs = (((metal::clz(metal::abs(0u)) + 1) % 33) - 1);
int clz_a = metal::clz(-1);
uint clz_b = metal::clz(1u);
metal::int2 clz_c = metal::clz(metal::int2(-1));
metal::uint2 clz_d = metal::clz(metal::uint2(1u));
}

View File

@@ -1,11 +1,11 @@
; SPIR-V
; Version: 1.1
; Generator: rspirv
; Bound: 37
; Bound: 55
OpCapability Shader
%1 = OpExtInstImport "GLSL.std.450"
OpMemoryModel Logical GLSL450
OpEntryPoint Vertex %14 "main"
OpEntryPoint Vertex %16 "main"
%2 = OpTypeVoid
%4 = OpTypeFloat 32
%3 = OpConstant %4 1.0
@@ -14,33 +14,51 @@ OpEntryPoint Vertex %14 "main"
%6 = OpConstant %7 0
%9 = OpTypeInt 32 0
%8 = OpConstant %9 0
%10 = OpTypeVector %4 4
%11 = OpTypeVector %7 2
%12 = OpConstantComposite %11 %6 %6
%15 = OpTypeFunction %2
%27 = OpConstantNull %7
%14 = OpFunction %2 None %15
%13 = OpLabel
OpBranch %16
%16 = OpLabel
%17 = OpCompositeConstruct %10 %5 %5 %5 %5
%18 = OpExtInst %4 %1 Degrees %3
%19 = OpExtInst %4 %1 Radians %3
%20 = OpExtInst %10 %1 Degrees %17
%21 = OpExtInst %10 %1 Radians %17
%23 = OpCompositeConstruct %10 %5 %5 %5 %5
%24 = OpCompositeConstruct %10 %3 %3 %3 %3
%22 = OpExtInst %10 %1 FClamp %17 %23 %24
%25 = OpExtInst %10 %1 Refract %17 %17 %3
%28 = OpCompositeExtract %7 %12 0
%29 = OpCompositeExtract %7 %12 0
%30 = OpIMul %7 %28 %29
%31 = OpIAdd %7 %27 %30
%32 = OpCompositeExtract %7 %12 1
%33 = OpCompositeExtract %7 %12 1
%34 = OpIMul %7 %32 %33
%26 = OpIAdd %7 %31 %34
%35 = OpCopyObject %9 %8
%36 = OpExtInst %9 %1 FindUMsb %35
%10 = OpConstant %7 -1
%11 = OpConstant %9 1
%12 = OpTypeVector %4 4
%13 = OpTypeVector %7 2
%14 = OpConstantComposite %13 %6 %6
%17 = OpTypeFunction %2
%40 = OpConstant %7 31
%49 = OpTypeVector %9 2
%25 = OpConstantComposite %12 %5 %5 %5 %5
%26 = OpConstantComposite %12 %3 %3 %3 %3
%29 = OpConstantNull %7
%47 = OpConstantComposite %13 %40 %40
%52 = OpConstantComposite %13 %40 %40
%16 = OpFunction %2 None %17
%15 = OpLabel
OpBranch %18
%18 = OpLabel
%19 = OpCompositeConstruct %12 %5 %5 %5 %5
%20 = OpExtInst %4 %1 Degrees %3
%21 = OpExtInst %4 %1 Radians %3
%22 = OpExtInst %12 %1 Degrees %19
%23 = OpExtInst %12 %1 Radians %19
%24 = OpExtInst %12 %1 FClamp %19 %25 %26
%27 = OpExtInst %12 %1 Refract %19 %19 %3
%30 = OpCompositeExtract %7 %14 0
%31 = OpCompositeExtract %7 %14 0
%32 = OpIMul %7 %30 %31
%33 = OpIAdd %7 %29 %32
%34 = OpCompositeExtract %7 %14 1
%35 = OpCompositeExtract %7 %14 1
%36 = OpIMul %7 %34 %35
%28 = OpIAdd %7 %33 %36
%37 = OpCopyObject %9 %8
%38 = OpExtInst %9 %1 FindUMsb %37
%39 = OpExtInst %7 %1 FindUMsb %10
%41 = OpISub %7 %40 %39
%42 = OpExtInst %7 %1 FindUMsb %11
%43 = OpISub %7 %40 %42
%44 = OpBitcast %9 %43
%45 = OpCompositeConstruct %13 %10 %10
%46 = OpExtInst %13 %1 FindUMsb %45
%48 = OpISub %13 %47 %46
%50 = OpCompositeConstruct %49 %11 %11
%51 = OpExtInst %13 %1 FindUMsb %50
%53 = OpISub %13 %52 %51
%54 = OpBitcast %49 %53
OpReturn
OpFunctionEnd

View File

@@ -9,4 +9,8 @@ fn main() {
let g = refract(v, v, 1.0);
let const_dot = dot(vec2<i32>(0, 0), vec2<i32>(0, 0));
let first_leading_bit_abs = firstLeadingBit(abs(0u));
let clz_a = countLeadingZeros(-1);
let clz_b = countLeadingZeros(1u);
let clz_c = countLeadingZeros(vec2<i32>(-1));
let clz_d = countLeadingZeros(vec2<u32>(1u));
}