reuse polyN in trig_poly float64 (#7385)

similar speed, less alu (151 v.s. 154 per sine) and simpler, the power of 2 thing should probably be done in polyN if needed
This commit is contained in:
chenyu
2024-10-29 20:45:56 -04:00
committed by GitHub
parent 6bf38c35e5
commit 33acbaeb24

View File

@@ -158,13 +158,7 @@ def cody_waite_reduction(d:UOp) -> Tuple[UOp, UOp]:
# *** approximate sine on small angle. ***
def trig_poly(d:UOp, coeff32, coeff64):
s = d * d
if d.dtype == dtypes.float64:
def __poly4(x:UOp, x2:UOp, c3, c2, c1, c0) -> UOp: return x2 * (x*c3+c2) + (x*c1+c0)
def __poly8(x, x2, x4, c7, c6, c5, c4, c3, c2, c1, c0) -> UOp: return x4 * __poly4(x, x2, c7, c6, c5, c4) + __poly4(x, x2, c3, c2, c1, c0)
s2 = s * s
s4 = s2 * s2
u = __poly8(s, s2, s4, *coeff64[:-1]) * s + coeff64[-1]
else: u = polyN(s, coeff32)
u = polyN(s, coeff64) if d.dtype == dtypes.float64 else polyN(s, coeff32)
return s * (u * d) + d
# approximate sine on [-pi/2, pi/2]
def sin_poly(d:UOp) -> UOp: