Remove POW llop and add SQRT llop (#1104)

* fixed division by zero for fast operations * made et closer to 0 * replace POW llop with SQRT * updated mlops to swap SQRT and POW llops * updated hlops to swap POW and SQRT * added sqrt llop to cpu runtime * added sqrt llop to cstyle codegen * added POW llop to llvm ir codegen * added SQRT llop to torch runtime * moved pow from mlops to hlops * found a better way to do reverse pow * fixed indentation * added SQRT llop to triton * update docs to match new llops * removed POW operator from assembly codegen * added sqrt and rsqrt to pow hlop * rewrote pow function in tensor.py * Adjust tolerance * Adjust for adamw * Reduce for Adam too * removed accidental leftover code * removed all of accidental code * added rsqrt test * removed pow from mlops again it was added back when resolving merge conflicts --------- Co-authored-by: Jacky Lee <jla524@sfu.ca>
2026-01-09 15:08:02 -05:00 · 2023-07-05 20:07:58 -05:00
parent b7369ffcff
commit 801564f31b
12 changed files with 34 additions and 36 deletions
--- a/test/test_optim.py
+++ b/test/test_optim.py
@@ -71,10 +71,10 @@ class TestOptim(unittest.TestCase):
  def test_adamw_high_lr(self): self._test_adamw(1, {'lr': 10}, 1e-5, 1e-5)

  def test_multistep_adam(self): self._test_adam(10, {'lr': 0.001}, 1e-5, 0)
-  def test_multistep_adam_high_lr(self): self._test_adam(10, {'lr': 10}, 1e-5, 3e-4)
+  def test_multistep_adam_high_lr(self): self._test_adam(10, {'lr': 10}, 2e-4, 5e-4)

  def test_multistep_adamw(self): self._test_adamw(10, {'lr': 0.001}, 1e-5, 0)
-  def test_multistep_adamw_high_lr(self): self._test_adamw(10, {'lr': 10}, 1e-5, 3e-4)
+  def test_multistep_adamw_high_lr(self): self._test_adamw(10, {'lr': 10}, 5e-4, 2e-3)

  def test_duped_weights(self):
    for Opt in [Adam, AdamW, SGD]: