diff --git a/accel/llvm/ops_llvm.py b/accel/llvm/ops_llvm.py index 6807ff0e10..e60da16bfa 100644 --- a/accel/llvm/ops_llvm.py +++ b/accel/llvm/ops_llvm.py @@ -84,6 +84,9 @@ class LLVM: target_machine = None engine = None optimizer = None + # if it can't vectorize + # OPT=2 DEBUG=3 LLVM=1 FORWARD_ONLY=1 python3 test/test_ops.py TestOps.test_sum_full + # if can't vectorize anything def __init__(self): if LLVM.engine is not None: return @@ -91,14 +94,19 @@ class LLVM: llvm.initialize_native_target() llvm.initialize_native_asmprinter() # yes, even this one target = llvm.Target.from_default_triple() - LLVM.target_machine = target.create_target_machine() - LLVM.engine = llvm.create_mcjit_compiler(llvm.parse_assembly(""), LLVM.target_machine) LLVM.optimizer = llvm.ModulePassManager() + + # does this do anything? builder = llvm.PassManagerBuilder() builder.opt_level = 3 builder.loop_vectorize = True builder.populate(LLVM.optimizer) + LLVM.target_machine = target.create_target_machine(opt=3) # this opt actually can change things + LLVM.target_machine.add_analysis_passes(LLVM.optimizer) + LLVM.target_machine.set_asm_verbosity(True) + LLVM.engine = llvm.create_mcjit_compiler(llvm.parse_assembly(""), LLVM.target_machine) + # cache def notify_func(module, buffer): #print("notify", module.name) diff --git a/test/test_ops.py b/test/test_ops.py index 96f3e63df1..bb1f56165c 100644 --- a/test/test_ops.py +++ b/test/test_ops.py @@ -111,6 +111,8 @@ class TestOps(unittest.TestCase): helper_test_op([(3,3,45,65), (3,3,65,45)], lambda x,y: x @ y, Tensor.dot, atol=1e-4) def test_sum_simple(self): helper_test_op(None, lambda x: x.sum(), Tensor.sum, vals=[[1.,1.]]) + def test_sum_full(self): + helper_test_op([(10000)], lambda x: x.sum(), lambda x: x.sum()) def test_sum_relu(self): helper_test_op([(3,4,5)], lambda x: x.relu().sum().relu(), lambda x: x.relu().sum().relu()) def test_sum(self):