From 0894fcd0bcc0aedf52ac4f41aeafc85b5e018560 Mon Sep 17 00:00:00 2001
From: Nick Koskelo <koskelo2@illinois.edu>
Date: Thu, 9 Jan 2025 23:40:44 +0000
Subject: [PATCH 1/6] Preliminary example for adding evaluation to unrolled
 expressions.

---
 examples/python/loop_unroll_codegen.py | 52 ++++++++++++++++++++++++++
 loopy/codegen/loop.py                  | 24 ++++++++++++
 2 files changed, 76 insertions(+)
 create mode 100644 examples/python/loop_unroll_codegen.py

diff --git a/examples/python/loop_unroll_codegen.py b/examples/python/loop_unroll_codegen.py
new file mode 100644
index 000000000..cdb2170fd
--- /dev/null
+++ b/examples/python/loop_unroll_codegen.py
@@ -0,0 +1,52 @@
+import numpy as np
+
+import pyopencl as cl
+import pyopencl.array
+
+import loopy as lp
+from loopy.version import LOOPY_USE_LANGUAGE_VERSION_2018_2  # noqa: F401
+
+
+# setup
+# -----
+ctx = cl.create_some_context()
+queue = cl.CommandQueue(ctx)
+
+n = 15 * 10**6
+a = cl.array.arange(queue, n, dtype=np.float32)
+
+# create
+# ------
+knl = lp.make_kernel(
+        "{ [i]: 0<= i <8}",
+        "out[i] = a if i == 0 else (b if i == 1 else c)")
+
+knl = lp.tag_inames(knl, {"i": "vec"})
+from loopy.kernel.array import VectorArrayDimTag
+
+try:
+    orig_knl = knl
+    knl = lp.tag_array_axes(knl, "out", [VectorArrayDimTag()])
+    knl = lp.add_and_infer_dtypes(knl, {"a": np.float32, "b": np.float32, "c": np.float32})
+
+    dev_code = lp.generate_code_v2(knl).device_code()
+    print(dev_code)
+
+except Exception as err:
+    print(err)
+breakpoint()
+
+print("No Vector Array Tag.")
+knl = orig_knl
+knl = lp.make_kernel(
+        "{ [i]: 0<= i <8}",
+        "out[i] = a if i == 0 else (b if i == 1 else c)")
+
+knl = lp.tag_inames(knl, {"i": "ilp.unr"})
+knl = lp.add_and_infer_dtypes(knl, {"a": np.float32, "b": np.float32, "c": np.float32})
+dev_code = lp.generate_code_v2(knl).device_code()
+print(dev_code)
+breakpoint()
+
+print("Hello")
+
diff --git a/loopy/codegen/loop.py b/loopy/codegen/loop.py
index 44bfa07cc..4b5f76d16 100644
--- a/loopy/codegen/loop.py
+++ b/loopy/codegen/loop.py
@@ -151,9 +151,33 @@ def generate_unroll_loop(codegen_state, sched_index):
 
     result = []
 
+    from pymbolic.mapper.evaluator import evaluate
+    from pymbolic.primitives import Variable
+
+    from loopy.kernel.instruction import Assignment
+
     for i in range(length):
         idx_aff = lower_bound_aff + i
         new_codegen_state = codegen_state.fix(iname, idx_aff)
+        original_knl_ = new_codegen_state.kernel.copy()
+        context = new_codegen_state.var_subst_map
+        # Add in the other variables as variables.
+        for key in original_knl_.arg_dict:
+            if key not in context.keys():
+                context = context.update({key: Variable(key)})
+
+        new_insns = []
+        for insn in new_codegen_state.kernel.instructions:
+            if isinstance(insn, Assignment):
+                # We can update the evaluation of this potentially.
+                new_insns.append(insn.copy(expression=evaluate(insn.expression,
+                                                               context)))
+            else:
+                new_insns.append(insn)
+
+        new_knl = original_knl_.copy(instructions=new_insns)
+        new_codegen_state = new_codegen_state.copy(kernel=new_knl)
+
         result.append(
                 build_loop_nest(new_codegen_state, sched_index+1))
 

From 2123f7e3a9000dac861ad776d0c313e5f99a219c Mon Sep 17 00:00:00 2001
From: Nick Koskelo <koskelo2@illinois.edu>
Date: Fri, 10 Jan 2025 18:15:16 +0000
Subject: [PATCH 2/6] Use all read variables in context and not just those
 which are arguments.

---
 loopy/codegen/loop.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/loopy/codegen/loop.py b/loopy/codegen/loop.py
index 4b5f76d16..35722d2de 100644
--- a/loopy/codegen/loop.py
+++ b/loopy/codegen/loop.py
@@ -162,7 +162,7 @@ def generate_unroll_loop(codegen_state, sched_index):
         original_knl_ = new_codegen_state.kernel.copy()
         context = new_codegen_state.var_subst_map
         # Add in the other variables as variables.
-        for key in original_knl_.arg_dict:
+        for key in original_knl_.get_read_variables():
             if key not in context.keys():
                 context = context.update({key: Variable(key)})
 

From a5bd5c1250e39e60ce4b4354ff56a2c3d65c6ccb Mon Sep 17 00:00:00 2001
From: Nick Koskelo <koskelo2@illinois.edu>
Date: Fri, 10 Jan 2025 18:18:55 +0000
Subject: [PATCH 3/6] Update example.

---
 examples/python/loop_unroll_codegen.py | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/examples/python/loop_unroll_codegen.py b/examples/python/loop_unroll_codegen.py
index cdb2170fd..ae09a5130 100644
--- a/examples/python/loop_unroll_codegen.py
+++ b/examples/python/loop_unroll_codegen.py
@@ -24,17 +24,19 @@
 knl = lp.tag_inames(knl, {"i": "vec"})
 from loopy.kernel.array import VectorArrayDimTag
 
+
 try:
     orig_knl = knl
     knl = lp.tag_array_axes(knl, "out", [VectorArrayDimTag()])
-    knl = lp.add_and_infer_dtypes(knl, {"a": np.float32, "b": np.float32, "c": np.float32})
+    knl = lp.add_and_infer_dtypes(knl, {"a": np.float32,
+                                        "b": np.float32,
+                                        "c": np.float32})
 
     dev_code = lp.generate_code_v2(knl).device_code()
     print(dev_code)
 
 except Exception as err:
     print(err)
-breakpoint()
 
 print("No Vector Array Tag.")
 knl = orig_knl
@@ -46,7 +48,3 @@
 knl = lp.add_and_infer_dtypes(knl, {"a": np.float32, "b": np.float32, "c": np.float32})
 dev_code = lp.generate_code_v2(knl).device_code()
 print(dev_code)
-breakpoint()
-
-print("Hello")
-

From 7d6cd0af82332a596410c034daa1d13d475a58d9 Mon Sep 17 00:00:00 2001
From: Nick Koskelo <koskelo2@illinois.edu>
Date: Mon, 13 Jan 2025 16:05:57 +0000
Subject: [PATCH 4/6] Use the partial evaluator.

---
 loopy/codegen/loop.py | 12 +++---------
 1 file changed, 3 insertions(+), 9 deletions(-)

diff --git a/loopy/codegen/loop.py b/loopy/codegen/loop.py
index 35722d2de..ebb3ee684 100644
--- a/loopy/codegen/loop.py
+++ b/loopy/codegen/loop.py
@@ -31,7 +31,7 @@
 from loopy.codegen.control import build_loop_nest
 from loopy.codegen.result import merge_codegen_results
 from loopy.diagnostic import LoopyError, warn
-from loopy.symbolic import flatten
+from loopy.symbolic import PartialEvaluationMapper, flatten
 
 
 # {{{ conditional-reducing slab decomposition
@@ -151,9 +151,6 @@ def generate_unroll_loop(codegen_state, sched_index):
 
     result = []
 
-    from pymbolic.mapper.evaluator import evaluate
-    from pymbolic.primitives import Variable
-
     from loopy.kernel.instruction import Assignment
 
     for i in range(length):
@@ -162,16 +159,13 @@ def generate_unroll_loop(codegen_state, sched_index):
         original_knl_ = new_codegen_state.kernel.copy()
         context = new_codegen_state.var_subst_map
         # Add in the other variables as variables.
-        for key in original_knl_.get_read_variables():
-            if key not in context.keys():
-                context = context.update({key: Variable(key)})
+        mymapper = PartialEvaluationMapper(context)
 
         new_insns = []
         for insn in new_codegen_state.kernel.instructions:
             if isinstance(insn, Assignment):
                 # We can update the evaluation of this potentially.
-                new_insns.append(insn.copy(expression=evaluate(insn.expression,
-                                                               context)))
+                new_insns.append(insn.copy(expression=mymapper(insn.expression)))
             else:
                 new_insns.append(insn)
 

From 5f1d791b0909aeb374beb9774c5faf0e89eb8a7b Mon Sep 17 00:00:00 2001
From: Nick Koskelo <koskelo2@illinois.edu>
Date: Mon, 13 Jan 2025 17:03:49 +0000
Subject: [PATCH 5/6] Use a partial evaluator.

---
 loopy/codegen/loop.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/loopy/codegen/loop.py b/loopy/codegen/loop.py
index ebb3ee684..c90f9b003 100644
--- a/loopy/codegen/loop.py
+++ b/loopy/codegen/loop.py
@@ -31,7 +31,7 @@
 from loopy.codegen.control import build_loop_nest
 from loopy.codegen.result import merge_codegen_results
 from loopy.diagnostic import LoopyError, warn
-from loopy.symbolic import PartialEvaluationMapper, flatten
+from loopy.symbolic import EvaluatorWithDeficientContext, flatten
 
 
 # {{{ conditional-reducing slab decomposition
@@ -159,13 +159,14 @@ def generate_unroll_loop(codegen_state, sched_index):
         original_knl_ = new_codegen_state.kernel.copy()
         context = new_codegen_state.var_subst_map
         # Add in the other variables as variables.
-        mymapper = PartialEvaluationMapper(context)
+        mymapper = EvaluatorWithDeficientContext(context)
 
         new_insns = []
         for insn in new_codegen_state.kernel.instructions:
             if isinstance(insn, Assignment):
                 # We can update the evaluation of this potentially.
-                new_insns.append(insn.copy(expression=mymapper(insn.expression)))
+                new_expr = mymapper(insn.expression)
+                new_insns.append(insn.copy(expression=new_expr))
             else:
                 new_insns.append(insn)
 

From 71c1a58764bfc5e3fc579f76c47fc7fb16bc71e5 Mon Sep 17 00:00:00 2001
From: Nick Koskelo <koskelo2@illinois.edu>
Date: Mon, 13 Jan 2025 18:42:53 +0000
Subject: [PATCH 6/6] Try with constant folding instead.

---
 loopy/codegen/loop.py | 21 ++++++++++++++++++---
 1 file changed, 18 insertions(+), 3 deletions(-)

diff --git a/loopy/codegen/loop.py b/loopy/codegen/loop.py
index c90f9b003..ff56f1e53 100644
--- a/loopy/codegen/loop.py
+++ b/loopy/codegen/loop.py
@@ -31,7 +31,9 @@
 from loopy.codegen.control import build_loop_nest
 from loopy.codegen.result import merge_codegen_results
 from loopy.diagnostic import LoopyError, warn
-from loopy.symbolic import EvaluatorWithDeficientContext, flatten
+from loopy.symbolic import ConstantFoldingMapper, SubstitutionMapper, flatten
+from pymbolic.mapper.substitutor import make_subst_func
+from loopy.transform.parameter import fix_parameters
 
 
 # {{{ conditional-reducing slab decomposition
@@ -151,7 +153,7 @@ def generate_unroll_loop(codegen_state, sched_index):
 
     result = []
 
-    from loopy.kernel.instruction import Assignment
+    fold_consts = ConstantFoldingMapper()
 
     for i in range(length):
         idx_aff = lower_bound_aff + i
@@ -159,19 +161,32 @@ def generate_unroll_loop(codegen_state, sched_index):
         original_knl_ = new_codegen_state.kernel.copy()
         context = new_codegen_state.var_subst_map
         # Add in the other variables as variables.
-        mymapper = EvaluatorWithDeficientContext(context)
+
+        from loopy.kernel.instruction import Assignment
+        #new_knl = fix_parameters(original_knl_, **context)
+
+        subst_func = make_subst_func(context)
+        mymapper = SubstitutionMapper(subst_func)
 
         new_insns = []
         for insn in new_codegen_state.kernel.instructions:
+            """
+            new_insn = mymapper(insn)
+            new_insns.append(fold_consts(new_insn))
+            
+            """
             if isinstance(insn, Assignment):
                 # We can update the evaluation of this potentially.
                 new_expr = mymapper(insn.expression)
+                new_expr = fold_consts(new_expr)
                 new_insns.append(insn.copy(expression=new_expr))
             else:
                 new_insns.append(insn)
 
         new_knl = original_knl_.copy(instructions=new_insns)
         new_codegen_state = new_codegen_state.copy(kernel=new_knl)
+        
+        #new_codegen_state = new_codegen_state.copy(kernel=new_knl)
 
         result.append(
                 build_loop_nest(new_codegen_state, sched_index+1))