From 0894fcd0bcc0aedf52ac4f41aeafc85b5e018560 Mon Sep 17 00:00:00 2001 From: Nick Koskelo Date: Thu, 9 Jan 2025 23:40:44 +0000 Subject: [PATCH 1/6] Preliminary example for adding evaluation to unrolled expressions. --- examples/python/loop_unroll_codegen.py | 52 ++++++++++++++++++++++++++ loopy/codegen/loop.py | 24 ++++++++++++ 2 files changed, 76 insertions(+) create mode 100644 examples/python/loop_unroll_codegen.py diff --git a/examples/python/loop_unroll_codegen.py b/examples/python/loop_unroll_codegen.py new file mode 100644 index 000000000..cdb2170fd --- /dev/null +++ b/examples/python/loop_unroll_codegen.py @@ -0,0 +1,52 @@ +import numpy as np + +import pyopencl as cl +import pyopencl.array + +import loopy as lp +from loopy.version import LOOPY_USE_LANGUAGE_VERSION_2018_2 # noqa: F401 + + +# setup +# ----- +ctx = cl.create_some_context() +queue = cl.CommandQueue(ctx) + +n = 15 * 10**6 +a = cl.array.arange(queue, n, dtype=np.float32) + +# create +# ------ +knl = lp.make_kernel( + "{ [i]: 0<= i <8}", + "out[i] = a if i == 0 else (b if i == 1 else c)") + +knl = lp.tag_inames(knl, {"i": "vec"}) +from loopy.kernel.array import VectorArrayDimTag + +try: + orig_knl = knl + knl = lp.tag_array_axes(knl, "out", [VectorArrayDimTag()]) + knl = lp.add_and_infer_dtypes(knl, {"a": np.float32, "b": np.float32, "c": np.float32}) + + dev_code = lp.generate_code_v2(knl).device_code() + print(dev_code) + +except Exception as err: + print(err) +breakpoint() + +print("No Vector Array Tag.") +knl = orig_knl +knl = lp.make_kernel( + "{ [i]: 0<= i <8}", + "out[i] = a if i == 0 else (b if i == 1 else c)") + +knl = lp.tag_inames(knl, {"i": "ilp.unr"}) +knl = lp.add_and_infer_dtypes(knl, {"a": np.float32, "b": np.float32, "c": np.float32}) +dev_code = lp.generate_code_v2(knl).device_code() +print(dev_code) +breakpoint() + +print("Hello") + diff --git a/loopy/codegen/loop.py b/loopy/codegen/loop.py index 44bfa07cc..4b5f76d16 100644 --- a/loopy/codegen/loop.py +++ b/loopy/codegen/loop.py @@ -151,9 +151,33 @@ def generate_unroll_loop(codegen_state, sched_index): result = [] + from pymbolic.mapper.evaluator import evaluate + from pymbolic.primitives import Variable + + from loopy.kernel.instruction import Assignment + for i in range(length): idx_aff = lower_bound_aff + i new_codegen_state = codegen_state.fix(iname, idx_aff) + original_knl_ = new_codegen_state.kernel.copy() + context = new_codegen_state.var_subst_map + # Add in the other variables as variables. + for key in original_knl_.arg_dict: + if key not in context.keys(): + context = context.update({key: Variable(key)}) + + new_insns = [] + for insn in new_codegen_state.kernel.instructions: + if isinstance(insn, Assignment): + # We can update the evaluation of this potentially. + new_insns.append(insn.copy(expression=evaluate(insn.expression, + context))) + else: + new_insns.append(insn) + + new_knl = original_knl_.copy(instructions=new_insns) + new_codegen_state = new_codegen_state.copy(kernel=new_knl) + result.append( build_loop_nest(new_codegen_state, sched_index+1)) From 2123f7e3a9000dac861ad776d0c313e5f99a219c Mon Sep 17 00:00:00 2001 From: Nick Koskelo Date: Fri, 10 Jan 2025 18:15:16 +0000 Subject: [PATCH 2/6] Use all read variables in context and not just those which are arguments. --- loopy/codegen/loop.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/loopy/codegen/loop.py b/loopy/codegen/loop.py index 4b5f76d16..35722d2de 100644 --- a/loopy/codegen/loop.py +++ b/loopy/codegen/loop.py @@ -162,7 +162,7 @@ def generate_unroll_loop(codegen_state, sched_index): original_knl_ = new_codegen_state.kernel.copy() context = new_codegen_state.var_subst_map # Add in the other variables as variables. - for key in original_knl_.arg_dict: + for key in original_knl_.get_read_variables(): if key not in context.keys(): context = context.update({key: Variable(key)}) From a5bd5c1250e39e60ce4b4354ff56a2c3d65c6ccb Mon Sep 17 00:00:00 2001 From: Nick Koskelo Date: Fri, 10 Jan 2025 18:18:55 +0000 Subject: [PATCH 3/6] Update example. --- examples/python/loop_unroll_codegen.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/examples/python/loop_unroll_codegen.py b/examples/python/loop_unroll_codegen.py index cdb2170fd..ae09a5130 100644 --- a/examples/python/loop_unroll_codegen.py +++ b/examples/python/loop_unroll_codegen.py @@ -24,17 +24,19 @@ knl = lp.tag_inames(knl, {"i": "vec"}) from loopy.kernel.array import VectorArrayDimTag + try: orig_knl = knl knl = lp.tag_array_axes(knl, "out", [VectorArrayDimTag()]) - knl = lp.add_and_infer_dtypes(knl, {"a": np.float32, "b": np.float32, "c": np.float32}) + knl = lp.add_and_infer_dtypes(knl, {"a": np.float32, + "b": np.float32, + "c": np.float32}) dev_code = lp.generate_code_v2(knl).device_code() print(dev_code) except Exception as err: print(err) -breakpoint() print("No Vector Array Tag.") knl = orig_knl @@ -46,7 +48,3 @@ knl = lp.add_and_infer_dtypes(knl, {"a": np.float32, "b": np.float32, "c": np.float32}) dev_code = lp.generate_code_v2(knl).device_code() print(dev_code) -breakpoint() - -print("Hello") - From 7d6cd0af82332a596410c034daa1d13d475a58d9 Mon Sep 17 00:00:00 2001 From: Nick Koskelo Date: Mon, 13 Jan 2025 16:05:57 +0000 Subject: [PATCH 4/6] Use the partial evaluator. --- loopy/codegen/loop.py | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/loopy/codegen/loop.py b/loopy/codegen/loop.py index 35722d2de..ebb3ee684 100644 --- a/loopy/codegen/loop.py +++ b/loopy/codegen/loop.py @@ -31,7 +31,7 @@ from loopy.codegen.control import build_loop_nest from loopy.codegen.result import merge_codegen_results from loopy.diagnostic import LoopyError, warn -from loopy.symbolic import flatten +from loopy.symbolic import PartialEvaluationMapper, flatten # {{{ conditional-reducing slab decomposition @@ -151,9 +151,6 @@ def generate_unroll_loop(codegen_state, sched_index): result = [] - from pymbolic.mapper.evaluator import evaluate - from pymbolic.primitives import Variable - from loopy.kernel.instruction import Assignment for i in range(length): @@ -162,16 +159,13 @@ def generate_unroll_loop(codegen_state, sched_index): original_knl_ = new_codegen_state.kernel.copy() context = new_codegen_state.var_subst_map # Add in the other variables as variables. - for key in original_knl_.get_read_variables(): - if key not in context.keys(): - context = context.update({key: Variable(key)}) + mymapper = PartialEvaluationMapper(context) new_insns = [] for insn in new_codegen_state.kernel.instructions: if isinstance(insn, Assignment): # We can update the evaluation of this potentially. - new_insns.append(insn.copy(expression=evaluate(insn.expression, - context))) + new_insns.append(insn.copy(expression=mymapper(insn.expression))) else: new_insns.append(insn) From 5f1d791b0909aeb374beb9774c5faf0e89eb8a7b Mon Sep 17 00:00:00 2001 From: Nick Koskelo Date: Mon, 13 Jan 2025 17:03:49 +0000 Subject: [PATCH 5/6] Use a partial evaluator. --- loopy/codegen/loop.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/loopy/codegen/loop.py b/loopy/codegen/loop.py index ebb3ee684..c90f9b003 100644 --- a/loopy/codegen/loop.py +++ b/loopy/codegen/loop.py @@ -31,7 +31,7 @@ from loopy.codegen.control import build_loop_nest from loopy.codegen.result import merge_codegen_results from loopy.diagnostic import LoopyError, warn -from loopy.symbolic import PartialEvaluationMapper, flatten +from loopy.symbolic import EvaluatorWithDeficientContext, flatten # {{{ conditional-reducing slab decomposition @@ -159,13 +159,14 @@ def generate_unroll_loop(codegen_state, sched_index): original_knl_ = new_codegen_state.kernel.copy() context = new_codegen_state.var_subst_map # Add in the other variables as variables. - mymapper = PartialEvaluationMapper(context) + mymapper = EvaluatorWithDeficientContext(context) new_insns = [] for insn in new_codegen_state.kernel.instructions: if isinstance(insn, Assignment): # We can update the evaluation of this potentially. - new_insns.append(insn.copy(expression=mymapper(insn.expression))) + new_expr = mymapper(insn.expression) + new_insns.append(insn.copy(expression=new_expr)) else: new_insns.append(insn) From 71c1a58764bfc5e3fc579f76c47fc7fb16bc71e5 Mon Sep 17 00:00:00 2001 From: Nick Koskelo Date: Mon, 13 Jan 2025 18:42:53 +0000 Subject: [PATCH 6/6] Try with constant folding instead. --- loopy/codegen/loop.py | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/loopy/codegen/loop.py b/loopy/codegen/loop.py index c90f9b003..ff56f1e53 100644 --- a/loopy/codegen/loop.py +++ b/loopy/codegen/loop.py @@ -31,7 +31,9 @@ from loopy.codegen.control import build_loop_nest from loopy.codegen.result import merge_codegen_results from loopy.diagnostic import LoopyError, warn -from loopy.symbolic import EvaluatorWithDeficientContext, flatten +from loopy.symbolic import ConstantFoldingMapper, SubstitutionMapper, flatten +from pymbolic.mapper.substitutor import make_subst_func +from loopy.transform.parameter import fix_parameters # {{{ conditional-reducing slab decomposition @@ -151,7 +153,7 @@ def generate_unroll_loop(codegen_state, sched_index): result = [] - from loopy.kernel.instruction import Assignment + fold_consts = ConstantFoldingMapper() for i in range(length): idx_aff = lower_bound_aff + i @@ -159,19 +161,32 @@ def generate_unroll_loop(codegen_state, sched_index): original_knl_ = new_codegen_state.kernel.copy() context = new_codegen_state.var_subst_map # Add in the other variables as variables. - mymapper = EvaluatorWithDeficientContext(context) + + from loopy.kernel.instruction import Assignment + #new_knl = fix_parameters(original_knl_, **context) + + subst_func = make_subst_func(context) + mymapper = SubstitutionMapper(subst_func) new_insns = [] for insn in new_codegen_state.kernel.instructions: + """ + new_insn = mymapper(insn) + new_insns.append(fold_consts(new_insn)) + + """ if isinstance(insn, Assignment): # We can update the evaluation of this potentially. new_expr = mymapper(insn.expression) + new_expr = fold_consts(new_expr) new_insns.append(insn.copy(expression=new_expr)) else: new_insns.append(insn) new_knl = original_knl_.copy(instructions=new_insns) new_codegen_state = new_codegen_state.copy(kernel=new_knl) + + #new_codegen_state = new_codegen_state.copy(kernel=new_knl) result.append( build_loop_nest(new_codegen_state, sched_index+1))