-
Notifications
You must be signed in to change notification settings - Fork 13.7k
CodeGenPrepare, X86: Support sinking phi nodes. #141716
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
CodeGenPrepare, X86: Support sinking phi nodes. #141716
Conversation
Created using spr 1.3.6-beta.1
Depends on #141326 |
@llvm/pr-subscribers-llvm-ir @llvm/pr-subscribers-backend-x86 Author: Peter Collingbourne (pcc) ChangesisProfitableToSinkOperands() may now return a phi node. If it does, This could be improved a bit by changing the isProfitableToSinkOperands() Full diff: https://github.com/llvm/llvm-project/pull/141716.diff 5 Files Affected:
diff --git a/llvm/include/llvm/IR/Instructions.h b/llvm/include/llvm/IR/Instructions.h
index c164f76eb335b..e8229036b51db 100644
--- a/llvm/include/llvm/IR/Instructions.h
+++ b/llvm/include/llvm/IR/Instructions.h
@@ -2822,6 +2822,11 @@ class PHINode : public Instruction {
/// non-undef value.
bool hasConstantOrUndefValue() const;
+ /// If the specified PHI node (possibly via other PHI nodes) merges together
+ /// the same or identical (i.e. Instruction::isIdenticalTo() returns true)
+ /// values, return one of the values, otherwise return null.
+ Value *hasIdenticalValue();
+
/// If the PHI node is complete which means all of its parent's predecessors
/// have incoming value in this PHI, return true, otherwise return false.
bool isComplete() const {
diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp
index 52263026d6cea..422916a28669f 100644
--- a/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -7739,9 +7739,14 @@ bool CodeGenPrepare::tryToSinkFreeOperands(Instruction *I) {
for (Use *U : reverse(OpsToSink)) {
auto *UI = cast<Instruction>(U->get());
- if (isa<PHINode>(UI))
- continue;
- if (UI->getParent() == TargetBB) {
+ if (auto *PN = dyn_cast<PHINode>(UI)) {
+ auto *I0 = dyn_cast<Instruction>(PN->hasIdenticalValue());
+ if (!I0)
+ continue;
+ if (I0->getParent() == TargetBB &&
+ InstOrdering[I0] < InstOrdering[InsertPoint])
+ InsertPoint = I0;
+ } else if (UI->getParent() == TargetBB) {
if (InstOrdering[UI] < InstOrdering[InsertPoint])
InsertPoint = UI;
continue;
@@ -7753,7 +7758,11 @@ bool CodeGenPrepare::tryToSinkFreeOperands(Instruction *I) {
DenseMap<Instruction *, Instruction *> NewInstructions;
for (Use *U : ToReplace) {
auto *UI = cast<Instruction>(U->get());
- Instruction *NI = UI->clone();
+ Instruction *NI;
+ if (auto *PN = dyn_cast<PHINode>(UI))
+ NI = cast<Instruction>(PN->hasIdenticalValue())->clone();
+ else
+ NI = UI->clone();
if (IsHugeFunc) {
// Now we clone an instruction, its operands' defs may sink to this BB
diff --git a/llvm/lib/IR/Instructions.cpp b/llvm/lib/IR/Instructions.cpp
index f404e11b9c0f0..9c59378213991 100644
--- a/llvm/lib/IR/Instructions.cpp
+++ b/llvm/lib/IR/Instructions.cpp
@@ -48,6 +48,7 @@
#include <cassert>
#include <cstdint>
#include <optional>
+#include <set>
#include <vector>
using namespace llvm;
@@ -239,6 +240,40 @@ bool PHINode::hasConstantOrUndefValue() const {
return true;
}
+/// If the specified PHI node (possibly via other PHI nodes) merges together the
+/// same or identical (i.e. Instruction::isIdenticalTo() returns true) values,
+/// return one of the values, otherwise return null.
+Value *PHINode::hasIdenticalValue() {
+ std::vector<PHINode *> Worklist;
+ std::set<PHINode *> Seen;
+ Value *Result = nullptr;
+ Worklist.push_back(this);
+ while (!Worklist.empty()) {
+ PHINode *PN = Worklist.back();
+ Worklist.pop_back();
+ if (!Seen.insert(PN).second)
+ continue;
+ for (Value *V : PN->incoming_values()) {
+ if (auto *PN = dyn_cast<PHINode>(V)) {
+ Worklist.push_back(PN);
+ continue;
+ }
+ if (!Result) {
+ Result = V;
+ continue;
+ }
+ if (V == Result)
+ continue;
+ if (auto *I = dyn_cast<Instruction>(V))
+ if (auto *ResultI = dyn_cast<Instruction>(Result))
+ if (I->isIdenticalTo(ResultI))
+ continue;
+ return nullptr;
+ }
+ }
+ return Result;
+}
+
//===----------------------------------------------------------------------===//
// LandingPadInst Implementation
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
index 072705bd31b1a..5baf5e3001baa 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -7167,7 +7167,14 @@ bool X86TTIImpl::isProfitableToSinkOperands(Instruction *I,
}
if (ShiftAmountOpNum == -1)
return false;
- auto *ShiftAmount = &I->getOperandUse(ShiftAmountOpNum);
+ auto *ShiftAmountUse = &I->getOperandUse(ShiftAmountOpNum);
+
+ Value *ShiftAmount = ShiftAmountUse->get();
+ if (auto *PN = dyn_cast<PHINode>(ShiftAmount)) {
+ ShiftAmount = PN->hasIdenticalValue();
+ if (!ShiftAmount)
+ return false;
+ }
// A uniform shift amount in a vector shift or funnel shift may be much
// cheaper than a generic variable vector shift, so make that pattern visible
@@ -7175,7 +7182,7 @@ bool X86TTIImpl::isProfitableToSinkOperands(Instruction *I,
auto *Shuf = dyn_cast<ShuffleVectorInst>(ShiftAmount);
if (Shuf && getSplatIndex(Shuf->getShuffleMask()) >= 0 &&
isVectorShiftByScalarCheap(I->getType())) {
- Ops.push_back(ShiftAmount);
+ Ops.push_back(ShiftAmountUse);
return true;
}
@@ -7186,7 +7193,7 @@ bool X86TTIImpl::isProfitableToSinkOperands(Instruction *I,
// instruction's immediate operand.
if (auto *CI = dyn_cast<CastInst>(ShiftAmount)) {
if (isa<ConstantExpr>(CI->getOperand(0))) {
- Ops.push_back(ShiftAmount);
+ Ops.push_back(ShiftAmountUse);
return true;
}
}
diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/inhibit-zext-constant-hoist-phi.ll b/llvm/test/Transforms/CodeGenPrepare/X86/inhibit-zext-constant-hoist-phi.ll
new file mode 100644
index 0000000000000..98f71cdce913d
--- /dev/null
+++ b/llvm/test/Transforms/CodeGenPrepare/X86/inhibit-zext-constant-hoist-phi.ll
@@ -0,0 +1,92 @@
+; Make sure that if a phi with identical inputs gets created it gets undone by CodeGenPrepare.
+
+; RUN: opt -codegenprepare -S < %s | FileCheck %s
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+@__typeid__ZTS1S_global_addr = external hidden global [0 x i8], code_model "small"
+@__typeid__ZTS1S_align = external hidden global [0 x i8], !absolute_symbol !0
+@__typeid__ZTS1S_size_m1 = external hidden global [0 x i8], !absolute_symbol !1
+
+; Check that we recover the third pair of zexts from the phi.
+
+; CHECK: define void @f4
+define void @f4(i1 noundef zeroext %0, ptr noundef %1, ptr noundef %2, ptr noundef %3) #1 {
+ br i1 %0, label %5, label %18
+
+5:
+ %6 = load ptr, ptr %1, align 8
+ %7 = ptrtoint ptr %6 to i64
+ %8 = sub i64 %7, ptrtoint (ptr @__typeid__ZTS1S_global_addr to i64)
+ ; CHECK: zext {{.*}} @__typeid__ZTS1S_align
+ %9 = zext nneg i8 ptrtoint (ptr @__typeid__ZTS1S_align to i8) to i64
+ %10 = lshr i64 %8, %9
+ ; CHECK: zext {{.*}} @__typeid__ZTS1S_align
+ %11 = zext nneg i8 sub (i8 64, i8 ptrtoint (ptr @__typeid__ZTS1S_align to i8)) to i64
+ %12 = shl i64 %8, %11
+ %13 = or i64 %10, %12
+ %14 = icmp ugt i64 %13, ptrtoint (ptr @__typeid__ZTS1S_size_m1 to i64)
+ br i1 %14, label %15, label %16
+
+15:
+ tail call void @llvm.ubsantrap(i8 2) #5
+ unreachable
+
+16:
+ %17 = load ptr, ptr %6, align 8
+ tail call void %17(ptr noundef nonnull align 8 dereferenceable(8) %1)
+ br label %31
+
+18:
+ %19 = load ptr, ptr %2, align 8
+ %20 = ptrtoint ptr %19 to i64
+ %21 = sub i64 %20, ptrtoint (ptr @__typeid__ZTS1S_global_addr to i64)
+ ; CHECK: zext {{.*}} @__typeid__ZTS1S_align
+ %22 = zext nneg i8 ptrtoint (ptr @__typeid__ZTS1S_align to i8) to i64
+ %23 = lshr i64 %21, %22
+ ; CHECK: zext {{.*}} @__typeid__ZTS1S_align
+ %24 = zext nneg i8 sub (i8 64, i8 ptrtoint (ptr @__typeid__ZTS1S_align to i8)) to i64
+ %25 = shl i64 %21, %24
+ %26 = or i64 %23, %25
+ %27 = icmp ugt i64 %26, ptrtoint (ptr @__typeid__ZTS1S_size_m1 to i64)
+ br i1 %27, label %28, label %29
+
+28:
+ tail call void @llvm.ubsantrap(i8 2) #5
+ unreachable
+
+29:
+ %30 = load ptr, ptr %19, align 8
+ tail call void %30(ptr noundef nonnull align 8 dereferenceable(8) %2)
+ br label %31
+
+31:
+ %32 = phi i64 [ %24, %29 ], [ %11, %16 ]
+ %33 = phi i64 [ %22, %29 ], [ %9, %16 ]
+ %34 = load ptr, ptr %3, align 8
+ %35 = ptrtoint ptr %34 to i64
+ %36 = sub i64 %35, ptrtoint (ptr @__typeid__ZTS1S_global_addr to i64)
+ ; CHECK: zext {{.*}} @__typeid__ZTS1S_align
+ %37 = lshr i64 %36, %33
+ ; CHECK: zext {{.*}} @__typeid__ZTS1S_align
+ %38 = shl i64 %36, %32
+ %39 = or i64 %37, %38
+ %40 = icmp ugt i64 %39, ptrtoint (ptr @__typeid__ZTS1S_size_m1 to i64)
+ br i1 %40, label %41, label %42
+
+41:
+ tail call void @llvm.ubsantrap(i8 2) #5
+ unreachable
+
+42:
+ %43 = load ptr, ptr %34, align 8
+ tail call void %43(ptr noundef nonnull align 8 dereferenceable(8) %3)
+ ret void
+}
+
+declare i1 @llvm.type.test(ptr, metadata)
+declare void @llvm.ubsantrap(i8 immarg)
+
+!0 = !{i64 0, i64 256}
+!1 = !{i64 0, i64 128}
|
@llvm/pr-subscribers-llvm-transforms Author: Peter Collingbourne (pcc) ChangesisProfitableToSinkOperands() may now return a phi node. If it does, This could be improved a bit by changing the isProfitableToSinkOperands() Full diff: https://github.com/llvm/llvm-project/pull/141716.diff 5 Files Affected:
diff --git a/llvm/include/llvm/IR/Instructions.h b/llvm/include/llvm/IR/Instructions.h
index c164f76eb335b..e8229036b51db 100644
--- a/llvm/include/llvm/IR/Instructions.h
+++ b/llvm/include/llvm/IR/Instructions.h
@@ -2822,6 +2822,11 @@ class PHINode : public Instruction {
/// non-undef value.
bool hasConstantOrUndefValue() const;
+ /// If the specified PHI node (possibly via other PHI nodes) merges together
+ /// the same or identical (i.e. Instruction::isIdenticalTo() returns true)
+ /// values, return one of the values, otherwise return null.
+ Value *hasIdenticalValue();
+
/// If the PHI node is complete which means all of its parent's predecessors
/// have incoming value in this PHI, return true, otherwise return false.
bool isComplete() const {
diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp
index 52263026d6cea..422916a28669f 100644
--- a/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -7739,9 +7739,14 @@ bool CodeGenPrepare::tryToSinkFreeOperands(Instruction *I) {
for (Use *U : reverse(OpsToSink)) {
auto *UI = cast<Instruction>(U->get());
- if (isa<PHINode>(UI))
- continue;
- if (UI->getParent() == TargetBB) {
+ if (auto *PN = dyn_cast<PHINode>(UI)) {
+ auto *I0 = dyn_cast<Instruction>(PN->hasIdenticalValue());
+ if (!I0)
+ continue;
+ if (I0->getParent() == TargetBB &&
+ InstOrdering[I0] < InstOrdering[InsertPoint])
+ InsertPoint = I0;
+ } else if (UI->getParent() == TargetBB) {
if (InstOrdering[UI] < InstOrdering[InsertPoint])
InsertPoint = UI;
continue;
@@ -7753,7 +7758,11 @@ bool CodeGenPrepare::tryToSinkFreeOperands(Instruction *I) {
DenseMap<Instruction *, Instruction *> NewInstructions;
for (Use *U : ToReplace) {
auto *UI = cast<Instruction>(U->get());
-
8000
Instruction *NI = UI->clone();
+ Instruction *NI;
+ if (auto *PN = dyn_cast<PHINode>(UI))
+ NI = cast<Instruction>(PN->hasIdenticalValue())->clone();
+ else
+ NI = UI->clone();
if (IsHugeFunc) {
// Now we clone an instruction, its operands' defs may sink to this BB
diff --git a/llvm/lib/IR/Instructions.cpp b/llvm/lib/IR/Instructions.cpp
index f404e11b9c0f0..9c59378213991 100644
--- a/llvm/lib/IR/Instructions.cpp
+++ b/llvm/lib/IR/Instructions.cpp
@@ -48,6 +48,7 @@
#include <cassert>
#include <cstdint>
#include <optional>
+#include <set>
#include <vector>
using namespace llvm;
@@ -239,6 +240,40 @@ bool PHINode::hasConstantOrUndefValue() const {
return true;
}
+/// If the specified PHI node (possibly via other PHI nodes) merges together the
+/// same or identical (i.e. Instruction::isIdenticalTo() returns true) values,
+/// return one of the values, otherwise return null.
+Value *PHINode::hasIdenticalValue() {
+ std::vector<PHINode *> Worklist;
+ std::set<PHINode *> Seen;
+ Value *Result = nullptr;
+ Worklist.push_back(this);
+ while (!Worklist.empty()) {
+ PHINode *PN = Worklist.back();
+ Worklist.pop_back();
+ if (!Seen.insert(PN).second)
+ continue;
+ for (Value *V : PN->incoming_values()) {
+ if (auto *PN = dyn_cast<PHINode>(V)) {
+ Worklist.push_back(PN);
+ continue;
+ }
+ if (!Result) {
+ Result = V;
+ continue;
+ }
+ if (V == Result)
+ continue;
+ if (auto *I = dyn_cast<Instruction>(V))
+ if (auto *ResultI = dyn_cast<Instruction>(Result))
+ if (I->isIdenticalTo(ResultI))
+ continue;
+ return nullptr;
+ }
+ }
+ return Result;
+}
+
//===----------------------------------------------------------------------===//
// LandingPadInst Implementation
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
index 072705bd31b1a..5baf5e3001baa 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -7167,7 +7167,14 @@ bool X86TTIImpl::isProfitableToSinkOperands(Instruction *I,
}
if (ShiftAmountOpNum == -1)
return false;
- auto *ShiftAmount = &I->getOperandUse(ShiftAmountOpNum);
+ auto *ShiftAmountUse = &I->getOperandUse(ShiftAmountOpNum);
+
+ Value *ShiftAmount = ShiftAmountUse->get();
+ if (auto *PN = dyn_cast<PHINode>(ShiftAmount)) {
+ ShiftAmount = PN->hasIdenticalValue();
+ if (!ShiftAmount)
+ return false;
+ }
// A uniform shift amount in a vector shift or funnel shift may be much
// cheaper than a generic variable vector shift, so make that pattern visible
@@ -7175,7 +7182,7 @@ bool X86TTIImpl::isProfitableToSinkOperands(Instruction *I,
auto *Shuf = dyn_cast<ShuffleVectorInst>(ShiftAmount);
if (Shuf && getSplatIndex(Shuf->getShuffleMask()) >= 0 &&
isVectorShiftByScalarCheap(I->getType())) {
- Ops.push_back(ShiftAmount);
+ Ops.push_back(ShiftAmountUse);
return true;
}
@@ -7186,7 +7193,7 @@ bool X86TTIImpl::isProfitableToSinkOperands(Instruction *I,
// instruction's immediate operand.
if (auto *CI = dyn_cast<CastInst>(ShiftAmount)) {
if (isa<ConstantExpr>(CI->getOperand(0))) {
- Ops.push_back(ShiftAmount);
+ Ops.push_back(ShiftAmountUse);
return true;
}
}
diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/inhibit-zext-constant-hoist-phi.ll b/llvm/test/Transforms/CodeGenPrepare/X86/inhibit-zext-constant-hoist-phi.ll
new file mode 100644
index 0000000000000..98f71cdce913d
--- /dev/null
+++ b/llvm/test/Transforms/CodeGenPrepare/X86/inhibit-zext-constant-hoist-phi.ll
@@ -0,0 +1,92 @@
+; Make sure that if a phi with identical inputs gets created it gets undone by CodeGenPrepare.
+
+; RUN: opt -codegenprepare -S < %s | FileCheck %s
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+@__typeid__ZTS1S_global_addr = external hidden global [0 x i8], code_model "small"
+@__typeid__ZTS1S_align = external hidden global [0 x i8], !absolute_symbol !0
+@__typeid__ZTS1S_size_m1 = external hidden global [0 x i8], !absolute_symbol !1
+
+; Check that we recover the third pair of zexts from the phi.
+
+; CHECK: define void @f4
+define void @f4(i1 noundef zeroext %0, ptr noundef %1, ptr noundef %2, ptr noundef %3) #1 {
+ br i1 %0, label %5, label %18
+
+5:
+ %6 = load ptr, ptr %1, align 8
+ %7 = ptrtoint ptr %6 to i64
+ %8 = sub i64 %7, ptrtoint (ptr @__typeid__ZTS1S_global_addr to i64)
+ ; CHECK: zext {{.*}} @__typeid__ZTS1S_align
+ %9 = zext nneg i8 ptrtoint (ptr @__typeid__ZTS1S_align to i8) to i64
+ %10 = lshr i64 %8, %9
+ ; CHECK: zext {{.*}} @__typeid__ZTS1S_align
+ %11 = zext nneg i8 sub (i8 64, i8 ptrtoint (ptr @__typeid__ZTS1S_align to i8)) to i64
+ %12 = shl i64 %8, %11
+ %13 = or i64 %10, %12
+ %14 = icmp ugt i64 %13, ptrtoint (ptr @__typeid__ZTS1S_size_m1 to i64)
+ br i1 %14, label %15, label %16
+
+15:
+ tail call void @llvm.ubsantrap(i8 2) #5
+ unreachable
+
+16:
+ %17 = load ptr, ptr %6, align 8
+ tail call void %17(ptr noundef nonnull align 8 dereferenceable(8) %1)
+ br label %31
+
+18:
+ %19 = load ptr, ptr %2, align 8
+ %20 = ptrtoint ptr %19 to i64
+ %21 = sub i64 %20, ptrtoint (ptr @__typeid__ZTS1S_global_addr to i64)
+ ; CHECK: zext {{.*}} @__typeid__ZTS1S_align
+ %22 = zext nneg i8 ptrtoint (ptr @__typeid__ZTS1S_align to i8) to i64
+ %23 = lshr i64 %21, %22
+ ; CHECK: zext {{.*}} @__typeid__ZTS1S_align
+ %24 = zext nneg i8 sub (i8 64, i8 ptrtoint (ptr @__typeid__ZTS1S_align to i8)) to i64
+ %25 = shl i64 %21, %24
+ %26 = or i64 %23, %25
+ %27 = icmp ugt i64 %26, ptrtoint (ptr @__typeid__ZTS1S_size_m1 to i64)
+ br i1 %27, label %28, label %29
+
+28:
+ tail call void @llvm.ubsantrap(i8 2) #5
+ unreachable
+
+29:
+ %30 = load ptr, ptr %19, align 8
+ tail call void %30(ptr noundef nonnull align 8 dereferenceable(8) %2)
+ br label %31
+
+31:
+ %32 = phi i64 [ %24, %29 ], [ %11, %16 ]
+ %33 = phi i64 [ %22, %29 ], [ %9, %16 ]
+ %34 = load ptr, ptr %3, align 8
+ %35 = ptrtoint ptr %34 to i64
+ %36 = sub i64 %35, ptrtoint (ptr @__typeid__ZTS1S_global_addr to i64)
+ ; CHECK: zext {{.*}} @__typeid__ZTS1S_align
+ %37 = lshr i64 %36, %33
+ ; CHECK: zext {{.*}} @__typeid__ZTS1S_align
+ %38 = shl i64 %36, %32
+ %39 = or i64 %37, %38
+ %40 = icmp ugt i64 %39, ptrtoint (ptr @__typeid__ZTS1S_size_m1 to i64)
+ br i1 %40, label %41, label %42
+
+41:
+ tail call void @llvm.ubsantrap(i8 2) #5
+ unreachable
+
+42:
+ %43 = load ptr, ptr %34, align 8
+ tail call void %43(ptr noundef nonnull align 8 dereferenceable(8) %3)
+ ret void
+}
+
+declare i1 @llvm.type.test(ptr, metadata)
+declare void @llvm.ubsantrap(i8 immarg)
+
+!0 = !{i64 0, i64 256}
+!1 = !{i64 0, i64 128}
|
Obsoleted by #142886 |
isProfitableToSinkOperands() may now return a phi node. If it does,
check that all incoming values are identical and if so, replace the phi
use with a clone of the incoming value. Use this mechanism to sink phi
node operands of shifts whose incoming values are identical casts of
a constant.
This could be improved a bit by changing the isProfitableToSinkOperands()
API so that we don't need to call hasIdenticalValue() again in the caller
but hopefully the case of a PHI operand to a shift is rare enough that
it doesn't matter that we need to do it multiple times.