[SelectionDAG] Split vector types for atomic load #120640

jofrn · 2024-12-19T21:28:36Z

Vector types that aren't widened are split
so that a single ATOMIC_LOAD is issued for the entire vector at once.
This change utilizes the load vectorization infrastructure in
SelectionDAG in order to group the vectors. This enables SelectionDAG
to translate vectors with type bfloat,half.

Stack:

⚠️ Part of a stack created by spr. Do not merge manually using the UI - doing so may have unexpected results.

llvmbot · 2024-12-19T21:29:12Z

@llvm/pr-subscribers-llvm-selectiondag

@llvm/pr-subscribers-backend-x86

Author: None (jofrn)

Changes

load atomic <n x T> is not valid. This change splits
vector types of atomic load in SelectionDAG
so that it can translate vectors of >1 size with type bfloat,half.

Stack:

#120640 ⬅
#120598
#120387
#120386
#120385
#120384

⚠️ Part of a stack created by spr. Do not merge manually using the UI - doing so may have unexpected results.

Full diff: https://github.com/llvm/llvm-project/pull/120640.diff

3 Files Affected:

(modified) llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h (+1)
(modified) llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp (+35)
(modified) llvm/test/CodeGen/X86/atomic-load-store.ll (+56)

diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index 3b3dddc44e3682..e0cd7319ac034b 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -946,6 +946,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
   void SplitVecRes_FPOp_MultiType(SDNode *N, SDValue &Lo, SDValue &Hi);
   void SplitVecRes_IS_FPCLASS(SDNode *N, SDValue &Lo, SDValue &Hi);
   void SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, SDValue &Hi);
+  void SplitVecRes_ATOMIC_LOAD(AtomicSDNode *LD, SDValue &Lo, SDValue &Hi);
   void SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo, SDValue &Hi);
   void SplitVecRes_VP_LOAD(VPLoadSDNode *LD, SDValue &Lo, SDValue &Hi);
   void SplitVecRes_VP_STRIDED_LOAD(VPStridedLoadSDNode *SLD, SDValue &Lo,
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 7c4caa96244b8b..44adc3fdb4a5a7 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -1146,6 +1146,9 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
     SplitVecRes_STEP_VECTOR(N, Lo, Hi);
     break;
   case ISD::SIGN_EXTEND_INREG: SplitVecRes_InregOp(N, Lo, Hi); break;
+  case ISD::ATOMIC_LOAD:
+    SplitVecRes_ATOMIC_LOAD(cast<AtomicSDNode>(N), Lo, Hi);
+    break;
   case ISD::LOAD:
     SplitVecRes_LOAD(cast<LoadSDNode>(N), Lo, Hi);
     break;
@@ -2079,6 +2082,38 @@ void DAGTypeLegalizer::SplitVecRes_VP_SPLAT(SDNode *N, SDValue &Lo,
   Hi = DAG.getNode(N->getOpcode(), dl, HiVT, N->getOperand(0), MaskHi, EVLHi);
 }
 
+void DAGTypeLegalizer::SplitVecRes_ATOMIC_LOAD(AtomicSDNode *LD, SDValue &Lo,
+                                               SDValue &Hi) {
+  EVT LoVT, HiVT;
+  SDLoc dl(LD);
+  std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(LD->getValueType(0));
+
+  SDValue Ch = LD->getChain();
+  SDValue Ptr = LD->getBasePtr();
+  EVT MemoryVT = LD->getMemoryVT();
+
+  EVT LoMemVT, HiMemVT;
+  std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
+
+  Lo = DAG.getAtomic(ISD::ATOMIC_LOAD, dl, LoMemVT, LoMemVT, Ch, Ptr,
+                     LD->getMemOperand());
+
+  MachinePointerInfo MPI;
+  IncrementPointer(LD, LoMemVT, MPI, Ptr);
+
+  Hi = DAG.getAtomic(ISD::ATOMIC_LOAD, dl, HiMemVT, HiMemVT, Ch, Ptr,
+                     LD->getMemOperand());
+
+  // Build a factor node to remember that this load is independent of the
+  // other one.
+  Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
+                   Hi.getValue(1));
+
+  // Legalize the chain result - switch anything that used the old chain to
+  // use the new one.
+  ReplaceValueWith(SDValue(LD, 1), Ch);
+}
+
 void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo,
                                         SDValue &Hi) {
   assert(ISD::isUNINDEXEDLoad(LD) && "Indexed load during type legalization!");
diff --git a/llvm/test/CodeGen/X86/atomic-load-store.ll b/llvm/test/CodeGen/X86/atomic-load-store.ll
index ba1bc4d98537d1..302a94aa9c1f60 100644
--- a/llvm/test/CodeGen/X86/atomic-load-store.ll
+++ b/llvm/test/CodeGen/X86/atomic-load-store.ll
@@ -176,6 +176,62 @@ define <2 x float> @atomic_vec2_float_align(ptr %x) nounwind {
   ret <2 x float> %ret
 }
 
+define <2 x half> @atomic_vec2_half(ptr %x) nounwind {
+; CHECK3-LABEL: atomic_vec2_half:
+; CHECK3:       ## %bb.0:
+; CHECK3-NEXT:    movzwl (%rdi), %eax
+; CHECK3-NEXT:    movzwl 2(%rdi), %ecx
+; CHECK3-NEXT:    pinsrw $0, %eax, %xmm0
+; CHECK3-NEXT:    pinsrw $0, %ecx, %xmm1
+; CHECK3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; CHECK3-NEXT:    retq
+;
+; CHECK0-LABEL: atomic_vec2_half:
+; CHECK0:       ## %bb.0:
+; CHECK0-NEXT:    movw (%rdi), %dx
+; CHECK0-NEXT:    movw 2(%rdi), %cx
+; CHECK0-NEXT:    ## implicit-def: $eax
+; CHECK0-NEXT:    movw %dx, %ax
+; CHECK0-NEXT:    ## implicit-def: $xmm0
+; CHECK0-NEXT:    pinsrw $0, %eax, %xmm0
+; CHECK0-NEXT:    ## implicit-def: $eax
+; CHECK0-NEXT:    movw %cx, %ax
+; CHECK0-NEXT:    ## implicit-def: $xmm1
+; CHECK0-NEXT:    pinsrw $0, %eax, %xmm1
+; CHECK0-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; CHECK0-NEXT:    retq
+  %ret = load atomic <2 x half>, ptr %x acquire, align 4
+  ret <2 x half> %ret
+}
+
+define <2 x bfloat> @atomic_vec2_bfloat(ptr %x) nounwind {
+; CHECK3-LABEL: atomic_vec2_bfloat:
+; CHECK3:       ## %bb.0:
+; CHECK3-NEXT:    movzwl (%rdi), %eax
+; CHECK3-NEXT:    movzwl 2(%rdi), %ecx
+; CHECK3-NEXT:    pinsrw $0, %ecx, %xmm1
+; CHECK3-NEXT:    pinsrw $0, %eax, %xmm0
+; CHECK3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; CHECK3-NEXT:    retq
+;
+; CHECK0-LABEL: atomic_vec2_bfloat:
+; CHECK0:       ## %bb.0:
+; CHECK0-NEXT:    movw (%rdi), %cx
+; CHECK0-NEXT:    movw 2(%rdi), %dx
+; CHECK0-NEXT:    ## implicit-def: $eax
+; CHECK0-NEXT:    movw %dx, %ax
+; CHECK0-NEXT:    ## implicit-def: $xmm1
+; CHECK0-NEXT:    pinsrw $0, %eax, %xmm1
+; CHECK0-NEXT:    ## implicit-def: $eax
+; CHECK0-NEXT:    movw %cx, %ax
+; CHECK0-NEXT:    ## implicit-def: $xmm0
+; CHECK0-NEXT:    pinsrw $0, %eax, %xmm0
+; CHECK0-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; CHECK0-NEXT:    retq
+  %ret = load atomic <2 x bfloat>, ptr %x acquire, align 4
+  ret <2 x bfloat> %ret
+}
+
 define <1 x ptr> @atomic_vec1_ptr(ptr %x) nounwind {
 ; CHECK3-LABEL: atomic_vec1_ptr:
 ; CHECK3:       ## %bb.0:

llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp

arsenm · 2025-05-06T09:29:34Z

llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp

+  EVT LoIntVT = EVT::getIntegerVT(*DAG.getContext(), LoVT.getSizeInBits());
+  EVT HiIntVT = EVT::getIntegerVT(*DAG.getContext(), HiVT.getSizeInBits());
+  SDValue ExtractLo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, LoIntVT, ALD,
+                                  DAG.getIntPtrConstant(0, dl));


This probably should be a target constant but this appears to be what existing uses do

arsenm · 2025-05-06T17:31:36Z

llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp

+  EVT MemIntVT =
+      EVT::getIntegerVT(*DAG.getContext(), 2 * LoMemVT.getSizeInBits());


MemVT should not be a derived quantity, directly bitcast the original memory type

Vector types that aren't widened are split so that a single ATOMIC_LOAD is issued for the entire vector at once. This change utilizes the load vectorization infrastructure in SelectionDAG in order to group the vectors. This enables SelectionDAG to translate vectors with type bfloat,half. commit-id:3a045357

jofrn · 2025-06-01T20:46:17Z

Closing pull request: commit has gone away

llvmbot added backend:X86 llvm:SelectionDAG SelectionDAGISel as well labels Dec 19, 2024

jofrn force-pushed the users/jofrn/spr/main/3a045357 branch from 8ed9199 to c9bdb95 Compare December 19, 2024 21:30

jofrn force-pushed the users/jofrn/spr/main/2894ccd1 branch from b2f0b33 to 6737dda Compare December 19, 2024 21:33

jofrn force-pushed the users/jofrn/spr/main/3a045357 branch from c9bdb95 to 94a71a3 Compare December 19, 2024 21:33

jofrn force-pushed the users/jofrn/spr/main/2894ccd1 branch from 6737dda to 2949391 Compare December 19, 2024 21:59

jofrn force-pushed the users/jofrn/spr/main/3a045357 branch from 94a71a3 to 34df4f7 Compare December 19, 2024 21:59

RKSimon requested review from arsenm, RKSimon and jyknight December 20, 2024 10:02

jofrn force-pushed the users/jofrn/spr/main/3a045357 branch from 34df4f7 to 761d4d9 Compare December 20, 2024 11:25

jofrn force-pushed the users/jofrn/spr/main/2894ccd1 branch from 2949391 to 78adf01 Compare December 20, 2024 11:25

jofrn mentioned this pull request Dec 20, 2024

[AtomicExpand] Add bitcasts when expanding load atomic vector #120716

Open

jofrn force-pushed the users/jofrn/spr/main/3a045357 branch 2 times, most recently from 6506acb to db674f8 Compare December 20, 2024 11:52

arsenm reviewed Dec 20, 2024

View reviewed changes

llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp Show resolved Hide resolved

jofrn changed the title ~~[SelectionDAG] Split <n x T> vector types for atomic load~~ [X86] load atomic vector tests for combine Jan 2, 2025

jofrn force-pushed the users/jofrn/spr/main/3a045357 branch from db674f8 to 13ea377 Compare January 2, 2025 19:21

jofrn force-pushed the users/jofrn/spr/main/2894ccd1 branch from 23c9ff2 to 2c51f72 Compare January 2, 2025 19:21

jofrn force-pushed the users/jofrn/spr/main/3a045357 branch from 13ea377 to e11194d Compare January 2, 2025 20:45

jofrn force-pushed the users/jofrn/spr/main/2894ccd1 branch from 2c51f72 to 3a82883 Compare January 2, 2025 20:45

jofrn changed the title ~~[X86] load atomic vector tests for combine~~ [SelectionDAG][X86] Split <2 x T> vector types for atomic load Jan 6, 2025

jofrn force-pushed the users/jofrn/spr/main/2894ccd1 branch from 3a82883 to 36161df Compare January 6, 2025 19:25

jofrn force-pushed the users/jofrn/spr/main/3a045357 branch from e11194d to 3be4fa0 Compare January 6, 2025 19:25

jofrn changed the base branch from users/jofrn/spr/main/2894ccd1 to main May 6, 2025 06:03

jofrn mentioned this pull request May 6, 2025

[X86] Remove extra MOV after widening atomic load #138635

Open

jofrn force-pushed the users/jofrn/spr/main/3a045357 branch from 28f6bf3 to 309d817 Compare May 6, 2025 06:03

jofrn changed the base branch from main to users/jofrn/spr/main/45989503 May 6, 2025 06:04

arsenm reviewed May 6, 2025

View reviewed changes

jofrn force-pushed the users/jofrn/spr/main/3a045357 branch from 309d817 to cd4402a Compare May 6, 2025 15:04

jofrn force-pushed the users/jofrn/spr/main/45989503 branch from 4383732 to 0fcd430 Compare May 6, 2025 15:04

arsenm reviewed May 6, 2025

View reviewed changes

jofrn force-pushed the users/jofrn/spr/main/45989503 branch from 0fcd430 to 5b5d948 Compare May 7, 2025 12:53

jofrn force-pushed the users/jofrn/spr/main/3a045357 branch from cd4402a to 0e4399d Compare May 7, 2025 12:53

jofrn force-pushed the users/jofrn/spr/main/45989503 branch from 5b5d948 to 939a68f Compare May 8, 2025 01:53

jofrn force-pushed the users/jofrn/spr/main/3a045357 branch 2 times, most recently from 4783f04 to 13a5e87 Compare May 8, 2025 23:38

jofrn force-pushed the users/jofrn/spr/main/45989503 branch from 939a68f to b6c4b48 Compare May 8, 2025 23:38

jofrn force-pushed the users/jofrn/spr/main/3a045357 branch from 13a5e87 to 3e8de67 Compare May 9, 2025 12:53

jofrn force-pushed the users/jofrn/spr/main/45989503 branch 2 times, most recently from 6a946a3 to 7e560d9 Compare May 9, 2025 19:43

jofrn force-pushed the users/jofrn/spr/main/3a045357 branch 2 times, most recently from b51658d to 507069a Compare May 9, 2025 20:03

jofrn force-pushed 8000 the users/jofrn/spr/main/45989503 branch from 7e560d9 to e8dc4c2 Compare May 9, 2025 20:03

jofrn force-pushed the users/jofrn/spr/main/3a045357 branch from 507069a to 40b0a4e Compare May 10, 2025 08:27

jofrn force-pushed the users/jofrn/spr/main/45989503 branch from e8dc4c2 to 539584c Compare May 10, 2025 08:27

jofrn force-pushed the users/jofrn/spr/main/3a045357 branch from 40b0a4e to eda6b72 Compare May 11, 2025 00:05

jofrn force-pushed the users/jofrn/spr/main/45989503 branch from e5413e4 to 6312f8c Compare May 12, 2025 05:34

jofrn force-pushed the users/jofrn/spr/main/3a045357 branch from eda6b72 to 4fccbd6 Compare May 12, 2025 05:34

jofrn force-pushed the users/jofrn/spr/main/3a045357 branch from 4fccbd6 to f916347 Compare May 27, 2025 17:34

jofrn force-pushed the users/jofrn/spr/main/45989503 branch from 6312f8c to 109bc60 Compare May 27, 2025 17:34

jofrn closed this Jun 1, 2025

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

[SelectionDAG] Split vector types for atomic load #120640

[SelectionDAG] Split vector types for atomic load #120640

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

		EVT MemIntVT =
		EVT::getIntegerVT(DAG.getContext(), 2 LoMemVT.getSizeInBits());

[SelectionDAG] Split vector types for atomic load #120640

[SelectionDAG] Split vector types for atomic load #120640

Uh oh!

Conversation

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Uh oh!