[arch-commits] Commit in llvm/trunk (PKGBUILD stack-clash-fixes.patch)

Evangelos Foutras foutrelis at archlinux.org
Sat Jan 9 23:06:21 UTC 2021


    Date: Saturday, January 9, 2021 @ 23:06:21
  Author: foutrelis
Revision: 405898

upgpkg: llvm 11.0.1-1: new upstream release

Modified:
  llvm/trunk/PKGBUILD
Deleted:
  llvm/trunk/stack-clash-fixes.patch

-------------------------+
 PKGBUILD                |   11 
 stack-clash-fixes.patch |  870 ----------------------------------------------
 2 files changed, 3 insertions(+), 878 deletions(-)

Modified: PKGBUILD
===================================================================
--- PKGBUILD	2021-01-09 22:15:03 UTC (rev 405897)
+++ PKGBUILD	2021-01-09 23:06:21 UTC (rev 405898)
@@ -2,8 +2,8 @@
 # Contributor: Jan "heftig" Steffens <jan.steffens at gmail.com>
 
 pkgname=('llvm' 'llvm-libs' 'llvm-ocaml')
-pkgver=11.0.0
-pkgrel=4
+pkgver=11.0.1
+pkgrel=1
 _ocaml_ver=4.11.1
 arch=('x86_64')
 url="https://llvm.org/"
@@ -15,11 +15,9 @@
 options=('staticlibs')
 _source_base=https://github.com/llvm/llvm-project/releases/download/llvmorg-$pkgver
 source=($_source_base/$pkgname-$pkgver.src.tar.xz{,.sig}
-        stack-clash-fixes.patch
         llvm-config.h)
-sha256sums=('913f68c898dfb4a03b397c5e11c6a2f39d0f22ed7665c9cefa87a34423a72469'
+sha256sums=('ccd87c254b6aebc5077e4e6977d08d4be888e7eb672c6630a26a15d58b59b528'
             'SKIP'
-            'bdcaa7559223bd42a381086f7cc23fc73f88ebb1966a7c235f897db0f73b7d20'
             '597dc5968c695bbdbb0eac9e8eb5117fcd2773bc91edf5ec103ecffffab8bc48')
 validpgpkeys+=('B6C8F98282B944E3B0D5C2530FC3042E345AD05D') # Hans Wennborg <hans at chromium.org>
 validpgpkeys+=('474E22316ABF4785A88C6E8EA2C794A986419D8A') # Tom Stellard <tstellar at redhat.com>
@@ -27,9 +25,6 @@
 prepare() {
   cd "$srcdir/llvm-$pkgver.src"
   mkdir build
-
-  # https://bugs.llvm.org/show_bug.cgi?id=48007
-  patch -Np2 -i ../stack-clash-fixes.patch
 }
 
 build() {

Deleted: stack-clash-fixes.patch
===================================================================
--- stack-clash-fixes.patch	2021-01-09 22:15:03 UTC (rev 405897)
+++ stack-clash-fixes.patch	2021-01-09 23:06:21 UTC (rev 405898)
@@ -1,870 +0,0 @@
-From a1e0363c7402f7aa58e24e0e6dfa447ebabc1910 Mon Sep 17 00:00:00 2001
-From: serge-sans-paille <sguelton at redhat.com>
-Date: Wed, 30 Sep 2020 11:35:00 +0200
-Subject: [PATCH 1/3] Fix limit behavior of dynamic alloca
-
-When the allocation size is 0, we shouldn't probe. Within [1,  PAGE_SIZE], we
-should probe once etc.
-
-This fixes https://bugs.llvm.org/show_bug.cgi?id=47657
-
-Differential Revision: https://reviews.llvm.org/D88548
-
-(cherry picked from commit 9573c9f2a363da71b2c07a3add4e52721e6028a0)
----
- llvm/lib/Target/X86/X86ISelLowering.cpp             | 2 +-
- llvm/test/CodeGen/X86/stack-clash-dynamic-alloca.ll | 8 ++++----
- 2 files changed, 5 insertions(+), 5 deletions(-)
-
-diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
-index fd1e6517dfac..f68ae4461fe3 100644
---- a/llvm/lib/Target/X86/X86ISelLowering.cpp
-+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
-@@ -31876,7 +31876,7 @@ X86TargetLowering::EmitLoweredProbedAlloca(MachineInstr &MI,
- 
-   BuildMI(testMBB, DL, TII->get(X86::JCC_1))
-       .addMBB(tailMBB)
--      .addImm(X86::COND_L);
-+      .addImm(X86::COND_LE);
-   testMBB->addSuccessor(blockMBB);
-   testMBB->addSuccessor(tailMBB);
- 
-diff --git a/llvm/test/CodeGen/X86/stack-clash-dynamic-alloca.ll b/llvm/test/CodeGen/X86/stack-clash-dynamic-alloca.ll
-index bc4678564083..82fd67842c8a 100644
---- a/llvm/test/CodeGen/X86/stack-clash-dynamic-alloca.ll
-+++ b/llvm/test/CodeGen/X86/stack-clash-dynamic-alloca.ll
-@@ -24,12 +24,12 @@ attributes #0 =  {"probe-stack"="inline-asm"}
- ; CHECK-X86-64-NEXT:  	andq	$-16, %rcx
- ; CHECK-X86-64-NEXT:  	subq	%rcx, %rax
- ; CHECK-X86-64-NEXT:  	cmpq	%rsp, %rax
--; CHECK-X86-64-NEXT:  	jl	.LBB0_3
-+; CHECK-X86-64-NEXT:  	jle	.LBB0_3
- ; CHECK-X86-64-NEXT:  .LBB0_2: # =>This Inner Loop Header: Depth=1
- ; CHECK-X86-64-NEXT:  	movq	$0, (%rsp)
- ; CHECK-X86-64-NEXT:  	subq	$4096, %rsp # imm = 0x1000
- ; CHECK-X86-64-NEXT:  	cmpq	%rsp, %rax
--; CHECK-X86-64-NEXT:  	jge	.LBB0_2
-+; CHECK-X86-64-NEXT:  	jg	.LBB0_2
- ; CHECK-X86-64-NEXT:  .LBB0_3:
- ; CHECK-X86-64-NEXT:  	movq	%rax, %rsp
- ; CHECK-X86-64-NEXT:  	movl	$1, 4792(%rax)
-@@ -54,12 +54,12 @@ attributes #0 =  {"probe-stack"="inline-asm"}
- ; CHECK-X86-32-NEXT:    andl    $-16, %ecx
- ; CHECK-X86-32-NEXT:    subl    %ecx, %eax
- ; CHECK-X86-32-NEXT:    cmpl    %esp, %eax
--; CHECK-X86-32-NEXT:    jl  .LBB0_3
-+; CHECK-X86-32-NEXT:    jle  .LBB0_3
- ; CHECK-X86-32-NEXT:  .LBB0_2: # =>This Inner Loop Header: Depth=1
- ; CHECK-X86-32-NEXT:    movl    $0, (%esp)
- ; CHECK-X86-32-NEXT:    subl    $4096, %esp # imm = 0x1000
- ; CHECK-X86-32-NEXT:    cmpl    %esp, %eax
--; CHECK-X86-32-NEXT:    jge .LBB0_2
-+; CHECK-X86-32-NEXT:    jg .LBB0_2
- ; CHECK-X86-32-NEXT:  .LBB0_3:
- ; CHECK-X86-32-NEXT:    movl    %eax, %esp
- ; CHECK-X86-32-NEXT:    movl    $1, 4792(%eax)
-
-From aac36687f7978f33751daf2870b5c812124ebfaf Mon Sep 17 00:00:00 2001
-From: serge-sans-paille <sguelton at redhat.com>
-Date: Thu, 23 Jul 2020 16:22:48 +0200
-Subject: [PATCH 2/3] Fix interaction between stack alignment and inline-asm
- stack clash protection
-
-As reported in https://github.com/rust-lang/rust/issues/70143 alignment is not
-taken into account when doing the probing. Fix that by adjusting the first probe
-if the stack align is small, or by extending the dynamic probing if the
-alignment is large.
-
-Differential Revision: https://reviews.llvm.org/D84419
-
-(cherry picked from commit f2c6bfa350de142e4d63808d03335f69bd136d6a)
----
- llvm/lib/Target/X86/X86FrameLowering.cpp      | 222 ++++++++++++++++--
- llvm/lib/Target/X86/X86FrameLowering.h        |   8 +-
- .../X86/stack-clash-large-large-align.ll      |  88 +++++++
- .../CodeGen/X86/stack-clash-no-free-probe.ll  |  27 ---
- .../stack-clash-small-alloc-medium-align.ll   | 135 +++++++++++
- .../X86/stack-clash-small-large-align.ll      |  83 +++++++
- 6 files changed, 512 insertions(+), 51 deletions(-)
- create mode 100644 llvm/test/CodeGen/X86/stack-clash-large-large-align.ll
- delete mode 100644 llvm/test/CodeGen/X86/stack-clash-no-free-probe.ll
- create mode 100644 llvm/test/CodeGen/X86/stack-clash-small-alloc-medium-align.ll
- create mode 100644 llvm/test/CodeGen/X86/stack-clash-small-large-align.ll
-
-diff --git a/llvm/lib/Target/X86/X86FrameLowering.cpp b/llvm/lib/Target/X86/X86FrameLowering.cpp
-index c7ca6fb2a4fc..db6b68659493 100644
---- a/llvm/lib/Target/X86/X86FrameLowering.cpp
-+++ b/llvm/lib/Target/X86/X86FrameLowering.cpp
-@@ -586,29 +586,55 @@ void X86FrameLowering::emitStackProbeInlineGeneric(
-   const uint64_t StackProbeSize = TLI.getStackProbeSize(MF);
-   uint64_t ProbeChunk = StackProbeSize * 8;
- 
-+  uint64_t MaxAlign =
-+      TRI->needsStackRealignment(MF) ? calculateMaxStackAlign(MF) : 0;
-+
-   // Synthesize a loop or unroll it, depending on the number of iterations.
-+  // BuildStackAlignAND ensures that only MaxAlign % StackProbeSize bits left
-+  // between the unaligned rsp and current rsp.
-   if (Offset > ProbeChunk) {
--    emitStackProbeInlineGenericLoop(MF, MBB, MBBI, DL, Offset);
-+    emitStackProbeInlineGenericLoop(MF, MBB, MBBI, DL, Offset,
-+                                    MaxAlign % StackProbeSize);
-   } else {
--    emitStackProbeInlineGenericBlock(MF, MBB, MBBI, DL, Offset);
-+    emitStackProbeInlineGenericBlock(MF, MBB, MBBI, DL, Offset,
-+                                     MaxAlign % StackProbeSize);
-   }
- }
- 
- void X86FrameLowering::emitStackProbeInlineGenericBlock(
-     MachineFunction &MF, MachineBasicBlock &MBB,
--    MachineBasicBlock::iterator MBBI, const DebugLoc &DL,
--    uint64_t Offset) const {
-+    MachineBasicBlock::iterator MBBI, const DebugLoc &DL, uint64_t Offset,
-+    uint64_t AlignOffset) const {
- 
-   const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
-   const X86TargetLowering &TLI = *STI.getTargetLowering();
-   const unsigned Opc = getSUBriOpcode(Uses64BitFramePtr, Offset);
-   const unsigned MovMIOpc = Is64Bit ? X86::MOV64mi32 : X86::MOV32mi;
-   const uint64_t StackProbeSize = TLI.getStackProbeSize(MF);
-+
-   uint64_t CurrentOffset = 0;
--  // 0 Thanks to return address being saved on the stack
--  uint64_t CurrentProbeOffset = 0;
- 
--  // For the first N - 1 pages, just probe. I tried to take advantage of
-+  assert(AlignOffset < StackProbeSize);
-+
-+  // If the offset is so small it fits within a page, there's nothing to do.
-+  if (StackProbeSize < Offset + AlignOffset) {
-+
-+    MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr)
-+                           .addReg(StackPtr)
-+                           .addImm(StackProbeSize - AlignOffset)
-+                           .setMIFlag(MachineInstr::FrameSetup);
-+    MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
-+
-+    addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(MovMIOpc))
-+                     .setMIFlag(MachineInstr::FrameSetup),
-+                 StackPtr, false, 0)
-+        .addImm(0)
-+        .setMIFlag(MachineInstr::FrameSetup);
-+    NumFrameExtraProbe++;
-+    CurrentOffset = StackProbeSize - AlignOffset;
-+  }
-+
-+  // For the next N - 1 pages, just probe. I tried to take advantage of
-   // natural probes but it implies much more logic and there was very few
-   // interesting natural probes to interleave.
-   while (CurrentOffset + StackProbeSize < Offset) {
-@@ -626,9 +652,9 @@ void X86FrameLowering::emitStackProbeInlineGenericBlock(
-         .setMIFlag(MachineInstr::FrameSetup);
-     NumFrameExtraProbe++;
-     CurrentOffset += StackProbeSize;
--    CurrentProbeOffset += StackProbeSize;
-   }
- 
-+  // No need to probe the tail, it is smaller than a Page.
-   uint64_t ChunkSize = Offset - CurrentOffset;
-   MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr)
-                          .addReg(StackPtr)
-@@ -639,8 +665,8 @@ void X86FrameLowering::emitStackProbeInlineGenericBlock(
- 
- void X86FrameLowering::emitStackProbeInlineGenericLoop(
-     MachineFunction &MF, MachineBasicBlock &MBB,
--    MachineBasicBlock::iterator MBBI, const DebugLoc &DL,
--    uint64_t Offset) const {
-+    MachineBasicBlock::iterator MBBI, const DebugLoc &DL, uint64_t Offset,
-+    uint64_t AlignOffset) const {
-   assert(Offset && "null offset");
- 
-   const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
-@@ -648,6 +674,26 @@ void X86FrameLowering::emitStackProbeInlineGenericLoop(
-   const unsigned MovMIOpc = Is64Bit ? X86::MOV64mi32 : X86::MOV32mi;
-   const uint64_t StackProbeSize = TLI.getStackProbeSize(MF);
- 
-+  if (AlignOffset) {
-+    if (AlignOffset < StackProbeSize) {
-+      // Perform a first smaller allocation followed by a probe.
-+      const unsigned SUBOpc = getSUBriOpcode(Uses64BitFramePtr, AlignOffset);
-+      MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(SUBOpc), StackPtr)
-+                             .addReg(StackPtr)
-+                             .addImm(AlignOffset)
-+                             .setMIFlag(MachineInstr::FrameSetup);
-+      MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
-+
-+      addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(MovMIOpc))
-+                       .setMIFlag(MachineInstr::FrameSetup),
-+                   StackPtr, false, 0)
-+          .addImm(0)
-+          .setMIFlag(MachineInstr::FrameSetup);
-+      NumFrameExtraProbe++;
-+      Offset -= AlignOffset;
-+    }
-+  }
-+
-   // Synthesize a loop
-   NumFrameLoopProbe++;
-   const BasicBlock *LLVM_BB = MBB.getBasicBlock();
-@@ -666,8 +712,8 @@ void X86FrameLowering::emitStackProbeInlineGenericLoop(
- 
-   // save loop bound
-   {
--    const unsigned Opc = getSUBriOpcode(Uses64BitFramePtr, Offset);
--    BuildMI(MBB, MBBI, DL, TII.get(Opc), FinalStackProbed)
-+    const unsigned SUBOpc = getSUBriOpcode(Uses64BitFramePtr, Offset);
-+    BuildMI(MBB, MBBI, DL, TII.get(SUBOpc), FinalStackProbed)
-         .addReg(FinalStackProbed)
-         .addImm(Offset / StackProbeSize * StackProbeSize)
-         .setMIFlag(MachineInstr::FrameSetup);
-@@ -675,8 +721,8 @@ void X86FrameLowering::emitStackProbeInlineGenericLoop(
- 
-   // allocate a page
-   {
--    const unsigned Opc = getSUBriOpcode(Uses64BitFramePtr, StackProbeSize);
--    BuildMI(testMBB, DL, TII.get(Opc), StackPtr)
-+    const unsigned SUBOpc = getSUBriOpcode(Uses64BitFramePtr, StackProbeSize);
-+    BuildMI(testMBB, DL, TII.get(SUBOpc), StackPtr)
-         .addReg(StackPtr)
-         .addImm(StackProbeSize)
-         .setMIFlag(MachineInstr::FrameSetup);
-@@ -1052,13 +1098,149 @@ void X86FrameLowering::BuildStackAlignAND(MachineBasicBlock &MBB,
-                                           uint64_t MaxAlign) const {
-   uint64_t Val = -MaxAlign;
-   unsigned AndOp = getANDriOpcode(Uses64BitFramePtr, Val);
--  MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(AndOp), Reg)
--                         .addReg(Reg)
--                         .addImm(Val)
--                         .setMIFlag(MachineInstr::FrameSetup);
- 
--  // The EFLAGS implicit def is dead.
--  MI->getOperand(3).setIsDead();
-+  MachineFunction &MF = *MBB.getParent();
-+  const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
-+  const X86TargetLowering &TLI = *STI.getTargetLowering();
-+  const uint64_t StackProbeSize = TLI.getStackProbeSize(MF);
-+  const bool EmitInlineStackProbe = TLI.hasInlineStackProbe(MF);
-+
-+  // We want to make sure that (in worst case) less than StackProbeSize bytes
-+  // are not probed after the AND. This assumption is used in
-+  // emitStackProbeInlineGeneric.
-+  if (Reg == StackPtr && EmitInlineStackProbe && MaxAlign >= StackProbeSize) {
-+    {
-+      NumFrameLoopProbe++;
-+      MachineBasicBlock *entryMBB =
-+          MF.CreateMachineBasicBlock(MBB.getBasicBlock());
-+      MachineBasicBlock *headMBB =
-+          MF.CreateMachineBasicBlock(MBB.getBasicBlock());
-+      MachineBasicBlock *bodyMBB =
-+          MF.CreateMachineBasicBlock(MBB.getBasicBlock());
-+      MachineBasicBlock *footMBB =
-+          MF.CreateMachineBasicBlock(MBB.getBasicBlock());
-+
-+      MachineFunction::iterator MBBIter = MBB.getIterator();
-+      MF.insert(MBBIter, entryMBB);
-+      MF.insert(MBBIter, headMBB);
-+      MF.insert(MBBIter, bodyMBB);
-+      MF.insert(MBBIter, footMBB);
-+      const unsigned MovMIOpc = Is64Bit ? X86::MOV64mi32 : X86::MOV32mi;
-+      Register FinalStackProbed = Uses64BitFramePtr ? X86::R11 : X86::R11D;
-+
-+      // Setup entry block
-+      {
-+
-+        entryMBB->splice(entryMBB->end(), &MBB, MBB.begin(), MBBI);
-+        BuildMI(entryMBB, DL, TII.get(TargetOpcode::COPY), FinalStackProbed)
-+            .addReg(StackPtr)
-+            .setMIFlag(MachineInstr::FrameSetup);
-+        MachineInstr *MI =
-+            BuildMI(entryMBB, DL, TII.get(AndOp), FinalStackProbed)
-+                .addReg(FinalStackProbed)
-+                .addImm(Val)
-+                .setMIFlag(MachineInstr::FrameSetup);
-+
-+        // The EFLAGS implicit def is dead.
-+        MI->getOperand(3).setIsDead();
-+
-+        BuildMI(entryMBB, DL,
-+                TII.get(Uses64BitFramePtr ? X86::CMP64rr : X86::CMP32rr))
-+            .addReg(FinalStackProbed)
-+            .addReg(StackPtr)
-+            .setMIFlag(MachineInstr::FrameSetup);
-+        BuildMI(entryMBB, DL, TII.get(X86::JCC_1))
-+            .addMBB(&MBB)
-+            .addImm(X86::COND_E)
-+            .setMIFlag(MachineInstr::FrameSetup);
-+        entryMBB->addSuccessor(headMBB);
-+        entryMBB->addSuccessor(&MBB);
-+      }
-+
-+      // Loop entry block
-+
-+      {
-+        const unsigned SUBOpc =
-+            getSUBriOpcode(Uses64BitFramePtr, StackProbeSize);
-+        BuildMI(headMBB, DL, TII.get(SUBOpc), StackPtr)
-+            .addReg(StackPtr)
-+            .addImm(StackProbeSize)
-+            .setMIFlag(MachineInstr::FrameSetup);
-+
-+        BuildMI(headMBB, DL,
-+                TII.get(Uses64BitFramePtr ? X86::CMP64rr : X86::CMP32rr))
-+            .addReg(FinalStackProbed)
-+            .addReg(StackPtr)
-+            .setMIFlag(MachineInstr::FrameSetup);
-+
-+        // jump
-+        BuildMI(headMBB, DL, TII.get(X86::JCC_1))
-+            .addMBB(footMBB)
-+            .addImm(X86::COND_B)
-+            .setMIFlag(MachineInstr::FrameSetup);
-+
-+        headMBB->addSuccessor(bodyMBB);
-+        headMBB->addSuccessor(footMBB);
-+      }
-+
-+      // setup loop body
-+      {
-+        addRegOffset(BuildMI(bodyMBB, DL, TII.get(MovMIOpc))
-+                         .setMIFlag(MachineInstr::FrameSetup),
-+                     StackPtr, false, 0)
-+            .addImm(0)
-+            .setMIFlag(MachineInstr::FrameSetup);
-+
-+        const unsigned SUBOpc =
-+            getSUBriOpcode(Uses64BitFramePtr, StackProbeSize);
-+        BuildMI(bodyMBB, DL, TII.get(SUBOpc), StackPtr)
-+            .addReg(StackPtr)
-+            .addImm(StackProbeSize)
-+            .setMIFlag(MachineInstr::FrameSetup);
-+
-+        // cmp with stack pointer bound
-+        BuildMI(bodyMBB, DL,
-+                TII.get(Uses64BitFramePtr ? X86::CMP64rr : X86::CMP32rr))
-+            .addReg(FinalStackProbed)
-+            .addReg(StackPtr)
-+            .setMIFlag(MachineInstr::FrameSetup);
-+
-+        // jump
-+        BuildMI(bodyMBB, DL, TII.get(X86::JCC_1))
-+            .addMBB(bodyMBB)
-+            .addImm(X86::COND_B)
-+            .setMIFlag(MachineInstr::FrameSetup);
-+        bodyMBB->addSuccessor(bodyMBB);
-+        bodyMBB->addSuccessor(footMBB);
-+      }
-+
-+      // setup loop footer
-+      {
-+        BuildMI(footMBB, DL, TII.get(TargetOpcode::COPY), StackPtr)
-+            .addReg(FinalStackProbed)
-+            .setMIFlag(MachineInstr::FrameSetup);
-+        addRegOffset(BuildMI(footMBB, DL, TII.get(MovMIOpc))
-+                         .setMIFlag(MachineInstr::FrameSetup),
-+                     StackPtr, false, 0)
-+            .addImm(0)
-+            .setMIFlag(MachineInstr::FrameSetup);
-+        footMBB->addSuccessor(&MBB);
-+      }
-+
-+      recomputeLiveIns(*headMBB);
-+      recomputeLiveIns(*bodyMBB);
-+      recomputeLiveIns(*footMBB);
-+      recomputeLiveIns(MBB);
-+    }
-+  } else {
-+    MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(AndOp), Reg)
-+                           .addReg(Reg)
-+                           .addImm(Val)
-+                           .setMIFlag(MachineInstr::FrameSetup);
-+
-+    // The EFLAGS implicit def is dead.
-+    MI->getOperand(3).setIsDead();
-+  }
- }
- 
- bool X86FrameLowering::has128ByteRedZone(const MachineFunction& MF) const {
-diff --git a/llvm/lib/Target/X86/X86FrameLowering.h b/llvm/lib/Target/X86/X86FrameLowering.h
-index c0b4be95f88d..bb2e83205e71 100644
---- a/llvm/lib/Target/X86/X86FrameLowering.h
-+++ b/llvm/lib/Target/X86/X86FrameLowering.h
-@@ -213,14 +213,14 @@ private:
-   void emitStackProbeInlineGenericBlock(MachineFunction &MF,
-                                         MachineBasicBlock &MBB,
-                                         MachineBasicBlock::iterator MBBI,
--                                        const DebugLoc &DL,
--                                        uint64_t Offset) const;
-+                                        const DebugLoc &DL, uint64_t Offset,
-+                                        uint64_t Align) const;
- 
-   void emitStackProbeInlineGenericLoop(MachineFunction &MF,
-                                        MachineBasicBlock &MBB,
-                                        MachineBasicBlock::iterator MBBI,
--                                       const DebugLoc &DL,
--                                       uint64_t Offset) const;
-+                                       const DebugLoc &DL, uint64_t Offset,
-+                                       uint64_t Align) const;
- 
-   /// Emit a stub to later inline the target stack probe.
-   MachineInstr *emitStackProbeInlineStub(MachineFunction &MF,
-diff --git a/llvm/test/CodeGen/X86/stack-clash-large-large-align.ll b/llvm/test/CodeGen/X86/stack-clash-large-large-align.ll
-new file mode 100644
-index 000000000000..6c981cb4ac91
---- /dev/null
-+++ b/llvm/test/CodeGen/X86/stack-clash-large-large-align.ll
-@@ -0,0 +1,88 @@
-+; RUN: llc < %s | FileCheck %s
-+
-+
-+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
-+target triple = "x86_64-unknown-linux-gnu"
-+
-+define i32 @foo_noprotect() local_unnamed_addr {
-+; CHECK-LABEL: foo_noprotect:
-+; CHECK:       # %bb.0:
-+; CHECK-NEXT:	pushq	%rbp
-+; CHECK-NEXT:   .cfi_def_cfa_offset 16
-+; CHECK-NEXT:   .cfi_offset %rbp, -16
-+; CHECK-NEXT:   movq	%rsp, %rbp
-+; CHECK-NEXT:   .cfi_def_cfa_register %rbp
-+; CHECK-NEXT:   andq	$-4096, %rsp                    # imm = 0xF000
-+; CHECK-NEXT:   subq	$73728, %rsp                    # imm = 0x12000
-+; CHECK-NEXT:   movl	$1, 392(%rsp)
-+; CHECK-NEXT:   movl	$1, 28792(%rsp)
-+; CHECK-NEXT:   movl	(%rsp), %eax
-+; CHECK-NEXT:   movq	%rbp, %rsp
-+; CHECK-NEXT:   popq	%rbp
-+; CHECK-NEXT:   .cfi_def_cfa %rsp, 8
-+; CHECK-NEXT:   retq
-+
-+
-+  %a = alloca i32, i64 18000, align 4096
-+  %b0 = getelementptr inbounds i32, i32* %a, i64 98
-+  %b1 = getelementptr inbounds i32, i32* %a, i64 7198
-+  store volatile i32 1, i32* %b0
-+  store volatile i32 1, i32* %b1
-+  %c = load volatile i32, i32* %a
-+  ret i32 %c
-+}
-+
-+define i32 @foo_protect() local_unnamed_addr #0 {
-+; CHECK-LABEL: foo_protect:
-+; CHECK:       # %bb.0:
-+; CHECK-NEXT:	pushq	%rbp
-+; CHECK-NEXT:	.cfi_def_cfa_offset 16
-+; CHECK-NEXT:	.cfi_offset %rbp, -16
-+; CHECK-NEXT:	movq	%rsp, %rbp
-+; CHECK-NEXT:	.cfi_def_cfa_register %rbp
-+; CHECK-NEXT:	movq	%rsp, %r11
-+; CHECK-NEXT:	andq	$-4096, %r11                    # imm = 0xF000
-+; CHECK-NEXT:	cmpq	%rsp, %r11
-+; CHECK-NEXT:	je	.LBB1_4
-+; CHECK-NEXT:# %bb.1:
-+; CHECK-NEXT:	subq	$4096, %rsp                     # imm = 0x1000
-+; CHECK-NEXT:	cmpq	%rsp, %r11
-+; CHECK-NEXT:	jb	.LBB1_3
-+; CHECK-NEXT:.LBB1_2:                                # =>This Inner Loop Header: Depth=1
-+; CHECK-NEXT:	movq	$0, (%rsp)
-+; CHECK-NEXT:	subq	$4096, %rsp                     # imm = 0x1000
-+; CHECK-NEXT:	cmpq	%rsp, %r11
-+; CHECK-NEXT:	jb	.LBB1_2
-+; CHECK-NEXT:.LBB1_3:
-+; CHECK-NEXT:	movq	%r11, %rsp
-+; CHECK-NEXT:	movq	$0, (%rsp)
-+; CHECK-NEXT:.LBB1_4:
-+; CHECK-NEXT:	movq	%rsp, %r11
-+; CHECK-NEXT:	subq	$73728, %r11                    # imm = 0x12000
-+; CHECK-NEXT:.LBB1_5:                                # =>This Inner Loop Header: Depth=1
-+; CHECK-NEXT:	subq	$4096, %rsp                     # imm = 0x1000
-+; CHECK-NEXT:	movq	$0, (%rsp)
-+; CHECK-NEXT:	cmpq	%r11, %rsp
-+; CHECK-NEXT:	jne	.LBB1_5
-+; CHECK-NEXT:# %bb.6:
-+; CHECK-NEXT:	movl	$1, 392(%rsp)
-+; CHECK-NEXT:	movl	$1, 28792(%rsp)
-+; CHECK-NEXT:	movl	(%rsp), %eax
-+; CHECK-NEXT:	movq	%rbp, %rsp
-+; CHECK-NEXT:	popq	%rbp
-+; CHECK-NEXT:	.cfi_def_cfa %rsp, 8
-+; CHECK-NEXT:	retq
-+
-+
-+
-+
-+  %a = alloca i32, i64 18000, align 4096
-+  %b0 = getelementptr inbounds i32, i32* %a, i64 98
-+  %b1 = getelementptr inbounds i32, i32* %a, i64 7198
-+  store volatile i32 1, i32* %b0
-+  store volatile i32 1, i32* %b1
-+  %c = load volatile i32, i32* %a
-+  ret i32 %c
-+}
-+
-+attributes #0 =  {"probe-stack"="inline-asm"}
-diff --git a/llvm/test/CodeGen/X86/stack-clash-no-free-probe.ll b/llvm/test/CodeGen/X86/stack-clash-no-free-probe.ll
-deleted file mode 100644
-index 652acbdf00ba..000000000000
---- a/llvm/test/CodeGen/X86/stack-clash-no-free-probe.ll
-+++ /dev/null
-@@ -1,27 +0,0 @@
--; RUN: llc < %s | FileCheck %s
--
--target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
--target triple = "x86_64-unknown-linux-gnu"
--
--define i32 @foo(i64 %i) local_unnamed_addr #0 {
--; CHECK-LABEL: foo:
--; CHECK:       # %bb.0:
--; CHECK-NEXT:  subq	$4096, %rsp # imm = 0x1000
--; CHECK-NEXT:  movq	$0, (%rsp)
--; CHECK-NEXT:  subq	$3784, %rsp # imm = 0xEC8
--; CHECK-NEXT:  .cfi_def_cfa_offset 7888
--; CHECK-NEXT:  movl	$1, -128(%rsp,%rdi,4)
--; CHECK-NEXT:  movl	-128(%rsp), %eax
--; CHECK-NEXT:  addq	$7880, %rsp # imm = 0x1EC8
--; CHECK-NEXT:  .cfi_def_cfa_offset 8
--; CHECK-NEXT:  retq
--
--  %a = alloca i32, i32 2000, align 16
--  %b = getelementptr inbounds i32, i32* %a, i64 %i
--  store volatile i32 1, i32* %b
--  %c = load volatile i32, i32* %a
--  ret i32 %c
--}
--
--attributes #0 =  {"probe-stack"="inline-asm"}
--
-diff --git a/llvm/test/CodeGen/X86/stack-clash-small-alloc-medium-align.ll b/llvm/test/CodeGen/X86/stack-clash-small-alloc-medium-align.ll
-new file mode 100644
-index 000000000000..eafa86f1eba9
---- /dev/null
-+++ b/llvm/test/CodeGen/X86/stack-clash-small-alloc-medium-align.ll
-@@ -0,0 +1,135 @@
-+; RUN: llc < %s | FileCheck %s
-+
-+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
-+target triple = "x86_64-unknown-linux-gnu"
-+
-+; | case1 | alloca + align < probe_size
-+define i32 @foo1(i64 %i) local_unnamed_addr #0 {
-+; CHECK-LABEL: foo1:
-+; CHECK:        # %bb.0:
-+; CHECK-NEXT:	pushq	%rbp
-+; CHECK-NEXT:	.cfi_def_cfa_offset 16
-+; CHECK-NEXT:	.cfi_offset %rbp, -16
-+; CHECK-NEXT:	movq	%rsp, %rbp
-+; CHECK-NEXT:	.cfi_def_cfa_register %rbp
-+; CHECK-NEXT:   andq    $-64, %rsp
-+; CHECK-NEXT:   subq    $832, %rsp                      # imm = 0x340
-+; CHECK-NEXT:   movl    $1, (%rsp,%rdi,4)
-+; CHECK-NEXT:   movl    (%rsp), %eax
-+; CHECK-NEXT:   movq    %rbp, %rsp
-+; CHECK-NEXT:   popq    %rbp
-+; CHECK-NEXT:	.cfi_def_cfa %rsp, 8
-+; CHECK-NEXT:	retq
-+
-+  %a = alloca i32, i32 200, align 64
-+  %b = getelementptr inbounds i32, i32* %a, i64 %i
-+  store volatile i32 1, i32* %b
-+  %c = load volatile i32, i32* %a
-+  ret i32 %c
-+}
-+
-+; | case2 | alloca > probe_size, align > probe_size
-+define i32 @foo2(i64 %i) local_unnamed_addr #0 {
-+; CHECK-LABEL: foo2:
-+; CHECK:        # %bb.0:
-+; CHECK-NEXT:	pushq	%rbp
-+; CHECK-NEXT:	.cfi_def_cfa_offset 16
-+; CHECK-NEXT:	.cfi_offset %rbp, -16
-+; CHECK-NEXT:	movq	%rsp, %rbp
-+; CHECK-NEXT:	.cfi_def_cfa_register %rbp
-+; CHECK-NEXT:   andq    $-2048, %rsp                    # imm = 0xF800
-+; CHECK-NEXT:   subq    $2048, %rsp                     # imm = 0x800
-+; CHECK-NEXT:   movq    $0, (%rsp)
-+; CHECK-NEXT:   subq    $4096, %rsp                     # imm = 0x1000
-+; CHECK-NEXT:   movq    $0, (%rsp)
-+; CHECK-NEXT:   subq    $2048, %rsp                     # imm = 0x800
-+; CHECK-NEXT:   movl    $1, (%rsp,%rdi,4)
-+; CHECK-NEXT:   movl    (%rsp), %eax
-+; CHECK-NEXT:   movq    %rbp, %rsp
-+; CHECK-NEXT:   popq    %rbp
-+; CHECK-NEXT:   .cfi_def_cfa %rsp, 8
-+; CHECK-NEXT:   retq
-+
-+  %a = alloca i32, i32 2000, align 2048
-+  %b = getelementptr inbounds i32, i32* %a, i64 %i
-+  store volatile i32 1, i32* %b
-+  %c = load volatile i32, i32* %a
-+  ret i32 %c
-+}
-+
-+; | case3 | alloca < probe_size, align < probe_size, alloca + align > probe_size
-+define i32 @foo3(i64 %i) local_unnamed_addr #0 {
-+; CHECK-LABEL: foo3:
-+; CHECK:        # %bb.0:
-+; CHECK-NEXT:   pushq   %rbp
-+; CHECK-NEXT:   .cfi_def_cfa_offset 16
-+; CHECK-NEXT:   .cfi_offset %rbp, -16
-+; CHECK-NEXT:   movq    %rsp, %rbp
-+; CHECK-NEXT:   .cfi_def_cfa_register %rbp
-+; CHECK-NEXT:   andq    $-1024, %rsp                    # imm = 0xFC00
-+; CHECK-NEXT:   subq    $3072, %rsp                     # imm = 0xC00
-+; CHECK-NEXT:   movq    $0, (%rsp)
-+; CHECK-NEXT:   subq    $1024, %rsp                     # imm = 0x400
-+; CHECK-NEXT:   movl    $1, (%rsp,%rdi,4)
-+; CHECK-NEXT:   movl    (%rsp), %eax
-+; CHECK-NEXT:   movq    %rbp, %rsp
-+; CHECK-NEXT:   popq    %rbp
-+; CHECK-NEXT:   .cfi_def_cfa %rsp, 8
-+; CHECK-NEXT:   retq
-+
-+
-+  %a = alloca i32, i32 1000, align 1024
-+  %b = getelementptr inbounds i32, i32* %a, i64 %i
-+  store volatile i32 1, i32* %b
-+  %c = load volatile i32, i32* %a
-+  ret i32 %c
-+}
-+
-+; | case4 | alloca + probe_size < probe_size, followed by dynamic alloca
-+define i32 @foo4(i64 %i) local_unnamed_addr #0 {
-+; CHECK-LABEL: foo4:
-+; CHECK:        # %bb.0:
-+; CHECK-NEXT:	pushq	%rbp
-+; CHECK-NEXT:	.cfi_def_cfa_offset 16
-+; CHECK-NEXT:	.cfi_offset %rbp, -16
-+; CHECK-NEXT:	movq	%rsp, %rbp
-+; CHECK-NEXT:	.cfi_def_cfa_register %rbp
-+; CHECK-NEXT:	pushq	%rbx
-+; CHECK-NEXT:	andq	$-64, %rsp
-+; CHECK-NEXT:	subq	$896, %rsp                      # imm = 0x380
-+; CHECK-NEXT:	movq	%rsp, %rbx
-+; CHECK-NEXT:	.cfi_offset %rbx, -24
-+; CHECK-NEXT:	movl	$1, (%rbx,%rdi,4)
-+; CHECK-NEXT:	movl	(%rbx), %ecx
-+; CHECK-NEXT:	movq	%rsp, %rax
-+; CHECK-NEXT:	leaq	15(,%rcx,4), %rcx
-+; CHECK-NEXT:	andq	$-16, %rcx
-+; CHECK-NEXT:	subq	%rcx, %rax
-+; CHECK-NEXT:	cmpq	%rsp, %rax
-+; CHECK-NEXT:	jle	.LBB3_3
-+; CHECK-NEXT:.LBB3_2:                                # =>This Inner Loop Header: Depth=1
-+; CHECK-NEXT:	movq	$0, (%rsp)
-+; CHECK-NEXT:	subq	$4096, %rsp                     # imm = 0x1000
-+; CHECK-NEXT:	cmpq	%rsp, %rax
-+; CHECK-NEXT:	jg	.LBB3_2
-+; CHECK-NEXT:.LBB3_3:
-+; CHECK-NEXT:	andq	$-64, %rax
-+; CHECK-NEXT:	movq	%rax, %rsp
-+; CHECK-NEXT:	movl	(%rax), %eax
-+; CHECK-NEXT:	leaq	-8(%rbp), %rsp
-+; CHECK-NEXT:	popq	%rbx
-+; CHECK-NEXT:	popq	%rbp
-+; CHECK-NEXT:	.cfi_def_cfa %rsp, 8
-+; CHECK-NEXT:	retq
-+
-+  %a = alloca i32, i32 200, align 64
-+  %b = getelementptr inbounds i32, i32* %a, i64 %i
-+  store volatile i32 1, i32* %b
-+  %c = load volatile i32, i32* %a
-+  %d = alloca i32, i32 %c, align 64
-+  %e = load volatile i32, i32* %d
-+  ret i32 %e
-+}
-+
-+attributes #0 =  {"probe-stack"="inline-asm"}
-+
-diff --git a/llvm/test/CodeGen/X86/stack-clash-small-large-align.ll b/llvm/test/CodeGen/X86/stack-clash-small-large-align.ll
-new file mode 100644
-index 000000000000..e608bab90415
---- /dev/null
-+++ b/llvm/test/CodeGen/X86/stack-clash-small-large-align.ll
-@@ -0,0 +1,83 @@
-+; RUN: llc < %s | FileCheck %s
-+
-+
-+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
-+target triple = "x86_64-unknown-linux-gnu"
-+
-+define i32 @foo_noprotect() local_unnamed_addr {
-+; CHECK-LABEL: foo_noprotect:
-+; CHECK:       # %bb.0:
-+; CHECK-NEXT:	pushq	%rbp
-+; CHECK-NEXT:	.cfi_def_cfa_offset 16
-+; CHECK-NEXT:	.cfi_offset %rbp, -16
-+; CHECK-NEXT:	movq	%rsp, %rbp
-+; CHECK-NEXT:	.cfi_def_cfa_register %rbp
-+; CHECK-NEXT:	andq	$-65536, %rsp
-+; CHECK-NEXT:	subq	$65536, %rsp
-+; CHECK-NEXT:	movl	$1, 392(%rsp)
-+; CHECK-NEXT:	movl	(%rsp), %eax
-+; CHECK-NEXT:	movq	%rbp, %rsp
-+; CHECK-NEXT:	popq	%rbp
-+; CHECK-NEXT:	.cfi_def_cfa %rsp, 8
-+; CHECK-NEXT:	retq
-+
-+
-+
-+  %a = alloca i32, i64 100, align 65536
-+  %b = getelementptr inbounds i32, i32* %a, i64 98
-+  store volatile i32 1, i32* %b
-+  %c = load volatile i32, i32* %a
-+  ret i32 %c
-+}
-+
-+define i32 @foo_protect() local_unnamed_addr #0 {
-+; CHECK-LABEL: foo_protect:
-+; CHECK:       # %bb.0:
-+; CHECK-NEXT:	pushq	%rbp
-+; CHECK-NEXT:	.cfi_def_cfa_offset 16
-+; CHECK-NEXT:	.cfi_offset %rbp, -16
-+; CHECK-NEXT:	movq	%rsp, %rbp
-+; CHECK-NEXT:	.cfi_def_cfa_register %rbp
-+; CHECK-NEXT:	movq	%rsp, %r11
-+; CHECK-NEXT:	andq	$-65536, %r11                   # imm = 0xFFFF0000
-+; CHECK-NEXT:	cmpq	%rsp, %r11
-+; CHECK-NEXT:	je	.LBB1_4
-+; CHECK-NEXT:# %bb.1:
-+; CHECK-NEXT:	subq	$4096, %rsp                     # imm = 0x1000
-+; CHECK-NEXT:	cmpq	%rsp, %r11
-+; CHECK-NEXT:	jb	.LBB1_3
-+; CHECK-NEXT:.LBB1_2:                                # =>This Inner Loop Header: Depth=1
-+; CHECK-NEXT:	movq	$0, (%rsp)
-+; CHECK-NEXT:	subq	$4096, %rsp                     # imm = 0x1000
-+; CHECK-NEXT:	cmpq	%rsp, %r11
-+; CHECK-NEXT:	jb	.LBB1_2
-+; CHECK-NEXT:.LBB1_3:
-+; CHECK-NEXT:	movq	%r11, %rsp
-+; CHECK-NEXT:	movq	$0, (%rsp)
-+; CHECK-NEXT:.LBB1_4:
-+; CHECK-NEXT:	movq	%rsp, %r11
-+; CHECK-NEXT:	subq	$65536, %r11                    # imm = 0x10000
-+; CHECK-NEXT:.LBB1_5:                                # =>This Inner Loop Header: Depth=1
-+; CHECK-NEXT:	subq	$4096, %rsp                     # imm = 0x1000
-+; CHECK-NEXT:	movq	$0, (%rsp)
-+; CHECK-NEXT:	cmpq	%r11, %rsp
-+; CHECK-NEXT:	jne	.LBB1_5
-+; CHECK-NEXT:# %bb.6:
-+; CHECK-NEXT:	movl	$1, 392(%rsp)
-+; CHECK-NEXT:	movl	(%rsp), %eax
-+; CHECK-NEXT:	movq	%rbp, %rsp
-+; CHECK-NEXT:	popq	%rbp
-+; CHECK-NEXT:	.cfi_def_cfa %rsp, 8
-+; CHECK-NEXT:	retq
-+
-+
-+
-+
-+  %a = alloca i32, i64 100, align 65536
-+  %b = getelementptr inbounds i32, i32* %a, i64 98
-+  store volatile i32 1, i32* %b
-+  %c = load volatile i32, i32* %a
-+  ret i32 %c
-+}
-+
-+attributes #0 =  {"probe-stack"="inline-asm"}
-
-From bbe6cbbed8c7460a7e8477373b9250543362e771 Mon Sep 17 00:00:00 2001
-From: serge-sans-paille <sguelton at redhat.com>
-Date: Tue, 27 Oct 2020 10:59:42 +0100
-Subject: [PATCH 3/3] [stack-clash] Fix probing of dynamic alloca
-
-- Perform the probing in the correct direction.
-  Related to https://github.com/rust-lang/rust/pull/77885#issuecomment-711062924
-
-- The first touch on a dynamic alloca cannot use a mov because it clobbers
-  existing space. Use a xor 0 instead
-
-Differential Revision: https://reviews.llvm.org/D90216
-
-(cherry picked from commit 0f60bcc36c34522618bd1425a45f8c6006568fb6)
----
- llvm/lib/Target/X86/X86ISelLowering.cpp              |  8 ++++----
- llvm/test/CodeGen/X86/stack-clash-dynamic-alloca.ll  | 12 ++++++------
- .../X86/stack-clash-small-alloc-medium-align.ll      |  6 +++---
- 3 files changed, 13 insertions(+), 13 deletions(-)
-
-diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
-index f68ae4461fe3..afe470cc6e0b 100644
---- a/llvm/lib/Target/X86/X86ISelLowering.cpp
-+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
-@@ -31876,7 +31876,7 @@ X86TargetLowering::EmitLoweredProbedAlloca(MachineInstr &MI,
- 
-   BuildMI(testMBB, DL, TII->get(X86::JCC_1))
-       .addMBB(tailMBB)
--      .addImm(X86::COND_LE);
-+      .addImm(X86::COND_GE);
-   testMBB->addSuccessor(blockMBB);
-   testMBB->addSuccessor(tailMBB);
- 
-@@ -31892,9 +31892,9 @@ X86TargetLowering::EmitLoweredProbedAlloca(MachineInstr &MI,
-   //
-   // The property we want to enforce is to never have more than [page alloc] between two probes.
- 
--  const unsigned MovMIOpc =
--      TFI.Uses64BitFramePtr ? X86::MOV64mi32 : X86::MOV32mi;
--  addRegOffset(BuildMI(blockMBB, DL, TII->get(MovMIOpc)), physSPReg, false, 0)
-+  const unsigned XORMIOpc =
-+      TFI.Uses64BitFramePtr ? X86::XOR64mi8 : X86::XOR32mi8;
-+  addRegOffset(BuildMI(blockMBB, DL, TII->get(XORMIOpc)), physSPReg, false, 0)
-       .addImm(0);
- 
-   BuildMI(blockMBB, DL,
-diff --git a/llvm/test/CodeGen/X86/stack-clash-dynamic-alloca.ll b/llvm/test/CodeGen/X86/stack-clash-dynamic-alloca.ll
-index 82fd67842c8a..6dd8b6ab5897 100644
---- a/llvm/test/CodeGen/X86/stack-clash-dynamic-alloca.ll
-+++ b/llvm/test/CodeGen/X86/stack-clash-dynamic-alloca.ll
-@@ -24,12 +24,12 @@ attributes #0 =  {"probe-stack"="inline-asm"}
- ; CHECK-X86-64-NEXT:  	andq	$-16, %rcx
- ; CHECK-X86-64-NEXT:  	subq	%rcx, %rax
- ; CHECK-X86-64-NEXT:  	cmpq	%rsp, %rax
--; CHECK-X86-64-NEXT:  	jle	.LBB0_3
-+; CHECK-X86-64-NEXT:  	jge	.LBB0_3
- ; CHECK-X86-64-NEXT:  .LBB0_2: # =>This Inner Loop Header: Depth=1
--; CHECK-X86-64-NEXT:  	movq	$0, (%rsp)
-+; CHECK-X86-64-NEXT:  	xorq	$0, (%rsp)
- ; CHECK-X86-64-NEXT:  	subq	$4096, %rsp # imm = 0x1000
- ; CHECK-X86-64-NEXT:  	cmpq	%rsp, %rax
--; CHECK-X86-64-NEXT:  	jg	.LBB0_2
-+; CHECK-X86-64-NEXT:  	jl	.LBB0_2
- ; CHECK-X86-64-NEXT:  .LBB0_3:
- ; CHECK-X86-64-NEXT:  	movq	%rax, %rsp
- ; CHECK-X86-64-NEXT:  	movl	$1, 4792(%rax)
-@@ -54,12 +54,12 @@ attributes #0 =  {"probe-stack"="inline-asm"}
- ; CHECK-X86-32-NEXT:    andl    $-16, %ecx
- ; CHECK-X86-32-NEXT:    subl    %ecx, %eax
- ; CHECK-X86-32-NEXT:    cmpl    %esp, %eax
--; CHECK-X86-32-NEXT:    jle  .LBB0_3
-+; CHECK-X86-32-NEXT:    jge  .LBB0_3
- ; CHECK-X86-32-NEXT:  .LBB0_2: # =>This Inner Loop Header: Depth=1
--; CHECK-X86-32-NEXT:    movl    $0, (%esp)
-+; CHECK-X86-32-NEXT:    xorl    $0, (%esp)
- ; CHECK-X86-32-NEXT:    subl    $4096, %esp # imm = 0x1000
- ; CHECK-X86-32-NEXT:    cmpl    %esp, %eax
--; CHECK-X86-32-NEXT:    jg .LBB0_2
-+; CHECK-X86-32-NEXT:    jl .LBB0_2
- ; CHECK-X86-32-NEXT:  .LBB0_3:
- ; CHECK-X86-32-NEXT:    movl    %eax, %esp
- ; CHECK-X86-32-NEXT:    movl    $1, 4792(%eax)
-diff --git a/llvm/test/CodeGen/X86/stack-clash-small-alloc-medium-align.ll b/llvm/test/CodeGen/X86/stack-clash-small-alloc-medium-align.ll
-index eafa86f1eba9..39b6c3640a60 100644
---- a/llvm/test/CodeGen/X86/stack-clash-small-alloc-medium-align.ll
-+++ b/llvm/test/CodeGen/X86/stack-clash-small-alloc-medium-align.ll
-@@ -106,12 +106,12 @@ define i32 @foo4(i64 %i) local_unnamed_addr #0 {
- ; CHECK-NEXT:	andq	$-16, %rcx
- ; CHECK-NEXT:	subq	%rcx, %rax
- ; CHECK-NEXT:	cmpq	%rsp, %rax
--; CHECK-NEXT:	jle	.LBB3_3
-+; CHECK-NEXT:	jge	.LBB3_3
- ; CHECK-NEXT:.LBB3_2:                                # =>This Inner Loop Header: Depth=1
--; CHECK-NEXT:	movq	$0, (%rsp)
-+; CHECK-NEXT:	xorq	$0, (%rsp)
- ; CHECK-NEXT:	subq	$4096, %rsp                     # imm = 0x1000
- ; CHECK-NEXT:	cmpq	%rsp, %rax
--; CHECK-NEXT:	jg	.LBB3_2
-+; CHECK-NEXT:	jl	.LBB3_2
- ; CHECK-NEXT:.LBB3_3:
- ; CHECK-NEXT:	andq	$-64, %rax
- ; CHECK-NEXT:	movq	%rax, %rsp



More information about the arch-commits mailing list