[arch-commits] Commit in llvm/repos/extra-x86_64 (5 files)
Evangelos Foutras
foutrelis at archlinux.org
Thu Dec 3 00:25:08 UTC 2020
Date: Thursday, December 3, 2020 @ 00:25:08
Author: foutrelis
Revision: 402830
archrelease: copy trunk to extra-x86_64
Added:
llvm/repos/extra-x86_64/PKGBUILD
(from rev 402829, llvm/trunk/PKGBUILD)
llvm/repos/extra-x86_64/llvm-config.h
(from rev 402829, llvm/trunk/llvm-config.h)
llvm/repos/extra-x86_64/stack-clash-fixes.patch
(from rev 402829, llvm/trunk/stack-clash-fixes.patch)
Deleted:
llvm/repos/extra-x86_64/PKGBUILD
llvm/repos/extra-x86_64/llvm-config.h
-------------------------+
PKGBUILD | 249 ++++++-------
llvm-config.h | 18
stack-clash-fixes.patch | 870 ++++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 1006 insertions(+), 131 deletions(-)
Deleted: PKGBUILD
===================================================================
--- PKGBUILD 2020-12-03 00:24:58 UTC (rev 402829)
+++ PKGBUILD 2020-12-03 00:25:08 UTC (rev 402830)
@@ -1,122 +0,0 @@
-# Maintainer: Evangelos Foutras <evangelos at foutrelis.com>
-# Contributor: Jan "heftig" Steffens <jan.steffens at gmail.com>
-
-pkgname=('llvm' 'llvm-libs' 'llvm-ocaml')
-pkgver=11.0.0
-pkgrel=3
-_ocaml_ver=4.11.1
-arch=('x86_64')
-url="https://llvm.org/"
-license=('custom:Apache 2.0 with LLVM Exception')
-makedepends=('cmake' 'ninja' 'libffi' 'libedit' 'ncurses' 'libxml2'
- "ocaml>=$_ocaml_ver" 'ocaml-ctypes' 'ocaml-findlib'
- 'python-setuptools' 'python-psutil' 'python-sphinx'
- 'python-recommonmark')
-options=('staticlibs')
-_source_base=https://github.com/llvm/llvm-project/releases/download/llvmorg-$pkgver
-source=($_source_base/$pkgname-$pkgver.src.tar.xz{,.sig}
- llvm-config.h)
-sha256sums=('913f68c898dfb4a03b397c5e11c6a2f39d0f22ed7665c9cefa87a34423a72469'
- 'SKIP'
- '597dc5968c695bbdbb0eac9e8eb5117fcd2773bc91edf5ec103ecffffab8bc48')
-validpgpkeys+=('B6C8F98282B944E3B0D5C2530FC3042E345AD05D') # Hans Wennborg <hans at chromium.org>
-validpgpkeys+=('474E22316ABF4785A88C6E8EA2C794A986419D8A') # Tom Stellard <tstellar at redhat.com>
-
-prepare() {
- cd "$srcdir/llvm-$pkgver.src"
- mkdir build
-}
-
-build() {
- cd "$srcdir/llvm-$pkgver.src/build"
-
- cmake .. -G Ninja \
- -DCMAKE_BUILD_TYPE=Release \
- -DCMAKE_INSTALL_PREFIX=/usr \
- -DLLVM_HOST_TRIPLE=$CHOST \
- -DLLVM_BUILD_LLVM_DYLIB=ON \
- -DLLVM_LINK_LLVM_DYLIB=ON \
- -DLLVM_INSTALL_UTILS=ON \
- -DLLVM_ENABLE_RTTI=ON \
- -DLLVM_ENABLE_FFI=ON \
- -DLLVM_BUILD_TESTS=ON \
- -DLLVM_BUILD_DOCS=ON \
- -DLLVM_ENABLE_SPHINX=ON \
- -DLLVM_ENABLE_DOXYGEN=OFF \
- -DSPHINX_WARNINGS_AS_ERRORS=OFF \
- -DLLVM_BINUTILS_INCDIR=/usr/include
- ninja all ocaml_doc
-}
-
-check() {
- cd "$srcdir/llvm-$pkgver.src/build"
- ninja check
-}
-
-package_llvm() {
- pkgdesc="Collection of modular and reusable compiler and toolchain technologies"
- depends=('llvm-libs' 'perl')
-
- cd "$srcdir/llvm-$pkgver.src/build"
-
- DESTDIR="$pkgdir" ninja install
-
- # Include lit for running lit-based tests in other projects
- pushd ../utils/lit
- python3 setup.py install --root="$pkgdir" -O1
- popd
-
- # Remove documentation sources
- rm -r "$pkgdir"/usr/share/doc/$pkgname/html/{_sources,.buildinfo}
-
- # The runtime libraries go into llvm-libs
- mv -f "$pkgdir"/usr/lib/lib{LLVM,LTO,Remarks}*.so* "$srcdir"
- mv -f "$pkgdir"/usr/lib/LLVMgold.so "$srcdir"
-
- # OCaml bindings go to a separate package
- rm -rf "$srcdir"/ocaml.{lib,doc}
- mv "$pkgdir/usr/lib/ocaml" "$srcdir/ocaml.lib"
- mv "$pkgdir/usr/share/doc/$pkgname/ocaml-html" "$srcdir/ocaml.doc"
-
- if [[ $CARCH == x86_64 ]]; then
- # Needed for multilib (https://bugs.archlinux.org/task/29951)
- # Header stub is taken from Fedora
- mv "$pkgdir/usr/include/llvm/Config/llvm-config"{,-64}.h
- cp "$srcdir/llvm-config.h" "$pkgdir/usr/include/llvm/Config/llvm-config.h"
- fi
-
- install -Dm644 ../LICENSE.TXT "$pkgdir/usr/share/licenses/$pkgname/LICENSE"
-}
-
-package_llvm-libs() {
- pkgdesc="LLVM runtime libraries"
- depends=('gcc-libs' 'zlib' 'libffi' 'libedit' 'ncurses' 'libxml2')
-
- install -d "$pkgdir/usr/lib"
- cp -P \
- "$srcdir"/lib{LLVM,LTO,Remarks}*.so* \
- "$srcdir"/LLVMgold.so \
- "$pkgdir/usr/lib/"
-
- # Symlink LLVMgold.so from /usr/lib/bfd-plugins
- # https://bugs.archlinux.org/task/28479
- install -d "$pkgdir/usr/lib/bfd-plugins"
- ln -s ../LLVMgold.so "$pkgdir/usr/lib/bfd-plugins/LLVMgold.so"
-
- install -Dm644 "$srcdir/llvm-$pkgver.src/LICENSE.TXT" \
- "$pkgdir/usr/share/licenses/$pkgname/LICENSE"
-}
-
-package_llvm-ocaml() {
- pkgdesc="OCaml bindings for LLVM"
- depends=('llvm' "ocaml=$_ocaml_ver" 'ocaml-ctypes')
-
- install -d "$pkgdir"/{usr/lib,usr/share/doc/$pkgname}
- cp -a "$srcdir/ocaml.lib" "$pkgdir/usr/lib/ocaml"
- cp -a "$srcdir/ocaml.doc" "$pkgdir/usr/share/doc/$pkgname/html"
-
- install -Dm644 "$srcdir/llvm-$pkgver.src/LICENSE.TXT" \
- "$pkgdir/usr/share/licenses/$pkgname/LICENSE"
-}
-
-# vim:set ts=2 sw=2 et:
Copied: llvm/repos/extra-x86_64/PKGBUILD (from rev 402829, llvm/trunk/PKGBUILD)
===================================================================
--- PKGBUILD (rev 0)
+++ PKGBUILD 2020-12-03 00:25:08 UTC (rev 402830)
@@ -0,0 +1,127 @@
+# Maintainer: Evangelos Foutras <evangelos at foutrelis.com>
+# Contributor: Jan "heftig" Steffens <jan.steffens at gmail.com>
+
+pkgname=('llvm' 'llvm-libs' 'llvm-ocaml')
+pkgver=11.0.0
+pkgrel=4
+_ocaml_ver=4.11.1
+arch=('x86_64')
+url="https://llvm.org/"
+license=('custom:Apache 2.0 with LLVM Exception')
+makedepends=('cmake' 'ninja' 'libffi' 'libedit' 'ncurses' 'libxml2'
+ "ocaml>=$_ocaml_ver" 'ocaml-ctypes' 'ocaml-findlib'
+ 'python-setuptools' 'python-psutil' 'python-sphinx'
+ 'python-recommonmark')
+options=('staticlibs')
+_source_base=https://github.com/llvm/llvm-project/releases/download/llvmorg-$pkgver
+source=($_source_base/$pkgname-$pkgver.src.tar.xz{,.sig}
+ stack-clash-fixes.patch
+ llvm-config.h)
+sha256sums=('913f68c898dfb4a03b397c5e11c6a2f39d0f22ed7665c9cefa87a34423a72469'
+ 'SKIP'
+ 'bdcaa7559223bd42a381086f7cc23fc73f88ebb1966a7c235f897db0f73b7d20'
+ '597dc5968c695bbdbb0eac9e8eb5117fcd2773bc91edf5ec103ecffffab8bc48')
+validpgpkeys+=('B6C8F98282B944E3B0D5C2530FC3042E345AD05D') # Hans Wennborg <hans at chromium.org>
+validpgpkeys+=('474E22316ABF4785A88C6E8EA2C794A986419D8A') # Tom Stellard <tstellar at redhat.com>
+
+prepare() {
+ cd "$srcdir/llvm-$pkgver.src"
+ mkdir build
+
+ # https://bugs.llvm.org/show_bug.cgi?id=48007
+ patch -Np2 -i ../stack-clash-fixes.patch
+}
+
+build() {
+ cd "$srcdir/llvm-$pkgver.src/build"
+
+ cmake .. -G Ninja \
+ -DCMAKE_BUILD_TYPE=Release \
+ -DCMAKE_INSTALL_PREFIX=/usr \
+ -DLLVM_HOST_TRIPLE=$CHOST \
+ -DLLVM_BUILD_LLVM_DYLIB=ON \
+ -DLLVM_LINK_LLVM_DYLIB=ON \
+ -DLLVM_INSTALL_UTILS=ON \
+ -DLLVM_ENABLE_RTTI=ON \
+ -DLLVM_ENABLE_FFI=ON \
+ -DLLVM_BUILD_TESTS=ON \
+ -DLLVM_BUILD_DOCS=ON \
+ -DLLVM_ENABLE_SPHINX=ON \
+ -DLLVM_ENABLE_DOXYGEN=OFF \
+ -DSPHINX_WARNINGS_AS_ERRORS=OFF \
+ -DLLVM_BINUTILS_INCDIR=/usr/include
+ ninja all ocaml_doc
+}
+
+check() {
+ cd "$srcdir/llvm-$pkgver.src/build"
+ ninja check
+}
+
+package_llvm() {
+ pkgdesc="Collection of modular and reusable compiler and toolchain technologies"
+ depends=('llvm-libs' 'perl')
+
+ cd "$srcdir/llvm-$pkgver.src/build"
+
+ DESTDIR="$pkgdir" ninja install
+
+ # Include lit for running lit-based tests in other projects
+ pushd ../utils/lit
+ python3 setup.py install --root="$pkgdir" -O1
+ popd
+
+ # Remove documentation sources
+ rm -r "$pkgdir"/usr/share/doc/$pkgname/html/{_sources,.buildinfo}
+
+ # The runtime libraries go into llvm-libs
+ mv -f "$pkgdir"/usr/lib/lib{LLVM,LTO,Remarks}*.so* "$srcdir"
+ mv -f "$pkgdir"/usr/lib/LLVMgold.so "$srcdir"
+
+ # OCaml bindings go to a separate package
+ rm -rf "$srcdir"/ocaml.{lib,doc}
+ mv "$pkgdir/usr/lib/ocaml" "$srcdir/ocaml.lib"
+ mv "$pkgdir/usr/share/doc/$pkgname/ocaml-html" "$srcdir/ocaml.doc"
+
+ if [[ $CARCH == x86_64 ]]; then
+ # Needed for multilib (https://bugs.archlinux.org/task/29951)
+ # Header stub is taken from Fedora
+ mv "$pkgdir/usr/include/llvm/Config/llvm-config"{,-64}.h
+ cp "$srcdir/llvm-config.h" "$pkgdir/usr/include/llvm/Config/llvm-config.h"
+ fi
+
+ install -Dm644 ../LICENSE.TXT "$pkgdir/usr/share/licenses/$pkgname/LICENSE"
+}
+
+package_llvm-libs() {
+ pkgdesc="LLVM runtime libraries"
+ depends=('gcc-libs' 'zlib' 'libffi' 'libedit' 'ncurses' 'libxml2')
+
+ install -d "$pkgdir/usr/lib"
+ cp -P \
+ "$srcdir"/lib{LLVM,LTO,Remarks}*.so* \
+ "$srcdir"/LLVMgold.so \
+ "$pkgdir/usr/lib/"
+
+ # Symlink LLVMgold.so from /usr/lib/bfd-plugins
+ # https://bugs.archlinux.org/task/28479
+ install -d "$pkgdir/usr/lib/bfd-plugins"
+ ln -s ../LLVMgold.so "$pkgdir/usr/lib/bfd-plugins/LLVMgold.so"
+
+ install -Dm644 "$srcdir/llvm-$pkgver.src/LICENSE.TXT" \
+ "$pkgdir/usr/share/licenses/$pkgname/LICENSE"
+}
+
+package_llvm-ocaml() {
+ pkgdesc="OCaml bindings for LLVM"
+ depends=('llvm' "ocaml=$_ocaml_ver" 'ocaml-ctypes')
+
+ install -d "$pkgdir"/{usr/lib,usr/share/doc/$pkgname}
+ cp -a "$srcdir/ocaml.lib" "$pkgdir/usr/lib/ocaml"
+ cp -a "$srcdir/ocaml.doc" "$pkgdir/usr/share/doc/$pkgname/html"
+
+ install -Dm644 "$srcdir/llvm-$pkgver.src/LICENSE.TXT" \
+ "$pkgdir/usr/share/licenses/$pkgname/LICENSE"
+}
+
+# vim:set ts=2 sw=2 et:
Deleted: llvm-config.h
===================================================================
--- llvm-config.h 2020-12-03 00:24:58 UTC (rev 402829)
+++ llvm-config.h 2020-12-03 00:25:08 UTC (rev 402830)
@@ -1,9 +0,0 @@
-#include <bits/wordsize.h>
-
-#if __WORDSIZE == 32
-#include "llvm-config-32.h"
-#elif __WORDSIZE == 64
-#include "llvm-config-64.h"
-#else
-#error "Unknown word size"
-#endif
Copied: llvm/repos/extra-x86_64/llvm-config.h (from rev 402829, llvm/trunk/llvm-config.h)
===================================================================
--- llvm-config.h (rev 0)
+++ llvm-config.h 2020-12-03 00:25:08 UTC (rev 402830)
@@ -0,0 +1,9 @@
+#include <bits/wordsize.h>
+
+#if __WORDSIZE == 32
+#include "llvm-config-32.h"
+#elif __WORDSIZE == 64
+#include "llvm-config-64.h"
+#else
+#error "Unknown word size"
+#endif
Copied: llvm/repos/extra-x86_64/stack-clash-fixes.patch (from rev 402829, llvm/trunk/stack-clash-fixes.patch)
===================================================================
--- stack-clash-fixes.patch (rev 0)
+++ stack-clash-fixes.patch 2020-12-03 00:25:08 UTC (rev 402830)
@@ -0,0 +1,870 @@
+From a1e0363c7402f7aa58e24e0e6dfa447ebabc1910 Mon Sep 17 00:00:00 2001
+From: serge-sans-paille <sguelton at redhat.com>
+Date: Wed, 30 Sep 2020 11:35:00 +0200
+Subject: [PATCH 1/3] Fix limit behavior of dynamic alloca
+
+When the allocation size is 0, we shouldn't probe. Within [1, PAGE_SIZE], we
+should probe once etc.
+
+This fixes https://bugs.llvm.org/show_bug.cgi?id=47657
+
+Differential Revision: https://reviews.llvm.org/D88548
+
+(cherry picked from commit 9573c9f2a363da71b2c07a3add4e52721e6028a0)
+---
+ llvm/lib/Target/X86/X86ISelLowering.cpp | 2 +-
+ llvm/test/CodeGen/X86/stack-clash-dynamic-alloca.ll | 8 ++++----
+ 2 files changed, 5 insertions(+), 5 deletions(-)
+
+diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
+index fd1e6517dfac..f68ae4461fe3 100644
+--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
++++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
+@@ -31876,7 +31876,7 @@ X86TargetLowering::EmitLoweredProbedAlloca(MachineInstr &MI,
+
+ BuildMI(testMBB, DL, TII->get(X86::JCC_1))
+ .addMBB(tailMBB)
+- .addImm(X86::COND_L);
++ .addImm(X86::COND_LE);
+ testMBB->addSuccessor(blockMBB);
+ testMBB->addSuccessor(tailMBB);
+
+diff --git a/llvm/test/CodeGen/X86/stack-clash-dynamic-alloca.ll b/llvm/test/CodeGen/X86/stack-clash-dynamic-alloca.ll
+index bc4678564083..82fd67842c8a 100644
+--- a/llvm/test/CodeGen/X86/stack-clash-dynamic-alloca.ll
++++ b/llvm/test/CodeGen/X86/stack-clash-dynamic-alloca.ll
+@@ -24,12 +24,12 @@ attributes #0 = {"probe-stack"="inline-asm"}
+ ; CHECK-X86-64-NEXT: andq $-16, %rcx
+ ; CHECK-X86-64-NEXT: subq %rcx, %rax
+ ; CHECK-X86-64-NEXT: cmpq %rsp, %rax
+-; CHECK-X86-64-NEXT: jl .LBB0_3
++; CHECK-X86-64-NEXT: jle .LBB0_3
+ ; CHECK-X86-64-NEXT: .LBB0_2: # =>This Inner Loop Header: Depth=1
+ ; CHECK-X86-64-NEXT: movq $0, (%rsp)
+ ; CHECK-X86-64-NEXT: subq $4096, %rsp # imm = 0x1000
+ ; CHECK-X86-64-NEXT: cmpq %rsp, %rax
+-; CHECK-X86-64-NEXT: jge .LBB0_2
++; CHECK-X86-64-NEXT: jg .LBB0_2
+ ; CHECK-X86-64-NEXT: .LBB0_3:
+ ; CHECK-X86-64-NEXT: movq %rax, %rsp
+ ; CHECK-X86-64-NEXT: movl $1, 4792(%rax)
+@@ -54,12 +54,12 @@ attributes #0 = {"probe-stack"="inline-asm"}
+ ; CHECK-X86-32-NEXT: andl $-16, %ecx
+ ; CHECK-X86-32-NEXT: subl %ecx, %eax
+ ; CHECK-X86-32-NEXT: cmpl %esp, %eax
+-; CHECK-X86-32-NEXT: jl .LBB0_3
++; CHECK-X86-32-NEXT: jle .LBB0_3
+ ; CHECK-X86-32-NEXT: .LBB0_2: # =>This Inner Loop Header: Depth=1
+ ; CHECK-X86-32-NEXT: movl $0, (%esp)
+ ; CHECK-X86-32-NEXT: subl $4096, %esp # imm = 0x1000
+ ; CHECK-X86-32-NEXT: cmpl %esp, %eax
+-; CHECK-X86-32-NEXT: jge .LBB0_2
++; CHECK-X86-32-NEXT: jg .LBB0_2
+ ; CHECK-X86-32-NEXT: .LBB0_3:
+ ; CHECK-X86-32-NEXT: movl %eax, %esp
+ ; CHECK-X86-32-NEXT: movl $1, 4792(%eax)
+
+From aac36687f7978f33751daf2870b5c812124ebfaf Mon Sep 17 00:00:00 2001
+From: serge-sans-paille <sguelton at redhat.com>
+Date: Thu, 23 Jul 2020 16:22:48 +0200
+Subject: [PATCH 2/3] Fix interaction between stack alignment and inline-asm
+ stack clash protection
+
+As reported in https://github.com/rust-lang/rust/issues/70143 alignment is not
+taken into account when doing the probing. Fix that by adjusting the first probe
+if the stack align is small, or by extending the dynamic probing if the
+alignment is large.
+
+Differential Revision: https://reviews.llvm.org/D84419
+
+(cherry picked from commit f2c6bfa350de142e4d63808d03335f69bd136d6a)
+---
+ llvm/lib/Target/X86/X86FrameLowering.cpp | 222 ++++++++++++++++--
+ llvm/lib/Target/X86/X86FrameLowering.h | 8 +-
+ .../X86/stack-clash-large-large-align.ll | 88 +++++++
+ .../CodeGen/X86/stack-clash-no-free-probe.ll | 27 ---
+ .../stack-clash-small-alloc-medium-align.ll | 135 +++++++++++
+ .../X86/stack-clash-small-large-align.ll | 83 +++++++
+ 6 files changed, 512 insertions(+), 51 deletions(-)
+ create mode 100644 llvm/test/CodeGen/X86/stack-clash-large-large-align.ll
+ delete mode 100644 llvm/test/CodeGen/X86/stack-clash-no-free-probe.ll
+ create mode 100644 llvm/test/CodeGen/X86/stack-clash-small-alloc-medium-align.ll
+ create mode 100644 llvm/test/CodeGen/X86/stack-clash-small-large-align.ll
+
+diff --git a/llvm/lib/Target/X86/X86FrameLowering.cpp b/llvm/lib/Target/X86/X86FrameLowering.cpp
+index c7ca6fb2a4fc..db6b68659493 100644
+--- a/llvm/lib/Target/X86/X86FrameLowering.cpp
++++ b/llvm/lib/Target/X86/X86FrameLowering.cpp
+@@ -586,29 +586,55 @@ void X86FrameLowering::emitStackProbeInlineGeneric(
+ const uint64_t StackProbeSize = TLI.getStackProbeSize(MF);
+ uint64_t ProbeChunk = StackProbeSize * 8;
+
++ uint64_t MaxAlign =
++ TRI->needsStackRealignment(MF) ? calculateMaxStackAlign(MF) : 0;
++
+ // Synthesize a loop or unroll it, depending on the number of iterations.
++ // BuildStackAlignAND ensures that only MaxAlign % StackProbeSize bits left
++ // between the unaligned rsp and current rsp.
+ if (Offset > ProbeChunk) {
+- emitStackProbeInlineGenericLoop(MF, MBB, MBBI, DL, Offset);
++ emitStackProbeInlineGenericLoop(MF, MBB, MBBI, DL, Offset,
++ MaxAlign % StackProbeSize);
+ } else {
+- emitStackProbeInlineGenericBlock(MF, MBB, MBBI, DL, Offset);
++ emitStackProbeInlineGenericBlock(MF, MBB, MBBI, DL, Offset,
++ MaxAlign % StackProbeSize);
+ }
+ }
+
+ void X86FrameLowering::emitStackProbeInlineGenericBlock(
+ MachineFunction &MF, MachineBasicBlock &MBB,
+- MachineBasicBlock::iterator MBBI, const DebugLoc &DL,
+- uint64_t Offset) const {
++ MachineBasicBlock::iterator MBBI, const DebugLoc &DL, uint64_t Offset,
++ uint64_t AlignOffset) const {
+
+ const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
+ const X86TargetLowering &TLI = *STI.getTargetLowering();
+ const unsigned Opc = getSUBriOpcode(Uses64BitFramePtr, Offset);
+ const unsigned MovMIOpc = Is64Bit ? X86::MOV64mi32 : X86::MOV32mi;
+ const uint64_t StackProbeSize = TLI.getStackProbeSize(MF);
++
+ uint64_t CurrentOffset = 0;
+- // 0 Thanks to return address being saved on the stack
+- uint64_t CurrentProbeOffset = 0;
+
+- // For the first N - 1 pages, just probe. I tried to take advantage of
++ assert(AlignOffset < StackProbeSize);
++
++ // If the offset is so small it fits within a page, there's nothing to do.
++ if (StackProbeSize < Offset + AlignOffset) {
++
++ MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr)
++ .addReg(StackPtr)
++ .addImm(StackProbeSize - AlignOffset)
++ .setMIFlag(MachineInstr::FrameSetup);
++ MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
++
++ addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(MovMIOpc))
++ .setMIFlag(MachineInstr::FrameSetup),
++ StackPtr, false, 0)
++ .addImm(0)
++ .setMIFlag(MachineInstr::FrameSetup);
++ NumFrameExtraProbe++;
++ CurrentOffset = StackProbeSize - AlignOffset;
++ }
++
++ // For the next N - 1 pages, just probe. I tried to take advantage of
+ // natural probes but it implies much more logic and there was very few
+ // interesting natural probes to interleave.
+ while (CurrentOffset + StackProbeSize < Offset) {
+@@ -626,9 +652,9 @@ void X86FrameLowering::emitStackProbeInlineGenericBlock(
+ .setMIFlag(MachineInstr::FrameSetup);
+ NumFrameExtraProbe++;
+ CurrentOffset += StackProbeSize;
+- CurrentProbeOffset += StackProbeSize;
+ }
+
++ // No need to probe the tail, it is smaller than a Page.
+ uint64_t ChunkSize = Offset - CurrentOffset;
+ MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr)
+ .addReg(StackPtr)
+@@ -639,8 +665,8 @@ void X86FrameLowering::emitStackProbeInlineGenericBlock(
+
+ void X86FrameLowering::emitStackProbeInlineGenericLoop(
+ MachineFunction &MF, MachineBasicBlock &MBB,
+- MachineBasicBlock::iterator MBBI, const DebugLoc &DL,
+- uint64_t Offset) const {
++ MachineBasicBlock::iterator MBBI, const DebugLoc &DL, uint64_t Offset,
++ uint64_t AlignOffset) const {
+ assert(Offset && "null offset");
+
+ const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
+@@ -648,6 +674,26 @@ void X86FrameLowering::emitStackProbeInlineGenericLoop(
+ const unsigned MovMIOpc = Is64Bit ? X86::MOV64mi32 : X86::MOV32mi;
+ const uint64_t StackProbeSize = TLI.getStackProbeSize(MF);
+
++ if (AlignOffset) {
++ if (AlignOffset < StackProbeSize) {
++ // Perform a first smaller allocation followed by a probe.
++ const unsigned SUBOpc = getSUBriOpcode(Uses64BitFramePtr, AlignOffset);
++ MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(SUBOpc), StackPtr)
++ .addReg(StackPtr)
++ .addImm(AlignOffset)
++ .setMIFlag(MachineInstr::FrameSetup);
++ MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
++
++ addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(MovMIOpc))
++ .setMIFlag(MachineInstr::FrameSetup),
++ StackPtr, false, 0)
++ .addImm(0)
++ .setMIFlag(MachineInstr::FrameSetup);
++ NumFrameExtraProbe++;
++ Offset -= AlignOffset;
++ }
++ }
++
+ // Synthesize a loop
+ NumFrameLoopProbe++;
+ const BasicBlock *LLVM_BB = MBB.getBasicBlock();
+@@ -666,8 +712,8 @@ void X86FrameLowering::emitStackProbeInlineGenericLoop(
+
+ // save loop bound
+ {
+- const unsigned Opc = getSUBriOpcode(Uses64BitFramePtr, Offset);
+- BuildMI(MBB, MBBI, DL, TII.get(Opc), FinalStackProbed)
++ const unsigned SUBOpc = getSUBriOpcode(Uses64BitFramePtr, Offset);
++ BuildMI(MBB, MBBI, DL, TII.get(SUBOpc), FinalStackProbed)
+ .addReg(FinalStackProbed)
+ .addImm(Offset / StackProbeSize * StackProbeSize)
+ .setMIFlag(MachineInstr::FrameSetup);
+@@ -675,8 +721,8 @@ void X86FrameLowering::emitStackProbeInlineGenericLoop(
+
+ // allocate a page
+ {
+- const unsigned Opc = getSUBriOpcode(Uses64BitFramePtr, StackProbeSize);
+- BuildMI(testMBB, DL, TII.get(Opc), StackPtr)
++ const unsigned SUBOpc = getSUBriOpcode(Uses64BitFramePtr, StackProbeSize);
++ BuildMI(testMBB, DL, TII.get(SUBOpc), StackPtr)
+ .addReg(StackPtr)
+ .addImm(StackProbeSize)
+ .setMIFlag(MachineInstr::FrameSetup);
+@@ -1052,13 +1098,149 @@ void X86FrameLowering::BuildStackAlignAND(MachineBasicBlock &MBB,
+ uint64_t MaxAlign) const {
+ uint64_t Val = -MaxAlign;
+ unsigned AndOp = getANDriOpcode(Uses64BitFramePtr, Val);
+- MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(AndOp), Reg)
+- .addReg(Reg)
+- .addImm(Val)
+- .setMIFlag(MachineInstr::FrameSetup);
+
+- // The EFLAGS implicit def is dead.
+- MI->getOperand(3).setIsDead();
++ MachineFunction &MF = *MBB.getParent();
++ const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
++ const X86TargetLowering &TLI = *STI.getTargetLowering();
++ const uint64_t StackProbeSize = TLI.getStackProbeSize(MF);
++ const bool EmitInlineStackProbe = TLI.hasInlineStackProbe(MF);
++
++ // We want to make sure that (in worst case) less than StackProbeSize bytes
++ // are not probed after the AND. This assumption is used in
++ // emitStackProbeInlineGeneric.
++ if (Reg == StackPtr && EmitInlineStackProbe && MaxAlign >= StackProbeSize) {
++ {
++ NumFrameLoopProbe++;
++ MachineBasicBlock *entryMBB =
++ MF.CreateMachineBasicBlock(MBB.getBasicBlock());
++ MachineBasicBlock *headMBB =
++ MF.CreateMachineBasicBlock(MBB.getBasicBlock());
++ MachineBasicBlock *bodyMBB =
++ MF.CreateMachineBasicBlock(MBB.getBasicBlock());
++ MachineBasicBlock *footMBB =
++ MF.CreateMachineBasicBlock(MBB.getBasicBlock());
++
++ MachineFunction::iterator MBBIter = MBB.getIterator();
++ MF.insert(MBBIter, entryMBB);
++ MF.insert(MBBIter, headMBB);
++ MF.insert(MBBIter, bodyMBB);
++ MF.insert(MBBIter, footMBB);
++ const unsigned MovMIOpc = Is64Bit ? X86::MOV64mi32 : X86::MOV32mi;
++ Register FinalStackProbed = Uses64BitFramePtr ? X86::R11 : X86::R11D;
++
++ // Setup entry block
++ {
++
++ entryMBB->splice(entryMBB->end(), &MBB, MBB.begin(), MBBI);
++ BuildMI(entryMBB, DL, TII.get(TargetOpcode::COPY), FinalStackProbed)
++ .addReg(StackPtr)
++ .setMIFlag(MachineInstr::FrameSetup);
++ MachineInstr *MI =
++ BuildMI(entryMBB, DL, TII.get(AndOp), FinalStackProbed)
++ .addReg(FinalStackProbed)
++ .addImm(Val)
++ .setMIFlag(MachineInstr::FrameSetup);
++
++ // The EFLAGS implicit def is dead.
++ MI->getOperand(3).setIsDead();
++
++ BuildMI(entryMBB, DL,
++ TII.get(Uses64BitFramePtr ? X86::CMP64rr : X86::CMP32rr))
++ .addReg(FinalStackProbed)
++ .addReg(StackPtr)
++ .setMIFlag(MachineInstr::FrameSetup);
++ BuildMI(entryMBB, DL, TII.get(X86::JCC_1))
++ .addMBB(&MBB)
++ .addImm(X86::COND_E)
++ .setMIFlag(MachineInstr::FrameSetup);
++ entryMBB->addSuccessor(headMBB);
++ entryMBB->addSuccessor(&MBB);
++ }
++
++ // Loop entry block
++
++ {
++ const unsigned SUBOpc =
++ getSUBriOpcode(Uses64BitFramePtr, StackProbeSize);
++ BuildMI(headMBB, DL, TII.get(SUBOpc), StackPtr)
++ .addReg(StackPtr)
++ .addImm(StackProbeSize)
++ .setMIFlag(MachineInstr::FrameSetup);
++
++ BuildMI(headMBB, DL,
++ TII.get(Uses64BitFramePtr ? X86::CMP64rr : X86::CMP32rr))
++ .addReg(FinalStackProbed)
++ .addReg(StackPtr)
++ .setMIFlag(MachineInstr::FrameSetup);
++
++ // jump
++ BuildMI(headMBB, DL, TII.get(X86::JCC_1))
++ .addMBB(footMBB)
++ .addImm(X86::COND_B)
++ .setMIFlag(MachineInstr::FrameSetup);
++
++ headMBB->addSuccessor(bodyMBB);
++ headMBB->addSuccessor(footMBB);
++ }
++
++ // setup loop body
++ {
++ addRegOffset(BuildMI(bodyMBB, DL, TII.get(MovMIOpc))
++ .setMIFlag(MachineInstr::FrameSetup),
++ StackPtr, false, 0)
++ .addImm(0)
++ .setMIFlag(MachineInstr::FrameSetup);
++
++ const unsigned SUBOpc =
++ getSUBriOpcode(Uses64BitFramePtr, StackProbeSize);
++ BuildMI(bodyMBB, DL, TII.get(SUBOpc), StackPtr)
++ .addReg(StackPtr)
++ .addImm(StackProbeSize)
++ .setMIFlag(MachineInstr::FrameSetup);
++
++ // cmp with stack pointer bound
++ BuildMI(bodyMBB, DL,
++ TII.get(Uses64BitFramePtr ? X86::CMP64rr : X86::CMP32rr))
++ .addReg(FinalStackProbed)
++ .addReg(StackPtr)
++ .setMIFlag(MachineInstr::FrameSetup);
++
++ // jump
++ BuildMI(bodyMBB, DL, TII.get(X86::JCC_1))
++ .addMBB(bodyMBB)
++ .addImm(X86::COND_B)
++ .setMIFlag(MachineInstr::FrameSetup);
++ bodyMBB->addSuccessor(bodyMBB);
++ bodyMBB->addSuccessor(footMBB);
++ }
++
++ // setup loop footer
++ {
++ BuildMI(footMBB, DL, TII.get(TargetOpcode::COPY), StackPtr)
++ .addReg(FinalStackProbed)
++ .setMIFlag(MachineInstr::FrameSetup);
++ addRegOffset(BuildMI(footMBB, DL, TII.get(MovMIOpc))
++ .setMIFlag(MachineInstr::FrameSetup),
++ StackPtr, false, 0)
++ .addImm(0)
++ .setMIFlag(MachineInstr::FrameSetup);
++ footMBB->addSuccessor(&MBB);
++ }
++
++ recomputeLiveIns(*headMBB);
++ recomputeLiveIns(*bodyMBB);
++ recomputeLiveIns(*footMBB);
++ recomputeLiveIns(MBB);
++ }
++ } else {
++ MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(AndOp), Reg)
++ .addReg(Reg)
++ .addImm(Val)
++ .setMIFlag(MachineInstr::FrameSetup);
++
++ // The EFLAGS implicit def is dead.
++ MI->getOperand(3).setIsDead();
++ }
+ }
+
+ bool X86FrameLowering::has128ByteRedZone(const MachineFunction& MF) const {
+diff --git a/llvm/lib/Target/X86/X86FrameLowering.h b/llvm/lib/Target/X86/X86FrameLowering.h
+index c0b4be95f88d..bb2e83205e71 100644
+--- a/llvm/lib/Target/X86/X86FrameLowering.h
++++ b/llvm/lib/Target/X86/X86FrameLowering.h
+@@ -213,14 +213,14 @@ private:
+ void emitStackProbeInlineGenericBlock(MachineFunction &MF,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+- const DebugLoc &DL,
+- uint64_t Offset) const;
++ const DebugLoc &DL, uint64_t Offset,
++ uint64_t Align) const;
+
+ void emitStackProbeInlineGenericLoop(MachineFunction &MF,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+- const DebugLoc &DL,
+- uint64_t Offset) const;
++ const DebugLoc &DL, uint64_t Offset,
++ uint64_t Align) const;
+
+ /// Emit a stub to later inline the target stack probe.
+ MachineInstr *emitStackProbeInlineStub(MachineFunction &MF,
+diff --git a/llvm/test/CodeGen/X86/stack-clash-large-large-align.ll b/llvm/test/CodeGen/X86/stack-clash-large-large-align.ll
+new file mode 100644
+index 000000000000..6c981cb4ac91
+--- /dev/null
++++ b/llvm/test/CodeGen/X86/stack-clash-large-large-align.ll
+@@ -0,0 +1,88 @@
++; RUN: llc < %s | FileCheck %s
++
++
++target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
++target triple = "x86_64-unknown-linux-gnu"
++
++define i32 @foo_noprotect() local_unnamed_addr {
++; CHECK-LABEL: foo_noprotect:
++; CHECK: # %bb.0:
++; CHECK-NEXT: pushq %rbp
++; CHECK-NEXT: .cfi_def_cfa_offset 16
++; CHECK-NEXT: .cfi_offset %rbp, -16
++; CHECK-NEXT: movq %rsp, %rbp
++; CHECK-NEXT: .cfi_def_cfa_register %rbp
++; CHECK-NEXT: andq $-4096, %rsp # imm = 0xF000
++; CHECK-NEXT: subq $73728, %rsp # imm = 0x12000
++; CHECK-NEXT: movl $1, 392(%rsp)
++; CHECK-NEXT: movl $1, 28792(%rsp)
++; CHECK-NEXT: movl (%rsp), %eax
++; CHECK-NEXT: movq %rbp, %rsp
++; CHECK-NEXT: popq %rbp
++; CHECK-NEXT: .cfi_def_cfa %rsp, 8
++; CHECK-NEXT: retq
++
++
++ %a = alloca i32, i64 18000, align 4096
++ %b0 = getelementptr inbounds i32, i32* %a, i64 98
++ %b1 = getelementptr inbounds i32, i32* %a, i64 7198
++ store volatile i32 1, i32* %b0
++ store volatile i32 1, i32* %b1
++ %c = load volatile i32, i32* %a
++ ret i32 %c
++}
++
++define i32 @foo_protect() local_unnamed_addr #0 {
++; CHECK-LABEL: foo_protect:
++; CHECK: # %bb.0:
++; CHECK-NEXT: pushq %rbp
++; CHECK-NEXT: .cfi_def_cfa_offset 16
++; CHECK-NEXT: .cfi_offset %rbp, -16
++; CHECK-NEXT: movq %rsp, %rbp
++; CHECK-NEXT: .cfi_def_cfa_register %rbp
++; CHECK-NEXT: movq %rsp, %r11
++; CHECK-NEXT: andq $-4096, %r11 # imm = 0xF000
++; CHECK-NEXT: cmpq %rsp, %r11
++; CHECK-NEXT: je .LBB1_4
++; CHECK-NEXT:# %bb.1:
++; CHECK-NEXT: subq $4096, %rsp # imm = 0x1000
++; CHECK-NEXT: cmpq %rsp, %r11
++; CHECK-NEXT: jb .LBB1_3
++; CHECK-NEXT:.LBB1_2: # =>This Inner Loop Header: Depth=1
++; CHECK-NEXT: movq $0, (%rsp)
++; CHECK-NEXT: subq $4096, %rsp # imm = 0x1000
++; CHECK-NEXT: cmpq %rsp, %r11
++; CHECK-NEXT: jb .LBB1_2
++; CHECK-NEXT:.LBB1_3:
++; CHECK-NEXT: movq %r11, %rsp
++; CHECK-NEXT: movq $0, (%rsp)
++; CHECK-NEXT:.LBB1_4:
++; CHECK-NEXT: movq %rsp, %r11
++; CHECK-NEXT: subq $73728, %r11 # imm = 0x12000
++; CHECK-NEXT:.LBB1_5: # =>This Inner Loop Header: Depth=1
++; CHECK-NEXT: subq $4096, %rsp # imm = 0x1000
++; CHECK-NEXT: movq $0, (%rsp)
++; CHECK-NEXT: cmpq %r11, %rsp
++; CHECK-NEXT: jne .LBB1_5
++; CHECK-NEXT:# %bb.6:
++; CHECK-NEXT: movl $1, 392(%rsp)
++; CHECK-NEXT: movl $1, 28792(%rsp)
++; CHECK-NEXT: movl (%rsp), %eax
++; CHECK-NEXT: movq %rbp, %rsp
++; CHECK-NEXT: popq %rbp
++; CHECK-NEXT: .cfi_def_cfa %rsp, 8
++; CHECK-NEXT: retq
++
++
++
++
++ %a = alloca i32, i64 18000, align 4096
++ %b0 = getelementptr inbounds i32, i32* %a, i64 98
++ %b1 = getelementptr inbounds i32, i32* %a, i64 7198
++ store volatile i32 1, i32* %b0
++ store volatile i32 1, i32* %b1
++ %c = load volatile i32, i32* %a
++ ret i32 %c
++}
++
++attributes #0 = {"probe-stack"="inline-asm"}
+diff --git a/llvm/test/CodeGen/X86/stack-clash-no-free-probe.ll b/llvm/test/CodeGen/X86/stack-clash-no-free-probe.ll
+deleted file mode 100644
+index 652acbdf00ba..000000000000
+--- a/llvm/test/CodeGen/X86/stack-clash-no-free-probe.ll
++++ /dev/null
+@@ -1,27 +0,0 @@
+-; RUN: llc < %s | FileCheck %s
+-
+-target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+-target triple = "x86_64-unknown-linux-gnu"
+-
+-define i32 @foo(i64 %i) local_unnamed_addr #0 {
+-; CHECK-LABEL: foo:
+-; CHECK: # %bb.0:
+-; CHECK-NEXT: subq $4096, %rsp # imm = 0x1000
+-; CHECK-NEXT: movq $0, (%rsp)
+-; CHECK-NEXT: subq $3784, %rsp # imm = 0xEC8
+-; CHECK-NEXT: .cfi_def_cfa_offset 7888
+-; CHECK-NEXT: movl $1, -128(%rsp,%rdi,4)
+-; CHECK-NEXT: movl -128(%rsp), %eax
+-; CHECK-NEXT: addq $7880, %rsp # imm = 0x1EC8
+-; CHECK-NEXT: .cfi_def_cfa_offset 8
+-; CHECK-NEXT: retq
+-
+- %a = alloca i32, i32 2000, align 16
+- %b = getelementptr inbounds i32, i32* %a, i64 %i
+- store volatile i32 1, i32* %b
+- %c = load volatile i32, i32* %a
+- ret i32 %c
+-}
+-
+-attributes #0 = {"probe-stack"="inline-asm"}
+-
+diff --git a/llvm/test/CodeGen/X86/stack-clash-small-alloc-medium-align.ll b/llvm/test/CodeGen/X86/stack-clash-small-alloc-medium-align.ll
+new file mode 100644
+index 000000000000..eafa86f1eba9
+--- /dev/null
++++ b/llvm/test/CodeGen/X86/stack-clash-small-alloc-medium-align.ll
+@@ -0,0 +1,135 @@
++; RUN: llc < %s | FileCheck %s
++
++target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
++target triple = "x86_64-unknown-linux-gnu"
++
++; | case1 | alloca + align < probe_size
++define i32 @foo1(i64 %i) local_unnamed_addr #0 {
++; CHECK-LABEL: foo1:
++; CHECK: # %bb.0:
++; CHECK-NEXT: pushq %rbp
++; CHECK-NEXT: .cfi_def_cfa_offset 16
++; CHECK-NEXT: .cfi_offset %rbp, -16
++; CHECK-NEXT: movq %rsp, %rbp
++; CHECK-NEXT: .cfi_def_cfa_register %rbp
++; CHECK-NEXT: andq $-64, %rsp
++; CHECK-NEXT: subq $832, %rsp # imm = 0x340
++; CHECK-NEXT: movl $1, (%rsp,%rdi,4)
++; CHECK-NEXT: movl (%rsp), %eax
++; CHECK-NEXT: movq %rbp, %rsp
++; CHECK-NEXT: popq %rbp
++; CHECK-NEXT: .cfi_def_cfa %rsp, 8
++; CHECK-NEXT: retq
++
++ %a = alloca i32, i32 200, align 64
++ %b = getelementptr inbounds i32, i32* %a, i64 %i
++ store volatile i32 1, i32* %b
++ %c = load volatile i32, i32* %a
++ ret i32 %c
++}
++
++; | case2 | alloca > probe_size, align > probe_size
++define i32 @foo2(i64 %i) local_unnamed_addr #0 {
++; CHECK-LABEL: foo2:
++; CHECK: # %bb.0:
++; CHECK-NEXT: pushq %rbp
++; CHECK-NEXT: .cfi_def_cfa_offset 16
++; CHECK-NEXT: .cfi_offset %rbp, -16
++; CHECK-NEXT: movq %rsp, %rbp
++; CHECK-NEXT: .cfi_def_cfa_register %rbp
++; CHECK-NEXT: andq $-2048, %rsp # imm = 0xF800
++; CHECK-NEXT: subq $2048, %rsp # imm = 0x800
++; CHECK-NEXT: movq $0, (%rsp)
++; CHECK-NEXT: subq $4096, %rsp # imm = 0x1000
++; CHECK-NEXT: movq $0, (%rsp)
++; CHECK-NEXT: subq $2048, %rsp # imm = 0x800
++; CHECK-NEXT: movl $1, (%rsp,%rdi,4)
++; CHECK-NEXT: movl (%rsp), %eax
++; CHECK-NEXT: movq %rbp, %rsp
++; CHECK-NEXT: popq %rbp
++; CHECK-NEXT: .cfi_def_cfa %rsp, 8
++; CHECK-NEXT: retq
++
++ %a = alloca i32, i32 2000, align 2048
++ %b = getelementptr inbounds i32, i32* %a, i64 %i
++ store volatile i32 1, i32* %b
++ %c = load volatile i32, i32* %a
++ ret i32 %c
++}
++
++; | case3 | alloca < probe_size, align < probe_size, alloca + align > probe_size
++define i32 @foo3(i64 %i) local_unnamed_addr #0 {
++; CHECK-LABEL: foo3:
++; CHECK: # %bb.0:
++; CHECK-NEXT: pushq %rbp
++; CHECK-NEXT: .cfi_def_cfa_offset 16
++; CHECK-NEXT: .cfi_offset %rbp, -16
++; CHECK-NEXT: movq %rsp, %rbp
++; CHECK-NEXT: .cfi_def_cfa_register %rbp
++; CHECK-NEXT: andq $-1024, %rsp # imm = 0xFC00
++; CHECK-NEXT: subq $3072, %rsp # imm = 0xC00
++; CHECK-NEXT: movq $0, (%rsp)
++; CHECK-NEXT: subq $1024, %rsp # imm = 0x400
++; CHECK-NEXT: movl $1, (%rsp,%rdi,4)
++; CHECK-NEXT: movl (%rsp), %eax
++; CHECK-NEXT: movq %rbp, %rsp
++; CHECK-NEXT: popq %rbp
++; CHECK-NEXT: .cfi_def_cfa %rsp, 8
++; CHECK-NEXT: retq
++
++
++ %a = alloca i32, i32 1000, align 1024
++ %b = getelementptr inbounds i32, i32* %a, i64 %i
++ store volatile i32 1, i32* %b
++ %c = load volatile i32, i32* %a
++ ret i32 %c
++}
++
++; | case4 | alloca + probe_size < probe_size, followed by dynamic alloca
++define i32 @foo4(i64 %i) local_unnamed_addr #0 {
++; CHECK-LABEL: foo4:
++; CHECK: # %bb.0:
++; CHECK-NEXT: pushq %rbp
++; CHECK-NEXT: .cfi_def_cfa_offset 16
++; CHECK-NEXT: .cfi_offset %rbp, -16
++; CHECK-NEXT: movq %rsp, %rbp
++; CHECK-NEXT: .cfi_def_cfa_register %rbp
++; CHECK-NEXT: pushq %rbx
++; CHECK-NEXT: andq $-64, %rsp
++; CHECK-NEXT: subq $896, %rsp # imm = 0x380
++; CHECK-NEXT: movq %rsp, %rbx
++; CHECK-NEXT: .cfi_offset %rbx, -24
++; CHECK-NEXT: movl $1, (%rbx,%rdi,4)
++; CHECK-NEXT: movl (%rbx), %ecx
++; CHECK-NEXT: movq %rsp, %rax
++; CHECK-NEXT: leaq 15(,%rcx,4), %rcx
++; CHECK-NEXT: andq $-16, %rcx
++; CHECK-NEXT: subq %rcx, %rax
++; CHECK-NEXT: cmpq %rsp, %rax
++; CHECK-NEXT: jle .LBB3_3
++; CHECK-NEXT:.LBB3_2: # =>This Inner Loop Header: Depth=1
++; CHECK-NEXT: movq $0, (%rsp)
++; CHECK-NEXT: subq $4096, %rsp # imm = 0x1000
++; CHECK-NEXT: cmpq %rsp, %rax
++; CHECK-NEXT: jg .LBB3_2
++; CHECK-NEXT:.LBB3_3:
++; CHECK-NEXT: andq $-64, %rax
++; CHECK-NEXT: movq %rax, %rsp
++; CHECK-NEXT: movl (%rax), %eax
++; CHECK-NEXT: leaq -8(%rbp), %rsp
++; CHECK-NEXT: popq %rbx
++; CHECK-NEXT: popq %rbp
++; CHECK-NEXT: .cfi_def_cfa %rsp, 8
++; CHECK-NEXT: retq
++
++ %a = alloca i32, i32 200, align 64
++ %b = getelementptr inbounds i32, i32* %a, i64 %i
++ store volatile i32 1, i32* %b
++ %c = load volatile i32, i32* %a
++ %d = alloca i32, i32 %c, align 64
++ %e = load volatile i32, i32* %d
++ ret i32 %e
++}
++
++attributes #0 = {"probe-stack"="inline-asm"}
++
+diff --git a/llvm/test/CodeGen/X86/stack-clash-small-large-align.ll b/llvm/test/CodeGen/X86/stack-clash-small-large-align.ll
+new file mode 100644
+index 000000000000..e608bab90415
+--- /dev/null
++++ b/llvm/test/CodeGen/X86/stack-clash-small-large-align.ll
+@@ -0,0 +1,83 @@
++; RUN: llc < %s | FileCheck %s
++
++
++target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
++target triple = "x86_64-unknown-linux-gnu"
++
++define i32 @foo_noprotect() local_unnamed_addr {
++; CHECK-LABEL: foo_noprotect:
++; CHECK: # %bb.0:
++; CHECK-NEXT: pushq %rbp
++; CHECK-NEXT: .cfi_def_cfa_offset 16
++; CHECK-NEXT: .cfi_offset %rbp, -16
++; CHECK-NEXT: movq %rsp, %rbp
++; CHECK-NEXT: .cfi_def_cfa_register %rbp
++; CHECK-NEXT: andq $-65536, %rsp
++; CHECK-NEXT: subq $65536, %rsp
++; CHECK-NEXT: movl $1, 392(%rsp)
++; CHECK-NEXT: movl (%rsp), %eax
++; CHECK-NEXT: movq %rbp, %rsp
++; CHECK-NEXT: popq %rbp
++; CHECK-NEXT: .cfi_def_cfa %rsp, 8
++; CHECK-NEXT: retq
++
++
++
++ %a = alloca i32, i64 100, align 65536
++ %b = getelementptr inbounds i32, i32* %a, i64 98
++ store volatile i32 1, i32* %b
++ %c = load volatile i32, i32* %a
++ ret i32 %c
++}
++
++define i32 @foo_protect() local_unnamed_addr #0 {
++; CHECK-LABEL: foo_protect:
++; CHECK: # %bb.0:
++; CHECK-NEXT: pushq %rbp
++; CHECK-NEXT: .cfi_def_cfa_offset 16
++; CHECK-NEXT: .cfi_offset %rbp, -16
++; CHECK-NEXT: movq %rsp, %rbp
++; CHECK-NEXT: .cfi_def_cfa_register %rbp
++; CHECK-NEXT: movq %rsp, %r11
++; CHECK-NEXT: andq $-65536, %r11 # imm = 0xFFFF0000
++; CHECK-NEXT: cmpq %rsp, %r11
++; CHECK-NEXT: je .LBB1_4
++; CHECK-NEXT:# %bb.1:
++; CHECK-NEXT: subq $4096, %rsp # imm = 0x1000
++; CHECK-NEXT: cmpq %rsp, %r11
++; CHECK-NEXT: jb .LBB1_3
++; CHECK-NEXT:.LBB1_2: # =>This Inner Loop Header: Depth=1
++; CHECK-NEXT: movq $0, (%rsp)
++; CHECK-NEXT: subq $4096, %rsp # imm = 0x1000
++; CHECK-NEXT: cmpq %rsp, %r11
++; CHECK-NEXT: jb .LBB1_2
++; CHECK-NEXT:.LBB1_3:
++; CHECK-NEXT: movq %r11, %rsp
++; CHECK-NEXT: movq $0, (%rsp)
++; CHECK-NEXT:.LBB1_4:
++; CHECK-NEXT: movq %rsp, %r11
++; CHECK-NEXT: subq $65536, %r11 # imm = 0x10000
++; CHECK-NEXT:.LBB1_5: # =>This Inner Loop Header: Depth=1
++; CHECK-NEXT: subq $4096, %rsp # imm = 0x1000
++; CHECK-NEXT: movq $0, (%rsp)
++; CHECK-NEXT: cmpq %r11, %rsp
++; CHECK-NEXT: jne .LBB1_5
++; CHECK-NEXT:# %bb.6:
++; CHECK-NEXT: movl $1, 392(%rsp)
++; CHECK-NEXT: movl (%rsp), %eax
++; CHECK-NEXT: movq %rbp, %rsp
++; CHECK-NEXT: popq %rbp
++; CHECK-NEXT: .cfi_def_cfa %rsp, 8
++; CHECK-NEXT: retq
++
++
++
++
++ %a = alloca i32, i64 100, align 65536
++ %b = getelementptr inbounds i32, i32* %a, i64 98
++ store volatile i32 1, i32* %b
++ %c = load volatile i32, i32* %a
++ ret i32 %c
++}
++
++attributes #0 = {"probe-stack"="inline-asm"}
+
+From bbe6cbbed8c7460a7e8477373b9250543362e771 Mon Sep 17 00:00:00 2001
+From: serge-sans-paille <sguelton at redhat.com>
+Date: Tue, 27 Oct 2020 10:59:42 +0100
+Subject: [PATCH 3/3] [stack-clash] Fix probing of dynamic alloca
+
+- Perform the probing in the correct direction.
+ Related to https://github.com/rust-lang/rust/pull/77885#issuecomment-711062924
+
+- The first touch on a dynamic alloca cannot use a mov because it clobbers
+ existing space. Use a xor 0 instead
+
+Differential Revision: https://reviews.llvm.org/D90216
+
+(cherry picked from commit 0f60bcc36c34522618bd1425a45f8c6006568fb6)
+---
+ llvm/lib/Target/X86/X86ISelLowering.cpp | 8 ++++----
+ llvm/test/CodeGen/X86/stack-clash-dynamic-alloca.ll | 12 ++++++------
+ .../X86/stack-clash-small-alloc-medium-align.ll | 6 +++---
+ 3 files changed, 13 insertions(+), 13 deletions(-)
+
+diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
+index f68ae4461fe3..afe470cc6e0b 100644
+--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
++++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
+@@ -31876,7 +31876,7 @@ X86TargetLowering::EmitLoweredProbedAlloca(MachineInstr &MI,
+
+ BuildMI(testMBB, DL, TII->get(X86::JCC_1))
+ .addMBB(tailMBB)
+- .addImm(X86::COND_LE);
++ .addImm(X86::COND_GE);
+ testMBB->addSuccessor(blockMBB);
+ testMBB->addSuccessor(tailMBB);
+
+@@ -31892,9 +31892,9 @@ X86TargetLowering::EmitLoweredProbedAlloca(MachineInstr &MI,
+ //
+ // The property we want to enforce is to never have more than [page alloc] between two probes.
+
+- const unsigned MovMIOpc =
+- TFI.Uses64BitFramePtr ? X86::MOV64mi32 : X86::MOV32mi;
+- addRegOffset(BuildMI(blockMBB, DL, TII->get(MovMIOpc)), physSPReg, false, 0)
++ const unsigned XORMIOpc =
++ TFI.Uses64BitFramePtr ? X86::XOR64mi8 : X86::XOR32mi8;
++ addRegOffset(BuildMI(blockMBB, DL, TII->get(XORMIOpc)), physSPReg, false, 0)
+ .addImm(0);
+
+ BuildMI(blockMBB, DL,
+diff --git a/llvm/test/CodeGen/X86/stack-clash-dynamic-alloca.ll b/llvm/test/CodeGen/X86/stack-clash-dynamic-alloca.ll
+index 82fd67842c8a..6dd8b6ab5897 100644
+--- a/llvm/test/CodeGen/X86/stack-clash-dynamic-alloca.ll
++++ b/llvm/test/CodeGen/X86/stack-clash-dynamic-alloca.ll
+@@ -24,12 +24,12 @@ attributes #0 = {"probe-stack"="inline-asm"}
+ ; CHECK-X86-64-NEXT: andq $-16, %rcx
+ ; CHECK-X86-64-NEXT: subq %rcx, %rax
+ ; CHECK-X86-64-NEXT: cmpq %rsp, %rax
+-; CHECK-X86-64-NEXT: jle .LBB0_3
++; CHECK-X86-64-NEXT: jge .LBB0_3
+ ; CHECK-X86-64-NEXT: .LBB0_2: # =>This Inner Loop Header: Depth=1
+-; CHECK-X86-64-NEXT: movq $0, (%rsp)
++; CHECK-X86-64-NEXT: xorq $0, (%rsp)
+ ; CHECK-X86-64-NEXT: subq $4096, %rsp # imm = 0x1000
+ ; CHECK-X86-64-NEXT: cmpq %rsp, %rax
+-; CHECK-X86-64-NEXT: jg .LBB0_2
++; CHECK-X86-64-NEXT: jl .LBB0_2
+ ; CHECK-X86-64-NEXT: .LBB0_3:
+ ; CHECK-X86-64-NEXT: movq %rax, %rsp
+ ; CHECK-X86-64-NEXT: movl $1, 4792(%rax)
+@@ -54,12 +54,12 @@ attributes #0 = {"probe-stack"="inline-asm"}
+ ; CHECK-X86-32-NEXT: andl $-16, %ecx
+ ; CHECK-X86-32-NEXT: subl %ecx, %eax
+ ; CHECK-X86-32-NEXT: cmpl %esp, %eax
+-; CHECK-X86-32-NEXT: jle .LBB0_3
++; CHECK-X86-32-NEXT: jge .LBB0_3
+ ; CHECK-X86-32-NEXT: .LBB0_2: # =>This Inner Loop Header: Depth=1
+-; CHECK-X86-32-NEXT: movl $0, (%esp)
++; CHECK-X86-32-NEXT: xorl $0, (%esp)
+ ; CHECK-X86-32-NEXT: subl $4096, %esp # imm = 0x1000
+ ; CHECK-X86-32-NEXT: cmpl %esp, %eax
+-; CHECK-X86-32-NEXT: jg .LBB0_2
++; CHECK-X86-32-NEXT: jl .LBB0_2
+ ; CHECK-X86-32-NEXT: .LBB0_3:
+ ; CHECK-X86-32-NEXT: movl %eax, %esp
+ ; CHECK-X86-32-NEXT: movl $1, 4792(%eax)
+diff --git a/llvm/test/CodeGen/X86/stack-clash-small-alloc-medium-align.ll b/llvm/test/CodeGen/X86/stack-clash-small-alloc-medium-align.ll
+index eafa86f1eba9..39b6c3640a60 100644
+--- a/llvm/test/CodeGen/X86/stack-clash-small-alloc-medium-align.ll
++++ b/llvm/test/CodeGen/X86/stack-clash-small-alloc-medium-align.ll
+@@ -106,12 +106,12 @@ define i32 @foo4(i64 %i) local_unnamed_addr #0 {
+ ; CHECK-NEXT: andq $-16, %rcx
+ ; CHECK-NEXT: subq %rcx, %rax
+ ; CHECK-NEXT: cmpq %rsp, %rax
+-; CHECK-NEXT: jle .LBB3_3
++; CHECK-NEXT: jge .LBB3_3
+ ; CHECK-NEXT:.LBB3_2: # =>This Inner Loop Header: Depth=1
+-; CHECK-NEXT: movq $0, (%rsp)
++; CHECK-NEXT: xorq $0, (%rsp)
+ ; CHECK-NEXT: subq $4096, %rsp # imm = 0x1000
+ ; CHECK-NEXT: cmpq %rsp, %rax
+-; CHECK-NEXT: jg .LBB3_2
++; CHECK-NEXT: jl .LBB3_2
+ ; CHECK-NEXT:.LBB3_3:
+ ; CHECK-NEXT: andq $-64, %rax
+ ; CHECK-NEXT: movq %rax, %rsp
More information about the arch-commits
mailing list