From 1708358fbbf58d4a870be015ef21eeea9adae87b Mon Sep 17 00:00:00 2001
From: Josh Stone <jistone@redhat.com>
Date: Mon, 17 Aug 2020 15:31:32 -0700
Subject: [PATCH 001/109] lld: link libatomic if needed for Timer

D80298 made Timer::total atomic, but this requires linking libatomic
on some targets.

Reviewed By: aaronpuchert

Differential Revision: https://reviews.llvm.org/D85691

(cherry picked from commit b26b32b5d3b85812a12f5e3bf011428612f78e19)
---
 lld/CMakeLists.txt        | 1 +
 lld/Common/CMakeLists.txt | 8 +++++++-
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/lld/CMakeLists.txt b/lld/CMakeLists.txt
index 5090c935e75ad..040bb2c8f6d75 100644
--- a/lld/CMakeLists.txt
+++ b/lld/CMakeLists.txt
@@ -54,6 +54,7 @@ if(CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR)
   include(AddLLVM)
   include(TableGen)
   include(HandleLLVMOptions)
+  include(CheckAtomic)
 
   if(LLVM_INCLUDE_TESTS)
     if(CMAKE_VERSION VERSION_LESS 3.12)
diff --git a/lld/Common/CMakeLists.txt b/lld/Common/CMakeLists.txt
index 53649032bd987..41eb58c151986 100644
--- a/lld/Common/CMakeLists.txt
+++ b/lld/Common/CMakeLists.txt
@@ -2,6 +2,12 @@ if(NOT LLD_BUILT_STANDALONE)
   set(tablegen_deps intrinsics_gen)
 endif()
 
+set(LLD_SYSTEM_LIBS ${LLVM_PTHREAD_LIB})
+
+if(NOT HAVE_CXX_ATOMICS64_WITHOUT_LIB)
+  list(APPEND LLD_SYSTEM_LIBS atomic)
+endif()
+
 find_first_existing_vc_file("${LLVM_MAIN_SRC_DIR}" llvm_vc)
 find_first_existing_vc_file("${LLD_SOURCE_DIR}" lld_vc)
 
@@ -54,7 +60,7 @@ add_lld_library(lldCommon
   Target
 
   LINK_LIBS
-  ${LLVM_PTHREAD_LIB}
+  ${LLD_SYSTEM_LIBS}
 
   DEPENDS
   ${tablegen_deps}

From 0c37a9165611880b99b1f9632179864ecb3f2e13 Mon Sep 17 00:00:00 2001
From: Bas Zalmstra <zalmstra.bas@gmail.com>
Date: Sat, 22 Aug 2020 23:04:22 +0300
Subject: [PATCH 002/109] [LLD][COFF] Reset outputSections for successive runs

The global variable outputSections in the COFF writer was not
cleared between runs which caused successive calls to lld::coff::link
to generate invalid binaries. These binaries when loaded would result
in "invalid win32 applications" and/or "bad image" errors.

Differential Revision: https://reviews.llvm.org/D86401

(cherry picked from commit 54f5a4ea4c859cf7f34f0d4955abc3a2f44bd0dc)
---
 lld/COFF/Writer.cpp | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/lld/COFF/Writer.cpp b/lld/COFF/Writer.cpp
index 082de5b8c1d62..0188f0971a754 100644
--- a/lld/COFF/Writer.cpp
+++ b/lld/COFF/Writer.cpp
@@ -599,6 +599,9 @@ void Writer::finalizeAddresses() {
 void Writer::run() {
   ScopedTimer t1(codeLayoutTimer);
 
+  // First, clear the output sections from previous runs
+  outputSections.clear();
+
   createImportTables();
   createSections();
   createMiscChunks();

From 82e48a579024d0ffbc352702ec0c52b47a6fe691 Mon Sep 17 00:00:00 2001
From: "Mott, Jeffrey T" <jeffrey.t.mott@intel.com>
Date: Fri, 17 Jul 2020 09:50:08 -0700
Subject: [PATCH 003/109] Disable use of _ExtInt with '__atomic' builtins

We're (temporarily) disabling ExtInt for the '__atomic' builtins so we can better design their behavior later. The idea is until we do an audit/design for the way atomic builtins are supposed to work with _ExtInt, we should leave them restricted so they don't limit our future options, such as by binding us to a sub-optimal implementation via ABI.

Example after this change:

    $ cat test.c

        void f(_ExtInt(64) *ptr) {
          __atomic_fetch_add(ptr, 1, 0);
        }

    $ clang -c test.c

        test.c:2:22: error: argument to atomic builtin of type '_ExtInt' is not supported
          __atomic_fetch_add(ptr, 1, 0);
                             ^
        1 error generated.

Differential Revision: https://reviews.llvm.org/D84049

(cherry picked from commit ca77ab494aa29f7521ff797d230cd1b36cbe4e62)
---
 clang/include/clang/Basic/DiagnosticSemaKinds.td |  7 ++++---
 clang/lib/Sema/SemaChecking.cpp                  |  5 +++++
 clang/lib/Sema/SemaType.cpp                      |  5 +----
 clang/test/Sema/builtins.c                       |  4 ++++
 clang/test/SemaCXX/ext-int.cpp                   |  5 +++--
 libcxx/test/libcxx/atomics/ext-int.verify.cpp    | 11 +++++++++++
 6 files changed, 28 insertions(+), 9 deletions(-)
 create mode 100644 libcxx/test/libcxx/atomics/ext-int.verify.cpp

diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index aa4de2812312d..941f2cafc3727 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -6021,9 +6021,8 @@ def err_func_def_incomplete_result : Error<
 def err_atomic_specifier_bad_type
     : Error<"_Atomic cannot be applied to "
             "%select{incomplete |array |function |reference |atomic |qualified "
-            "|sizeless ||integer |integer }0type "
-            "%1 %select{|||||||which is not trivially copyable|with less than "
-            "1 byte of precision|with a non power of 2 precision}0">;
+            "|sizeless ||integer }0type "
+            "%1 %select{|||||||which is not trivially copyable|}0">;
 
 // Expressions.
 def ext_sizeof_alignof_function_type : Extension<
@@ -7941,6 +7940,8 @@ def err_atomic_exclusive_builtin_pointer_size : Error<
   " 1,2,4 or 8 byte type (%0 invalid)">;
 def err_atomic_builtin_ext_int_size : Error<
   "Atomic memory operand must have a power-of-two size">;
+def err_atomic_builtin_ext_int_prohibit : Error<
+  "argument to atomic builtin of type '_ExtInt' is not supported">;
 def err_atomic_op_needs_atomic : Error<
   "address argument to atomic operation must be a pointer to _Atomic "
   "type (%0 invalid)">;
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index 509d88e25000e..b00d2ff5f1d59 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -4956,6 +4956,11 @@ ExprResult Sema::BuildAtomicExpr(SourceRange CallRange, SourceRange ExprRange,
                 ? 0
                 : 1);
 
+  if (ValType->isExtIntType()) {
+    Diag(Ptr->getExprLoc(), diag::err_atomic_builtin_ext_int_prohibit);
+    return ExprError();
+  }
+
   return AE;
 }
 
diff --git a/clang/lib/Sema/SemaType.cpp b/clang/lib/Sema/SemaType.cpp
index b8f7f1a581590..cc151a048b98b 100644
--- a/clang/lib/Sema/SemaType.cpp
+++ b/clang/lib/Sema/SemaType.cpp
@@ -8880,11 +8880,8 @@ QualType Sema::BuildAtomicType(QualType T, SourceLocation Loc) {
     else if (!T.isTriviallyCopyableType(Context))
       // Some other non-trivially-copyable type (probably a C++ class)
       DisallowedKind = 7;
-    else if (auto *ExtTy = T->getAs<ExtIntType>()) {
-      if (ExtTy->getNumBits() < 8)
+    else if (T->isExtIntType()) {
         DisallowedKind = 8;
-      else if (!llvm::isPowerOf2_32(ExtTy->getNumBits()))
-        DisallowedKind = 9;
     }
 
     if (DisallowedKind != -1) {
diff --git a/clang/test/Sema/builtins.c b/clang/test/Sema/builtins.c
index 90c033e47cd17..3e0a9cbfdeaf3 100644
--- a/clang/test/Sema/builtins.c
+++ b/clang/test/Sema/builtins.c
@@ -285,12 +285,16 @@ void test_ei_i42i(_ExtInt(42) *ptr, int value) {
   __sync_fetch_and_add(ptr, value); // expected-error {{Atomic memory operand must have a power-of-two size}}
   // expected-warning@+1 {{the semantics of this intrinsic changed with GCC version 4.4 - the newer semantics are provided here}}
   __sync_nand_and_fetch(ptr, value); // expected-error {{Atomic memory operand must have a power-of-two size}}
+
+  __atomic_fetch_add(ptr, 1, 0); // expected-error {{argument to atomic builtin of type '_ExtInt' is not supported}}
 }
 
 void test_ei_i64i(_ExtInt(64) *ptr, int value) {
   __sync_fetch_and_add(ptr, value); // expect success
   // expected-warning@+1 {{the semantics of this intrinsic changed with GCC version 4.4 - the newer semantics are provided here}}
   __sync_nand_and_fetch(ptr, value); // expect success
+
+  __atomic_fetch_add(ptr, 1, 0); // expected-error {{argument to atomic builtin of type '_ExtInt' is not supported}}
 }
 
 void test_ei_ii42(int *ptr, _ExtInt(42) value) {
diff --git a/clang/test/SemaCXX/ext-int.cpp b/clang/test/SemaCXX/ext-int.cpp
index 14f11a6bb9614..f4c2dc4752ee7 100644
--- a/clang/test/SemaCXX/ext-int.cpp
+++ b/clang/test/SemaCXX/ext-int.cpp
@@ -91,10 +91,11 @@ typedef _ExtInt(32) __attribute__((vector_size(16))) VecTy;
 _Complex _ExtInt(3) Cmplx;
 
 // Reject cases of _Atomic:
-// expected-error@+1{{_Atomic cannot be applied to integer type '_ExtInt(4)' with less than 1 byte of precision}}
+// expected-error@+1{{_Atomic cannot be applied to integer type '_ExtInt(4)'}}
 _Atomic _ExtInt(4) TooSmallAtomic;
-// expected-error@+1{{_Atomic cannot be applied to integer type '_ExtInt(9)' with a non power of 2 precision}}
+// expected-error@+1{{_Atomic cannot be applied to integer type '_ExtInt(9)'}}
 _Atomic _ExtInt(9) NotPow2Atomic;
+// expected-error@+1{{_Atomic cannot be applied to integer type '_ExtInt(128)'}}
 _Atomic _ExtInt(128) JustRightAtomic;
 
 // Test result types of Unary/Bitwise/Binary Operations:
diff --git a/libcxx/test/libcxx/atomics/ext-int.verify.cpp b/libcxx/test/libcxx/atomics/ext-int.verify.cpp
new file mode 100644
index 0000000000000..3f57437f43cc6
--- /dev/null
+++ b/libcxx/test/libcxx/atomics/ext-int.verify.cpp
@@ -0,0 +1,11 @@
+// REQUIRES: clang-11
+
+#include <atomic>
+
+int main(int, char**)
+{
+  // expected-error@atomic:*1 {{_Atomic cannot be applied to integer type '_ExtInt(32)'}}
+  std::atomic<_ExtInt(32)> x {42};
+
+  return 0;
+}

From dcdf2aff02a154f9f862d6f01feff3de1389cc47 Mon Sep 17 00:00:00 2001
From: Kang Zhang <shkzhang@cn.ibm.com>
Date: Fri, 21 Aug 2020 01:10:52 +0000
Subject: [PATCH 004/109] [PowerPC] Fix a typo for InstAlias of mfsprg

D77531 has a type for mfsprg, it should be mtsprg. This patch is to fix
this typo.

(cherry picked from commit 95e18b2d9d5f93c209ea81df79c2e18ef77de506)
---
 llvm/lib/Target/PowerPC/PPCInstr64Bit.td | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
index 6956c40a70be5..de42d354a0488 100644
--- a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
+++ b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -1026,8 +1026,8 @@ def : InstAlias<"mfamr $Rx", (MFSPR8 g8rc:$Rx, 29)>;
 foreach SPRG = 0-3 in {
   def : InstAlias<"mfsprg $RT, "#SPRG, (MFSPR8 g8rc:$RT, !add(SPRG, 272))>;
   def : InstAlias<"mfsprg"#SPRG#" $RT", (MFSPR8 g8rc:$RT, !add(SPRG, 272))>;
-  def : InstAlias<"mfsprg "#SPRG#", $RT", (MTSPR8 !add(SPRG, 272), g8rc:$RT)>;
-  def : InstAlias<"mfsprg"#SPRG#" $RT", (MTSPR8 !add(SPRG, 272), g8rc:$RT)>;
+  def : InstAlias<"mtsprg "#SPRG#", $RT", (MTSPR8 !add(SPRG, 272), g8rc:$RT)>;
+  def : InstAlias<"mtsprg"#SPRG#" $RT", (MTSPR8 !add(SPRG, 272), g8rc:$RT)>;
 }
 
 def : InstAlias<"mfasr $RT", (MFSPR8 g8rc:$RT, 280)>;

From d6d03d09e3f7498f60e2976b8cea235080f55fe7 Mon Sep 17 00:00:00 2001
From: Fangrui Song <i@maskray.me>
Date: Wed, 19 Aug 2020 22:48:28 -0700
Subject: [PATCH 005/109] [ELF][test] Fix some llvm-objdump RUN lines which
 don't actually test anything

(cherry picked from commit ac46bc35e98d922f1b05b451341f03dcaccd1527)
---
 lld/test/ELF/arm-ldrlit.s                |  1 -
 lld/test/ELF/arm-thumb-interwork-ifunc.s | 20 ++++++++++----------
 lld/test/ELF/arm-thumb2-adr.s            |  1 -
 3 files changed, 10 insertions(+), 12 deletions(-)

diff --git a/lld/test/ELF/arm-ldrlit.s b/lld/test/ELF/arm-ldrlit.s
index a466b0a8e1d1a..b6ee2d8e9da0f 100644
--- a/lld/test/ELF/arm-ldrlit.s
+++ b/lld/test/ELF/arm-ldrlit.s
@@ -1,6 +1,5 @@
 // REQUIRES: arm
 // RUN: llvm-mc --triple=armv7a-none-eabi --arm-add-build-attributes -filetype=obj -o %t.o %s
-// RUN: llvm-objdump -d -r --triple=armv7a-none-eabi %t.o
 // RUN: echo "SECTIONS { \
 // RUN:                 .rodata.low 0x8012  : { *(.rodata.low) } \
 // RUN:                 .text.low   0x8f00  : { *(.text.low) } \
diff --git a/lld/test/ELF/arm-thumb-interwork-ifunc.s b/lld/test/ELF/arm-thumb-interwork-ifunc.s
index 319737a08aad5..947bc2dd77863 100644
--- a/lld/test/ELF/arm-thumb-interwork-ifunc.s
+++ b/lld/test/ELF/arm-thumb-interwork-ifunc.s
@@ -1,7 +1,7 @@
 // REQUIRES: arm
 // RUN: llvm-mc --triple=armv7a-linux-gnueabihf -arm-add-build-attributes -filetype=obj -o %t.o %s
 // RUN: ld.lld %t.o -o %t
-// RUN: llvm-objdump --triple=armv7a-none-linux-gnueabi -d --no-show-raw-insn %t
+// RUN: llvm-objdump --triple=armv7a-none-linux-gnueabi -d --no-show-raw-insn %t | FileCheck %s
 
 /// Non-preemptible ifuncs are called via a PLT entry which is always Arm
 /// state, expect the ARM callers to go direct to the PLT entry, Thumb
@@ -30,23 +30,23 @@ thumb_caller:
  b.w foo
  bl foo
 
-// CHECK: 00012004 _start:
-// CHECK-NEXT: b       #36
-// CHECK-NEXT: bl      #32
+// CHECK:      00021004 <_start>:
+// CHECK-NEXT: b       #36 <$a>
+// CHECK-NEXT: bl      #32 <$a>
 
-// CHECK: 0001200c thumb_caller:
+// CHECK:      0002100c <thumb_caller>:
 // CHECK-NEXT: b.w     #8
 // CHECK-NEXT: b.w     #4
 // CHECK-NEXT: blx     #24
 
-// CHECK: 00012018 __Thumbv7ABSLongThunk_foo:
-// CHECK-NEXT: movw    r12, #8240
-// CHECK-NEXT: movt    r12, #1
+// CHECK:      00021018 <__Thumbv7ABSLongThunk_foo>:
+// CHECK-NEXT: movw    r12, #4144
+// CHECK-NEXT: movt    r12, #2
 // CHECK-NEXT: bx      r12
 
 // CHECK: Disassembly of section .iplt:
 
-// CHECK: 00012030 $a:
+// CHECK:      00021030 <$a>:
 // CHECK-NEXT: add     r12, pc, #0, #12
-// CHECK-NEXT: add     r12, r12, #4096
+// CHECK-NEXT: add     r12, r12, #16, #20
 // CHECK-NEXT: ldr     pc, [r12, #8]!
diff --git a/lld/test/ELF/arm-thumb2-adr.s b/lld/test/ELF/arm-thumb2-adr.s
index a6895bc878b6b..c0c7cfcc3fd2b 100644
--- a/lld/test/ELF/arm-thumb2-adr.s
+++ b/lld/test/ELF/arm-thumb2-adr.s
@@ -10,7 +10,6 @@
 // RUN:               } " > %t.script
 // RUN: ld.lld --script %t.script %t.o -o %t
 // RUN: llvm-readobj --symbols %t | FileCheck %s --check-prefix=SYMS
-// RUN: llvm-objdump -d --triple=thumbv7m-none-eabi %t
 // RUN: llvm-objdump -d --no-show-raw-insn --triple=thumbv7m-none-eabi %t | FileCheck %s
 
 /// Test the various legal cases for the R_ARM_THM_ALU_PREL_11_0 relocation

From c4e216711d001316df2313a85c8af73a85d804c0 Mon Sep 17 00:00:00 2001
From: Fangrui Song <i@maskray.me>
Date: Mon, 24 Aug 2020 12:17:48 -0700
Subject: [PATCH 006/109] [ELF] Keep st_type for symbol assignment

PR46970: for `alias = aliasee`, the alias can be used in relocation processing
and on ARM st_type does affect Thumb interworking. It is thus desirable for the
alias to get the same st_type.

Note that the st_size field should not be inherited because some tools use
st_size=0 as a heuristic to detect aliases. Retaining st_size can thwart such
heuristics and cause aliases to be preferred over the original symbols.

Differential Revision: https://reviews.llvm.org/D86263

(cherry picked from commit 9670029b6b302c75bb373fb1814f4e02790c4da8)
The test symbol-assign-type.s was rewritten to not depend on 'split-file'.
---
 lld/ELF/LinkerScript.cpp                      | 13 +++++--
 lld/ELF/LinkerScript.h                        |  4 ++
 lld/docs/ELF/linker_script.rst                | 19 +++++++++
 lld/test/ELF/arm-thumb-interwork-ifunc.s      | 11 ++++++
 lld/test/ELF/linkerscript/common-assign.s     |  4 +-
 .../ELF/linkerscript/symbol-assign-type.s     | 39 +++++++++++++++++++
 6 files changed, 85 insertions(+), 5 deletions(-)
 create mode 100644 lld/test/ELF/linkerscript/symbol-assign-type.s

diff --git a/lld/ELF/LinkerScript.cpp b/lld/ELF/LinkerScript.cpp
index 72e2ebff9b8ca..6de2cd65b973b 100644
--- a/lld/ELF/LinkerScript.cpp
+++ b/lld/ELF/LinkerScript.cpp
@@ -180,7 +180,7 @@ void LinkerScript::addSymbol(SymbolAssignment *cmd) {
   // write expressions like this: `alignment = 16; . = ALIGN(., alignment)`.
   uint64_t symValue = value.sec ? 0 : value.getValue();
 
-  Defined newSym(nullptr, cmd->name, STB_GLOBAL, visibility, STT_NOTYPE,
+  Defined newSym(nullptr, cmd->name, STB_GLOBAL, visibility, value.type,
                  symValue, 0, sec);
 
   Symbol *sym = symtab->insert(cmd->name);
@@ -317,6 +317,7 @@ void LinkerScript::assignSymbol(SymbolAssignment *cmd, bool inSec) {
     cmd->sym->section = v.sec;
     cmd->sym->value = v.getSectionOffset();
   }
+  cmd->sym->type = v.type;
 }
 
 static std::string getFilename(InputFile *file) {
@@ -1215,8 +1216,14 @@ ExprValue LinkerScript::getSymbolValue(StringRef name, const Twine &loc) {
   }
 
   if (Symbol *sym = symtab->find(name)) {
-    if (auto *ds = dyn_cast<Defined>(sym))
-      return {ds->section, false, ds->value, loc};
+    if (auto *ds = dyn_cast<Defined>(sym)) {
+      ExprValue v{ds->section, false, ds->value, loc};
+      // Retain the original st_type, so that the alias will get the same
+      // behavior in relocation processing. Any operation will reset st_type to
+      // STT_NOTYPE.
+      v.type = ds->type;
+      return v;
+    }
     if (isa<SharedSymbol>(sym))
       if (!errorOnMissingSection)
         return {nullptr, false, 0, loc};
diff --git a/lld/ELF/LinkerScript.h b/lld/ELF/LinkerScript.h
index ec4fc22db486b..4a1a5fd71b67f 100644
--- a/lld/ELF/LinkerScript.h
+++ b/lld/ELF/LinkerScript.h
@@ -59,6 +59,10 @@ struct ExprValue {
   uint64_t val;
   uint64_t alignment = 1;
 
+  // The original st_type if the expression represents a symbol. Any operation
+  // resets type to STT_NOTYPE.
+  uint8_t type = llvm::ELF::STT_NOTYPE;
+
   // Original source location. Used for error messages.
   std::string loc;
 };
diff --git a/lld/docs/ELF/linker_script.rst b/lld/docs/ELF/linker_script.rst
index 0f409b2020ace..debddbf511b60 100644
--- a/lld/docs/ELF/linker_script.rst
+++ b/lld/docs/ELF/linker_script.rst
@@ -17,6 +17,25 @@ possible. We reserve the right to make different implementation choices where
 it is appropriate for LLD. Intentional deviations will be documented in this
 file.
 
+Symbol assignment
+~~~~~~~~~~~~~~~~~
+
+A symbol assignment looks like:
+
+::
+
+  symbol = expression;
+  symbol += expression;
+
+The first form defines ``symbol``. If ``symbol`` is already defined, it will be
+overridden. The other form requires ``symbol`` to be already defined.
+
+For a simple assignment like ``alias = aliasee;``, the ``st_type`` field is
+copied from the original symbol. Any arithmetic operation (e.g. ``+ 0`` will
+reset ``st_type`` to ``STT_NOTYPE``.
+
+The ``st_size`` field is set to 0.
+
 Output section description
 ~~~~~~~~~~~~~~~~~~~~~~~~~~
 
diff --git a/lld/test/ELF/arm-thumb-interwork-ifunc.s b/lld/test/ELF/arm-thumb-interwork-ifunc.s
index 947bc2dd77863..f77439c6c50b4 100644
--- a/lld/test/ELF/arm-thumb-interwork-ifunc.s
+++ b/lld/test/ELF/arm-thumb-interwork-ifunc.s
@@ -3,6 +3,11 @@
 // RUN: ld.lld %t.o -o %t
 // RUN: llvm-objdump --triple=armv7a-none-linux-gnueabi -d --no-show-raw-insn %t | FileCheck %s
 
+/// A symbol assignment defined alias inherits st_type and gets the same treatment.
+// RUN: llvm-mc --triple=armv7a-linux-gnueabihf -arm-add-build-attributes -filetype=obj --defsym ALIAS=1 -o %t1.o %s
+// RUN: ld.lld --defsym foo=foo1 %t1.o -o %t1
+// RUN: llvm-objdump --triple=armv7a-none-linux-gnueabi -d --no-show-raw-insn %t | FileCheck %s
+
 /// Non-preemptible ifuncs are called via a PLT entry which is always Arm
 /// state, expect the ARM callers to go direct to the PLT entry, Thumb
 /// branches are indirected via state change thunks, the bl is changed to blx.
@@ -10,9 +15,15 @@
  .syntax unified
  .text
  .balign 0x1000
+.ifdef ALIAS
+ .type foo1 STT_GNU_IFUNC
+ .globl foo1
+foo1:
+.else
  .type foo STT_GNU_IFUNC
  .globl foo
 foo:
+.endif
  bx lr
 
  .section .text.1, "ax", %progbits
diff --git a/lld/test/ELF/linkerscript/common-assign.s b/lld/test/ELF/linkerscript/common-assign.s
index ef0ad14ce92dd..f0d783886e4d6 100644
--- a/lld/test/ELF/linkerscript/common-assign.s
+++ b/lld/test/ELF/linkerscript/common-assign.s
@@ -27,7 +27,7 @@
 # CHECK-NEXT:     Value: [[FOO]]
 # CHECK-NEXT:     Size: 0
 # CHECK-NEXT:     Binding: Global
-# CHECK-NEXT:     Type: None
+# CHECK-NEXT:     Type: Object
 # CHECK-NEXT:     Other: 0
 # CHECK-NEXT:     Section: .bss
 # CHECK-NEXT:   }
@@ -36,7 +36,7 @@
 # CHECK-NEXT:     Value: [[BAR]]
 # CHECK-NEXT:     Size: 0
 # CHECK-NEXT:     Binding: Global
-# CHECK-NEXT:     Type: None
+# CHECK-NEXT:     Type: Object
 # CHECK-NEXT:     Other: 0
 # CHECK-NEXT:     Section: .bss
 # CHECK-NEXT:   }
diff --git a/lld/test/ELF/linkerscript/symbol-assign-type.s b/lld/test/ELF/linkerscript/symbol-assign-type.s
new file mode 100644
index 0000000000000..c3db8ce8c8ae9
--- /dev/null
+++ b/lld/test/ELF/linkerscript/symbol-assign-type.s
@@ -0,0 +1,39 @@
+# REQUIRES: x86
+## Keep st_type for simple assignment (`alias = aliasee`). This property is
+## desired on some targets, where symbol types can affect relocation processing
+## (e.g. Thumb interworking). However, the st_size field should not be retained
+## because some tools use st_size=0 as a heuristic to detect aliases. With any
+## operation, it can be argued that the new symbol may not be of the same type,
+## so reset st_type to STT_NOTYPE.
+
+## NOTE: GNU ld retains st_type for many operations.
+
+# RUN: echo 'retain1 = _start; \
+# RUN:   retain2 = 1 ? _start : 0; \
+# RUN:   drop1 = _start + 0; \
+# RUN:   drop2 = 0 ? _start : 1; \
+# RUN:   drop3 = -_start; \
+# RUN: ' > %t.lds
+# RUN: llvm-mc -filetype=obj -triple=x86_64 %s -o %t.o
+# RUN: ld.lld -T %t.lds %t.o -o %t1
+# RUN: llvm-readelf -s %t1 | FileCheck %s
+
+# CHECK:      Size Type   Bind   Vis     Ndx Name
+# CHECK:         1 FUNC   GLOBAL DEFAULT   1 _start
+# CHECK:         0 FUNC   GLOBAL DEFAULT   1 retain1
+# CHECK-NEXT:    0 FUNC   GLOBAL DEFAULT   1 retain2
+# CHECK-NEXT:    0 NOTYPE GLOBAL DEFAULT   1 drop1
+# CHECK-NEXT:    0 NOTYPE GLOBAL DEFAULT ABS drop2
+# CHECK-NEXT:    0 NOTYPE GLOBAL DEFAULT ABS drop3
+
+# RUN: ld.lld --defsym 'retain=_start' --defsym 'drop=_start+0' %t.o -o %t2
+# RUN: llvm-readelf -s %t2 | FileCheck %s --check-prefix=DEFSYM
+
+# DEFSYM:        0 FUNC   GLOBAL DEFAULT   1 retain
+# DEFSYM-NEXT:   0 NOTYPE GLOBAL DEFAULT   1 drop
+
+.globl _start
+.type _start, @function
+_start:
+  ret
+.size _start, 1

From 6406b6fa5ac8192b0861c343509d98368b555d12 Mon Sep 17 00:00:00 2001
From: Dimitry Andric <dimitry@andric.com>
Date: Fri, 21 Aug 2020 21:03:00 +0200
Subject: [PATCH 007/109] Instantiate Error in Target::GetEntryPointAddress()
 only when necessary

When `Target::GetEntryPointAddress()` calls `exe_module->GetObjectFile()->GetEntryPointAddress()`, and the returned
`entry_addr` is valid, it can immediately be returned.

However, just before that, an `llvm::Error` value has been setup, but in this case it is not consumed before returning, like is done further below in the function.

In https://bugs.freebsd.org/248745 we got a bug report for this, where a very simple test case aborts and dumps core:

```
* thread #1, name = 'testcase', stop reason = breakpoint 1.1
    frame #0: 0x00000000002018d4 testcase`main(argc=1, argv=0x00007fffffffea18) at testcase.c:3:5
   1	int main(int argc, char *argv[])
   2	{
-> 3	    return 0;
   4	}
(lldb) p argc
Program aborted due to an unhandled Error:
Error value was Success. (Note: Success values must still be checked prior to being destroyed).

Thread 1 received signal SIGABRT, Aborted.
thr_kill () at thr_kill.S:3
3	thr_kill.S: No such file or directory.
(gdb) bt
#0  thr_kill () at thr_kill.S:3
#1  0x00000008049a0004 in __raise (s=6) at /usr/src/lib/libc/gen/raise.c:52
#2  0x0000000804916229 in abort () at /usr/src/lib/libc/stdlib/abort.c:67
#3  0x000000000451b5f5 in fatalUncheckedError () at /usr/src/contrib/llvm-project/llvm/lib/Support/Error.cpp:112
#4  0x00000000019cf008 in GetEntryPointAddress () at /usr/src/contrib/llvm-project/llvm/include/llvm/Support/Error.h:267
#5  0x0000000001bccbd8 in ConstructorSetup () at /usr/src/contrib/llvm-project/lldb/source/Target/ThreadPlanCallFunction.cpp:67
#6  0x0000000001bcd2c0 in ThreadPlanCallFunction () at /usr/src/contrib/llvm-project/lldb/source/Target/ThreadPlanCallFunction.cpp:114
#7  0x00000000020076d4 in InferiorCallMmap () at /usr/src/contrib/llvm-project/lldb/source/Plugins/Process/Utility/InferiorCallPOSIX.cpp:97
#8  0x0000000001f4be33 in DoAllocateMemory () at /usr/src/contrib/llvm-project/lldb/source/Plugins/Process/FreeBSD/ProcessFreeBSD.cpp:604
#9  0x0000000001fe51b9 in AllocatePage () at /usr/src/contrib/llvm-project/lldb/source/Target/Memory.cpp:347
#10 0x0000000001fe5385 in AllocateMemory () at /usr/src/contrib/llvm-project/lldb/source/Target/Memory.cpp:383
#11 0x0000000001974da2 in AllocateMemory () at /usr/src/contrib/llvm-project/lldb/source/Target/Process.cpp:2301
#12 CanJIT () at /usr/src/contrib/llvm-project/lldb/source/Target/Process.cpp:2331
#13 0x0000000001a1bf3d in Evaluate () at /usr/src/contrib/llvm-project/lldb/source/Expression/UserExpression.cpp:190
#14 0x00000000019ce7a2 in EvaluateExpression () at /usr/src/contrib/llvm-project/lldb/source/Target/Target.cpp:2372
#15 0x0000000001ad784c in EvaluateExpression () at /usr/src/contrib/llvm-project/lldb/source/Commands/CommandObjectExpression.cpp:414
#16 0x0000000001ad86ae in DoExecute () at /usr/src/contrib/llvm-project/lldb/source/Commands/CommandObjectExpression.cpp:646
#17 0x0000000001a5e3ed in Execute () at /usr/src/contrib/llvm-project/lldb/source/Interpreter/CommandObject.cpp:1003
#18 0x0000000001a6c4a3 in HandleCommand () at /usr/src/contrib/llvm-project/lldb/source/Interpreter/CommandInterpreter.cpp:1762
#19 0x0000000001a6f98c in IOHandlerInputComplete () at /usr/src/contrib/llvm-project/lldb/source/Interpreter/CommandInterpreter.cpp:2760
#20 0x0000000001a90b08 in Run () at /usr/src/contrib/llvm-project/lldb/source/Core/IOHandler.cpp:548
#21 0x00000000019a6c6a in ExecuteIOHandlers () at /usr/src/contrib/llvm-project/lldb/source/Core/Debugger.cpp:903
#22 0x0000000001a70337 in RunCommandInterpreter () at /usr/src/contrib/llvm-project/lldb/source/Interpreter/CommandInterpreter.cpp:2946
#23 0x0000000001d9d812 in RunCommandInterpreter () at /usr/src/contrib/llvm-project/lldb/source/API/SBDebugger.cpp:1169
#24 0x0000000001918be8 in MainLoop () at /usr/src/contrib/llvm-project/lldb/tools/driver/Driver.cpp:675
#25 0x000000000191a114 in main () at /usr/src/contrib/llvm-project/lldb/tools/driver/Driver.cpp:890```

Fix the incorrect error catch by only instantiating an `Error` object if it is necessary.

Reviewed By: JDevlieghere

Differential Revision: https://reviews.llvm.org/D86355

(cherry picked from commit 1ce07cd614beab5150a5440c7faf195009f99e2c)
---
 lldb/source/Target/Target.cpp | 31 +++++++++++++++----------------
 1 file changed, 15 insertions(+), 16 deletions(-)

diff --git a/lldb/source/Target/Target.cpp b/lldb/source/Target/Target.cpp
index dad56376005c6..707344f99fcba 100644
--- a/lldb/source/Target/Target.cpp
+++ b/lldb/source/Target/Target.cpp
@@ -2401,21 +2401,13 @@ lldb::addr_t Target::GetPersistentSymbol(ConstString name) {
 
 llvm::Expected<lldb_private::Address> Target::GetEntryPointAddress() {
   Module *exe_module = GetExecutableModulePointer();
-  llvm::Error error = llvm::Error::success();
-  assert(!error); // Check the success value when assertions are enabled.
 
-  if (!exe_module || !exe_module->GetObjectFile()) {
-    error = llvm::make_error<llvm::StringError>("No primary executable found",
-                                                llvm::inconvertibleErrorCode());
-  } else {
+  // Try to find the entry point address in the primary executable.
+  const bool has_primary_executable = exe_module && exe_module->GetObjectFile();
+  if (has_primary_executable) {
     Address entry_addr = exe_module->GetObjectFile()->GetEntryPointAddress();
     if (entry_addr.IsValid())
       return entry_addr;
-
-    error = llvm::make_error<llvm::StringError>(
-        "Could not find entry point address for executable module \"" +
-            exe_module->GetFileSpec().GetFilename().GetStringRef() + "\"",
-        llvm::inconvertibleErrorCode());
   }
 
   const ModuleList &modules = GetImages();
@@ -2426,14 +2418,21 @@ llvm::Expected<lldb_private::Address> Target::GetEntryPointAddress() {
       continue;
 
     Address entry_addr = module_sp->GetObjectFile()->GetEntryPointAddress();
-    if (entry_addr.IsValid()) {
-      // Discard the error.
-      llvm::consumeError(std::move(error));
+    if (entry_addr.IsValid())
       return entry_addr;
-    }
   }
 
-  return std::move(error);
+  // We haven't found the entry point address. Return an appropriate error.
+  if (!has_primary_executable)
+    return llvm::make_error<llvm::StringError>(
+        "No primary executable found and could not find entry point address in "
+        "any executable module",
+        llvm::inconvertibleErrorCode());
+
+  return llvm::make_error<llvm::StringError>(
+      "Could not find entry point address for primary executable module \"" +
+          exe_module->GetFileSpec().GetFilename().GetStringRef() + "\"",
+      llvm::inconvertibleErrorCode());
 }
 
 lldb::addr_t Target::GetCallableLoadAddress(lldb::addr_t load_addr,

From 0c001a171c7d671f0129d69c6a47b159544cdca6 Mon Sep 17 00:00:00 2001
From: Brad Smith <brad@comstyle.com>
Date: Sun, 23 Aug 2020 20:01:38 -0400
Subject: [PATCH 008/109] [clang][Driver] Implement AddClangSystemIncludeArgs
 and HasNativeLLVMSupport for the OpenBSD clang driver.

If not overridden, AddClangSystemIncludeArgs's implementation is empty, so by
default, no system include args are added to the Clang driver. This means that
invoking Clang without the frontend must include a manual -I/usr/include flag,
which is inconsistent behavior. Therefore, override and implement this method
to match. Some boilerplate is also borrowed for handling of the other driver
flags.

While we are here, also override and enable HasNativeLLVMSupport.

Patch by: 3405691582 (dana koch)

Differential Revision: https://reviews.llvm.org/D86412

(cherry picked from commit 2b37174b9a5db235e493cb72e4454cc08a1b1791)
---
 clang/lib/Driver/ToolChains/OpenBSD.cpp | 37 +++++++++++++++++++++++++
 clang/lib/Driver/ToolChains/OpenBSD.h   |  6 ++++
 clang/lib/Frontend/InitHeaderSearch.cpp |  2 ++
 3 files changed, 45 insertions(+)

diff --git a/clang/lib/Driver/ToolChains/OpenBSD.cpp b/clang/lib/Driver/ToolChains/OpenBSD.cpp
index 9c1a9c5f82280..b0174ac62b58c 100644
--- a/clang/lib/Driver/ToolChains/OpenBSD.cpp
+++ b/clang/lib/Driver/ToolChains/OpenBSD.cpp
@@ -10,10 +10,12 @@
 #include "Arch/Mips.h"
 #include "Arch/Sparc.h"
 #include "CommonArgs.h"
+#include "clang/Config/config.h"
 #include "clang/Driver/Compilation.h"
 #include "clang/Driver/Options.h"
 #include "clang/Driver/SanitizerArgs.h"
 #include "llvm/Option/ArgList.h"
+#include "llvm/Support/Path.h"
 
 using namespace clang::driver;
 using namespace clang::driver::tools;
@@ -278,3 +280,38 @@ void OpenBSD::addClangTargetOptions(const ArgList &DriverArgs,
                           options::OPT_fno_use_init_array, false))
     CC1Args.push_back("-fno-use-init-array");
 }
+
+void OpenBSD::AddClangSystemIncludeArgs(
+    const llvm::opt::ArgList &DriverArgs,
+    llvm::opt::ArgStringList &CC1Args) const {
+  const Driver &D = getDriver();
+
+  if (DriverArgs.hasArg(clang::driver::options::OPT_nostdinc))
+    return;
+
+  if (!DriverArgs.hasArg(options::OPT_nobuiltininc)) {
+    SmallString<128> Dir(D.ResourceDir);
+    llvm::sys::path::append(Dir, "include");
+    addSystemInclude(DriverArgs, CC1Args, Dir.str());
+  }
+
+  if (DriverArgs.hasArg(options::OPT_nostdlibinc))
+    return;
+
+  // Check for configure-time C include directories.
+  StringRef CIncludeDirs(C_INCLUDE_DIRS);
+  if (CIncludeDirs != "") {
+    SmallVector<StringRef, 5> dirs;
+    CIncludeDirs.split(dirs, ":");
+    for (StringRef dir : dirs) {
+      StringRef Prefix =
+          llvm::sys::path::is_absolute(dir) ? StringRef(D.SysRoot) : "";
+      addExternCSystemInclude(DriverArgs, CC1Args, Prefix + dir);
+    }
+    return;
+  }
+
+  addExternCSystemInclude(DriverArgs, CC1Args, D.SysRoot + "/usr/include");
+}
+
+bool OpenBSD::HasNativeLLVMSupport() const { return true; }
diff --git a/clang/lib/Driver/ToolChains/OpenBSD.h b/clang/lib/Driver/ToolChains/OpenBSD.h
index 897eee57ab684..9924aa22e9d9d 100644
--- a/clang/lib/Driver/ToolChains/OpenBSD.h
+++ b/clang/lib/Driver/ToolChains/OpenBSD.h
@@ -65,6 +65,12 @@ class LLVM_LIBRARY_VISIBILITY OpenBSD : public Generic_ELF {
     return ToolChain::CST_Libcxx;
   }
 
+  void
+  AddClangSystemIncludeArgs(const llvm::opt::ArgList &DriverArgs,
+                            llvm::opt::ArgStringList &CC1Args) const override;
+
+  bool HasNativeLLVMSupport() const override;
+
   void AddCXXStdlibLibArgs(const llvm::opt::ArgList &Args,
                            llvm::opt::ArgStringList &CmdArgs) const override;
 
diff --git a/clang/lib/Frontend/InitHeaderSearch.cpp b/clang/lib/Frontend/InitHeaderSearch.cpp
index 16f1f1670e8de..bc31445d6d08e 100644
--- a/clang/lib/Frontend/InitHeaderSearch.cpp
+++ b/clang/lib/Frontend/InitHeaderSearch.cpp
@@ -270,6 +270,7 @@ void InitHeaderSearch::AddDefaultCIncludePaths(const llvm::Triple &triple,
   case llvm::Triple::Linux:
   case llvm::Triple::Hurd:
   case llvm::Triple::Solaris:
+  case llvm::Triple::OpenBSD:
     llvm_unreachable("Include management is handled in the driver.");
 
   case llvm::Triple::CloudABI: {
@@ -423,6 +424,7 @@ void InitHeaderSearch::AddDefaultIncludePaths(const LangOptions &Lang,
   case llvm::Triple::Emscripten:
   case llvm::Triple::Linux:
   case llvm::Triple::Hurd:
+  case llvm::Triple::OpenBSD:
   case llvm::Triple::Solaris:
   case llvm::Triple::WASI:
   case llvm::Triple::AIX:

From e4f4d48665526c6d8001e8101dc539a7f7653aec Mon Sep 17 00:00:00 2001
From: Brad Smith <brad@comstyle.com>
Date: Sun, 23 Aug 2020 20:44:29 -0400
Subject: [PATCH 009/109] [clang][Driver] Implement addLibCxxIncludePaths and
 getCompilerRT for the OpenBSD clang driver.

(cherry picked from commit bf3577ef64c300ba7841a90a4e09e1e305271976)
---
 clang/lib/Driver/ToolChains/OpenBSD.cpp | 60 +++++++++++++++----------
 clang/lib/Driver/ToolChains/OpenBSD.h   |  9 +++-
 2 files changed, 44 insertions(+), 25 deletions(-)

diff --git a/clang/lib/Driver/ToolChains/OpenBSD.cpp b/clang/lib/Driver/ToolChains/OpenBSD.cpp
index b0174ac62b58c..4f2d04058d249 100644
--- a/clang/lib/Driver/ToolChains/OpenBSD.cpp
+++ b/clang/lib/Driver/ToolChains/OpenBSD.cpp
@@ -258,29 +258,6 @@ OpenBSD::OpenBSD(const Driver &D, const llvm::Triple &Triple,
   getFilePaths().push_back(getDriver().SysRoot + "/usr/lib");
 }
 
-void OpenBSD::AddCXXStdlibLibArgs(const ArgList &Args,
-                                  ArgStringList &CmdArgs) const {
-  bool Profiling = Args.hasArg(options::OPT_pg);
-
-  CmdArgs.push_back(Profiling ? "-lc++_p" : "-lc++");
-  CmdArgs.push_back(Profiling ? "-lc++abi_p" : "-lc++abi");
-}
-
-Tool *OpenBSD::buildAssembler() const {
-  return new tools::openbsd::Assembler(*this);
-}
-
-Tool *OpenBSD::buildLinker() const { return new tools::openbsd::Linker(*this); }
-
-void OpenBSD::addClangTargetOptions(const ArgList &DriverArgs,
-                                    ArgStringList &CC1Args,
-                                    Action::OffloadKind) const {
-  // Support for .init_array is still new (Aug 2016).
-  if (!DriverArgs.hasFlag(options::OPT_fuse_init_array,
-                          options::OPT_fno_use_init_array, false))
-    CC1Args.push_back("-fno-use-init-array");
-}
-
 void OpenBSD::AddClangSystemIncludeArgs(
     const llvm::opt::ArgList &DriverArgs,
     llvm::opt::ArgStringList &CC1Args) const {
@@ -314,4 +291,41 @@ void OpenBSD::AddClangSystemIncludeArgs(
   addExternCSystemInclude(DriverArgs, CC1Args, D.SysRoot + "/usr/include");
 }
 
+void OpenBSD::addLibCxxIncludePaths(const llvm::opt::ArgList &DriverArgs,
+                                    llvm::opt::ArgStringList &CC1Args) const {
+  addSystemInclude(DriverArgs, CC1Args,
+                   getDriver().SysRoot + "/usr/include/c++/v1");
+}
+
+void OpenBSD::AddCXXStdlibLibArgs(const ArgList &Args,
+                                  ArgStringList &CmdArgs) const {
+  bool Profiling = Args.hasArg(options::OPT_pg);
+
+  CmdArgs.push_back(Profiling ? "-lc++_p" : "-lc++");
+  CmdArgs.push_back(Profiling ? "-lc++abi_p" : "-lc++abi");
+}
+
+std::string OpenBSD::getCompilerRT(const ArgList &Args,
+                                   StringRef Component,
+                                   FileType Type) const {
+  SmallString<128> Path(getDriver().SysRoot);
+  llvm::sys::path::append(Path, "/usr/lib/libcompiler_rt.a");
+  return std::string(Path.str());
+}
+
+void OpenBSD::addClangTargetOptions(const ArgList &DriverArgs,
+                                    ArgStringList &CC1Args,
+                                    Action::OffloadKind) const {
+  // Support for .init_array is still new (Aug 2016).
+  if (!DriverArgs.hasFlag(options::OPT_fuse_init_array,
+                          options::OPT_fno_use_init_array, false))
+    CC1Args.push_back("-fno-use-init-array");
+}
+
+Tool *OpenBSD::buildAssembler() const {
+  return new tools::openbsd::Assembler(*this);
+}
+
+Tool *OpenBSD::buildLinker() const { return new tools::openbsd::Linker(*this); }
+
 bool OpenBSD::HasNativeLLVMSupport() const { return true; }
diff --git a/clang/lib/Driver/ToolChains/OpenBSD.h b/clang/lib/Driver/ToolChains/OpenBSD.h
index 9924aa22e9d9d..09595faf9d6bb 100644
--- a/clang/lib/Driver/ToolChains/OpenBSD.h
+++ b/clang/lib/Driver/ToolChains/OpenBSD.h
@@ -54,6 +54,8 @@ class LLVM_LIBRARY_VISIBILITY OpenBSD : public Generic_ELF {
   OpenBSD(const Driver &D, const llvm::Triple &Triple,
           const llvm::opt::ArgList &Args);
 
+  bool HasNativeLLVMSupport() const override;
+
   bool IsMathErrnoDefault() const override { return false; }
   bool IsObjCNonFragileABIDefault() const override { return true; }
   bool isPIEDefault() const override { return true; }
@@ -69,11 +71,14 @@ class LLVM_LIBRARY_VISIBILITY OpenBSD : public Generic_ELF {
   AddClangSystemIncludeArgs(const llvm::opt::ArgList &DriverArgs,
                             llvm::opt::ArgStringList &CC1Args) const override;
 
-  bool HasNativeLLVMSupport() const override;
-
+  void addLibCxxIncludePaths(const llvm::opt::ArgList &DriverArgs,
+                             llvm::opt::ArgStringList &CC1Args) const override;
   void AddCXXStdlibLibArgs(const llvm::opt::ArgList &Args,
                            llvm::opt::ArgStringList &CmdArgs) const override;
 
+  std::string getCompilerRT(const llvm::opt::ArgList &Args, StringRef Component,
+                            FileType Type = ToolChain::FT_Static) const override;
+
   unsigned GetDefaultStackProtectorLevel(bool KernelOrKext) const override {
     return 2;
   }

From 90c2c51a524f28c23c41c67eb71d7e278ede7524 Mon Sep 17 00:00:00 2001
From: Amy Huang <akhuang@google.com>
Date: Tue, 28 Jul 2020 11:23:59 -0700
Subject: [PATCH 010/109] Revert "Switch to using -debug-info-kind=constructor
 as default (from =limited)"

This reverts commit 227db86a1b7dd6f96f7df14890fcd071bc4fe1f5.

Causing debug info errors in google3 LTO builds; also causes a
debuginfo-test failure.

(cherry picked from commit 394db2259575ef3cac8d3d37836b11eb2373c435)
---
 clang/lib/Driver/ToolChains/Clang.cpp            | 16 +++++++---------
 clang/test/Driver/cl-options.c                   |  6 +++---
 clang/test/Driver/clang-g-opts.c                 |  2 +-
 clang/test/Driver/cuda-dwarf-2.cu                |  2 +-
 clang/test/Driver/debug-options-as.c             |  2 +-
 clang/test/Driver/debug-options.c                |  8 ++++----
 clang/test/Driver/integrated-as.s                | 10 +++++-----
 clang/test/Driver/myriad-toolchain.c             |  2 +-
 clang/test/Driver/openmp-offload-gpu.c           |  2 +-
 clang/test/Driver/split-debug.c                  | 10 +++++-----
 .../SymbolFile/PDB/Inputs/ClassLayoutTest.cpp    |  1 -
 11 files changed, 29 insertions(+), 32 deletions(-)

diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
index c77ae5a44a0ef..f0a5451322aaa 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -498,7 +498,7 @@ static codegenoptions::DebugInfoKind DebugLevelToInfoKind(const Arg &A) {
     return codegenoptions::DebugLineTablesOnly;
   if (A.getOption().matches(options::OPT_gline_directives_only))
     return codegenoptions::DebugDirectivesOnly;
-  return codegenoptions::DebugInfoConstructor;
+  return codegenoptions::LimitedDebugInfo;
 }
 
 static bool mustUseNonLeafFramePointerForTarget(const llvm::Triple &Triple) {
@@ -2380,7 +2380,7 @@ static void CollectArgsForIntegratedAssembler(Compilation &C,
           CmdArgs.push_back(Value.data());
         } else {
           RenderDebugEnablingArgs(Args, CmdArgs,
-                                  codegenoptions::DebugInfoConstructor,
+                                  codegenoptions::LimitedDebugInfo,
                                   DwarfVersion, llvm::DebuggerKind::Default);
         }
       } else if (Value.startswith("-mcpu") || Value.startswith("-mfpu") ||
@@ -3653,7 +3653,7 @@ static void RenderDebugOptions(const ToolChain &TC, const Driver &D,
   if (const Arg *A =
           Args.getLastArg(options::OPT_g_Group, options::OPT_gsplit_dwarf,
                           options::OPT_gsplit_dwarf_EQ)) {
-    DebugInfoKind = codegenoptions::DebugInfoConstructor;
+    DebugInfoKind = codegenoptions::LimitedDebugInfo;
 
     // If the last option explicitly specified a debug-info level, use it.
     if (checkDebugInfoOption(A, Args, D, TC) &&
@@ -3758,7 +3758,7 @@ static void RenderDebugOptions(const ToolChain &TC, const Driver &D,
     if (checkDebugInfoOption(A, Args, D, TC)) {
       if (DebugInfoKind != codegenoptions::DebugLineTablesOnly &&
           DebugInfoKind != codegenoptions::DebugDirectivesOnly) {
-        DebugInfoKind = codegenoptions::DebugInfoConstructor;
+        DebugInfoKind = codegenoptions::LimitedDebugInfo;
         CmdArgs.push_back("-dwarf-ext-refs");
         CmdArgs.push_back("-fmodule-format=obj");
       }
@@ -3778,9 +3778,7 @@ static void RenderDebugOptions(const ToolChain &TC, const Driver &D,
           TC.GetDefaultStandaloneDebug());
   if (const Arg *A = Args.getLastArg(options::OPT_fstandalone_debug))
     (void)checkDebugInfoOption(A, Args, D, TC);
-  if ((DebugInfoKind == codegenoptions::LimitedDebugInfo ||
-       DebugInfoKind == codegenoptions::DebugInfoConstructor) &&
-      NeedFullDebug)
+  if (DebugInfoKind == codegenoptions::LimitedDebugInfo && NeedFullDebug)
     DebugInfoKind = codegenoptions::FullDebugInfo;
 
   if (Args.hasFlag(options::OPT_gembed_source, options::OPT_gno_embed_source,
@@ -6566,7 +6564,7 @@ void Clang::AddClangCLArgs(const ArgList &Args, types::ID InputType,
                           options::OPT_gline_tables_only)) {
     *EmitCodeView = true;
     if (DebugInfoArg->getOption().matches(options::OPT__SLASH_Z7))
-      *DebugInfoKind = codegenoptions::DebugInfoConstructor;
+      *DebugInfoKind = codegenoptions::LimitedDebugInfo;
     else
       *DebugInfoKind = codegenoptions::DebugLineTablesOnly;
   } else {
@@ -6863,7 +6861,7 @@ void ClangAs::ConstructJob(Compilation &C, const JobAction &JA,
     // the guard for source type, however there is a test which asserts
     // that some assembler invocation receives no -debug-info-kind,
     // and it's not clear whether that test is just overly restrictive.
-    DebugInfoKind = (WantDebug ? codegenoptions::DebugInfoConstructor
+    DebugInfoKind = (WantDebug ? codegenoptions::LimitedDebugInfo
                                : codegenoptions::NoDebugInfo);
     // Add the -fdebug-compilation-dir flag if needed.
     addDebugCompDirArg(Args, CmdArgs, C.getDriver().getVFS());
diff --git a/clang/test/Driver/cl-options.c b/clang/test/Driver/cl-options.c
index 0dcaf61088069..d0c48ae41d9a2 100644
--- a/clang/test/Driver/cl-options.c
+++ b/clang/test/Driver/cl-options.c
@@ -524,11 +524,11 @@
 
 // RUN: %clang_cl /Zi /c -### -- %s 2>&1 | FileCheck -check-prefix=Zi %s
 // Zi: "-gcodeview"
-// Zi: "-debug-info-kind=constructor"
+// Zi: "-debug-info-kind=limited"
 
 // RUN: %clang_cl /Z7 /c -### -- %s 2>&1 | FileCheck -check-prefix=Z7 %s
 // Z7: "-gcodeview"
-// Z7: "-debug-info-kind=constructor"
+// Z7: "-debug-info-kind=limited"
 
 // RUN: %clang_cl /Zd /c -### -- %s 2>&1 | FileCheck -check-prefix=Z7GMLT %s
 // Z7GMLT: "-gcodeview"
@@ -557,7 +557,7 @@
 // which made it "win". This test could not detect that bug.
 // RUN: %clang_cl /Z7 -gdwarf /c -### -- %s 2>&1 | FileCheck -check-prefix=Z7_gdwarf %s
 // Z7_gdwarf: "-gcodeview"
-// Z7_gdwarf: "-debug-info-kind=constructor"
+// Z7_gdwarf: "-debug-info-kind=limited"
 // Z7_gdwarf: "-dwarf-version=4"
 
 // RUN: %clang_cl -fmsc-version=1800 -TP -### -- %s 2>&1 | FileCheck -check-prefix=CXX11 %s
diff --git a/clang/test/Driver/clang-g-opts.c b/clang/test/Driver/clang-g-opts.c
index 60c97790b7dae..bc714b6c93791 100644
--- a/clang/test/Driver/clang-g-opts.c
+++ b/clang/test/Driver/clang-g-opts.c
@@ -31,7 +31,7 @@
 // RUN:             | FileCheck --check-prefix=CHECK-WITH-G-DWARF2 %s
 
 // CHECK-WITHOUT-G-NOT: -debug-info-kind
-// CHECK-WITH-G: "-debug-info-kind=constructor"
+// CHECK-WITH-G: "-debug-info-kind=limited"
 // CHECK-WITH-G: "-dwarf-version=4"
 // CHECK-WITH-G-DWARF2: "-dwarf-version=2"
 
diff --git a/clang/test/Driver/cuda-dwarf-2.cu b/clang/test/Driver/cuda-dwarf-2.cu
index 92b8919729fc4..bcfb2444bc516 100644
--- a/clang/test/Driver/cuda-dwarf-2.cu
+++ b/clang/test/Driver/cuda-dwarf-2.cu
@@ -49,7 +49,7 @@
 
 // HAS_DEBUG-NOT: warning: debug
 // HAS_DEBUG: "-fcuda-is-device"
-// HAS_DEBUG-SAME: "-debug-info-kind={{constructor|line-tables-only}}"
+// HAS_DEBUG-SAME: "-debug-info-kind={{limited|line-tables-only}}"
 // HAS_DEBUG-SAME: "-dwarf-version=2"
 // HAS_DEBUG: ptxas
 // HAS_DEBUG-SAME: "-g"
diff --git a/clang/test/Driver/debug-options-as.c b/clang/test/Driver/debug-options-as.c
index 4808219702e76..51475680e9b18 100644
--- a/clang/test/Driver/debug-options-as.c
+++ b/clang/test/Driver/debug-options-as.c
@@ -23,7 +23,7 @@
 // RUN:   | FileCheck %s
 //
 // CHECK: "-cc1as"
-// CHECK: "-debug-info-kind=constructor"
+// CHECK: "-debug-info-kind=limited"
 
 // Check to make sure clang with -g on a .s file gets passed -dwarf-debug-producer.
 // rdar://12955296
diff --git a/clang/test/Driver/debug-options.c b/clang/test/Driver/debug-options.c
index 2d1a0b2d5cd8f..189c1f9addeb9 100644
--- a/clang/test/Driver/debug-options.c
+++ b/clang/test/Driver/debug-options.c
@@ -274,18 +274,18 @@
 // GLIO_ONLY_DWARF2: "-dwarf-version=2"
 //
 // G_ONLY: "-cc1"
-// G_ONLY: "-debug-info-kind=constructor"
+// G_ONLY: "-debug-info-kind=limited"
 //
 // These tests assert that "-gline-tables-only" "-g" uses the latter,
 // but otherwise not caring about the DebugInfoKind.
 // G_ONLY_DWARF2: "-cc1"
-// G_ONLY_DWARF2: "-debug-info-kind={{standalone|constructor}}"
+// G_ONLY_DWARF2: "-debug-info-kind={{standalone|limited}}"
 // G_ONLY_DWARF2: "-dwarf-version=2"
 //
 // G_STANDALONE: "-cc1"
 // G_STANDALONE: "-debug-info-kind=standalone"
 // G_LIMITED: "-cc1"
-// G_LIMITED: "-debug-info-kind=constructor"
+// G_LIMITED: "-debug-info-kind=limited"
 // G_DWARF2: "-dwarf-version=2"
 // G_DWARF4: "-dwarf-version=4"
 //
@@ -339,7 +339,7 @@
 // NOCI: "-gno-column-info"
 //
 // GEXTREFS: "-dwarf-ext-refs" "-fmodule-format=obj"
-// GEXTREFS: "-debug-info-kind={{standalone|constructor}}"
+// GEXTREFS: "-debug-info-kind={{standalone|limited}}"
 
 // RUN: not %clang -cc1 -debug-info-kind=watkind 2>&1 | FileCheck -check-prefix=BADSTRING1 %s
 // BADSTRING1: error: invalid value 'watkind' in '-debug-info-kind=watkind'
diff --git a/clang/test/Driver/integrated-as.s b/clang/test/Driver/integrated-as.s
index 05999cfe002b5..0194a3d5a4382 100644
--- a/clang/test/Driver/integrated-as.s
+++ b/clang/test/Driver/integrated-as.s
@@ -27,19 +27,19 @@
 // XA_INCLUDE2: "-Ifoo_dir"
 
 // RUN: %clang -### -target x86_64--- -c -integrated-as %s -gdwarf-4 -gdwarf-2 2>&1 | FileCheck --check-prefix=DWARF2 %s
-// DWARF2: "-debug-info-kind=constructor" "-dwarf-version=2"
+// DWARF2: "-debug-info-kind=limited" "-dwarf-version=2"
 
 // RUN: %clang -### -target x86_64--- -c -integrated-as %s -gdwarf-3 2>&1 | FileCheck --check-prefix=DWARF3 %s
-// DWARF3: "-debug-info-kind=constructor" "-dwarf-version=3"
+// DWARF3: "-debug-info-kind=limited" "-dwarf-version=3"
 
 // RUN: %clang -### -target x86_64--- -c -integrated-as %s -gdwarf-4 2>&1 | FileCheck --check-prefix=DWARF4 %s
-// DWARF4: "-debug-info-kind=constructor" "-dwarf-version=4"
+// DWARF4: "-debug-info-kind=limited" "-dwarf-version=4"
 
 // RUN: %clang -### -target x86_64--- -c -integrated-as %s -Xassembler -gdwarf-2 2>&1 | FileCheck --check-prefix=DWARF2XASSEMBLER %s
-// DWARF2XASSEMBLER: "-debug-info-kind=constructor" "-dwarf-version=2"
+// DWARF2XASSEMBLER: "-debug-info-kind=limited" "-dwarf-version=2"
 
 // RUN: %clang -### -target x86_64--- -c -integrated-as %s -Wa,-gdwarf-2 2>&1 | FileCheck --check-prefix=DWARF2WA %s
-// DWARF2WA: "-debug-info-kind=constructor" "-dwarf-version=2"
+// DWARF2WA: "-debug-info-kind=limited" "-dwarf-version=2"
 
 // A dwarf version number that driver can't parse is just stuffed in.
 // RUN: %clang -### -target x86_64--- -c -integrated-as %s -Wa,-gdwarf-huh 2>&1 | FileCheck --check-prefix=BOGODWARF %s
diff --git a/clang/test/Driver/myriad-toolchain.c b/clang/test/Driver/myriad-toolchain.c
index a4bd260a14986..215a02fd0dec1 100644
--- a/clang/test/Driver/myriad-toolchain.c
+++ b/clang/test/Driver/myriad-toolchain.c
@@ -83,7 +83,7 @@
 // NOSTDLIB-NOT: "-lc"
 
 // RUN: %clang -### -c -g %s -target sparc-myriad 2>&1 | FileCheck -check-prefix=G_SPARC %s
-// G_SPARC: "-debug-info-kind=constructor" "-dwarf-version=2"
+// G_SPARC: "-debug-info-kind=limited" "-dwarf-version=2"
 
 // RUN: %clang -### -c %s -target sparc-myriad-rtems -fuse-init-array 2>&1 \
 // RUN: | FileCheck -check-prefix=USE-INIT-ARRAY %s
diff --git a/clang/test/Driver/openmp-offload-gpu.c b/clang/test/Driver/openmp-offload-gpu.c
index 3ddd6446d1176..6415f1d61b720 100644
--- a/clang/test/Driver/openmp-offload-gpu.c
+++ b/clang/test/Driver/openmp-offload-gpu.c
@@ -241,7 +241,7 @@
 
 // HAS_DEBUG-NOT: warning: debug
 // HAS_DEBUG: "-triple" "nvptx64-nvidia-cuda"
-// HAS_DEBUG-SAME: "-debug-info-kind={{constructor|line-tables-only}}"
+// HAS_DEBUG-SAME: "-debug-info-kind={{limited|line-tables-only}}"
 // HAS_DEBUG-SAME: "-dwarf-version=2"
 // HAS_DEBUG-SAME: "-fopenmp-is-device"
 // HAS_DEBUG: ptxas
diff --git a/clang/test/Driver/split-debug.c b/clang/test/Driver/split-debug.c
index 70f8d91d48e01..d40207d5ae3b6 100644
--- a/clang/test/Driver/split-debug.c
+++ b/clang/test/Driver/split-debug.c
@@ -68,18 +68,18 @@
 // RUN: FileCheck -check-prefix=CHECK-NOINLINE-WITHOUT-SPLIT < %t %s
 //
 // CHECK-NOINLINE-WITHOUT-SPLIT: "-fno-split-dwarf-inlining"
-// CHECK-NOINLINE-WITHOUT-SPLIT: "-debug-info-kind=constructor"
+// CHECK-NOINLINE-WITHOUT-SPLIT: "-debug-info-kind=limited"
 
 // RUN: %clang -target x86_64-unknown-linux-gnu -gmlt -gsplit-dwarf -fno-split-dwarf-inlining -S -### %s 2> %t
 // RUN: FileCheck -check-prefix=CHECK-SPLIT-WITH-GMLT < %t %s
 //
-// CHECK-SPLIT-WITH-GMLT: "-debug-info-kind=constructor"
+// CHECK-SPLIT-WITH-GMLT: "-debug-info-kind=limited"
 // CHECK-SPLIT-WITH-GMLT: "-split-dwarf-output"
 
 // RUN: %clang -target x86_64-unknown-linux-gnu -gsplit-dwarf -fno-split-dwarf-inlining -S -### %s 2> %t
 // RUN: FileCheck -check-prefix=CHECK-SPLIT-WITH-NOINL < %t %s
 //
-// CHECK-SPLIT-WITH-NOINL: "-debug-info-kind=constructor"
+// CHECK-SPLIT-WITH-NOINL: "-debug-info-kind=limited"
 // CHECK-SPLIT-WITH-NOINL: "-split-dwarf-output"
 
 // RUN: %clang -target x86_64-unknown-linux-gnu -gsplit-dwarf -gmlt -fsplit-dwarf-inlining -S -### %s 2> %t
@@ -92,7 +92,7 @@
 // RUN: %clang -target x86_64-unknown-linux-gnu -gmlt -gsplit-dwarf -S -### %s 2> %t
 // RUN: FileCheck -check-prefix=CHECK-SPLIT-OVER-GMLT < %t %s
 //
-// CHECK-SPLIT-OVER-GMLT: "-debug-info-kind=constructor"
+// CHECK-SPLIT-OVER-GMLT: "-debug-info-kind=limited"
 // CHECK-SPLIT-OVER-GMLT: "-split-dwarf-file"
 // CHECK-SPLIT-OVER-GMLT: "-split-dwarf-output"
 
@@ -117,6 +117,6 @@
 // RUN: %clang -target x86_64-unknown-linux-gnu -g0 -gsplit-dwarf=split -S -### %s 2> %t
 // RUN: FileCheck -check-prefix=CHECK-SPLIT-OVER-G0 < %t %s
 //
-// CHECK-SPLIT-OVER-G0: "-debug-info-kind=constructor"
+// CHECK-SPLIT-OVER-G0: "-debug-info-kind=limited"
 // CHECK-SPLIT-OVER-G0: "-split-dwarf-file"
 // CHECK-SPLIT-OVER-G0: "-split-dwarf-output"
diff --git a/lldb/test/Shell/SymbolFile/PDB/Inputs/ClassLayoutTest.cpp b/lldb/test/Shell/SymbolFile/PDB/Inputs/ClassLayoutTest.cpp
index 503939680c500..3c4b005cdf1be 100644
--- a/lldb/test/Shell/SymbolFile/PDB/Inputs/ClassLayoutTest.cpp
+++ b/lldb/test/Shell/SymbolFile/PDB/Inputs/ClassLayoutTest.cpp
@@ -106,7 +106,6 @@ class Class : public Base { // Test base class.
 int main() {
   MemberTest::Base B1;
   B1.Get();
-  MemberTest::Class C1;
   MemberTest::Class::StaticMemberFunc(1, 10, 2);
   return 0;
 }

From 83338bed0c2078870b36a81fe9a36723bd3be2e5 Mon Sep 17 00:00:00 2001
From: Ryan Prichard <rprichard@google.com>
Date: Sat, 22 Aug 2020 17:12:52 -0700
Subject: [PATCH 011/109] [libunwind] Make findUnwindSectionsByPhdr static

Currently, this function is present in the dynsym table of
libunwind.so (on ELF targets). Make the function static instead.

In the previous release (LLVM 10.x), this function was instead a lambda
function inside LocalAddressSpace::findUnwindSections, and because
LocalAddressSpace was marked with _LIBUNWIND_HIDDEN, the lambda
function was also a hidden symbol.

Differential Revision: https://reviews.llvm.org/D86372

(cherry picked from commit 3c1b2e338dfdf4f305b1cb40e2ebcb93a7e470c3)
---
 libunwind/src/AddressSpace.hpp | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/libunwind/src/AddressSpace.hpp b/libunwind/src/AddressSpace.hpp
index e40c23291f844..93395ffb3b1d1 100644
--- a/libunwind/src/AddressSpace.hpp
+++ b/libunwind/src/AddressSpace.hpp
@@ -473,8 +473,8 @@ static bool checkAddrInSegment(const Elf_Phdr *phdr, size_t image_base,
   return false;
 }
 
-int findUnwindSectionsByPhdr(struct dl_phdr_info *pinfo, size_t pinfo_size,
-                             void *data) {
+static int findUnwindSectionsByPhdr(struct dl_phdr_info *pinfo,
+                                    size_t pinfo_size, void *data) {
   auto cbdata = static_cast<dl_iterate_cb_data *>(data);
   if (pinfo->dlpi_phnum == 0 || cbdata->targetAddr < pinfo->dlpi_addr)
     return 0;
@@ -523,7 +523,8 @@ int findUnwindSectionsByPhdr(struct dl_phdr_info *pinfo, size_t pinfo_size,
 // Given all the #ifdef's above, the code here is for
 // defined(LIBUNWIND_ARM_EHABI)
 
-int findUnwindSectionsByPhdr(struct dl_phdr_info *pinfo, size_t, void *data) {
+static int findUnwindSectionsByPhdr(struct dl_phdr_info *pinfo, size_t,
+                                    void *data) {
   auto *cbdata = static_cast<dl_iterate_cb_data *>(data);
   bool found_obj = false;
   bool found_hdr = false;

From c160ff1564d8047c852f54d64ba4e9a81d080cac Mon Sep 17 00:00:00 2001
From: Richard Smith <richard@metafoo.co.uk>
Date: Mon, 24 Aug 2020 22:49:41 -0700
Subject: [PATCH 012/109] PR37556: Don't diagnose conflicts between
 instantiated unqualified friend declarations and declarations found in inline
 namespaces within the target context.

(cherry picked from commit 04ba18563390ec87400fa068a9b4981b235ebaa6)
---
 .../lib/Sema/SemaTemplateInstantiateDecl.cpp  |  7 +++++++
 clang/test/SemaTemplate/friend.cpp            | 19 +++++++++++++++++++
 2 files changed, 26 insertions(+)

diff --git a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp
index 2efb7acb97245..baec13ba627c7 100644
--- a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp
+++ b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp
@@ -2053,6 +2053,13 @@ Decl *TemplateDeclInstantiator::VisitFunctionDecl(
     // typedef (C++ [dcl.typedef]p4).
     if (Previous.isSingleTagDecl())
       Previous.clear();
+
+    // Filter out previous declarations that don't match the scope. The only
+    // effect this has is to remove declarations found in inline namespaces
+    // for friend declarations with unqualified names.
+    SemaRef.FilterLookupForScope(Previous, DC, /*Scope*/ nullptr,
+                                 /*ConsiderLinkage*/ true,
+                                 QualifierLoc.hasQualifier());
   }
 
   SemaRef.CheckFunctionDeclaration(/*Scope*/ nullptr, Function, Previous,
diff --git a/clang/test/SemaTemplate/friend.cpp b/clang/test/SemaTemplate/friend.cpp
index 777682be3f1b8..283c7732ccff1 100644
--- a/clang/test/SemaTemplate/friend.cpp
+++ b/clang/test/SemaTemplate/friend.cpp
@@ -122,3 +122,22 @@ namespace qualified_friend_finds_nothing {
   namespace N { void f(int); }
   B<int> bi; // ok?!
 }
+
+namespace PR37556 {
+  inline namespace N { int x1, x2, y1, y2; } // expected-note 2{{previous}}
+  struct X {
+    friend void x1(int);
+    friend void PR37556::x2(int); // expected-error {{different kind}}
+  };
+  template<typename T> struct Y {
+    friend void y1(T);
+    friend void PR37556::y2(T); // expected-error {{different kind}}
+  };
+  template struct Y<int>;
+  template<typename T> struct Z {
+    friend void z1(T);
+    friend void PR37556::z2(T); // expected-error {{does not match any}}
+  };
+  inline namespace N { int z1, z2; }
+  template struct Z<int>;
+}

From 9f4a92a4349ff1794379b706a4851c678899d5d2 Mon Sep 17 00:00:00 2001
From: Johannes Doerfert <johannes@jdoerfert.de>
Date: Thu, 28 May 2020 09:41:01 -0500
Subject: [PATCH 013/109] Reuse OMPIRBuilder `struct ident_t` handling in Clang

Replace the `ident_t` handling in Clang with the methods offered by the
OMPIRBuilder. This cuts down on the clang code as well as the
differences between the two, making further transitions easier. Tests
have changed but there should not be a real functional change. The most
interesting difference is probably that we stop generating local ident_t
allocations for now and just use globals. Given that this happens only
with debug info, the location part of the `ident_t` is probably bigger
than the test anyway. As the location part is already a global, we can
avoid the allocation, memcpy, and store in favor of a constant global
that is slightly bigger. This can be revisited if there are
complications.

Reviewed By: ABataev

Differential Revision: https://reviews.llvm.org/D80735
---
 clang/lib/CodeGen/CGOpenMPRuntime.cpp         | 121 ++---------
 clang/lib/CodeGen/CGOpenMPRuntime.h           |  12 +-
 clang/test/OpenMP/distribute_codegen.cpp      |   4 +-
 ...ibute_parallel_for_num_threads_codegen.cpp |   2 +-
 ...tribute_parallel_for_proc_bind_codegen.cpp |   2 +-
 ..._parallel_for_simd_num_threads_codegen.cpp |   2 +-
 ...te_parallel_for_simd_proc_bind_codegen.cpp |   2 +-
 clang/test/OpenMP/distribute_simd_codegen.cpp |   4 +-
 clang/test/OpenMP/for_codegen.cpp             |   4 +-
 .../test/OpenMP/for_firstprivate_codegen.cpp  |   2 +-
 clang/test/OpenMP/for_lastprivate_codegen.cpp |   2 +-
 clang/test/OpenMP/for_linear_codegen.cpp      |   2 +-
 clang/test/OpenMP/for_reduction_codegen.cpp   |   6 +-
 .../test/OpenMP/for_reduction_codegen_UDR.cpp |   4 +-
 .../master_taskloop_in_reduction_codegen.cpp  |  14 +-
 .../master_taskloop_reduction_codegen.cpp     |   4 +-
 ...ter_taskloop_simd_in_reduction_codegen.cpp |  14 +-
 ...master_taskloop_simd_reduction_codegen.cpp |   4 +-
 clang/test/OpenMP/openmp_win_codegen.cpp      |   8 +-
 clang/test/OpenMP/ordered_codegen.cpp         |   2 +-
 clang/test/OpenMP/parallel_codegen.cpp        |  24 +--
 clang/test/OpenMP/parallel_copyin_codegen.cpp |   4 +-
 clang/test/OpenMP/parallel_for_codegen.cpp    |   2 +-
 clang/test/OpenMP/parallel_master_codegen.cpp |  38 ++--
 ...llel_master_taskloop_reduction_codegen.cpp |   4 +-
 ...master_taskloop_simd_reduction_codegen.cpp |   4 +-
 .../OpenMP/parallel_num_threads_codegen.cpp   |   2 +-
 .../OpenMP/parallel_proc_bind_codegen.cpp     |   2 +-
 .../OpenMP/parallel_reduction_codegen.cpp     |   2 +-
 clang/test/OpenMP/sections_codegen.cpp        |   4 +-
 .../OpenMP/sections_firstprivate_codegen.cpp  |   2 +-
 .../OpenMP/sections_lastprivate_codegen.cpp   |   2 +-
 .../OpenMP/sections_reduction_codegen.cpp     |   4 +-
 clang/test/OpenMP/single_codegen.cpp          |   2 +-
 .../OpenMP/single_firstprivate_codegen.cpp    |   2 +-
 clang/test/OpenMP/target_depend_codegen.cpp   |  62 +++---
 clang/test/OpenMP/target_parallel_codegen.cpp |   2 +-
 .../OpenMP/target_parallel_depend_codegen.cpp |  67 +++---
 .../OpenMP/target_parallel_for_codegen.cpp    |   2 +-
 .../target_parallel_for_simd_codegen.cpp      |   2 +-
 .../OpenMP/target_parallel_if_codegen.cpp     |   2 +-
 .../target_parallel_num_threads_codegen.cpp   |   2 +-
 .../OpenMP/target_simd_depend_codegen.cpp     |  66 +++---
 clang/test/OpenMP/target_teams_codegen.cpp    |   2 +-
 .../OpenMP/target_teams_depend_codegen.cpp    |  66 +++---
 .../target_teams_distribute_codegen.cpp       |   2 +-
 ...tribute_parallel_for_proc_bind_codegen.cpp |   2 +-
 ...te_parallel_for_simd_proc_bind_codegen.cpp |   2 +-
 .../target_teams_distribute_simd_codegen.cpp  |   2 +-
 .../OpenMP/target_teams_num_teams_codegen.cpp |   2 +-
 .../target_teams_thread_limit_codegen.cpp     |   2 +-
 .../test/OpenMP/task_in_reduction_codegen.cpp |  14 +-
 .../OpenMP/taskloop_in_reduction_codegen.cpp  |  14 +-
 .../OpenMP/taskloop_reduction_codegen.cpp     |   4 +-
 .../taskloop_simd_in_reduction_codegen.cpp    |  14 +-
 .../taskloop_simd_reduction_codegen.cpp       |   4 +-
 clang/test/OpenMP/teams_codegen.cpp           |   6 +-
 ...ibute_parallel_for_num_threads_codegen.cpp |   2 +-
 ...tribute_parallel_for_proc_bind_codegen.cpp |   2 +-
 ..._parallel_for_simd_num_threads_codegen.cpp |   2 +-
 ...te_parallel_for_simd_proc_bind_codegen.cpp |   2 +-
 clang/test/OpenMP/threadprivate_codegen.cpp   | 202 +++++++++---------
 .../llvm/Frontend/OpenMP/OMPIRBuilder.h       |  11 +-
 llvm/include/llvm/IR/IRBuilder.h              |  14 +-
 llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp     |  62 ++++--
 llvm/lib/IR/IRBuilder.cpp                     |  13 +-
 llvm/test/Transforms/OpenMP/deduplication.ll  |  14 +-
 67 files changed, 461 insertions(+), 517 deletions(-)

diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index a7e1fe8560b6e..b221deab0174c 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -29,7 +29,6 @@
 #include "llvm/ADT/SetOperations.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/Bitcode/BitcodeReader.h"
-#include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/GlobalValue.h"
@@ -1064,23 +1063,6 @@ CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator,
                                  StringRef Separator)
     : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator),
       OMPBuilder(CGM.getModule()), OffloadEntriesInfoManager(CGM) {
-  ASTContext &C = CGM.getContext();
-  RecordDecl *RD = C.buildImplicitRecord("ident_t");
-  QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
-  RD->startDefinition();
-  // reserved_1
-  addFieldToRecordDecl(C, RD, KmpInt32Ty);
-  // flags
-  addFieldToRecordDecl(C, RD, KmpInt32Ty);
-  // reserved_2
-  addFieldToRecordDecl(C, RD, KmpInt32Ty);
-  // reserved_3
-  addFieldToRecordDecl(C, RD, KmpInt32Ty);
-  // psource
-  addFieldToRecordDecl(C, RD, C.VoidPtrTy);
-  RD->completeDefinition();
-  IdentQTy = C.getRecordType(RD);
-  IdentTy = CGM.getTypes().ConvertRecordDeclType(RD);
   KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
 
   // Initialize Types used in OpenMPIRBuilder from OMPKinds.def
@@ -1397,39 +1379,6 @@ createConstantGlobalStructAndAddToParent(CodeGenModule &CGM, QualType Ty,
   Fields.finishAndAddTo(Parent);
 }
 
-Address CGOpenMPRuntime::getOrCreateDefaultLocation(unsigned Flags) {
-  CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy);
-  unsigned Reserved2Flags = getDefaultLocationReserved2Flags();
-  FlagsTy FlagsKey(Flags, Reserved2Flags);
-  llvm::Value *Entry = OpenMPDefaultLocMap.lookup(FlagsKey);
-  if (!Entry) {
-    if (!DefaultOpenMPPSource) {
-      // Initialize default location for psource field of ident_t structure of
-      // all ident_t objects. Format is ";file;function;line;column;;".
-      // Taken from
-      // https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp_str.cpp
-      DefaultOpenMPPSource =
-          CGM.GetAddrOfConstantCString(";unknown;unknown;0;0;;").getPointer();
-      DefaultOpenMPPSource =
-          llvm::ConstantExpr::getBitCast(DefaultOpenMPPSource, CGM.Int8PtrTy);
-    }
-
-    llvm::Constant *Data[] = {
-        llvm::ConstantInt::getNullValue(CGM.Int32Ty),
-        llvm::ConstantInt::get(CGM.Int32Ty, Flags),
-        llvm::ConstantInt::get(CGM.Int32Ty, Reserved2Flags),
-        llvm::ConstantInt::getNullValue(CGM.Int32Ty), DefaultOpenMPPSource};
-    llvm::GlobalValue *DefaultOpenMPLocation =
-        createGlobalStruct(CGM, IdentQTy, isDefaultLocationConstant(), Data, "",
-                           llvm::GlobalValue::PrivateLinkage);
-    DefaultOpenMPLocation->setUnnamedAddr(
-        llvm::GlobalValue::UnnamedAddr::Global);
-
-    OpenMPDefaultLocMap[FlagsKey] = Entry = DefaultOpenMPLocation;
-  }
-  return Address(Entry, Align);
-}
-
 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF,
                                              bool AtCurrentPoint) {
   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
@@ -1458,62 +1407,24 @@ void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) {
 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
                                                  SourceLocation Loc,
                                                  unsigned Flags) {
-  Flags |= OMP_IDENT_KMPC;
-  // If no debug info is generated - return global default location.
+  llvm::Constant *SrcLocStr;
   if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo ||
-      Loc.isInvalid())
-    return getOrCreateDefaultLocation(Flags).getPointer();
-
-  assert(CGF.CurFn && "No function in current CodeGenFunction.");
-
-  CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy);
-  Address LocValue = Address::invalid();
-  auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
-  if (I != OpenMPLocThreadIDMap.end())
-    LocValue = Address(I->second.DebugLoc, Align);
-
-  // OpenMPLocThreadIDMap may have null DebugLoc and non-null ThreadID, if
-  // GetOpenMPThreadID was called before this routine.
-  if (!LocValue.isValid()) {
-    // Generate "ident_t .kmpc_loc.addr;"
-    Address AI = CGF.CreateMemTemp(IdentQTy, ".kmpc_loc.addr");
-    auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
-    Elem.second.DebugLoc = AI.getPointer();
-    LocValue = AI;
-
-    if (!Elem.second.ServiceInsertPt)
-      setLocThreadIdInsertPt(CGF);
-    CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
-    CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
-    CGF.Builder.CreateMemCpy(LocValue, getOrCreateDefaultLocation(Flags),
-                             CGF.getTypeSize(IdentQTy));
-  }
-
-  // char **psource = &.kmpc_loc_<flags>.addr.psource;
-  LValue Base = CGF.MakeAddrLValue(LocValue, IdentQTy);
-  auto Fields = cast<RecordDecl>(IdentQTy->getAsTagDecl())->field_begin();
-  LValue PSource =
-      CGF.EmitLValueForField(Base, *std::next(Fields, IdentField_PSource));
-
-  llvm::Value *OMPDebugLoc = OpenMPDebugLocMap.lookup(Loc.getRawEncoding());
-  if (OMPDebugLoc == nullptr) {
-    SmallString<128> Buffer2;
-    llvm::raw_svector_ostream OS2(Buffer2);
-    // Build debug location
-    PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
-    OS2 << ";" << PLoc.getFilename() << ";";
+      Loc.isInvalid()) {
+    SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr();
+  } else {
+    std::string FunctionName = "";
     if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
-      OS2 << FD->getQualifiedNameAsString();
-    OS2 << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
-    OMPDebugLoc = CGF.Builder.CreateGlobalStringPtr(OS2.str());
-    OpenMPDebugLocMap[Loc.getRawEncoding()] = OMPDebugLoc;
+      FunctionName = FD->getQualifiedNameAsString();
+    PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
+    const char *FileName = PLoc.getFilename();
+    unsigned Line = PLoc.getLine();
+    unsigned Column = PLoc.getColumn();
+    SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FunctionName.c_str(), FileName,
+                                                Line, Column);
   }
-  // *psource = ";<File>;<Function>;<Line>;<Column>;;";
-  CGF.EmitStoreOfScalar(OMPDebugLoc, PSource);
-
-  // Our callers always pass this to a runtime function, so for
-  // convenience, go ahead and return a naked pointer.
-  return LocValue.getPointer();
+  unsigned Reserved2Flags = getDefaultLocationReserved2Flags();
+  return OMPBuilder.getOrCreateIdent(SrcLocStr, llvm::omp::IdentFlag(Flags),
+                                     Reserved2Flags);
 }
 
 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
@@ -1595,7 +1506,7 @@ void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
 }
 
 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
-  return IdentTy->getPointerTo();
+  return OMPBuilder.IdentPtr;
 }
 
 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() {
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.h b/clang/lib/CodeGen/CGOpenMPRuntime.h
index eb22f155f5ef4..cf3dbf59634d7 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.h
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.h
@@ -374,17 +374,7 @@ class CGOpenMPRuntime {
 private:
   /// An OpenMP-IR-Builder instance.
   llvm::OpenMPIRBuilder OMPBuilder;
-  /// Default const ident_t object used for initialization of all other
-  /// ident_t objects.
-  llvm::Constant *DefaultOpenMPPSource = nullptr;
-  using FlagsTy = std::pair<unsigned, unsigned>;
-  /// Map of flags and corresponding default locations.
-  using OpenMPDefaultLocMapTy = llvm::DenseMap<FlagsTy, llvm::Value *>;
-  OpenMPDefaultLocMapTy OpenMPDefaultLocMap;
-  Address getOrCreateDefaultLocation(unsigned Flags);
-
-  QualType IdentQTy;
-  llvm::StructType *IdentTy = nullptr;
+
   /// Map for SourceLocation and OpenMP runtime library debug locations.
   typedef llvm::DenseMap<unsigned, llvm::Value *> OpenMPDebugLocMapTy;
   OpenMPDebugLocMapTy OpenMPDebugLocMap;
diff --git a/clang/test/OpenMP/distribute_codegen.cpp b/clang/test/OpenMP/distribute_codegen.cpp
index 4e8bcb44f63df..bbece45c6e31b 100644
--- a/clang/test/OpenMP/distribute_codegen.cpp
+++ b/clang/test/OpenMP/distribute_codegen.cpp
@@ -55,8 +55,8 @@
 
 // CHECK-DAG: %struct.ident_t = type { i32, i32, i32, i32, i8* }
 // CHECK-DAG: [[STR:@.+]] = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00"
-// CHECK-DAG: [[DEF_LOC_0:@.+]] = private unnamed_addr global %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) }
-// CHECK-DAG: [[DEF_LOC_DISTRIBUTE_0:@.+]] = private unnamed_addr global %struct.ident_t { i32 0, i32 2050, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) }
+// CHECK-DAG: [[DEF_LOC_0:@.+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) }
+// CHECK-DAG: [[DEF_LOC_DISTRIBUTE_0:@.+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2050, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) }
 
 // CHECK-LABEL: define {{.*void}} @{{.*}}without_schedule_clause{{.*}}(float* {{.+}}, float* {{.+}}, float* {{.+}}, float* {{.+}})
 void without_schedule_clause(float *a, float *b, float *c, float *d) {
diff --git a/clang/test/OpenMP/distribute_parallel_for_num_threads_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_num_threads_codegen.cpp
index 8d941391c75b4..21d0949454274 100644
--- a/clang/test/OpenMP/distribute_parallel_for_num_threads_codegen.cpp
+++ b/clang/test/OpenMP/distribute_parallel_for_num_threads_codegen.cpp
@@ -15,7 +15,7 @@ typedef __INTPTR_TYPE__ intptr_t;
 // CHECK-DAG: [[IDENT_T_TY:%.+]] = type { i32, i32, i32, i32, i8* }
 // CHECK-DAG: [[S_TY:%.+]] = type { [[INTPTR_T_TY:i[0-9]+]], [[INTPTR_T_TY]], [[INTPTR_T_TY]] }
 // CHECK-DAG: [[STR:@.+]] = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00"
-// CHECK-DAG: [[DEF_LOC_2:@.+]] = private unnamed_addr global [[IDENT_T_TY]] { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) }
+// CHECK-DAG: [[DEF_LOC_2:@.+]] = private unnamed_addr constant [[IDENT_T_TY]] { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) }
 
 void foo();
 
diff --git a/clang/test/OpenMP/distribute_parallel_for_proc_bind_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_proc_bind_codegen.cpp
index 3e2a65e47f0e9..fce005be80fb4 100644
--- a/clang/test/OpenMP/distribute_parallel_for_proc_bind_codegen.cpp
+++ b/clang/test/OpenMP/distribute_parallel_for_proc_bind_codegen.cpp
@@ -16,7 +16,7 @@ typedef __INTPTR_TYPE__ intptr_t;
 
 // CHECK-DAG: [[IDENT_T_TY:%.+]] = type { i32, i32, i32, i32, i8* }
 // CHECK-DAG: [[STR:@.+]] = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00"
-// CHECK-DAG: [[DEF_LOC_2:@.+]] = private unnamed_addr global [[IDENT_T_TY]] { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) }
+// CHECK-DAG: [[DEF_LOC_2:@.+]] = private unnamed_addr constant [[IDENT_T_TY]] { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) }
 
 void foo();
 
diff --git a/clang/test/OpenMP/distribute_parallel_for_simd_num_threads_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_simd_num_threads_codegen.cpp
index 318fc1401963c..014fb9523fe59 100644
--- a/clang/test/OpenMP/distribute_parallel_for_simd_num_threads_codegen.cpp
+++ b/clang/test/OpenMP/distribute_parallel_for_simd_num_threads_codegen.cpp
@@ -15,7 +15,7 @@ typedef __INTPTR_TYPE__ intptr_t;
 // CHECK-DAG: [[IDENT_T_TY:%.+]] = type { i32, i32, i32, i32, i8* }
 // CHECK-DAG: [[S_TY:%.+]] = type { [[INTPTR_T_TY:i[0-9]+]], [[INTPTR_T_TY]], [[INTPTR_T_TY]] }
 // CHECK-DAG: [[STR:@.+]] = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00"
-// CHECK-DAG: [[DEF_LOC_2:@.+]] = private unnamed_addr global [[IDENT_T_TY]] { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) }
+// CHECK-DAG: [[DEF_LOC_2:@.+]] = private unnamed_addr constant [[IDENT_T_TY]] { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) }
 
 void foo();
 
diff --git a/clang/test/OpenMP/distribute_parallel_for_simd_proc_bind_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_simd_proc_bind_codegen.cpp
index 716d7d7fa2e9a..4fb1f5b0274d9 100644
--- a/clang/test/OpenMP/distribute_parallel_for_simd_proc_bind_codegen.cpp
+++ b/clang/test/OpenMP/distribute_parallel_for_simd_proc_bind_codegen.cpp
@@ -16,7 +16,7 @@ typedef __INTPTR_TYPE__ intptr_t;
 
 // CHECK-DAG: [[IDENT_T_TY:%.+]] = type { i32, i32, i32, i32, i8* }
 // CHECK-DAG: [[STR:@.+]] = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00"
-// CHECK-DAG: [[DEF_LOC_2:@.+]] = private unnamed_addr global [[IDENT_T_TY]] { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) }
+// CHECK-DAG: [[DEF_LOC_2:@.+]] = private unnamed_addr constant [[IDENT_T_TY]] { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) }
 
 void foo();
 
diff --git a/clang/test/OpenMP/distribute_simd_codegen.cpp b/clang/test/OpenMP/distribute_simd_codegen.cpp
index 7229c8095f0e2..a0c3c6accc0fb 100644
--- a/clang/test/OpenMP/distribute_simd_codegen.cpp
+++ b/clang/test/OpenMP/distribute_simd_codegen.cpp
@@ -68,8 +68,8 @@
 
 // CHECK-DAG: %struct.ident_t = type { i32, i32, i32, i32, i8* }
 // CHECK-DAG: [[STR:@.+]] = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00"
-// CHECK-DAG: [[DEF_LOC_0:@.+]] = private unnamed_addr global %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) }
-// CHECK-DAG: [[DEF_LOC_DISTRIBUTE_0:@.+]] = private unnamed_addr global %struct.ident_t { i32 0, i32 2050, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) }
+// CHECK-DAG: [[DEF_LOC_0:@.+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) }
+// CHECK-DAG: [[DEF_LOC_DISTRIBUTE_0:@.+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2050, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) }
 
 // CHECK-LABEL: define {{.*void}} @{{.*}}without_schedule_clause{{.*}}(float* {{.+}}, float* {{.+}}, float* {{.+}}, float* {{.+}})
 void without_schedule_clause(float *a, float *b, float *c, float *d) {
diff --git a/clang/test/OpenMP/for_codegen.cpp b/clang/test/OpenMP/for_codegen.cpp
index 26b09c574f3c7..5c4f984e8fc12 100644
--- a/clang/test/OpenMP/for_codegen.cpp
+++ b/clang/test/OpenMP/for_codegen.cpp
@@ -22,8 +22,8 @@
 // PROF-INSTR-PATH: constant [25 x i8] c"for_codegen-test.profraw\00"
 
 // CHECK: [[IDENT_T_TY:%.+]] = type { i32, i32, i32, i32, i8* }
-// CHECK-DAG: [[IMPLICIT_BARRIER_LOC:@.+]] = private unnamed_addr global %{{.+}} { i32 0, i32 66, i32 0, i32 0, i8*
-// CHECK-DAG: [[LOOP_LOC:@.+]] = private unnamed_addr global %{{.+}} { i32 0, i32 514, i32 0, i32 0, i8*
+// CHECK-DAG: [[IMPLICIT_BARRIER_LOC:@.+]] = private unnamed_addr constant %{{.+}} { i32 0, i32 66, i32 0, i32 0, i8*
+// CHECK-DAG: [[LOOP_LOC:@.+]] = private unnamed_addr constant %{{.+}} { i32 0, i32 514, i32 0, i32 0, i8*
 // CHECK-DAG: [[I:@.+]] = global i8 1,
 // CHECK-DAG: [[J:@.+]] = global i8 2,
 // CHECK-DAG: [[K:@.+]] = global i8 3,
diff --git a/clang/test/OpenMP/for_firstprivate_codegen.cpp b/clang/test/OpenMP/for_firstprivate_codegen.cpp
index 1cfd94af9d4ea..756665523b7ca 100644
--- a/clang/test/OpenMP/for_firstprivate_codegen.cpp
+++ b/clang/test/OpenMP/for_firstprivate_codegen.cpp
@@ -65,7 +65,7 @@ S<float> s_arr[] = {1, 2};
 // CHECK-DAG: [[VAR:@.+]] = global [[S_FLOAT_TY]] zeroinitializer,
 S<float> var(3);
 // CHECK: [[SIVAR:@.+]] = internal global i{{[0-9]+}} 0,
-// CHECK-DAG: [[IMPLICIT_BARRIER_LOC:@.+]] = private unnamed_addr global %{{.+}} { i32 0, i32 66, i32 0, i32 0, i8*
+// CHECK-DAG: [[IMPLICIT_BARRIER_LOC:@.+]] = private unnamed_addr constant %{{.+}} { i32 0, i32 66, i32 0, i32 0, i8*
 
 // CHECK: call {{.*}} [[S_FLOAT_TY_DEF_CONSTR:@.+]]([[S_FLOAT_TY]]* [[TEST]])
 // CHECK: ([[S_FLOAT_TY]]*)* [[S_FLOAT_TY_DESTR:@[^ ]+]] {{[^,]+}}, {{.+}}([[S_FLOAT_TY]]* [[TEST]]
diff --git a/clang/test/OpenMP/for_lastprivate_codegen.cpp b/clang/test/OpenMP/for_lastprivate_codegen.cpp
index fd7cad07e8b4a..fbbb6ad6bc3d5 100644
--- a/clang/test/OpenMP/for_lastprivate_codegen.cpp
+++ b/clang/test/OpenMP/for_lastprivate_codegen.cpp
@@ -172,7 +172,7 @@ char cnt;
 // BLOCKS: [[SS_TY:%.+]] = type { i{{[0-9]+}}, i8
 // CHECK: [[S_FLOAT_TY:%.+]] = type { float }
 // CHECK: [[S_INT_TY:%.+]] = type { i32 }
-// CHECK-DAG: [[IMPLICIT_BARRIER_LOC:@.+]] = private unnamed_addr global %{{.+}} { i32 0, i32 66, i32 0, i32 0, i8*
+// CHECK-DAG: [[IMPLICIT_BARRIER_LOC:@.+]] = private unnamed_addr constant %{{.+}} { i32 0, i32 66, i32 0, i32 0, i8*
 // CHECK-DAG: [[X:@.+]] = global double 0.0
 // CHECK-DAG: [[F:@.+]] = global float 0.0
 // CHECK-DAG: [[CNT:@.+]] = global i8 0
diff --git a/clang/test/OpenMP/for_linear_codegen.cpp b/clang/test/OpenMP/for_linear_codegen.cpp
index 2f4e8dd531dd7..fd9d89c38dcb7 100644
--- a/clang/test/OpenMP/for_linear_codegen.cpp
+++ b/clang/test/OpenMP/for_linear_codegen.cpp
@@ -112,7 +112,7 @@ struct SST {
 // BLOCKS: [[SS_TY:%.+]] = type { i{{[0-9]+}}, i8
 // CHECK: [[S_FLOAT_TY:%.+]] = type { float }
 // CHECK: [[S_INT_TY:%.+]] = type { i32 }
-// CHECK-DAG: [[IMPLICIT_BARRIER_LOC:@.+]] = private unnamed_addr global %{{.+}} { i32 0, i32 66, i32 0, i32 0, i8*
+// CHECK-DAG: [[IMPLICIT_BARRIER_LOC:@.+]] = private unnamed_addr constant %{{.+}} { i32 0, i32 66, i32 0, i32 0, i8*
 // CHECK-DAG: [[F:@.+]] = global float 0.0
 // CHECK-DAG: [[CNT:@.+]] = global i8 0
 template <typename T>
diff --git a/clang/test/OpenMP/for_reduction_codegen.cpp b/clang/test/OpenMP/for_reduction_codegen.cpp
index 61f7afd6b4608..5a360fb246843 100644
--- a/clang/test/OpenMP/for_reduction_codegen.cpp
+++ b/clang/test/OpenMP/for_reduction_codegen.cpp
@@ -29,9 +29,9 @@ struct S {
 
 // CHECK-DAG: [[S_FLOAT_TY:%.+]] = type { float }
 // CHECK-DAG: [[S_INT_TY:%.+]] = type { i{{[0-9]+}} }
-// CHECK-DAG: [[ATOMIC_REDUCE_BARRIER_LOC:@.+]] = private unnamed_addr global %{{.+}} { i32 0, i32 18, i32 0, i32 0, i8*
-// CHECK-DAG: [[IMPLICIT_BARRIER_LOC:@.+]] = private unnamed_addr global %{{.+}} { i32 0, i32 66, i32 0, i32 0, i8*
-// CHECK-DAG: [[REDUCTION_LOC:@.+]] = private unnamed_addr global %{{.+}} { i32 0, i32 18, i32 0, i32 0, i8*
+// CHECK-DAG: [[ATOMIC_REDUCE_BARRIER_LOC:@.+]] = private unnamed_addr constant %{{.+}} { i32 0, i32 18, i32 0, i32 0, i8*
+// CHECK-DAG: [[IMPLICIT_BARRIER_LOC:@.+]] = private unnamed_addr constant %{{.+}} { i32 0, i32 66, i32 0, i32 0, i8*
+// CHECK-DAG: [[REDUCTION_LOC:@.+]] = private unnamed_addr constant %{{.+}} { i32 0, i32 18, i32 0, i32 0, i8*
 // CHECK-DAG: [[REDUCTION_LOCK:@.+]] = common global [8 x i32] zeroinitializer
 
 template <typename T, int length>
diff --git a/clang/test/OpenMP/for_reduction_codegen_UDR.cpp b/clang/test/OpenMP/for_reduction_codegen_UDR.cpp
index 31168bc325e3a..5a20fa187e9c3 100644
--- a/clang/test/OpenMP/for_reduction_codegen_UDR.cpp
+++ b/clang/test/OpenMP/for_reduction_codegen_UDR.cpp
@@ -53,8 +53,8 @@ void init_plus(BaseS1&, const BaseS1&);
 
 // CHECK-DAG: [[S_FLOAT_TY:%.+]] = type { %{{[^,]+}}, %{{[^,]+}}, float }
 // CHECK-DAG: [[S_INT_TY:%.+]] = type { %{{[^,]+}}, %{{[^,]+}}, i{{[0-9]+}} }
-// CHECK-DAG: [[IMPLICIT_BARRIER_LOC:@.+]] = private unnamed_addr global %struct.ident_t { i32 0, i32 66, i32 0, i32 0, i8*
-// CHECK-DAG: [[REDUCTION_LOC:@.+]] = private unnamed_addr global %struct.ident_t { i32 0, i32 18, i32 0, i32 0, i8*
+// CHECK-DAG: [[IMPLICIT_BARRIER_LOC:@.+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 66, i32 0, i32 0, i8*
+// CHECK-DAG: [[REDUCTION_LOC:@.+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 18, i32 0, i32 0, i8*
 // CHECK-DAG: [[REDUCTION_LOCK:@.+]] = common global [8 x i32] zeroinitializer
 
 #pragma omp declare reduction(operator* : S<int> : omp_out.f = 17 * omp_in.f) initializer(omp_priv = S<int>())
diff --git a/clang/test/OpenMP/master_taskloop_in_reduction_codegen.cpp b/clang/test/OpenMP/master_taskloop_in_reduction_codegen.cpp
index 5d6d1645408f5..e6cc39c5345a3 100644
--- a/clang/test/OpenMP/master_taskloop_in_reduction_codegen.cpp
+++ b/clang/test/OpenMP/master_taskloop_in_reduction_codegen.cpp
@@ -39,22 +39,22 @@ int main(int argc, char **argv) {
 }
 
 // CHECK-LABEL: @main
-// CHECK:       void @__kmpc_taskgroup(%struct.ident_t* @0, i32 [[GTID:%.+]])
+// CHECK:       void @__kmpc_taskgroup(%struct.ident_t* @1, i32 [[GTID:%.+]])
 // CHECK:       [[TD1:%.+]] = call i8* @__kmpc_taskred_init(i32 [[GTID]], i32 3, i8* %
 // CHECK-NEXT:  store i8* [[TD1]], i8** [[TD1_ADDR:%[^,]+]],
-// CHECK-NEXT:  call void @__kmpc_taskgroup(%struct.ident_t* @0, i32 [[GTID]])
+// CHECK-NEXT:  call void @__kmpc_taskgroup(%struct.ident_t* @1, i32 [[GTID]])
 // CHECK:       [[TD2:%.+]] = call i8* @__kmpc_taskred_init(i32 [[GTID]], i32 2, i8* %
 // CHECK-NEXT:  store i8* [[TD2]], i8** [[TD2_ADDR:%[^,]+]],
-// CHECK-NEXT:  call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @0, i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64, i16*, i8**, i8**)* [[OMP_PARALLEL:@.+]] to void (i32*, i32*, ...)*), i32* %{{.+}}, i64 %{{.+}}, i16* %{{.+}}, i8** [[TD1_ADDR]], i8** [[TD2_ADDR]])
-// CHECK-NEXT:  call void @__kmpc_end_taskgroup(%struct.ident_t* @0, i32 [[GTID]])
-// CHECK-NEXT:  call void @__kmpc_end_taskgroup(%struct.ident_t* @0, i32 [[GTID]])
+// CHECK-NEXT:  call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @1, i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64, i16*, i8**, i8**)* [[OMP_PARALLEL:@.+]] to void (i32*, i32*, ...)*), i32* %{{.+}}, i64 %{{.+}}, i16* %{{.+}}, i8** [[TD1_ADDR]], i8** [[TD2_ADDR]])
+// CHECK-NEXT:  call void @__kmpc_end_taskgroup(%struct.ident_t* @1, i32 [[GTID]])
+// CHECK-NEXT:  call void @__kmpc_end_taskgroup(%struct.ident_t* @1, i32 [[GTID]])
 
 // CHECK:       define internal void [[OMP_PARALLEL]](
 // CHECK:       [[RES:%.+]] = call {{.*}}i32 @__kmpc_master(
 // CHECK-NEXT:  [[IS_MASTER:%.+]] = icmp ne i32 [[RES]], 0
 // CHECK-NEXT:  br i1 [[IS_MASTER]], label {{%?}}[[THEN:.+]], label {{%?}}[[EXIT:.+]]
 // CHECK:       [[THEN]]
-// CHECK:       [[TASK_T:%.+]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @0, i32 [[GTID:%.+]], i32 1, i64 96, i64 40, i32 (i32, i8*)* bitcast (i32 (i32, [[T:%.+]]*)* [[OMP_TASK:@.+]] to i32 (i32, i8*)*))
+// CHECK:       [[TASK_T:%.+]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @1, i32 [[GTID:%.+]], i32 1, i64 96, i64 40, i32 (i32, i8*)* bitcast (i32 (i32, [[T:%.+]]*)* [[OMP_TASK:@.+]] to i32 (i32, i8*)*))
 // CHECK-NEXT:  [[TASK_T_WITH_PRIVS:%.+]] = bitcast i8* [[TASK_T]] to [[T]]*
 // CHECK:       [[PRIVS:%.+]] = getelementptr inbounds [[T]], [[T]]* [[TASK_T_WITH_PRIVS]], i32 0, i32 1
 // CHECK:       [[TD1_REF:%.+]] = getelementptr inbounds [[PRIVATES]], [[PRIVATES]]* [[PRIVS]], i32 0, i32 0
@@ -63,7 +63,7 @@ int main(int argc, char **argv) {
 // CHECK-NEXT:  [[TD2_REF:%.+]] = getelementptr inbounds [[PRIVATES]], [[PRIVATES]]* [[PRIVS]], i32 0, i32 1
 // CHECK-NEXT:  [[TD2:%.+]] = load i8*, i8** %{{.+}},
 // CHECK-NEXT:  store i8* [[TD2]], i8** [[TD2_REF]],
-// CHECK:       call void @__kmpc_taskloop(%struct.ident_t* @0, i32 [[GTID]], i8* [[TASK_T]], i32 1,
+// CHECK:       call void @__kmpc_taskloop(%struct.ident_t* @1, i32 [[GTID]], i8* [[TASK_T]], i32 1,
 // CHECK:  call {{.*}}void @__kmpc_end_master(
 // CHECK-NEXT:  br label {{%?}}[[EXIT]]
 // CHECK:       [[EXIT]]
diff --git a/clang/test/OpenMP/master_taskloop_reduction_codegen.cpp b/clang/test/OpenMP/master_taskloop_reduction_codegen.cpp
index 2c67e49caf43c..4d151bed649d2 100644
--- a/clang/test/OpenMP/master_taskloop_reduction_codegen.cpp
+++ b/clang/test/OpenMP/master_taskloop_reduction_codegen.cpp
@@ -161,8 +161,8 @@ sum = 0.0;
 // CHECK:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
 // CHECK:    [[SUB12:%.*]] = sub nsw i32 [[DIV]], 1
 // CHECK:    store i32 [[SUB12]], i32* [[DOTCAPTURE_EXPR_9]],
-// CHECK:    [[TMP65:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* %{{.+}}, i32 [[TMP0]], i32 1, i64 888, i64 40, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @[[TASK:.+]] to i32 (i32, i8*)*))
-// CHECK:    call void @__kmpc_taskloop(%struct.ident_t* %{{.+}}, i32 [[TMP0]], i8* [[TMP65]], i32 1, i64* %{{.+}}, i64* %{{.+}}, i64 %{{.+}}, i32 1, i32 0, i64 0, i8* null)
+// CHECK:    [[TMP65:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* {{.+}}, i32 [[TMP0]], i32 1, i64 888, i64 40, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @[[TASK:.+]] to i32 (i32, i8*)*))
+// CHECK:    call void @__kmpc_taskloop(%struct.ident_t* {{.+}}, i32 [[TMP0]], i8* [[TMP65]], i32 1, i64* %{{.+}}, i64* %{{.+}}, i64 %{{.+}}, i32 1, i32 0, i64 0, i8* null)
 // CHECK:    call void @__kmpc_end_taskgroup(%struct.ident_t*
 // CHECK:  call {{.*}}void @__kmpc_end_master(
 // CHECK-NEXT:  br label {{%?}}[[EXIT]]
diff --git a/clang/test/OpenMP/master_taskloop_simd_in_reduction_codegen.cpp b/clang/test/OpenMP/master_taskloop_simd_in_reduction_codegen.cpp
index 58f1b0e034b0a..aca7f0f472449 100644
--- a/clang/test/OpenMP/master_taskloop_simd_in_reduction_codegen.cpp
+++ b/clang/test/OpenMP/master_taskloop_simd_in_reduction_codegen.cpp
@@ -39,18 +39,18 @@ int main(int argc, char **argv) {
 }
 
 // CHECK-LABEL: @main
-// CHECK:       void @__kmpc_taskgroup(%struct.ident_t* @0, i32 [[GTID:%.+]])
+// CHECK:       void @__kmpc_taskgroup(%struct.ident_t* @1, i32 [[GTID:%.+]])
 // CHECK:       [[TD1:%.+]] = call i8* @__kmpc_taskred_init(i32 [[GTID]], i32 3, i8* %
 // CHECK-NEXT:  store i8* [[TD1]], i8** [[TD1_ADDR:%[^,]+]],
-// CHECK-NEXT:  call void @__kmpc_taskgroup(%struct.ident_t* @0, i32 [[GTID]])
+// CHECK-NEXT:  call void @__kmpc_taskgroup(%struct.ident_t* @1, i32 [[GTID]])
 // CHECK:       [[TD2:%.+]] = call i8* @__kmpc_taskred_init(i32 [[GTID]], i32 2, i8* %
 // CHECK-NEXT:  store i8* [[TD2]], i8** [[TD2_ADDR:%[^,]+]],
-// CHECK-NEXT:  call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @0, i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64, i16*, i8**, i8**)* [[OMP_PARALLEL:@.+]] to void (i32*, i32*, ...)*), i32* %{{.+}}, i64 %{{.+}}, i16* %{{.+}}, i8** [[TD1_ADDR]], i8** [[TD2_ADDR]])
-// CHECK-NEXT:  call void @__kmpc_end_taskgroup(%struct.ident_t* @0, i32 [[GTID]])
-// CHECK-NEXT:  call void @__kmpc_end_taskgroup(%struct.ident_t* @0, i32 [[GTID]])
+// CHECK-NEXT:  call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @1, i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64, i16*, i8**, i8**)* [[OMP_PARALLEL:@.+]] to void (i32*, i32*, ...)*), i32* %{{.+}}, i64 %{{.+}}, i16* %{{.+}}, i8** [[TD1_ADDR]], i8** [[TD2_ADDR]])
+// CHECK-NEXT:  call void @__kmpc_end_taskgroup(%struct.ident_t* @1, i32 [[GTID]])
+// CHECK-NEXT:  call void @__kmpc_end_taskgroup(%struct.ident_t* @1, i32 [[GTID]])
 
 // CHECK:       define internal void [[OMP_PARALLEL]](
-// CHECK:       [[TASK_T:%.+]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @0, i32 [[GTID:%.+]], i32 1, i64 96, i64 40, i32 (i32, i8*)* bitcast (i32 (i32, [[T:%.+]]*)* [[OMP_TASK:@.+]] to i32 (i32, i8*)*))
+// CHECK:       [[TASK_T:%.+]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @1, i32 [[GTID:%.+]], i32 1, i64 96, i64 40, i32 (i32, i8*)* bitcast (i32 (i32, [[T:%.+]]*)* [[OMP_TASK:@.+]] to i32 (i32, i8*)*))
 // CHECK-NEXT:  [[TASK_T_WITH_PRIVS:%.+]] = bitcast i8* [[TASK_T]] to [[T]]*
 // CHECK:       [[PRIVS:%.+]] = getelementptr inbounds [[T]], [[T]]* [[TASK_T_WITH_PRIVS]], i32 0, i32 1
 // CHECK:       [[TD1_REF:%.+]] = getelementptr inbounds [[PRIVATES]], [[PRIVATES]]* [[PRIVS]], i32 0, i32 0
@@ -59,7 +59,7 @@ int main(int argc, char **argv) {
 // CHECK-NEXT:  [[TD2_REF:%.+]] = getelementptr inbounds [[PRIVATES]], [[PRIVATES]]* [[PRIVS]], i32 0, i32 1
 // CHECK-NEXT:  [[TD2:%.+]] = load i8*, i8** %{{.+}},
 // CHECK-NEXT:  store i8* [[TD2]], i8** [[TD2_REF]],
-// CHECK:       call void @__kmpc_taskloop(%struct.ident_t* @0, i32 [[GTID]], i8* [[TASK_T]], i32 1,
+// CHECK:       call void @__kmpc_taskloop(%struct.ident_t* @1, i32 [[GTID]], i8* [[TASK_T]], i32 1,
 // CHECK:       ret void
 // CHECK-NEXT:  }
 
diff --git a/clang/test/OpenMP/master_taskloop_simd_reduction_codegen.cpp b/clang/test/OpenMP/master_taskloop_simd_reduction_codegen.cpp
index 0e31b2f4eb497..c48a52029ebbb 100644
--- a/clang/test/OpenMP/master_taskloop_simd_reduction_codegen.cpp
+++ b/clang/test/OpenMP/master_taskloop_simd_reduction_codegen.cpp
@@ -157,8 +157,8 @@ sum = 0.0;
 // CHECK:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
 // CHECK:    [[SUB12:%.*]] = sub nsw i32 [[DIV]], 1
 // CHECK:    store i32 [[SUB12]], i32* [[DOTCAPTURE_EXPR_9]],
-// CHECK:    [[TMP65:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* %{{.+}}, i32 [[TMP0]], i32 1, i64 888, i64 40, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @{{.+}} to i32 (i32, i8*)*))
-// CHECK:    call void @__kmpc_taskloop(%struct.ident_t* %{{.+}}, i32 [[TMP0]], i8* [[TMP65]], i32 1, i64* %{{.+}}, i64* %{{.+}}, i64 %{{.+}}, i32 1, i32 0, i64 0, i8* null)
+// CHECK:    [[TMP65:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* {{.+}}, i32 [[TMP0]], i32 1, i64 888, i64 40, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @{{.+}} to i32 (i32, i8*)*))
+// CHECK:    call void @__kmpc_taskloop(%struct.ident_t* {{.+}}, i32 [[TMP0]], i8* [[TMP65]], i32 1, i64* %{{.+}}, i64* %{{.+}}, i64 %{{.+}}, i32 1, i32 0, i64 0, i8* null)
 // CHECK:    call void @__kmpc_end_taskgroup(%struct.ident_t*
 
 // CHECK:    ret i32
diff --git a/clang/test/OpenMP/openmp_win_codegen.cpp b/clang/test/OpenMP/openmp_win_codegen.cpp
index 4b330eccc669b..11f5adb39fe5a 100644
--- a/clang/test/OpenMP/openmp_win_codegen.cpp
+++ b/clang/test/OpenMP/openmp_win_codegen.cpp
@@ -33,7 +33,7 @@ struct Test {
 int main() {
   // CHECK: call void @{{.+}}main
   Test::main();
-  // CHECK: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* {{.*}}@0, i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* [[OUTLINED:@.+]] to void (i32*, i32*, ...)*))
+  // CHECK: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* {{.*}}@1, i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* [[OUTLINED:@.+]] to void (i32*, i32*, ...)*))
 #pragma omp parallel
   {
     try {
@@ -53,12 +53,12 @@ int main() {
 // CHECK: invoke void @{{.+}}foo
 // CHECK: [[CATCHSWITCH:%.+]] = catchswitch within none
 // CHECK: [[CATCHPAD:%.+]] = catchpad within [[CATCHSWITCH]]
-// CHECK: call void @__kmpc_critical(%struct.ident_t* {{.*}}@0, i32 [[GID:%.+]],
+// CHECK: call void @__kmpc_critical(%struct.ident_t* {{.*}}@1, i32 [[GID:%.+]],
 // CHECK: invoke void @{{.+}}bar
-// CHECK: call void @__kmpc_end_critical(%struct.ident_t* {{.*}}@0, i32 [[GID]],
+// CHECK: call void @__kmpc_end_critical(%struct.ident_t* {{.*}}@1, i32 [[GID]],
 // CHECK: catchret from [[CATCHPAD]] to
 // CHECK:      cleanuppad within [[CATCHPAD]] []
-// CHECK-NEXT: call void @__kmpc_end_critical(%struct.ident_t* {{.*}}@0, i32 [[GID]],
+// CHECK-NEXT: call void @__kmpc_end_critical(%struct.ident_t* {{.*}}@1, i32 [[GID]],
 // CHECK-NEXT: cleanupret from {{.*}} unwind label %[[CATCHTERM:[^ ]+]]
 // CHECK:      cleanuppad within none []
 // CHECK-NEXT: call void @"?terminate@@YAXXZ"() #{{[0-9]+}} [ "funclet"(token %{{.*}}) ]
diff --git a/clang/test/OpenMP/ordered_codegen.cpp b/clang/test/OpenMP/ordered_codegen.cpp
index 07ecee45974c5..85235f31a0ea8 100644
--- a/clang/test/OpenMP/ordered_codegen.cpp
+++ b/clang/test/OpenMP/ordered_codegen.cpp
@@ -15,7 +15,7 @@
 #define HEADER
 
 // CHECK: [[IDENT_T_TY:%.+]] = type { i32, i32, i32, i32, i8* }
-// CHECK: [[IMPLICIT_BARRIER_LOC:@.+]] = private unnamed_addr global %{{.+}} { i32 0, i32 66, i32 0, i32 0, i8*
+// CHECK: [[IMPLICIT_BARRIER_LOC:@.+]] = private unnamed_addr constant %{{.+}} { i32 0, i32 66, i32 0, i32 0, i8*
 // CHECK-LABEL: define {{.*void}} @{{.*}}static_not_chunked{{.*}}(float* {{.+}}, float* {{.+}}, float* {{.+}}, float* {{.+}})
 void static_not_chunked(float *a, float *b, float *c, float *d) {
 // CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num([[IDENT_T_TY]]* [[DEFAULT_LOC:[@%].+]])
diff --git a/clang/test/OpenMP/parallel_codegen.cpp b/clang/test/OpenMP/parallel_codegen.cpp
index 6fd394c0bbc9b..bceab0637f6a9 100644
--- a/clang/test/OpenMP/parallel_codegen.cpp
+++ b/clang/test/OpenMP/parallel_codegen.cpp
@@ -17,10 +17,9 @@
 #define HEADER
 // ALL-DAG: %struct.ident_t = type { i32, i32, i32, i32, i8* }
 // ALL-DAG: [[STR:@.+]] = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00"
-// ALL-DAG: [[DEF_LOC_2:@.+]] = private unnamed_addr global %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) }
+// ALL-DAG: [[DEF_LOC_2:@.+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) }
 // CHECK-DEBUG-DAG: %struct.ident_t = type { i32, i32, i32, i32, i8* }
-// CHECK-DEBUG-DAG: [[STR:@.+]] = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00"
-// CHECK-DEBUG-DAG: [[DEF_LOC_2:@.+]] = private unnamed_addr global %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) }
+
 // CHECK-DEBUG-DAG: [[LOC1:@.+]] = private unnamed_addr constant [{{.+}} x i8] c";{{.*}}parallel_codegen.cpp;main;[[@LINE+23]];1;;\00"
 // CHECK-DEBUG-DAG: [[LOC2:@.+]] = private unnamed_addr constant [{{.+}} x i8] c";{{.*}}parallel_codegen.cpp;tmain;[[@LINE+11]];1;;\00"
 // IRBUILDER-DEBUG-DAG: %struct.ident_t = type { i32, i32, i32, i32, i8* }
@@ -72,14 +71,11 @@ int main (int argc, char **argv) {
 // ALL:       ret i32
 // ALL-NEXT:  }
 // ALL-DEBUG-LABEL: define i32 @main(i32 %argc, i8** %argv)
-// CHECK-DEBUG:       [[LOC_2_ADDR:%.+]] = alloca %struct.ident_t
-// CHECK-DEBUG:       [[KMPC_LOC_VOIDPTR:%.+]] = bitcast %struct.ident_t* [[LOC_2_ADDR]] to i8*
-// CHECK-DEBUG-NEXT:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[KMPC_LOC_VOIDPTR]], i8* align 8 bitcast (%struct.ident_t* [[DEF_LOC_2]] to i8*), i64 24, i1 false)
+
 // ALL-DEBUG:       store i32 %argc, i32* [[ARGC_ADDR:%.+]],
 // ALL-DEBUG:       [[VLA:%.+]] = alloca i32, i64 [[VLA_SIZE:%[^,]+]],
-// CHECK-DEBUG:       [[KMPC_LOC_PSOURCE_REF:%.+]] = getelementptr inbounds %struct.ident_t, %struct.ident_t* [[LOC_2_ADDR]], i32 0, i32 4
-// CHECK-DEBUG-NEXT:  store i8* getelementptr inbounds ([{{.+}} x i8], [{{.+}} x i8]* [[LOC1]], i32 0, i32 0), i8** [[KMPC_LOC_PSOURCE_REF]]
-// CHECK-DEBUG:       call {{.*}}void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* [[LOC_2_ADDR]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i32*)* [[OMP_OUTLINED:@.+]] to void (i32*, i32*, ...)*), i64 [[VLA_SIZE]], i32* [[VLA]])
+
+// CHECK-DEBUG:       call {{.*}}void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @{{.*}}, i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i32*)* [[OMP_OUTLINED:@.+]] to void (i32*, i32*, ...)*), i64 [[VLA_SIZE]], i32* [[VLA]])
 // IRBUILDER-DEBUG:       call {{.*}}void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @{{.*}}, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* [[OMP_OUTLINED:@.+]] to void (i32*, i32*, ...)*), i32* [[VLA]])
 // ALL-DEBUG:        [[ARGV:%.+]] = load i8**, i8*** {{%[a-z0-9.]+}}
 // ALL-DEBUG:        [[RET:%.+]] = call i32 [[TMAIN:@.+tmain.+]](i8** [[ARGV]])
@@ -144,13 +140,9 @@ int main (int argc, char **argv) {
 // ALL:  ret i32 0
 // ALL-NEXT:  }
 // ALL-DEBUG:       define linkonce_odr i32 [[TMAIN]](i8** %argc)
-// CHECK-DEBUG-DAG:   [[LOC_2_ADDR:%.+]] = alloca %struct.ident_t
-// CHECK-DEBUG:       [[KMPC_LOC_VOIDPTR:%.+]] = bitcast %struct.ident_t* [[LOC_2_ADDR]] to i8*
-// CHECK-DEBUG-NEXT:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[KMPC_LOC_VOIDPTR]], i8* align 8 bitcast (%struct.ident_t* [[DEF_LOC_2]] to i8*), i64 24, i1 false)
-// CHECK-DEBUG-NEXT:  store i8** %argc, i8*** [[ARGC_ADDR:%.+]],
-// CHECK-DEBUG:  [[KMPC_LOC_PSOURCE_REF:%.+]] = getelementptr inbounds %struct.ident_t, %struct.ident_t* [[LOC_2_ADDR]], i32 0, i32 4
-// CHECK-DEBUG-NEXT:  store i8* getelementptr inbounds ([{{.+}} x i8], [{{.+}} x i8]* [[LOC2]], i32 0, i32 0), i8** [[KMPC_LOC_PSOURCE_REF]]
-// CHECK-DEBUG-NEXT:  call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* [[LOC_2_ADDR]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i8***, i64)* [[OMP_OUTLINED:@.+]] to void (i32*, i32*, ...)*), i8*** [[ARGC_ADDR]], i64 %{{.+}})
+
+// CHECK-DEBUG:       store i8** %argc, i8*** [[ARGC_ADDR:%.+]],
+// CHECK-DEBUG:       call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @{{.*}}, i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i8***, i64)* [[OMP_OUTLINED:@.+]] to void (i32*, i32*, ...)*), i8*** [[ARGC_ADDR]], i64 %{{.+}})
 // IRBUILDER-DEBUG:   call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @{{.*}}, i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i8***, i64)* [[OMP_OUTLINED:@.+]] to void (i32*, i32*, ...)*), i8*** [[ARGC_ADDR]], i64 %{{.+}})
 // ALL-DEBUG:  ret i32 0
 // ALL-DEBUG-NEXT:  }
diff --git a/clang/test/OpenMP/parallel_copyin_codegen.cpp b/clang/test/OpenMP/parallel_copyin_codegen.cpp
index 1331a2b8c0e4b..0f974af5ec549 100644
--- a/clang/test/OpenMP/parallel_copyin_codegen.cpp
+++ b/clang/test/OpenMP/parallel_copyin_codegen.cpp
@@ -48,10 +48,10 @@ struct S {
 
 // CHECK-DAG: [[S_FLOAT_TY:%.+]] = type { float }
 // CHECK-DAG: [[S_INT_TY:%.+]] = type { i{{[0-9]+}} }
-// CHECK-DAG: [[IMPLICIT_BARRIER_LOC:@.+]] = private unnamed_addr global %{{.+}} { i32 0, i32 66, i32 0, i32 0, i8*
+// CHECK-DAG: [[IMPLICIT_BARRIER_LOC:@.+]] = private unnamed_addr constant %{{.+}} { i32 0, i32 66, i32 0, i32 0, i8*
 // TLS-CHECK-DAG: [[S_FLOAT_TY:%.+]] = type { float }
 // TLS-CHECK-DAG: [[S_INT_TY:%.+]] = type { i{{[0-9]+}} }
-// TLS-CHECK-DAG: [[IMPLICIT_BARRIER_LOC:@.+]] = private unnamed_addr global %{{.+}} { i32 0, i32 66, i32 0, i32 0, i8*
+// TLS-CHECK-DAG: [[IMPLICIT_BARRIER_LOC:@.+]] = private unnamed_addr constant %{{.+}} { i32 0, i32 66, i32 0, i32 0, i8*
 
 // CHECK-DAG: [[T_VAR:@.+]] = internal global i{{[0-9]+}} 1122,
 // CHECK-DAG: [[VEC:@.+]] = internal global [2 x i{{[0-9]+}}] [i{{[0-9]+}} 1, i{{[0-9]+}} 2],
diff --git a/clang/test/OpenMP/parallel_for_codegen.cpp b/clang/test/OpenMP/parallel_for_codegen.cpp
index de445634470bb..cc5bb8f4858f0 100644
--- a/clang/test/OpenMP/parallel_for_codegen.cpp
+++ b/clang/test/OpenMP/parallel_for_codegen.cpp
@@ -29,7 +29,7 @@
 
 #ifndef OMP5
 // CHECK-DAG: [[IDENT_T_TY:%.+]] = type { i32, i32, i32, i32, i8* }
-// CHECK-DAG: [[LOOP_LOC:@.+]] = private unnamed_addr global [[IDENT_T_TY]] { i32 0, i32 514, i32 0, i32 0, i8*
+// CHECK-DAG: [[LOOP_LOC:@.+]] = private unnamed_addr constant [[IDENT_T_TY]] { i32 0, i32 514, i32 0, i32 0, i8*
 
 // CHECK-LABEL: with_var_schedule
 void with_var_schedule() {
diff --git a/clang/test/OpenMP/parallel_master_codegen.cpp b/clang/test/OpenMP/parallel_master_codegen.cpp
index 82e18c80f103e..850a650ca7adf 100644
--- a/clang/test/OpenMP/parallel_master_codegen.cpp
+++ b/clang/test/OpenMP/parallel_master_codegen.cpp
@@ -15,7 +15,7 @@
 
 // CK1-DAG: %struct.ident_t = type { i32, i32, i32, i32, i8* }
 // CK1-DAG: [[STR:@.+]] = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00"
-// CK1-DAG: [[DEF_LOC:@.+]] = private unnamed_addr global %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) }
+// CK1-DAG: [[DEF_LOC:@.+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) }
 
 // CK1-LABEL: foo
 void foo() {}
@@ -52,7 +52,7 @@ void parallel_master() {
 
 // CK2-DAG: %struct.ident_t = type { i32, i32, i32, i32, i8* }
 // CK2-DAG: [[STR:@.+]] = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00"
-// CK2-DAG: [[DEF_LOC:@.+]] = private unnamed_addr global %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) }
+// CK2-DAG: [[DEF_LOC:@.+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) }
 
 void parallel_master_private() {
   int a;
@@ -98,12 +98,12 @@ void parallel_master_private() {
 
 // CK3-LABEL: define void @{{.+}}parallel_master{{.+}}
 // CK3:       [[A_VAL:%.+]] = alloca i32
-// CK3:       call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* {{.+}}, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* [[OMP_OUTLINED:@.+]] to void 
+// CK3:       call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* {{.+}}, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* [[OMP_OUTLINED:@.+]] to void
 
 // CK3:       define internal {{.*}}void [[OMP_OUTLINED]](i32* noalias [[GTID:%.+]], i32* noalias [[BTID:%.+]], i32* nonnull align 4 dereferenceable(4) [[A_VAL]])
 // CK3:       [[GTID_ADDR:%.+]] = alloca i32*
 // CK3:       [[BTID_ADDR:%.+]] = alloca i32*
-// CK3:       [[A_ADDR:%.+]] = alloca i32* 
+// CK3:       [[A_ADDR:%.+]] = alloca i32*
 // CK3:       store i32* [[GTID]], i32** [[GTID_ADDR]]
 // CK3:       store i32* [[BTID]], i32** [[BTID_ADDR]]
 // CK3:       store i32* [[A_VAL]], i32** [[A_ADDR]]
@@ -145,7 +145,7 @@ void parallel_master_default_firstprivate() {
 // CK31:       [[CONV:%.+]] = bitcast i64* [[A_CASTED]] to i32*
 // CK31:       store i32 [[ZERO_VAL]], i32* [[CONV]]
 // CK31:       [[ONE_VAL:%.+]] = load i64, i64* [[A_CASTED]]
-// CK31:       call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @0, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64)* @.omp_outlined. to void (i32*, i32*, ...)*), i64 [[ONE_VAL]])
+// CK31:       call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @{{.*}}, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64)* @.omp_outlined. to void (i32*, i32*, ...)*), i64 [[ONE_VAL]])
 // CK31:       ret void
 
 // CK31:       [[GLOBAL_TID_ADDR:%.+]] = alloca i32*
@@ -157,14 +157,14 @@ void parallel_master_default_firstprivate() {
 // CK31:       [[CONV]] = bitcast i64* [[A_ADDR]]
 // CK31:       [[ZERO_VAL]] = load i32*, i32** [[GLOBAL_TID_ADDR]]
 // CK31:       [[ONE_VAL]] = load i32, i32* [[ZERO_VAL]]
-// CK31:       [[TWO_VAL:%.+]] = call i32 @__kmpc_master(%struct.ident_t* @0, i32 [[ONE_VAL]])
+// CK31:       [[TWO_VAL:%.+]] = call i32 @__kmpc_master(%struct.ident_t* @{{.*}}, i32 [[ONE_VAL]])
 // CK31:       [[THREE:%.+]] = icmp ne i32 [[TWO_VAL]], 0
 // CK31:       br i1 %3, label [[OMP_IF_THEN:%.+]], label [[OMP_IF_END:%.+]]
 
 // CK31:       [[FOUR:%.+]] = load i32, i32* [[CONV:%.+]]
 // CK31:       [[INC:%.+]] = add nsw i32 [[FOUR]]
 // CK31:       store i32 [[INC]], i32* [[CONV]]
-// CK31:       call void @__kmpc_end_master(%struct.ident_t* @0, i32 [[ONE_VAL]])
+// CK31:       call void @__kmpc_end_master(%struct.ident_t* @{{.*}}, i32 [[ONE_VAL]])
 // CK31:       br label [[OMP_IF_END]]
 
 // CK31:       ret void
@@ -287,7 +287,7 @@ void parallel_master_default_firstprivate() {
 
 // CK4-DAG: %struct.ident_t = type { i32, i32, i32, i32, i8* }
 // CK4-DAG: [[STR:@.+]] = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00"
-// CK4-DAG: [[DEF_LOC:@.+]] = private unnamed_addr global %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) }
+// CK4-DAG: [[DEF_LOC:@.+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) }
 
 void parallel_master_firstprivate() {
   int a;
@@ -307,7 +307,7 @@ void parallel_master_firstprivate() {
 // CK4:       define internal {{.*}}void [[OMP_OUTLINED]](i32* noalias [[GLOBAL_TID:%.+]], i32* noalias [[BOUND_TID:%.+]], i64 [[A_VAL]])
 // CK4:       [[GLOBAL_TID_ADDR:%.+]] = alloca i32*
 // CK4:       [[BOUND_TID_ADDR:%.+]] = alloca i32*
-// CK4:       [[A_ADDR:%.+]] = alloca i64 
+// CK4:       [[A_ADDR:%.+]] = alloca i64
 // CK4:       store i32* [[GLOBAL_TID]], i32** [[GLOBAL_TID_ADDR]]
 // CK4:       store i32* [[BOUND_TID]], i32** [[BOUND_TID_ADDR]]
 // CK4:       store i64 [[A_VAL]], i64* [[A_ADDR]]
@@ -345,14 +345,14 @@ void parallel_master_firstprivate() {
 // CK5-DAG: %struct.ident_t = type { i32, i32, i32, i32, i8* }
 // CK5-DAG: [[A:@.+]] = {{.+}} i32 0
 // CK5-DAG: [[STR:@.+]] = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00"
-// CK5-DAG: [[DEF_LOC_1:@.+]] = private unnamed_addr global %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) }
+// CK5-DAG: [[DEF_LOC_1:@.+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) }
 // CK5-DAG: [[A_CACHE:@.+]] = common global i8** null
-// CK5-DAG: [[DEF_LOC_2:@.+]] = private unnamed_addr global %struct.ident_t { i32 0, i32 66, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) }
+// CK5-DAG: [[DEF_LOC_2:@.+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 66, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) }
 // TLS-CHECK-DAG: %struct.ident_t = type { i32, i32, i32, i32, i8* }
 // TLS-CHECK-DAG: [[A:@.+]] = thread_local global i32 0
 // TLS-CHECK-DAG: [[STR:@.+]] = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00"
-// TLS-CHECK-DAG: [[DEF_LOC_1:@.+]] = private unnamed_addr global %struct.ident_t { i32 0, i32 66, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) }
-// TLS-CHECK-DAG: [[DEF_LOC_2:@.+]] = private unnamed_addr global %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) }
+// TLS-CHECK-DAG: [[DEF_LOC_1:@.+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 66, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) }
+// TLS-CHECK-DAG: [[DEF_LOC_2:@.+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) }
 
 int a;
 #pragma omp threadprivate(a)
@@ -443,9 +443,9 @@ void parallel_master_copyin() {
 
 // CK6-DAG: %struct.ident_t = type { i32, i32, i32, i32, i8* }
 // CK6-DAG: [[STR:@.+]] = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00"
-// CK6-DAG: [[DEF_LOC_1:@.+]] = private unnamed_addr global %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) }
+// CK6-DAG: [[DEF_LOC_1:@.+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) }
 // CK6-DAG: [[GOMP:@.+]] = common global [8 x i32] zeroinitializer
-// CK6-DAG: [[DEF_LOC_2:@.+]] = private unnamed_addr global %struct.ident_t { i32 0, i32 18, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) }
+// CK6-DAG: [[DEF_LOC_2:@.+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 18, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) }
 
 void parallel_master_reduction() {
   int g;
@@ -510,7 +510,7 @@ void parallel_master_reduction() {
 
 // CK7-DAG: %struct.ident_t = type { i32, i32, i32, i32, i8* }
 // CK7-DAG: [[STR:@.+]] = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00"
-// CK7-DAG: [[DEF_LOC_1:@.+]] = private unnamed_addr global %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) }
+// CK7-DAG: [[DEF_LOC_1:@.+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) }
 
 void parallel_master_if() {
 #pragma omp parallel master if (parallel: false)
@@ -525,7 +525,7 @@ void parallel_master_if() {
 // CK7:       ret void
 
 // CK7:       define internal void @.omp_outlined.(i32* noalias [[GTID:%.+]], i32* noalias [[BTID:%.+]])
-// CK7:       [[EXECUTE:%.+]] = call i32 @__kmpc_master(%struct.ident_t* @0, i32 %1)
+// CK7:       [[EXECUTE:%.+]] = call i32 @__kmpc_master(%struct.ident_t* @1, i32 %1)
 // CK7:       call void @__kmpc_end_master(%struct.ident_t* [[DEF_LOC_1]], i32 %1)
 
 #endif
@@ -544,7 +544,7 @@ typedef __INTPTR_TYPE__ intptr_t;
 
 // CK8-DAG: [[IDENT_T_TY:%.+]] = type { i32, i32, i32, i32, i8* }
 // CK8-DAG: [[STR:@.+]] = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00"
-// CK8-DAG: [[DEF_LOC_2:@.+]] = private unnamed_addr global [[IDENT_T_TY]] { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) }
+// CK8-DAG: [[DEF_LOC_2:@.+]] = private unnamed_addr constant [[IDENT_T_TY]] { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) }
 
 void foo();
 
@@ -600,7 +600,7 @@ int main() {
 // SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
 // CK9-DAG: %struct.ident_t = type { i32, i32, i32, i32, i8* }
 // CK9-DAG: [[STR:@.+]] = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00"
-// CK9-DAG: [[DEF_LOC:@.+]] = private unnamed_addr global %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) }
+// CK9-DAG: [[DEF_LOC:@.+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) }
 typedef void **omp_allocator_handle_t;
 extern const omp_allocator_handle_t omp_null_allocator;
 extern const omp_allocator_handle_t omp_default_mem_alloc;
diff --git a/clang/test/OpenMP/parallel_master_taskloop_reduction_codegen.cpp b/clang/test/OpenMP/parallel_master_taskloop_reduction_codegen.cpp
index 1c24e4e1d0844..c75e35f9ed412 100644
--- a/clang/test/OpenMP/parallel_master_taskloop_reduction_codegen.cpp
+++ b/clang/test/OpenMP/parallel_master_taskloop_reduction_codegen.cpp
@@ -161,8 +161,8 @@ sum = 0.0;
 // CHECK:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
 // CHECK:    [[SUB12:%.*]] = sub nsw i32 [[DIV]], 1
 // CHECK:    store i32 [[SUB12]], i32* [[DOTCAPTURE_EXPR_9]],
-// CHECK:    [[TMP65:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* %{{.+}}, i32 [[TMP0]], i32 1, i64 888, i64 40, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @[[TASK:.+]] to i32 (i32, i8*)*))
-// CHECK:    call void @__kmpc_taskloop(%struct.ident_t* %{{.+}}, i32 [[TMP0]], i8* [[TMP65]], i32 1, i64* %{{.+}}, i64* %{{.+}}, i64 %{{.+}}, i32 1, i32 0, i64 0, i8* null)
+// CHECK:    [[TMP65:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* {{.+}}, i32 [[TMP0]], i32 1, i64 888, i64 40, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @[[TASK:.+]] to i32 (i32, i8*)*))
+// CHECK:    call void @__kmpc_taskloop(%struct.ident_t* {{.+}}, i32 [[TMP0]], i8* [[TMP65]], i32 1, i64* %{{.+}}, i64* %{{.+}}, i64 %{{.+}}, i32 1, i32 0, i64 0, i8* null)
 // CHECK:    call void @__kmpc_end_taskgroup(%struct.ident_t*
 // CHECK:  call {{.*}}void @__kmpc_end_master(
 // CHECK-NEXT:  br label {{%?}}[[EXIT]]
diff --git a/clang/test/OpenMP/parallel_master_taskloop_simd_reduction_codegen.cpp b/clang/test/OpenMP/parallel_master_taskloop_simd_reduction_codegen.cpp
index c83589f34c787..bf5b2dc88d242 100644
--- a/clang/test/OpenMP/parallel_master_taskloop_simd_reduction_codegen.cpp
+++ b/clang/test/OpenMP/parallel_master_taskloop_simd_reduction_codegen.cpp
@@ -161,8 +161,8 @@ sum = 0.0;
 // CHECK:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
 // CHECK:    [[SUB12:%.*]] = sub nsw i32 [[DIV]], 1
 // CHECK:    store i32 [[SUB12]], i32* [[DOTCAPTURE_EXPR_9]],
-// CHECK:    [[TMP65:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* %{{.+}}, i32 [[TMP0]], i32 1, i64 888, i64 40, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @[[TASK:.+]] to i32 (i32, i8*)*))
-// CHECK:    call void @__kmpc_taskloop(%struct.ident_t* %{{.+}}, i32 [[TMP0]], i8* [[TMP65]], i32 1, i64* %{{.+}}, i64* %{{.+}}, i64 %{{.+}}, i32 1, i32 0, i64 0, i8* null)
+// CHECK:    [[TMP65:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* {{.+}}, i32 [[TMP0]], i32 1, i64 888, i64 40, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @[[TASK:.+]] to i32 (i32, i8*)*))
+// CHECK:    call void @__kmpc_taskloop(%struct.ident_t* {{.+}}, i32 [[TMP0]], i8* [[TMP65]], i32 1, i64* %{{.+}}, i64* %{{.+}}, i64 %{{.+}}, i32 1, i32 0, i64 0, i8* null)
 // CHECK:    call void @__kmpc_end_taskgroup(%struct.ident_t*
 // CHECK:  call {{.*}}void @__kmpc_end_master(
 // CHECK-NEXT:  br label {{%?}}[[EXIT]]
diff --git a/clang/test/OpenMP/parallel_num_threads_codegen.cpp b/clang/test/OpenMP/parallel_num_threads_codegen.cpp
index 79615b9341687..9ec712f83c530 100644
--- a/clang/test/OpenMP/parallel_num_threads_codegen.cpp
+++ b/clang/test/OpenMP/parallel_num_threads_codegen.cpp
@@ -15,7 +15,7 @@ typedef __INTPTR_TYPE__ intptr_t;
 // CHECK-DAG: [[IDENT_T_TY:%.+]] = type { i32, i32, i32, i32, i8* }
 // CHECK-DAG: [[S_TY:%.+]] = type { [[INTPTR_T_TY:i[0-9]+]], [[INTPTR_T_TY]], [[INTPTR_T_TY]] }
 // CHECK-DAG: [[STR:@.+]] = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00"
-// CHECK-DAG: [[DEF_LOC_2:@.+]] = private unnamed_addr global [[IDENT_T_TY]] { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) }
+// CHECK-DAG: [[DEF_LOC_2:@.+]] = private unnamed_addr constant [[IDENT_T_TY]] { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) }
 
 void foo();
 
diff --git a/clang/test/OpenMP/parallel_proc_bind_codegen.cpp b/clang/test/OpenMP/parallel_proc_bind_codegen.cpp
index 4747a8182e589..8b9e09191b24b 100644
--- a/clang/test/OpenMP/parallel_proc_bind_codegen.cpp
+++ b/clang/test/OpenMP/parallel_proc_bind_codegen.cpp
@@ -14,7 +14,7 @@ typedef __INTPTR_TYPE__ intptr_t;
 
 // CHECK-DAG: [[IDENT_T_TY:%.+]] = type { i32, i32, i32, i32, i8* }
 // CHECK-DAG: [[STR:@.+]] = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00"
-// CHECK-DAG: [[DEF_LOC_2:@.+]] = private unnamed_addr global [[IDENT_T_TY]] { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) }
+// CHECK-DAG: [[DEF_LOC_2:@.+]] = private unnamed_addr constant [[IDENT_T_TY]] { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) }
 
 void foo();
 
diff --git a/clang/test/OpenMP/parallel_reduction_codegen.cpp b/clang/test/OpenMP/parallel_reduction_codegen.cpp
index 21f9efb322237..3b4348e4bc1db 100644
--- a/clang/test/OpenMP/parallel_reduction_codegen.cpp
+++ b/clang/test/OpenMP/parallel_reduction_codegen.cpp
@@ -84,7 +84,7 @@ struct SST {
 // BLOCKS: [[SS_TY:%.+]] = type { i{{[0-9]+}}, i8
 // CHECK-DAG: [[S_FLOAT_TY:%.+]] = type { float }
 // CHECK-DAG: [[S_INT_TY:%.+]] = type { i{{[0-9]+}} }
-// CHECK-DAG: [[REDUCTION_LOC:@.+]] = private unnamed_addr global %{{.+}} { i32 0, i32 18, i32 0, i32 0, i8*
+// CHECK-DAG: [[REDUCTION_LOC:@.+]] = private unnamed_addr constant %{{.+}} { i32 0, i32 18, i32 0, i32 0, i8*
 // CHECK-DAG: [[REDUCTION_LOCK:@.+]] = common global [8 x i32] zeroinitializer
 
 //CHECK: foo_array_sect
diff --git a/clang/test/OpenMP/sections_codegen.cpp b/clang/test/OpenMP/sections_codegen.cpp
index 68fd38f7d0bba..ba918c385fc30 100644
--- a/clang/test/OpenMP/sections_codegen.cpp
+++ b/clang/test/OpenMP/sections_codegen.cpp
@@ -9,8 +9,8 @@
 // expected-no-diagnostics
 #ifndef HEADER
 #define HEADER
-// CHECK-DAG: [[IMPLICIT_BARRIER_SECTIONS_LOC:@.+]] = private unnamed_addr global %{{.+}} { i32 0, i32 194, i32 0, i32 0, i8*
-// CHECK-DAG: [[SECTIONS_LOC:@.+]] = private unnamed_addr global %{{.+}} { i32 0, i32 1026, i32 0, i32 0, i8*
+// CHECK-DAG: [[IMPLICIT_BARRIER_SECTIONS_LOC:@.+]] = private unnamed_addr constant %{{.+}} { i32 0, i32 194, i32 0, i32 0, i8*
+// CHECK-DAG: [[SECTIONS_LOC:@.+]] = private unnamed_addr constant %{{.+}} { i32 0, i32 1026, i32 0, i32 0, i8*
 // CHECK-LABEL: foo
 void foo() {};
 // CHECK-LABEL: bar
diff --git a/clang/test/OpenMP/sections_firstprivate_codegen.cpp b/clang/test/OpenMP/sections_firstprivate_codegen.cpp
index 4ba4cf70eb2b6..8d73c5dcfca18 100644
--- a/clang/test/OpenMP/sections_firstprivate_codegen.cpp
+++ b/clang/test/OpenMP/sections_firstprivate_codegen.cpp
@@ -65,7 +65,7 @@ S<float> s_arr[] = {1, 2};
 // CHECK-DAG: [[VAR:@.+]] = global [[S_FLOAT_TY]] zeroinitializer,
 S<float> var(3);
 // CHECK-DAG: [[SIVAR:@.+]] = internal global i{{[0-9]+}} 0,
-// CHECK-DAG: [[SECTIONS_BARRIER_LOC:@.+]] = private unnamed_addr global %{{.+}} { i32 0, i32 194, i32 0, i32 0, i8*
+// CHECK-DAG: [[SECTIONS_BARRIER_LOC:@.+]] = private unnamed_addr constant %{{.+}} { i32 0, i32 194, i32 0, i32 0, i8*
 
 // CHECK: call {{.*}} [[S_FLOAT_TY_DEF_CONSTR:@.+]]([[S_FLOAT_TY]]* [[TEST]])
 // CHECK: ([[S_FLOAT_TY]]*)* [[S_FLOAT_TY_DESTR:@[^ ]+]] {{[^,]+}}, {{.+}}([[S_FLOAT_TY]]* [[TEST]]
diff --git a/clang/test/OpenMP/sections_lastprivate_codegen.cpp b/clang/test/OpenMP/sections_lastprivate_codegen.cpp
index 12a64d17e7274..acbda9dddd904 100644
--- a/clang/test/OpenMP/sections_lastprivate_codegen.cpp
+++ b/clang/test/OpenMP/sections_lastprivate_codegen.cpp
@@ -44,7 +44,7 @@ volatile int g = 1212;
 
 // CHECK: [[S_FLOAT_TY:%.+]] = type { float }
 // CHECK: [[S_INT_TY:%.+]] = type { i32 }
-// CHECK-DAG: [[SECTIONS_BARRIER_LOC:@.+]] = private unnamed_addr global %{{.+}} { i32 0, i32 194, i32 0, i32 0, i8*
+// CHECK-DAG: [[SECTIONS_BARRIER_LOC:@.+]] = private unnamed_addr constant %{{.+}} { i32 0, i32 194, i32 0, i32 0, i8*
 // CHECK-DAG: [[X:@.+]] = global double 0.0
 // OMP50-DAG: [[LAST_IV_X:@.+]] = {{.*}}common global i32 0
 // OMP50-DAG: [[LAST_X:@.+]] = {{.*}}common global double 0.000000e+00,
diff --git a/clang/test/OpenMP/sections_reduction_codegen.cpp b/clang/test/OpenMP/sections_reduction_codegen.cpp
index a583606c5677f..19f57fd19feb4 100644
--- a/clang/test/OpenMP/sections_reduction_codegen.cpp
+++ b/clang/test/OpenMP/sections_reduction_codegen.cpp
@@ -28,8 +28,8 @@ struct S {
 
 // CHECK-DAG: [[S_FLOAT_TY:%.+]] = type { float }
 // CHECK-DAG: [[S_INT_TY:%.+]] = type { i{{[0-9]+}} }
-// CHECK-DAG: [[ATOMIC_REDUCE_BARRIER_LOC:@.+]] = private unnamed_addr global %{{.+}} { i32 0, i32 18, i32 0, i32 0, i8*
-// CHECK-DAG: [[REDUCTION_LOC:@.+]] = private unnamed_addr global %{{.+}} { i32 0, i32 18, i32 0, i32 0, i8*
+// CHECK-DAG: [[ATOMIC_REDUCE_BARRIER_LOC:@.+]] = private unnamed_addr constant %{{.+}} { i32 0, i32 18, i32 0, i32 0, i8*
+// CHECK-DAG: [[REDUCTION_LOC:@.+]] = private unnamed_addr constant %{{.+}} { i32 0, i32 18, i32 0, i32 0, i8*
 // CHECK-DAG: [[REDUCTION_LOCK:@.+]] = common global [8 x i32] zeroinitializer
 
 template <typename T>
diff --git a/clang/test/OpenMP/single_codegen.cpp b/clang/test/OpenMP/single_codegen.cpp
index a56cdb0ae81a4..e5b2c86b995b4 100644
--- a/clang/test/OpenMP/single_codegen.cpp
+++ b/clang/test/OpenMP/single_codegen.cpp
@@ -34,7 +34,7 @@ class TestClass {
 // CHECK-DAG:   [[SST_TY:%.+]] = type { double }
 // CHECK-DAG:   [[SS_TY:%.+]] = type { i32, i8, i32* }
 // CHECK-DAG:   [[IDENT_T_TY:%.+]] = type { i32, i32, i32, i32, i8* }
-// CHECK:       [[IMPLICIT_BARRIER_SINGLE_LOC:@.+]] = private unnamed_addr global %{{.+}} { i32 0, i32 322, i32 0, i32 0, i8*
+// CHECK:       [[IMPLICIT_BARRIER_SINGLE_LOC:@.+]] = private unnamed_addr constant %{{.+}} { i32 0, i32 322, i32 0, i32 0, i8*
 
 // CHECK:       define void [[FOO:@.+]]()
 
diff --git a/clang/test/OpenMP/single_firstprivate_codegen.cpp b/clang/test/OpenMP/single_firstprivate_codegen.cpp
index aadec94270fd4..0c1d7df370dfd 100644
--- a/clang/test/OpenMP/single_firstprivate_codegen.cpp
+++ b/clang/test/OpenMP/single_firstprivate_codegen.cpp
@@ -63,7 +63,7 @@ int vec[] = {1, 2};
 S<float> s_arr[] = {1, 2};
 // CHECK-DAG: [[VAR:@.+]] = global [[S_FLOAT_TY]] zeroinitializer,
 S<float> var(3);
-// CHECK-DAG: [[SINGLE_BARRIER_LOC:@.+]] = private unnamed_addr global %{{.+}} { i32 0, i32 322, i32 0, i32 0, i8*
+// CHECK-DAG: [[SINGLE_BARRIER_LOC:@.+]] = private unnamed_addr constant %{{.+}} { i32 0, i32 322, i32 0, i32 0, i8*
 
 // CHECK: call {{.*}} [[S_FLOAT_TY_DEF_CONSTR:@.+]]([[S_FLOAT_TY]]* [[TEST]])
 // CHECK: ([[S_FLOAT_TY]]*)* [[S_FLOAT_TY_DESTR:@[^ ]+]] {{[^,]+}}, {{.+}}([[S_FLOAT_TY]]* [[TEST]]
diff --git a/clang/test/OpenMP/target_depend_codegen.cpp b/clang/test/OpenMP/target_depend_codegen.cpp
index e8b07ace5fb05..b2e375f21186a 100644
--- a/clang/test/OpenMP/target_depend_codegen.cpp
+++ b/clang/test/OpenMP/target_depend_codegen.cpp
@@ -75,22 +75,24 @@ int foo(int n) {
   TT<long long, char> d;
   static long *plocal;
 
-  // CHECK:       [[ADD:%.+]] = add nsw i32
-  // CHECK:       store i32 [[ADD]], i32* [[DEVICE_CAP:%.+]],
-  // CHECK:       [[GEP:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i32 0, i32 0
-  // CHECK:       [[DEV:%.+]] = load i32, i32* [[DEVICE_CAP]],
-  // CHECK:       store i32 [[DEV]], i32* [[GEP]],
-  // CHECK:       [[TASK:%.+]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @0, i32 [[GTID:%.+]], i32 1, i[[SZ]] {{20|40}}, i[[SZ]] 4, i32 (i32, i8*)* bitcast (i32 (i32, %{{.+}}*)* [[TASK_ENTRY0:@.+]] to i32 (i32, i8*)*))
-  // CHECK:       [[BC_TASK:%.+]] = bitcast i8* [[TASK]] to [[TASK_TY0:%.+]]*
-  // CHECK:       getelementptr %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP_START:%.+]], i[[SZ]] 1
-  // CHECK:       getelementptr %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP_START]], i[[SZ]] 2
-  // CHECK:       getelementptr %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP_START]], i[[SZ]] 3
-  // CHECK:       [[DEP:%.+]] = bitcast %struct.kmp_depend_info* [[DEP_START]] to i8*
-  // CHECK:       call void @__kmpc_omp_wait_deps(%struct.ident_t* @0, i32 [[GTID]], i32 4, i8* [[DEP]], i32 0, i8* null)
-  // CHECK:       call void @__kmpc_omp_task_begin_if0(%struct.ident_t* @0, i32 [[GTID]], i8* [[TASK]])
-  // CHECK:       call i32 [[TASK_ENTRY0]](i32 [[GTID]], [[TASK_TY0]]* [[BC_TASK]])
-  // CHECK:       call void @__kmpc_omp_task_complete_if0(%struct.ident_t* @0, i32 [[GTID]], i8* [[TASK]])
-  #pragma omp target device(global + a) depend(in: global) depend(out: a, b, cn[4])
+// CHECK:       [[ADD:%.+]] = add nsw i32
+// CHECK:       store i32 [[ADD]], i32* [[DEVICE_CAP:%.+]],
+// CHECK:       [[GEP:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i32 0, i32 0
+// CHECK:       [[DEV:%.+]] = load i32, i32* [[DEVICE_CAP]],
+// CHECK:       store i32 [[DEV]], i32* [[GEP]],
+// CHECK:       [[TASK:%.+]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @1, i32 [[GTID:%.+]], i32 1, i[[SZ]] {{20|40}}, i[[SZ]] 4, i32 (i32, i8*)* bitcast (i32 (i32, %{{.+}}*)* [[TASK_ENTRY0:@.+]] to i32 (i32, i8*)*))
+// CHECK:       [[BC_TASK:%.+]] = bitcast i8* [[TASK]] to [[TASK_TY0:%.+]]*
+// CHECK:       getelementptr %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP_START:%.+]], i[[SZ]] 1
+// CHECK:       getelementptr %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP_START]], i[[SZ]] 2
+// CHECK:       getelementptr %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP_START]], i[[SZ]] 3
+// CHECK:       [[DEP:%.+]] = bitcast %struct.kmp_depend_info* [[DEP_START]] to i8*
+// CHECK:       call void @__kmpc_omp_wait_deps(%struct.ident_t* @1, i32 [[GTID]], i32 4, i8* [[DEP]], i32 0, i8* null)
+// CHECK:       call void @__kmpc_omp_task_begin_if0(%struct.ident_t* @1, i32 [[GTID]], i8* [[TASK]])
+// CHECK:       call i32 [[TASK_ENTRY0]](i32 [[GTID]], [[TASK_TY0]]* [[BC_TASK]])
+// CHECK:       call void @__kmpc_omp_task_complete_if0(%struct.ident_t* @1, i32 [[GTID]], i8* [[TASK]])
+#pragma omp target device(global + a) depend(in                   \
+                                             : global) depend(out \
+                                                              : a, b, cn[4])
   {
   }
 
@@ -121,12 +123,12 @@ int foo(int n) {
   // CHECK:       [[DEV1:%.+]] = load i32, i32* [[DEVICE_CAP]],
   // CHECK:       [[DEV2:%.+]] = sext i32 [[DEV1]] to i64
 
-  // CHECK:       [[TASK:%.+]] = call i8* @__kmpc_omp_target_task_alloc(%struct.ident_t* @0, i32 [[GTID]], i32 1, i[[SZ]] {{104|60}}, i[[SZ]] {{16|12}}, i32 (i32, i8*)* bitcast (i32 (i32, %{{.+}}*)* [[TASK_ENTRY1_:@.+]] to i32 (i32, i8*)*), i64 [[DEV2]])
+  // CHECK:       [[TASK:%.+]] = call i8* @__kmpc_omp_target_task_alloc(%struct.ident_t* @{{.*}}, i32 [[GTID]], i32 1, i[[SZ]] {{104|60}}, i[[SZ]] {{16|12}}, i32 (i32, i8*)* bitcast (i32 (i32, %{{.+}}*)* [[TASK_ENTRY1_:@.+]] to i32 (i32, i8*)*), i64 [[DEV2]])
   // CHECK:       [[BC_TASK:%.+]] = bitcast i8* [[TASK]] to [[TASK_TY1_:%.+]]*
   // CHECK:       getelementptr %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP_START:%.+]], i[[SZ]] 1
   // CHECK:       getelementptr %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP_START]], i[[SZ]] 2
   // CHECK:       [[DEP:%.+]] = bitcast %struct.kmp_depend_info* [[DEP_START]] to i8*
-  // CHECK:       call i32 @__kmpc_omp_task_with_deps(%struct.ident_t* @0, i32 [[GTID]], i8* [[TASK]], i32 3, i8* [[DEP]], i32 0, i8* null)
+  // CHECK:       call i32 @__kmpc_omp_task_with_deps(%struct.ident_t* @1, i32 [[GTID]], i8* [[TASK]], i32 3, i8* [[DEP]], i32 0, i8* null)
   // CHECK:       br label %[[EXIT:.+]]
 
   // CHECK:       [[ELSE]]:
@@ -137,30 +139,32 @@ int foo(int n) {
   // CHECK:       [[DEV1:%.+]] = load i32, i32* [[DEVICE_CAP]],
   // CHECK:       [[DEV2:%.+]] = sext i32 [[DEV1]] to i64
 
-  // CHECK:       [[TASK:%.+]] = call i8* @__kmpc_omp_target_task_alloc(%struct.ident_t* @0, i32 [[GTID]], i32 1, i[[SZ]] {{56|28}}, i[[SZ]] {{16|12}}, i32 (i32, i8*)* bitcast (i32 (i32, %{{.+}}*)* [[TASK_ENTRY1__:@.+]] to i32 (i32, i8*)*), i64 [[DEV2]])
+  // CHECK:       [[TASK:%.+]] = call i8* @__kmpc_omp_target_task_alloc(%struct.ident_t* @1, i32 [[GTID]], i32 1, i[[SZ]] {{56|28}}, i[[SZ]] {{16|12}}, i32 (i32, i8*)* bitcast (i32 (i32, %{{.+}}*)* [[TASK_ENTRY1__:@.+]] to i32 (i32, i8*)*), i64 [[DEV2]])
   // CHECK:       [[BC_TASK:%.+]] = bitcast i8* [[TASK]] to [[TASK_TY1__:%.+]]*
   // CHECK:       getelementptr %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP_START:%.+]], i[[SZ]] 1
   // CHECK:       getelementptr %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP_START]], i[[SZ]] 2
   // CHECK:       [[DEP:%.+]] = bitcast %struct.kmp_depend_info* [[DEP_START]] to i8*
-  // CHECK:       call i32 @__kmpc_omp_task_with_deps(%struct.ident_t* @0, i32 [[GTID]], i8* [[TASK]], i32 3, i8* [[DEP]], i32 0, i8* null)
+  // CHECK:       call i32 @__kmpc_omp_task_with_deps(%struct.ident_t* @1, i32 [[GTID]], i8* [[TASK]], i32 3, i8* [[DEP]], i32 0, i8* null)
   // CHECK:       br label %[[EXIT:.+]]
   // CHECK:       [[EXIT]]:
 
-  #pragma omp target device(global + a) nowait depend(inout: global, a, bn) if(a)
+#pragma omp target device(global + a) nowait depend(inout \
+                                                    : global, a, bn) if (a)
   {
     static int local1;
     *plocal = global;
     local1 = global;
   }
 
-  // CHECK:       [[TASK:%.+]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @0, i32 [[GTID]], i32 1, i[[SZ]] {{48|24}}, i[[SZ]] 4, i32 (i32, i8*)* bitcast (i32 (i32, %{{.+}}*)* [[TASK_ENTRY2:@.+]] to i32 (i32, i8*)*))
-  // CHECK:       [[BC_TASK:%.+]] = bitcast i8* [[TASK]] to [[TASK_TY2:%.+]]*
-  // CHECK:       [[DEP:%.+]] = bitcast %struct.kmp_depend_info* %{{.+}} to i8*
-  // CHECK:       call void @__kmpc_omp_wait_deps(%struct.ident_t* @0, i32 [[GTID]], i32 1, i8* [[DEP]], i32 0, i8* null)
-  // CHECK:       call void @__kmpc_omp_task_begin_if0(%struct.ident_t* @0, i32 [[GTID]], i8* [[TASK]])
-  // CHECK:       call i32 [[TASK_ENTRY2]](i32 [[GTID]], [[TASK_TY2]]* [[BC_TASK]])
-  // CHECK:       call void @__kmpc_omp_task_complete_if0(%struct.ident_t* @0, i32 [[GTID]], i8* [[TASK]])
-  #pragma omp target if(0) firstprivate(global) depend(out:global)
+// CHECK:       [[TASK:%.+]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @1, i32 [[GTID]], i32 1, i[[SZ]] {{48|24}}, i[[SZ]] 4, i32 (i32, i8*)* bitcast (i32 (i32, %{{.+}}*)* [[TASK_ENTRY2:@.+]] to i32 (i32, i8*)*))
+// CHECK:       [[BC_TASK:%.+]] = bitcast i8* [[TASK]] to [[TASK_TY2:%.+]]*
+// CHECK:       [[DEP:%.+]] = bitcast %struct.kmp_depend_info* %{{.+}} to i8*
+// CHECK:       call void @__kmpc_omp_wait_deps(%struct.ident_t* @1, i32 [[GTID]], i32 1, i8* [[DEP]], i32 0, i8* null)
+// CHECK:       call void @__kmpc_omp_task_begin_if0(%struct.ident_t* @1, i32 [[GTID]], i8* [[TASK]])
+// CHECK:       call i32 [[TASK_ENTRY2]](i32 [[GTID]], [[TASK_TY2]]* [[BC_TASK]])
+// CHECK:       call void @__kmpc_omp_task_complete_if0(%struct.ident_t* @1, i32 [[GTID]], i8* [[TASK]])
+#pragma omp target if (0) firstprivate(global) depend(out \
+                                                      : global)
   {
     global += 1;
   }
diff --git a/clang/test/OpenMP/target_parallel_codegen.cpp b/clang/test/OpenMP/target_parallel_codegen.cpp
index 2e094c294dfa0..88a2275650701 100644
--- a/clang/test/OpenMP/target_parallel_codegen.cpp
+++ b/clang/test/OpenMP/target_parallel_codegen.cpp
@@ -40,7 +40,7 @@
 
 // CHECK-DAG: %struct.ident_t = type { i32, i32, i32, i32, i8* }
 // CHECK-DAG: [[STR:@.+]] = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00"
-// CHECK-DAG: [[DEF_LOC:@.+]] = private unnamed_addr global %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) }
+// CHECK-DAG: [[DEF_LOC:@.+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) }
 
 // CHECK-DAG: [[TT:%.+]] = type { i64, i8 }
 // CHECK-DAG: [[S1:%.+]] = type { double }
diff --git a/clang/test/OpenMP/target_parallel_depend_codegen.cpp b/clang/test/OpenMP/target_parallel_depend_codegen.cpp
index 71d02ec19b4be..44ee8695611aa 100644
--- a/clang/test/OpenMP/target_parallel_depend_codegen.cpp
+++ b/clang/test/OpenMP/target_parallel_depend_codegen.cpp
@@ -75,23 +75,25 @@ int foo(int n) {
   TT<long long, char> d;
   static long *plocal;
 
-  // CHECK:       [[ADD:%.+]] = add nsw i32
-  // CHECK:       store i32 [[ADD]], i32* [[DEVICE_CAP:%.+]],
-  // CHECK:       [[GEP:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i32 0, i32 0
-  // CHECK:       [[DEV:%.+]] = load i32, i32* [[DEVICE_CAP]],
-  // CHECK:       store i32 [[DEV]], i32* [[GEP]],
-  // CHECK:       [[TASK:%.+]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @0, i32 [[GTID:%.+]], i32 1, i[[SZ]] {{20|40}}, i[[SZ]] 4, i32 (i32, i8*)* bitcast (i32 (i32, %{{.+}}*)* [[TASK_ENTRY0:@.+]] to i32 (i32, i8*)*))
-  // CHECK:       [[BC_TASK:%.+]] = bitcast i8* [[TASK]] to [[TASK_TY0:%.+]]*
-  // CHECK:       getelementptr %struct.kmp_depend_info, %struct.kmp_depend_info* %{{.+}}, i[[SZ]] 0
-  // CHECK:       getelementptr %struct.kmp_depend_info, %struct.kmp_depend_info* %{{.+}}, i[[SZ]] 1
-  // CHECK:       getelementptr %struct.kmp_depend_info, %struct.kmp_depend_info* %{{.+}}, i[[SZ]] 2
-  // CHECK:       getelementptr %struct.kmp_depend_info, %struct.kmp_depend_info* %{{.+}}, i[[SZ]] 3
-  // CHECK:       [[DEP:%.+]] = bitcast %struct.kmp_depend_info* %{{.+}} to i8*
-  // CHECK:       call void @__kmpc_omp_wait_deps(%struct.ident_t* @0, i32 [[GTID]], i32 4, i8* [[DEP]], i32 0, i8* null)
-  // CHECK:       call void @__kmpc_omp_task_begin_if0(%struct.ident_t* @0, i32 [[GTID]], i8* [[TASK]])
-  // CHECK:       call i32 [[TASK_ENTRY0]](i32 [[GTID]], [[TASK_TY0]]* [[BC_TASK]])
-  // CHECK:       call void @__kmpc_omp_task_complete_if0(%struct.ident_t* @0, i32 [[GTID]], i8* [[TASK]])
-  #pragma omp target parallel device(global + a) depend(in: global) depend(out: a, b, cn[4])
+// CHECK:       [[ADD:%.+]] = add nsw i32
+// CHECK:       store i32 [[ADD]], i32* [[DEVICE_CAP:%.+]],
+// CHECK:       [[GEP:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i32 0, i32 0
+// CHECK:       [[DEV:%.+]] = load i32, i32* [[DEVICE_CAP]],
+// CHECK:       store i32 [[DEV]], i32* [[GEP]],
+// CHECK:       [[TASK:%.+]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @1, i32 [[GTID:%.+]], i32 1, i[[SZ]] {{20|40}}, i[[SZ]] 4, i32 (i32, i8*)* bitcast (i32 (i32, %{{.+}}*)* [[TASK_ENTRY0:@.+]] to i32 (i32, i8*)*))
+// CHECK:       [[BC_TASK:%.+]] = bitcast i8* [[TASK]] to [[TASK_TY0:%.+]]*
+// CHECK:       getelementptr %struct.kmp_depend_info, %struct.kmp_depend_info* %{{.+}}, i[[SZ]] 0
+// CHECK:       getelementptr %struct.kmp_depend_info, %struct.kmp_depend_info* %{{.+}}, i[[SZ]] 1
+// CHECK:       getelementptr %struct.kmp_depend_info, %struct.kmp_depend_info* %{{.+}}, i[[SZ]] 2
+// CHECK:       getelementptr %struct.kmp_depend_info, %struct.kmp_depend_info* %{{.+}}, i[[SZ]] 3
+// CHECK:       [[DEP:%.+]] = bitcast %struct.kmp_depend_info* %{{.+}} to i8*
+// CHECK:       call void @__kmpc_omp_wait_deps(%struct.ident_t* @1, i32 [[GTID]], i32 4, i8* [[DEP]], i32 0, i8* null)
+// CHECK:       call void @__kmpc_omp_task_begin_if0(%struct.ident_t* @1, i32 [[GTID]], i8* [[TASK]])
+// CHECK:       call i32 [[TASK_ENTRY0]](i32 [[GTID]], [[TASK_TY0]]* [[BC_TASK]])
+// CHECK:       call void @__kmpc_omp_task_complete_if0(%struct.ident_t* @1, i32 [[GTID]], i8* [[TASK]])
+#pragma omp target parallel device(global + a) depend(in                   \
+                                                      : global) depend(out \
+                                                                       : a, b, cn[4])
   {
   }
 
@@ -122,13 +124,13 @@ int foo(int n) {
   // CHECK:       [[DEV1:%.+]] = load i32, i32* [[DEVICE_CAP]],
   // CHECK:       [[DEV2:%.+]] = sext i32 [[DEV1]] to i64
 
-  // CHECK:       [[TASK:%.+]] = call i8* @__kmpc_omp_target_task_alloc(%struct.ident_t* @0, i32 [[GTID]], i32 1, i[[SZ]] {{104|60}}, i[[SZ]] {{16|12}}, i32 (i32, i8*)* bitcast (i32 (i32, %{{.+}}*)* [[TASK_ENTRY1_:@.+]] to i32 (i32, i8*)*), i64 [[DEV2]])
+  // CHECK:       [[TASK:%.+]] = call i8* @__kmpc_omp_target_task_alloc(%struct.ident_t* @{{.*}}, i32 [[GTID]], i32 1, i[[SZ]] {{104|60}}, i[[SZ]] {{16|12}}, i32 (i32, i8*)* bitcast (i32 (i32, %{{.+}}*)* [[TASK_ENTRY1_:@.+]] to i32 (i32, i8*)*), i64 [[DEV2]])
   // CHECK:       [[BC_TASK:%.+]] = bitcast i8* [[TASK]] to [[TASK_TY1_:%.+]]*
   // CHECK:       getelementptr %struct.kmp_depend_info, %struct.kmp_depend_info* %{{.+}}, i[[SZ]] 0
   // CHECK:       getelementptr %struct.kmp_depend_info, %struct.kmp_depend_info* %{{.+}}, i[[SZ]] 1
   // CHECK:       getelementptr %struct.kmp_depend_info, %struct.kmp_depend_info* %{{.+}}, i[[SZ]] 2
   // CHECK:       [[DEP:%.+]] = bitcast %struct.kmp_depend_info* %{{.+}} to i8*
-  // CHECK:       call i32 @__kmpc_omp_task_with_deps(%struct.ident_t* @0, i32 [[GTID]], i8* [[TASK]], i32 3, i8* [[DEP]], i32 0, i8* null)
+  // CHECK:       call i32 @__kmpc_omp_task_with_deps(%struct.ident_t* @1, i32 [[GTID]], i8* [[TASK]], i32 3, i8* [[DEP]], i32 0, i8* null)
   // CHECK:       br label %[[EXIT:.+]]
 
   // CHECK:       [[ELSE]]:
@@ -139,32 +141,35 @@ int foo(int n) {
   // CHECK:       [[DEV1:%.+]] = load i32, i32* [[DEVICE_CAP]],
   // CHECK:       [[DEV2:%.+]] = sext i32 [[DEV1]] to i64
 
-  // CHECK:       [[TASK:%.+]] = call i8* @__kmpc_omp_target_task_alloc(%struct.ident_t* @0, i32 [[GTID]], i32 1, i[[SZ]] {{56|28}}, i[[SZ]] {{16|12}}, i32 (i32, i8*)* bitcast (i32 (i32, %{{.+}}*)* [[TASK_ENTRY1__:@.+]] to i32 (i32, i8*)*), i64 [[DEV2]])
+  // CHECK:       [[TASK:%.+]] = call i8* @__kmpc_omp_target_task_alloc(%struct.ident_t* @1, i32 [[GTID]], i32 1, i[[SZ]] {{56|28}}, i[[SZ]] {{16|12}}, i32 (i32, i8*)* bitcast (i32 (i32, %{{.+}}*)* [[TASK_ENTRY1__:@.+]] to i32 (i32, i8*)*), i64 [[DEV2]])
   // CHECK:       [[BC_TASK:%.+]] = bitcast i8* [[TASK]] to [[TASK_TY1__:%.+]]*
   // CHECK:       getelementptr %struct.kmp_depend_info, %struct.kmp_depend_info* %{{.+}}, i[[SZ]] 0
   // CHECK:       getelementptr %struct.kmp_depend_info, %struct.kmp_depend_info* %{{.+}}, i[[SZ]] 1
   // CHECK:       getelementptr %struct.kmp_depend_info, %struct.kmp_depend_info* %{{.+}}, i[[SZ]] 2
   // CHECK:       [[DEP:%.+]] = bitcast %struct.kmp_depend_info* %{{.+}} to i8*
-  // CHECK:       call i32 @__kmpc_omp_task_with_deps(%struct.ident_t* @0, i32 [[GTID]], i8* [[TASK]], i32 3, i8* [[DEP]], i32 0, i8* null)
+  // CHECK:       call i32 @__kmpc_omp_task_with_deps(%struct.ident_t* @1, i32 [[GTID]], i8* [[TASK]], i32 3, i8* [[DEP]], i32 0, i8* null)
   // CHECK:       br label %[[EXIT:.+]]
   // CHECK:       [[EXIT]]:
 
-  #pragma omp target parallel device(global + a) nowait depend(inout: global, a, bn) if(target:a)
+#pragma omp target parallel device(global + a) nowait depend(inout                       \
+                                                             : global, a, bn) if (target \
+                                                                                  : a)
   {
     static int local1;
     *plocal = global;
     local1 = global;
   }
 
-  // CHECK:       [[TASK:%.+]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @0, i32 [[GTID]], i32 1, i[[SZ]] {{48|24}}, i[[SZ]] 4, i32 (i32, i8*)* bitcast (i32 (i32, %{{.+}}*)* [[TASK_ENTRY2:@.+]] to i32 (i32, i8*)*))
-  // CHECK:       [[BC_TASK:%.+]] = bitcast i8* [[TASK]] to [[TASK_TY2:%.+]]*
-  // CHECK:       getelementptr %struct.kmp_depend_info, %struct.kmp_depend_info* %{{.+}}, i[[SZ]] 0
-  // CHECK:       [[DEP:%.+]] = bitcast %struct.kmp_depend_info* %{{.+}} to i8*
-  // CHECK:       call void @__kmpc_omp_wait_deps(%struct.ident_t* @0, i32 [[GTID]], i32 1, i8* [[DEP]], i32 0, i8* null)
-  // CHECK:       call void @__kmpc_omp_task_begin_if0(%struct.ident_t* @0, i32 [[GTID]], i8* [[TASK]])
-  // CHECK:       call i32 [[TASK_ENTRY2]](i32 [[GTID]], [[TASK_TY2]]* [[BC_TASK]])
-  // CHECK:       call void @__kmpc_omp_task_complete_if0(%struct.ident_t* @0, i32 [[GTID]], i8* [[TASK]])
-  #pragma omp target parallel if(0) firstprivate(global) depend(out:global)
+// CHECK:       [[TASK:%.+]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @1, i32 [[GTID]], i32 1, i[[SZ]] {{48|24}}, i[[SZ]] 4, i32 (i32, i8*)* bitcast (i32 (i32, %{{.+}}*)* [[TASK_ENTRY2:@.+]] to i32 (i32, i8*)*))
+// CHECK:       [[BC_TASK:%.+]] = bitcast i8* [[TASK]] to [[TASK_TY2:%.+]]*
+// CHECK:       getelementptr %struct.kmp_depend_info, %struct.kmp_depend_info* %{{.+}}, i[[SZ]] 0
+// CHECK:       [[DEP:%.+]] = bitcast %struct.kmp_depend_info* %{{.+}} to i8*
+// CHECK:       call void @__kmpc_omp_wait_deps(%struct.ident_t* @1, i32 [[GTID]], i32 1, i8* [[DEP]], i32 0, i8* null)
+// CHECK:       call void @__kmpc_omp_task_begin_if0(%struct.ident_t* @1, i32 [[GTID]], i8* [[TASK]])
+// CHECK:       call i32 [[TASK_ENTRY2]](i32 [[GTID]], [[TASK_TY2]]* [[BC_TASK]])
+// CHECK:       call void @__kmpc_omp_task_complete_if0(%struct.ident_t* @1, i32 [[GTID]], i8* [[TASK]])
+#pragma omp target parallel if (0) firstprivate(global) depend(out \
+                                                               : global)
   {
     global += 1;
   }
diff --git a/clang/test/OpenMP/target_parallel_for_codegen.cpp b/clang/test/OpenMP/target_parallel_for_codegen.cpp
index e8590530a0d89..78494dba97e88 100644
--- a/clang/test/OpenMP/target_parallel_for_codegen.cpp
+++ b/clang/test/OpenMP/target_parallel_for_codegen.cpp
@@ -40,7 +40,7 @@
 
 // CHECK-DAG: %struct.ident_t = type { i32, i32, i32, i32, i8* }
 // CHECK-DAG: [[STR:@.+]] = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00"
-// CHECK-DAG: [[DEF_LOC:@.+]] = private unnamed_addr global %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) }
+// CHECK-DAG: [[DEF_LOC:@.+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) }
 
 // CHECK-DAG: [[TT:%.+]] = type { i64, i8 }
 // CHECK-DAG: [[S1:%.+]] = type { double }
diff --git a/clang/test/OpenMP/target_parallel_for_simd_codegen.cpp b/clang/test/OpenMP/target_parallel_for_simd_codegen.cpp
index 055d5dce28bbd..02d08b6329be0 100644
--- a/clang/test/OpenMP/target_parallel_for_simd_codegen.cpp
+++ b/clang/test/OpenMP/target_parallel_for_simd_codegen.cpp
@@ -67,7 +67,7 @@
 #define HEADER
 // CHECK-DAG: %struct.ident_t = type { i32, i32, i32, i32, i8* }
 // CHECK-DAG: [[STR:@.+]] = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00"
-// CHECK-DAG: [[DEF_LOC:@.+]] = private unnamed_addr global %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) }
+// CHECK-DAG: [[DEF_LOC:@.+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) }
 
 // CHECK-DAG: [[TT:%.+]] = type { i64, i8 }
 // CHECK-DAG: [[S1:%.+]] = type { double }
diff --git a/clang/test/OpenMP/target_parallel_if_codegen.cpp b/clang/test/OpenMP/target_parallel_if_codegen.cpp
index b315362735fec..9f3f13e6aa9ae 100644
--- a/clang/test/OpenMP/target_parallel_if_codegen.cpp
+++ b/clang/test/OpenMP/target_parallel_if_codegen.cpp
@@ -40,7 +40,7 @@
 
 // CHECK-DAG: %struct.ident_t = type { i32, i32, i32, i32, i8* }
 // CHECK-DAG: [[STR:@.+]] = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00"
-// CHECK-DAG: [[DEF_LOC:@.+]] = private unnamed_addr global %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) }
+// CHECK-DAG: [[DEF_LOC:@.+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) }
 
 // CHECK-DAG: [[S1:%.+]] = type { double }
 // CHECK-DAG: [[ENTTY:%.+]] = type { i8*, i8*, i[[SZ:32|64]], i32, i32 }
diff --git a/clang/test/OpenMP/target_parallel_num_threads_codegen.cpp b/clang/test/OpenMP/target_parallel_num_threads_codegen.cpp
index f12248d6458ca..bb231b3328e61 100644
--- a/clang/test/OpenMP/target_parallel_num_threads_codegen.cpp
+++ b/clang/test/OpenMP/target_parallel_num_threads_codegen.cpp
@@ -40,7 +40,7 @@
 
 // CHECK-DAG: %struct.ident_t = type { i32, i32, i32, i32, i8* }
 // CHECK-DAG: [[STR:@.+]] = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00"
-// CHECK-DAG: [[DEF_LOC:@.+]] = private unnamed_addr global %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) }
+// CHECK-DAG: [[DEF_LOC:@.+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) }
 
 // CHECK-DAG: [[S1:%.+]] = type { double }
 // CHECK-DAG: [[ENTTY:%.+]] = type { i8*, i8*, i[[SZ:32|64]], i32, i32 }
diff --git a/clang/test/OpenMP/target_simd_depend_codegen.cpp b/clang/test/OpenMP/target_simd_depend_codegen.cpp
index 72cd550207b67..d45001c9eaa70 100644
--- a/clang/test/OpenMP/target_simd_depend_codegen.cpp
+++ b/clang/test/OpenMP/target_simd_depend_codegen.cpp
@@ -75,23 +75,25 @@ int foo(int n) {
   TT<long long, char> d;
   static long *plocal;
 
-  // CHECK:       [[ADD:%.+]] = add nsw i32
-  // CHECK:       store i32 [[ADD]], i32* [[DEVICE_CAP:%.+]],
-  // CHECK:       [[GEP:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i32 0, i32 0
-  // CHECK:       [[DEV:%.+]] = load i32, i32* [[DEVICE_CAP]],
-  // CHECK:       store i32 [[DEV]], i32* [[GEP]],
-  // CHECK:       [[TASK:%.+]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @0, i32 [[GTID:%.+]], i32 1, i[[SZ]] {{20|40}}, i[[SZ]] 4, i32 (i32, i8*)* bitcast (i32 (i32, %{{.+}}*)* [[TASK_ENTRY0:@.+]] to i32 (i32, i8*)*))
-  // CHECK:       [[BC_TASK:%.+]] = bitcast i8* [[TASK]] to [[TASK_TY0:%.+]]*
-  // CHECK:       getelementptr %struct.kmp_depend_info, %struct.kmp_depend_info* %{{.+}}, i[[SZ]] 0
-  // CHECK:       getelementptr %struct.kmp_depend_info, %struct.kmp_depend_info* %{{.+}}, i[[SZ]] 1
-  // CHECK:       getelementptr %struct.kmp_depend_info, %struct.kmp_depend_info* %{{.+}}, i[[SZ]] 2
-  // CHECK:       getelementptr %struct.kmp_depend_info, %struct.kmp_depend_info* %{{.+}}, i[[SZ]] 3
-  // CHECK:       [[DEP:%.+]] = bitcast %struct.kmp_depend_info* %{{.+}} to i8*
-  // CHECK:       call void @__kmpc_omp_wait_deps(%struct.ident_t* @0, i32 [[GTID]], i32 4, i8* [[DEP]], i32 0, i8* null)
-  // CHECK:       call void @__kmpc_omp_task_begin_if0(%struct.ident_t* @0, i32 [[GTID]], i8* [[TASK]])
-  // CHECK:       call i32 [[TASK_ENTRY0]](i32 [[GTID]], [[TASK_TY0]]* [[BC_TASK]])
-  // CHECK:       call void @__kmpc_omp_task_complete_if0(%struct.ident_t* @0, i32 [[GTID]], i8* [[TASK]])
-  #pragma omp target simd device(global + a) depend(in: global) depend(out: a, b, cn[4])
+// CHECK:       [[ADD:%.+]] = add nsw i32
+// CHECK:       store i32 [[ADD]], i32* [[DEVICE_CAP:%.+]],
+// CHECK:       [[GEP:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i32 0, i32 0
+// CHECK:       [[DEV:%.+]] = load i32, i32* [[DEVICE_CAP]],
+// CHECK:       store i32 [[DEV]], i32* [[GEP]],
+// CHECK:       [[TASK:%.+]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @1, i32 [[GTID:%.+]], i32 1, i[[SZ]] {{20|40}}, i[[SZ]] 4, i32 (i32, i8*)* bitcast (i32 (i32, %{{.+}}*)* [[TASK_ENTRY0:@.+]] to i32 (i32, i8*)*))
+// CHECK:       [[BC_TASK:%.+]] = bitcast i8* [[TASK]] to [[TASK_TY0:%.+]]*
+// CHECK:       getelementptr %struct.kmp_depend_info, %struct.kmp_depend_info* %{{.+}}, i[[SZ]] 0
+// CHECK:       getelementptr %struct.kmp_depend_info, %struct.kmp_depend_info* %{{.+}}, i[[SZ]] 1
+// CHECK:       getelementptr %struct.kmp_depend_info, %struct.kmp_depend_info* %{{.+}}, i[[SZ]] 2
+// CHECK:       getelementptr %struct.kmp_depend_info, %struct.kmp_depend_info* %{{.+}}, i[[SZ]] 3
+// CHECK:       [[DEP:%.+]] = bitcast %struct.kmp_depend_info* %{{.+}} to i8*
+// CHECK:       call void @__kmpc_omp_wait_deps(%struct.ident_t* @1, i32 [[GTID]], i32 4, i8* [[DEP]], i32 0, i8* null)
+// CHECK:       call void @__kmpc_omp_task_begin_if0(%struct.ident_t* @1, i32 [[GTID]], i8* [[TASK]])
+// CHECK:       call i32 [[TASK_ENTRY0]](i32 [[GTID]], [[TASK_TY0]]* [[BC_TASK]])
+// CHECK:       call void @__kmpc_omp_task_complete_if0(%struct.ident_t* @1, i32 [[GTID]], i8* [[TASK]])
+#pragma omp target simd device(global + a) depend(in                   \
+                                                  : global) depend(out \
+                                                                   : a, b, cn[4])
   for (int i = 0; i < 10; ++i) {
   }
 
@@ -122,13 +124,13 @@ int foo(int n) {
   // CHECK:       [[DEV1:%.+]] = load i32, i32* [[DEVICE_CAP]],
   // CHECK:       [[DEV2:%.+]] = sext i32 [[DEV1]] to i64
 
-  // CHECK:       [[TASK:%.+]] = call i8* @__kmpc_omp_target_task_alloc(%struct.ident_t* @0, i32 [[GTID]], i32 1, i[[SZ]] {{104|60}}, i[[SZ]] {{16|12}}, i32 (i32, i8*)* bitcast (i32 (i32, %{{.+}}*)* [[TASK_ENTRY1_:@.+]] to i32 (i32, i8*)*), i64 [[DEV2]])
+  // CHECK:       [[TASK:%.+]] = call i8* @__kmpc_omp_target_task_alloc(%struct.ident_t* @{{.*}}, i32 [[GTID]], i32 1, i[[SZ]] {{104|60}}, i[[SZ]] {{16|12}}, i32 (i32, i8*)* bitcast (i32 (i32, %{{.+}}*)* [[TASK_ENTRY1_:@.+]] to i32 (i32, i8*)*), i64 [[DEV2]])
   // CHECK:       [[BC_TASK:%.+]] = bitcast i8* [[TASK]] to [[TASK_TY1_:%.+]]*
   // CHECK:       getelementptr %struct.kmp_depend_info, %struct.kmp_depend_info* %{{.+}}, i[[SZ]] 0
   // CHECK:       getelementptr %struct.kmp_depend_info, %struct.kmp_depend_info* %{{.+}}, i[[SZ]] 1
   // CHECK:       getelementptr %struct.kmp_depend_info, %struct.kmp_depend_info* %{{.+}}, i[[SZ]] 2
   // CHECK:       [[DEP:%.+]] = bitcast %struct.kmp_depend_info* %{{.+}} to i8*
-  // CHECK:       call i32 @__kmpc_omp_task_with_deps(%struct.ident_t* @0, i32 [[GTID]], i8* [[TASK]], i32 3, i8* [[DEP]], i32 0, i8* null)
+  // CHECK:       call i32 @__kmpc_omp_task_with_deps(%struct.ident_t* @1, i32 [[GTID]], i8* [[TASK]], i32 3, i8* [[DEP]], i32 0, i8* null)
   // CHECK:       br label %[[EXIT:.+]]
 
   // CHECK:       [[ELSE]]:
@@ -139,32 +141,34 @@ int foo(int n) {
   // CHECK:       [[DEV1:%.+]] = load i32, i32* [[DEVICE_CAP]],
   // CHECK:       [[DEV2:%.+]] = sext i32 [[DEV1]] to i64
 
-  // CHECK:       [[TASK:%.+]] = call i8* @__kmpc_omp_target_task_alloc(%struct.ident_t* @0, i32 [[GTID]], i32 1, i[[SZ]] {{56|28}}, i[[SZ]] {{16|12}}, i32 (i32, i8*)* bitcast (i32 (i32, %{{.+}}*)* [[TASK_ENTRY1__:@.+]] to i32 (i32, i8*)*), i64 [[DEV2]])
+  // CHECK:       [[TASK:%.+]] = call i8* @__kmpc_omp_target_task_alloc(%struct.ident_t* @1, i32 [[GTID]], i32 1, i[[SZ]] {{56|28}}, i[[SZ]] {{16|12}}, i32 (i32, i8*)* bitcast (i32 (i32, %{{.+}}*)* [[TASK_ENTRY1__:@.+]] to i32 (i32, i8*)*), i64 [[DEV2]])
   // CHECK:       [[BC_TASK:%.+]] = bitcast i8* [[TASK]] to [[TASK_TY1__:%.+]]*
   // CHECK:       getelementptr %struct.kmp_depend_info, %struct.kmp_depend_info* %{{.+}}, i[[SZ]] 0
   // CHECK:       getelementptr %struct.kmp_depend_info, %struct.kmp_depend_info* %{{.+}}, i[[SZ]] 1
   // CHECK:       getelementptr %struct.kmp_depend_info, %struct.kmp_depend_info* %{{.+}}, i[[SZ]] 2
   // CHECK:       [[DEP:%.+]] = bitcast %struct.kmp_depend_info* %{{.+}} to i8*
-  // CHECK:       call i32 @__kmpc_omp_task_with_deps(%struct.ident_t* @0, i32 [[GTID]], i8* [[TASK]], i32 3, i8* [[DEP]], i32 0, i8* null)
+  // CHECK:       call i32 @__kmpc_omp_task_with_deps(%struct.ident_t* @1, i32 [[GTID]], i8* [[TASK]], i32 3, i8* [[DEP]], i32 0, i8* null)
   // CHECK:       br label %[[EXIT:.+]]
   // CHECK:       [[EXIT]]:
 
-  #pragma omp target simd device(global + a) nowait depend(inout: global, a, bn) if(a)
+#pragma omp target simd device(global + a) nowait depend(inout \
+                                                         : global, a, bn) if (a)
   for (int i = 0; i < *plocal; ++i) {
     static int local1;
     *plocal = global;
     local1 = global;
   }
 
-  // CHECK:       [[TASK:%.+]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @0, i32 [[GTID]], i32 1, i[[SZ]] {{48|24}}, i[[SZ]] 4, i32 (i32, i8*)* bitcast (i32 (i32, %{{.+}}*)* [[TASK_ENTRY2:@.+]] to i32 (i32, i8*)*))
-  // CHECK:       [[BC_TASK:%.+]] = bitcast i8* [[TASK]] to [[TASK_TY2:%.+]]*
-  // CHECK:       getelementptr %struct.kmp_depend_info, %struct.kmp_depend_info* %{{.+}}, i[[SZ]] 0
-  // CHECK:       [[DEP:%.+]] = bitcast %struct.kmp_depend_info* %{{.+}} to i8*
-  // CHECK:       call void @__kmpc_omp_wait_deps(%struct.ident_t* @0, i32 [[GTID]], i32 1, i8* [[DEP]], i32 0, i8* null)
-  // CHECK:       call void @__kmpc_omp_task_begin_if0(%struct.ident_t* @0, i32 [[GTID]], i8* [[TASK]])
-  // CHECK:       call i32 [[TASK_ENTRY2]](i32 [[GTID]], [[TASK_TY2]]* [[BC_TASK]])
-  // CHECK:       call void @__kmpc_omp_task_complete_if0(%struct.ident_t* @0, i32 [[GTID]], i8* [[TASK]])
-  #pragma omp target simd if(0) firstprivate(global) depend(out:global)
+// CHECK:       [[TASK:%.+]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @1, i32 [[GTID]], i32 1, i[[SZ]] {{48|24}}, i[[SZ]] 4, i32 (i32, i8*)* bitcast (i32 (i32, %{{.+}}*)* [[TASK_ENTRY2:@.+]] to i32 (i32, i8*)*))
+// CHECK:       [[BC_TASK:%.+]] = bitcast i8* [[TASK]] to [[TASK_TY2:%.+]]*
+// CHECK:       getelementptr %struct.kmp_depend_info, %struct.kmp_depend_info* %{{.+}}, i[[SZ]] 0
+// CHECK:       [[DEP:%.+]] = bitcast %struct.kmp_depend_info* %{{.+}} to i8*
+// CHECK:       call void @__kmpc_omp_wait_deps(%struct.ident_t* @1, i32 [[GTID]], i32 1, i8* [[DEP]], i32 0, i8* null)
+// CHECK:       call void @__kmpc_omp_task_begin_if0(%struct.ident_t* @1, i32 [[GTID]], i8* [[TASK]])
+// CHECK:       call i32 [[TASK_ENTRY2]](i32 [[GTID]], [[TASK_TY2]]* [[BC_TASK]])
+// CHECK:       call void @__kmpc_omp_task_complete_if0(%struct.ident_t* @1, i32 [[GTID]], i8* [[TASK]])
+#pragma omp target simd if (0) firstprivate(global) depend(out \
+                                                           : global)
   for (int i = 0; i < global; ++i) {
     global += 1;
   }
diff --git a/clang/test/OpenMP/target_teams_codegen.cpp b/clang/test/OpenMP/target_teams_codegen.cpp
index 9011c3c0ff805..af4831cb87927 100644
--- a/clang/test/OpenMP/target_teams_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_codegen.cpp
@@ -40,7 +40,7 @@
 
 // CHECK-DAG: %struct.ident_t = type { i32, i32, i32, i32, i8* }
 // CHECK-DAG: [[STR:@.+]] = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00"
-// CHECK-DAG: [[DEF_LOC:@.+]] = private unnamed_addr global %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) }
+// CHECK-DAG: [[DEF_LOC:@.+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) }
 
 // CHECK-DAG: [[TT:%.+]] = type { i64, i8 }
 // CHECK-DAG: [[S1:%.+]] = type { double }
diff --git a/clang/test/OpenMP/target_teams_depend_codegen.cpp b/clang/test/OpenMP/target_teams_depend_codegen.cpp
index 9a58e40de7505..69c749945de62 100644
--- a/clang/test/OpenMP/target_teams_depend_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_depend_codegen.cpp
@@ -75,23 +75,25 @@ int foo(int n) {
   TT<long long, char> d;
   static long *plocal;
 
-  // CHECK:       [[ADD:%.+]] = add nsw i32
-  // CHECK:       store i32 [[ADD]], i32* [[DEVICE_CAP:%.+]],
-  // CHECK:       [[GEP:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i32 0, i32 0
-  // CHECK:       [[DEV:%.+]] = load i32, i32* [[DEVICE_CAP]],
-  // CHECK:       store i32 [[DEV]], i32* [[GEP]],
-  // CHECK:       [[TASK:%.+]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @0, i32 [[GTID:%.+]], i32 1, i[[SZ]] {{20|40}}, i[[SZ]] 4, i32 (i32, i8*)* bitcast (i32 (i32, %{{.+}}*)* [[TASK_ENTRY0:@.+]] to i32 (i32, i8*)*))
-  // CHECK:       [[BC_TASK:%.+]] = bitcast i8* [[TASK]] to [[TASK_TY0:%.+]]*
-  // CHECK:       getelementptr %struct.kmp_depend_info, %struct.kmp_depend_info* %{{.+}}, i[[SZ]] 0
-  // CHECK:       getelementptr %struct.kmp_depend_info, %struct.kmp_depend_info* %{{.+}}, i[[SZ]] 1
-  // CHECK:       getelementptr %struct.kmp_depend_info, %struct.kmp_depend_info* %{{.+}}, i[[SZ]] 2
-  // CHECK:       getelementptr %struct.kmp_depend_info, %struct.kmp_depend_info* %{{.+}}, i[[SZ]] 3
-  // CHECK:       [[DEP:%.+]] = bitcast %struct.kmp_depend_info* %{{.+}} to i8*
-  // CHECK:       call void @__kmpc_omp_wait_deps(%struct.ident_t* @0, i32 [[GTID]], i32 4, i8* [[DEP]], i32 0, i8* null)
-  // CHECK:       call void @__kmpc_omp_task_begin_if0(%struct.ident_t* @0, i32 [[GTID]], i8* [[TASK]])
-  // CHECK:       call i32 [[TASK_ENTRY0]](i32 [[GTID]], [[TASK_TY0]]* [[BC_TASK]])
-  // CHECK:       call void @__kmpc_omp_task_complete_if0(%struct.ident_t* @0, i32 [[GTID]], i8* [[TASK]])
-  #pragma omp target teams device(global + a) depend(in: global) depend(out: a, b, cn[4])
+// CHECK:       [[ADD:%.+]] = add nsw i32
+// CHECK:       store i32 [[ADD]], i32* [[DEVICE_CAP:%.+]],
+// CHECK:       [[GEP:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i32 0, i32 0
+// CHECK:       [[DEV:%.+]] = load i32, i32* [[DEVICE_CAP]],
+// CHECK:       store i32 [[DEV]], i32* [[GEP]],
+// CHECK:       [[TASK:%.+]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @1, i32 [[GTID:%.+]], i32 1, i[[SZ]] {{20|40}}, i[[SZ]] 4, i32 (i32, i8*)* bitcast (i32 (i32, %{{.+}}*)* [[TASK_ENTRY0:@.+]] to i32 (i32, i8*)*))
+// CHECK:       [[BC_TASK:%.+]] = bitcast i8* [[TASK]] to [[TASK_TY0:%.+]]*
+// CHECK:       getelementptr %struct.kmp_depend_info, %struct.kmp_depend_info* %{{.+}}, i[[SZ]] 0
+// CHECK:       getelementptr %struct.kmp_depend_info, %struct.kmp_depend_info* %{{.+}}, i[[SZ]] 1
+// CHECK:       getelementptr %struct.kmp_depend_info, %struct.kmp_depend_info* %{{.+}}, i[[SZ]] 2
+// CHECK:       getelementptr %struct.kmp_depend_info, %struct.kmp_depend_info* %{{.+}}, i[[SZ]] 3
+// CHECK:       [[DEP:%.+]] = bitcast %struct.kmp_depend_info* %{{.+}} to i8*
+// CHECK:       call void @__kmpc_omp_wait_deps(%struct.ident_t* @1, i32 [[GTID]], i32 4, i8* [[DEP]], i32 0, i8* null)
+// CHECK:       call void @__kmpc_omp_task_begin_if0(%struct.ident_t* @1, i32 [[GTID]], i8* [[TASK]])
+// CHECK:       call i32 [[TASK_ENTRY0]](i32 [[GTID]], [[TASK_TY0]]* [[BC_TASK]])
+// CHECK:       call void @__kmpc_omp_task_complete_if0(%struct.ident_t* @1, i32 [[GTID]], i8* [[TASK]])
+#pragma omp target teams device(global + a) depend(in                   \
+                                                   : global) depend(out \
+                                                                    : a, b, cn[4])
   {
   }
 
@@ -122,13 +124,13 @@ int foo(int n) {
   // CHECK:       [[DEV1:%.+]] = load i32, i32* [[DEVICE_CAP]],
   // CHECK:       [[DEV2:%.+]] = sext i32 [[DEV1]] to i64
 
-  // CHECK:       [[TASK:%.+]] = call i8* @__kmpc_omp_target_task_alloc(%struct.ident_t* @0, i32 [[GTID]], i32 1, i[[SZ]] {{104|60}}, i[[SZ]] {{16|12}}, i32 (i32, i8*)* bitcast (i32 (i32, %{{.+}}*)* [[TASK_ENTRY1_:@.+]] to i32 (i32, i8*)*), i64 [[DEV2]])
+  // CHECK:       [[TASK:%.+]] = call i8* @__kmpc_omp_target_task_alloc(%struct.ident_t* @{{.*}}, i32 [[GTID]], i32 1, i[[SZ]] {{104|60}}, i[[SZ]] {{16|12}}, i32 (i32, i8*)* bitcast (i32 (i32, %{{.+}}*)* [[TASK_ENTRY1_:@.+]] to i32 (i32, i8*)*), i64 [[DEV2]])
   // CHECK:       [[BC_TASK:%.+]] = bitcast i8* [[TASK]] to [[TASK_TY1_:%.+]]*
   // CHECK:       getelementptr %struct.kmp_depend_info, %struct.kmp_depend_info* %{{.+}}, i[[SZ]] 0
   // CHECK:       getelementptr %struct.kmp_depend_info, %struct.kmp_depend_info* %{{.+}}, i[[SZ]] 1
   // CHECK:       getelementptr %struct.kmp_depend_info, %struct.kmp_depend_info* %{{.+}}, i[[SZ]] 2
   // CHECK:       [[DEP:%.+]] = bitcast %struct.kmp_depend_info* %{{.+}} to i8*
-  // CHECK:       call i32 @__kmpc_omp_task_with_deps(%struct.ident_t* @0, i32 [[GTID]], i8* [[TASK]], i32 3, i8* [[DEP]], i32 0, i8* null)
+  // CHECK:       call i32 @__kmpc_omp_task_with_deps(%struct.ident_t* @1, i32 [[GTID]], i8* [[TASK]], i32 3, i8* [[DEP]], i32 0, i8* null)
   // CHECK:       br label %[[EXIT:.+]]
 
   // CHECK:       [[ELSE]]:
@@ -139,32 +141,34 @@ int foo(int n) {
   // CHECK:       [[DEV1:%.+]] = load i32, i32* [[DEVICE_CAP]],
   // CHECK:       [[DEV2:%.+]] = sext i32 [[DEV1]] to i64
 
-  // CHECK:       [[TASK:%.+]] = call i8* @__kmpc_omp_target_task_alloc(%struct.ident_t* @0, i32 [[GTID]], i32 1, i[[SZ]] {{56|28}}, i[[SZ]] {{16|12}}, i32 (i32, i8*)* bitcast (i32 (i32, %{{.+}}*)* [[TASK_ENTRY1__:@.+]] to i32 (i32, i8*)*), i64 [[DEV2]])
+  // CHECK:       [[TASK:%.+]] = call i8* @__kmpc_omp_target_task_alloc(%struct.ident_t* @1, i32 [[GTID]], i32 1, i[[SZ]] {{56|28}}, i[[SZ]] {{16|12}}, i32 (i32, i8*)* bitcast (i32 (i32, %{{.+}}*)* [[TASK_ENTRY1__:@.+]] to i32 (i32, i8*)*), i64 [[DEV2]])
   // CHECK:       [[BC_TASK:%.+]] = bitcast i8* [[TASK]] to [[TASK_TY1__:%.+]]*
   // CHECK:       getelementptr %struct.kmp_depend_info, %struct.kmp_depend_info* %{{.+}}, i[[SZ]] 0
   // CHECK:       getelementptr %struct.kmp_depend_info, %struct.kmp_depend_info* %{{.+}}, i[[SZ]] 1
   // CHECK:       getelementptr %struct.kmp_depend_info, %struct.kmp_depend_info* %{{.+}}, i[[SZ]] 2
   // CHECK:       [[DEP:%.+]] = bitcast %struct.kmp_depend_info* %{{.+}} to i8*
-  // CHECK:       call i32 @__kmpc_omp_task_with_deps(%struct.ident_t* @0, i32 [[GTID]], i8* [[TASK]], i32 3, i8* [[DEP]], i32 0, i8* null)
+  // CHECK:       call i32 @__kmpc_omp_task_with_deps(%struct.ident_t* @1, i32 [[GTID]], i8* [[TASK]], i32 3, i8* [[DEP]], i32 0, i8* null)
   // CHECK:       br label %[[EXIT:.+]]
   // CHECK:       [[EXIT]]:
 
-  #pragma omp target teams device(global + a) nowait depend(inout: global, a, bn) if(a)
+#pragma omp target teams device(global + a) nowait depend(inout \
+                                                          : global, a, bn) if (a)
   {
     static int local1;
     *plocal = global;
     local1 = global;
   }
 
-  // CHECK:       [[TASK:%.+]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @0, i32 [[GTID]], i32 1, i[[SZ]] {{48|24}}, i[[SZ]] 4, i32 (i32, i8*)* bitcast (i32 (i32, %{{.+}}*)* [[TASK_ENTRY2:@.+]] to i32 (i32, i8*)*))
-  // CHECK:       [[BC_TASK:%.+]] = bitcast i8* [[TASK]] to [[TASK_TY2:%.+]]*
-  // CHECK:       getelementptr %struct.kmp_depend_info, %struct.kmp_depend_info* %{{.+}}, i[[SZ]] 0
-  // CHECK:       [[DEP:%.+]] = bitcast %struct.kmp_depend_info* %{{.+}} to i8*
-  // CHECK:       call void @__kmpc_omp_wait_deps(%struct.ident_t* @0, i32 [[GTID]], i32 1, i8* [[DEP]], i32 0, i8* null)
-  // CHECK:       call void @__kmpc_omp_task_begin_if0(%struct.ident_t* @0, i32 [[GTID]], i8* [[TASK]])
-  // CHECK:       call i32 [[TASK_ENTRY2]](i32 [[GTID]], [[TASK_TY2]]* [[BC_TASK]])
-  // CHECK:       call void @__kmpc_omp_task_complete_if0(%struct.ident_t* @0, i32 [[GTID]], i8* [[TASK]])
-  #pragma omp target teams if(0) firstprivate(global) depend(out:global)
+// CHECK:       [[TASK:%.+]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @1, i32 [[GTID]], i32 1, i[[SZ]] {{48|24}}, i[[SZ]] 4, i32 (i32, i8*)* bitcast (i32 (i32, %{{.+}}*)* [[TASK_ENTRY2:@.+]] to i32 (i32, i8*)*))
+// CHECK:       [[BC_TASK:%.+]] = bitcast i8* [[TASK]] to [[TASK_TY2:%.+]]*
+// CHECK:       getelementptr %struct.kmp_depend_info, %struct.kmp_depend_info* %{{.+}}, i[[SZ]] 0
+// CHECK:       [[DEP:%.+]] = bitcast %struct.kmp_depend_info* %{{.+}} to i8*
+// CHECK:       call void @__kmpc_omp_wait_deps(%struct.ident_t* @1, i32 [[GTID]], i32 1, i8* [[DEP]], i32 0, i8* null)
+// CHECK:       call void @__kmpc_omp_task_begin_if0(%struct.ident_t* @1, i32 [[GTID]], i8* [[TASK]])
+// CHECK:       call i32 [[TASK_ENTRY2]](i32 [[GTID]], [[TASK_TY2]]* [[BC_TASK]])
+// CHECK:       call void @__kmpc_omp_task_complete_if0(%struct.ident_t* @1, i32 [[GTID]], i8* [[TASK]])
+#pragma omp target teams if (0) firstprivate(global) depend(out \
+                                                            : global)
   {
     global += 1;
   }
diff --git a/clang/test/OpenMP/target_teams_distribute_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_codegen.cpp
index 547e45f6d3e7e..7f34ce07597e7 100644
--- a/clang/test/OpenMP/target_teams_distribute_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_codegen.cpp
@@ -40,7 +40,7 @@
 
 // CHECK-DAG: %struct.ident_t = type { i32, i32, i32, i32, i8* }
 // CHECK-DAG: [[STR:@.+]] = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00"
-// CHECK-DAG: [[DEF_LOC:@.+]] = private unnamed_addr global %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) }
+// CHECK-DAG: [[DEF_LOC:@.+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) }
 
 // CHECK-DAG: [[TT:%.+]] = type { i64, i8 }
 // CHECK-DAG: [[S1:%.+]] = type { double }
diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_proc_bind_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_proc_bind_codegen.cpp
index a7242c9112451..9107c218a436a 100644
--- a/clang/test/OpenMP/target_teams_distribute_parallel_for_proc_bind_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_proc_bind_codegen.cpp
@@ -17,7 +17,7 @@ typedef __INTPTR_TYPE__ intptr_t;
 
 // CHECK-DAG: [[IDENT_T_TY:%.+]] = type { i32, i32, i32, i32, i8* }
 // CHECK-DAG: [[STR:@.+]] = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00"
-// CHECK-DAG: [[DEF_LOC_2:@.+]] = private unnamed_addr global [[IDENT_T_TY]] { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) }
+// CHECK-DAG: [[DEF_LOC_2:@.+]] = private unnamed_addr constant [[IDENT_T_TY]] { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) }
 
 void foo();
 
diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_proc_bind_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_proc_bind_codegen.cpp
index 9d10c2e3dc7c4..40bf51f340280 100644
--- a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_proc_bind_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_proc_bind_codegen.cpp
@@ -17,7 +17,7 @@ typedef __INTPTR_TYPE__ intptr_t;
 
 // CHECK-DAG: [[IDENT_T_TY:%.+]] = type { i32, i32, i32, i32, i8* }
 // CHECK-DAG: [[STR:@.+]] = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00"
-// CHECK-DAG: [[DEF_LOC_2:@.+]] = private unnamed_addr global [[IDENT_T_TY]] { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) }
+// CHECK-DAG: [[DEF_LOC_2:@.+]] = private unnamed_addr constant [[IDENT_T_TY]] { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) }
 
 void foo();
 
diff --git a/clang/test/OpenMP/target_teams_distribute_simd_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_simd_codegen.cpp
index 4912352e17ca2..2c3fcf08d16e4 100644
--- a/clang/test/OpenMP/target_teams_distribute_simd_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_simd_codegen.cpp
@@ -68,7 +68,7 @@
 
 // CHECK-DAG: %struct.ident_t = type { i32, i32, i32, i32, i8* }
 // CHECK-DAG: [[STR:@.+]] = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00"
-// CHECK-DAG: [[DEF_LOC:@.+]] = private unnamed_addr global %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) }
+// CHECK-DAG: [[DEF_LOC:@.+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) }
 
 // CHECK-DAG: [[TT:%.+]] = type { i64, i8 }
 // CHECK-DAG: [[S1:%.+]] = type { double }
diff --git a/clang/test/OpenMP/target_teams_num_teams_codegen.cpp b/clang/test/OpenMP/target_teams_num_teams_codegen.cpp
index 93b28f8c43645..35763c946a13f 100644
--- a/clang/test/OpenMP/target_teams_num_teams_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_num_teams_codegen.cpp
@@ -40,7 +40,7 @@
 
 // CHECK-DAG: %struct.ident_t = type { i32, i32, i32, i32, i8* }
 // CHECK-DAG: [[STR:@.+]] = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00"
-// CHECK-DAG: [[DEF_LOC:@.+]] = private unnamed_addr global %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) }
+// CHECK-DAG: [[DEF_LOC:@.+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) }
 
 // CHECK-DAG: [[S1:%.+]] = type { double }
 // CHECK-DAG: [[ENTTY:%.+]] = type { i8*, i8*, i[[SZ:32|64]], i32, i32 }
diff --git a/clang/test/OpenMP/target_teams_thread_limit_codegen.cpp b/clang/test/OpenMP/target_teams_thread_limit_codegen.cpp
index 2432d6b3ad6e6..34ffbf6efa225 100644
--- a/clang/test/OpenMP/target_teams_thread_limit_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_thread_limit_codegen.cpp
@@ -40,7 +40,7 @@
 
 // CHECK-DAG: %struct.ident_t = type { i32, i32, i32, i32, i8* }
 // CHECK-DAG: [[STR:@.+]] = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00"
-// CHECK-DAG: [[DEF_LOC:@.+]] = private unnamed_addr global %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) }
+// CHECK-DAG: [[DEF_LOC:@.+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) }
 
 // CHECK-DAG: [[S1:%.+]] = type { double }
 // CHECK-DAG: [[ENTTY:%.+]] = type { i8*, i8*, i[[SZ:32|64]], i32, i32 }
diff --git a/clang/test/OpenMP/task_in_reduction_codegen.cpp b/clang/test/OpenMP/task_in_reduction_codegen.cpp
index 8679daded9485..64477211ca1fd 100644
--- a/clang/test/OpenMP/task_in_reduction_codegen.cpp
+++ b/clang/test/OpenMP/task_in_reduction_codegen.cpp
@@ -51,18 +51,18 @@ int main(int argc, char **argv) {
 }
 
 // CHECK-LABEL: @main
-// CHECK:       void @__kmpc_taskgroup(%struct.ident_t* @0, i32 [[GTID:%.+]])
+// CHECK:       void @__kmpc_taskgroup(%struct.ident_t* @1, i32 [[GTID:%.+]])
 // CHECK:       [[TD1:%.+]] = call i8* @__kmpc_taskred_init(i32 [[GTID]], i32 3, i8* %
 // CHECK-NEXT:  store i8* [[TD1]], i8** [[TD1_ADDR:%[^,]+]],
-// CHECK-NEXT:  call void @__kmpc_taskgroup(%struct.ident_t* @0, i32 [[GTID]])
+// CHECK-NEXT:  call void @__kmpc_taskgroup(%struct.ident_t* @1, i32 [[GTID]])
 // CHECK:       [[TD2:%.+]] = call i8* @__kmpc_taskred_init(i32 [[GTID]], i32 2, i8* %
 // CHECK-NEXT:  store i8* [[TD2]], i8** [[TD2_ADDR:%[^,]+]],
-// CHECK-NEXT:  call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @0, i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64, i16*, i8**, i8**)* [[OMP_PARALLEL:@.+]] to void (i32*, i32*, ...)*), i32* %{{.+}}, i64 %{{.+}}, i16* %{{.+}}, i8** [[TD1_ADDR]], i8** [[TD2_ADDR]])
-// CHECK-NEXT:  call void @__kmpc_end_taskgroup(%struct.ident_t* @0, i32 [[GTID]])
-// CHECK-NEXT:  call void @__kmpc_end_taskgroup(%struct.ident_t* @0, i32 [[GTID]])
+// CHECK-NEXT:  call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @1, i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64, i16*, i8**, i8**)* [[OMP_PARALLEL:@.+]] to void (i32*, i32*, ...)*), i32* %{{.+}}, i64 %{{.+}}, i16* %{{.+}}, i8** [[TD1_ADDR]], i8** [[TD2_ADDR]])
+// CHECK-NEXT:  call void @__kmpc_end_taskgroup(%struct.ident_t* @1, i32 [[GTID]])
+// CHECK-NEXT:  call void @__kmpc_end_taskgroup(%struct.ident_t* @1, i32 [[GTID]])
 
 // CHECK:       define internal void [[OMP_PARALLEL]](
-// CHECK:       [[TASK_T:%.+]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @0, i32 [[GTID:%.+]], i32 1, i64 56, i64 40, i32 (i32, i8*)* bitcast (i32 (i32, [[T:%.+]]*)* [[OMP_TASK:@.+]] to i32 (i32, i8*)*))
+// CHECK:       [[TASK_T:%.+]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @1, i32 [[GTID:%.+]], i32 1, i64 56, i64 40, i32 (i32, i8*)* bitcast (i32 (i32, [[T:%.+]]*)* [[OMP_TASK:@.+]] to i32 (i32, i8*)*))
 // CHECK-NEXT:  [[TASK_T_WITH_PRIVS:%.+]] = bitcast i8* [[TASK_T]] to [[T]]*
 // CHECK:       [[PRIVS:%.+]] = getelementptr inbounds [[T]], [[T]]* [[TASK_T_WITH_PRIVS]], i32 0, i32 1
 // CHECK:       [[TD1_REF:%.+]] = getelementptr inbounds [[PRIVATES]], [[PRIVATES]]* [[PRIVS]], i32 0, i32 0
@@ -71,7 +71,7 @@ int main(int argc, char **argv) {
 // CHECK-NEXT:  [[TD2_REF:%.+]] = getelementptr inbounds [[PRIVATES]], [[PRIVATES]]* [[PRIVS]], i32 0, i32 1
 // CHECK-NEXT:  [[TD2:%.+]] = load i8*, i8** %{{.+}},
 // CHECK-NEXT:  store i8* [[TD2]], i8** [[TD2_REF]],
-// CHECK-NEXT:  call i32 @__kmpc_omp_task(%struct.ident_t* @0, i32 [[GTID]], i8* [[TASK_T]])
+// CHECK-NEXT:  call i32 @__kmpc_omp_task(%struct.ident_t* @1, i32 [[GTID]], i8* [[TASK_T]])
 // CHECK-NEXT:  ret void
 // CHECK-NEXT:  }
 
diff --git a/clang/test/OpenMP/taskloop_in_reduction_codegen.cpp b/clang/test/OpenMP/taskloop_in_reduction_codegen.cpp
index e648c2df50b83..3a150eeedd1f7 100644
--- a/clang/test/OpenMP/taskloop_in_reduction_codegen.cpp
+++ b/clang/test/OpenMP/taskloop_in_reduction_codegen.cpp
@@ -39,18 +39,18 @@ int main(int argc, char **argv) {
 }
 
 // CHECK-LABEL: @main
-// CHECK:       void @__kmpc_taskgroup(%struct.ident_t* @0, i32 [[GTID:%.+]])
+// CHECK:       void @__kmpc_taskgroup(%struct.ident_t* @1, i32 [[GTID:%.+]])
 // CHECK:       [[TD1:%.+]] = call i8* @__kmpc_taskred_init(i32 [[GTID]], i32 3, i8* %
 // CHECK-NEXT:  store i8* [[TD1]], i8** [[TD1_ADDR:%[^,]+]],
-// CHECK-NEXT:  call void @__kmpc_taskgroup(%struct.ident_t* @0, i32 [[GTID]])
+// CHECK-NEXT:  call void @__kmpc_taskgroup(%struct.ident_t* @1, i32 [[GTID]])
 // CHECK:       [[TD2:%.+]] = call i8* @__kmpc_taskred_init(i32 [[GTID]], i32 2, i8* %
 // CHECK-NEXT:  store i8* [[TD2]], i8** [[TD2_ADDR:%[^,]+]],
-// CHECK-NEXT:  call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @0, i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64, i16*, i8**, i8**)* [[OMP_PARALLEL:@.+]] to void (i32*, i32*, ...)*), i32* %{{.+}}, i64 %{{.+}}, i16* %{{.+}}, i8** [[TD1_ADDR]], i8** [[TD2_ADDR]])
-// CHECK-NEXT:  call void @__kmpc_end_taskgroup(%struct.ident_t* @0, i32 [[GTID]])
-// CHECK-NEXT:  call void @__kmpc_end_taskgroup(%struct.ident_t* @0, i32 [[GTID]])
+// CHECK-NEXT:  call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @1, i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64, i16*, i8**, i8**)* [[OMP_PARALLEL:@.+]] to void (i32*, i32*, ...)*), i32* %{{.+}}, i64 %{{.+}}, i16* %{{.+}}, i8** [[TD1_ADDR]], i8** [[TD2_ADDR]])
+// CHECK-NEXT:  call void @__kmpc_end_taskgroup(%struct.ident_t* @1, i32 [[GTID]])
+// CHECK-NEXT:  call void @__kmpc_end_taskgroup(%struct.ident_t* @1, i32 [[GTID]])
 
 // CHECK:       define internal void [[OMP_PARALLEL]](
-// CHECK:       [[TASK_T:%.+]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @0, i32 [[GTID:%.+]], i32 1, i64 96, i64 40, i32 (i32, i8*)* bitcast (i32 (i32, [[T:%.+]]*)* [[OMP_TASK:@.+]] to i32 (i32, i8*)*))
+// CHECK:       [[TASK_T:%.+]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @1, i32 [[GTID:%.+]], i32 1, i64 96, i64 40, i32 (i32, i8*)* bitcast (i32 (i32, [[T:%.+]]*)* [[OMP_TASK:@.+]] to i32 (i32, i8*)*))
 // CHECK-NEXT:  [[TASK_T_WITH_PRIVS:%.+]] = bitcast i8* [[TASK_T]] to [[T]]*
 // CHECK:       [[PRIVS:%.+]] = getelementptr inbounds [[T]], [[T]]* [[TASK_T_WITH_PRIVS]], i32 0, i32 1
 // CHECK:       [[TD1_REF:%.+]] = getelementptr inbounds [[PRIVATES]], [[PRIVATES]]* [[PRIVS]], i32 0, i32 0
@@ -59,7 +59,7 @@ int main(int argc, char **argv) {
 // CHECK-NEXT:  [[TD2_REF:%.+]] = getelementptr inbounds [[PRIVATES]], [[PRIVATES]]* [[PRIVS]], i32 0, i32 1
 // CHECK-NEXT:  [[TD2:%.+]] = load i8*, i8** %{{.+}},
 // CHECK-NEXT:  store i8* [[TD2]], i8** [[TD2_REF]],
-// CHECK:       call void @__kmpc_taskloop(%struct.ident_t* @0, i32 [[GTID]], i8* [[TASK_T]], i32 1,
+// CHECK:       call void @__kmpc_taskloop(%struct.ident_t* @1, i32 [[GTID]], i8* [[TASK_T]], i32 1,
 // CHECK:       ret void
 // CHECK-NEXT:  }
 
diff --git a/clang/test/OpenMP/taskloop_reduction_codegen.cpp b/clang/test/OpenMP/taskloop_reduction_codegen.cpp
index 7d143f33353a7..f50c1318bf22c 100644
--- a/clang/test/OpenMP/taskloop_reduction_codegen.cpp
+++ b/clang/test/OpenMP/taskloop_reduction_codegen.cpp
@@ -160,8 +160,8 @@ sum = 0.0;
 // CHECK:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
 // CHECK:    [[SUB12:%.*]] = sub nsw i32 [[DIV]], 1
 // CHECK:    store i32 [[SUB12]], i32* [[DOTCAPTURE_EXPR_9]],
-// CHECK:    [[TMP65:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* %{{.+}}, i32 [[TMP0]], i32 1, i64 888, i64 40, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @[[TASK:.+]] to i32 (i32, i8*)*))
-// CHECK:    call void @__kmpc_taskloop(%struct.ident_t* %{{.+}}, i32 [[TMP0]], i8* [[TMP65]], i32 1, i64* %{{.+}}, i64* %{{.+}}, i64 %{{.+}}, i32 1, i32 0, i64 0, i8* null)
+// CHECK:    [[TMP65:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* {{.+}}, i32 [[TMP0]], i32 1, i64 888, i64 40, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @[[TASK:.+]] to i32 (i32, i8*)*))
+// CHECK:    call void @__kmpc_taskloop(%struct.ident_t* {{.+}}, i32 [[TMP0]], i8* [[TMP65]], i32 1, i64* %{{.+}}, i64* %{{.+}}, i64 %{{.+}}, i32 1, i32 0, i64 0, i8* null)
 // CHECK:    call void @__kmpc_end_taskgroup(%struct.ident_t*
 
 // CHECK:    ret i32
diff --git a/clang/test/OpenMP/taskloop_simd_in_reduction_codegen.cpp b/clang/test/OpenMP/taskloop_simd_in_reduction_codegen.cpp
index 4640b44cbe938..e0fd21d8e937b 100644
--- a/clang/test/OpenMP/taskloop_simd_in_reduction_codegen.cpp
+++ b/clang/test/OpenMP/taskloop_simd_in_reduction_codegen.cpp
@@ -39,18 +39,18 @@ int main(int argc, char **argv) {
 }
 
 // CHECK-LABEL: @main
-// CHECK:       void @__kmpc_taskgroup(%struct.ident_t* @0, i32 [[GTID:%.+]])
+// CHECK:       void @__kmpc_taskgroup(%struct.ident_t* @1, i32 [[GTID:%.+]])
 // CHECK:       [[TD1:%.+]] = call i8* @__kmpc_taskred_init(i32 [[GTID]], i32 3, i8* %
 // CHECK-NEXT:  store i8* [[TD1]], i8** [[TD1_ADDR:%[^,]+]],
-// CHECK-NEXT:  call void @__kmpc_taskgroup(%struct.ident_t* @0, i32 [[GTID]])
+// CHECK-NEXT:  call void @__kmpc_taskgroup(%struct.ident_t* @1, i32 [[GTID]])
 // CHECK:       [[TD2:%.+]] = call i8* @__kmpc_taskred_init(i32 [[GTID]], i32 2, i8* %
 // CHECK-NEXT:  store i8* [[TD2]], i8** [[TD2_ADDR:%[^,]+]],
-// CHECK-NEXT:  call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @0, i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64, i16*, i8**, i8**)* [[OMP_PARALLEL:@.+]] to void (i32*, i32*, ...)*), i32* %{{.+}}, i64 %{{.+}}, i16* %{{.+}}, i8** [[TD1_ADDR]], i8** [[TD2_ADDR]])
-// CHECK-NEXT:  call void @__kmpc_end_taskgroup(%struct.ident_t* @0, i32 [[GTID]])
-// CHECK-NEXT:  call void @__kmpc_end_taskgroup(%struct.ident_t* @0, i32 [[GTID]])
+// CHECK-NEXT:  call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @1, i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64, i16*, i8**, i8**)* [[OMP_PARALLEL:@.+]] to void (i32*, i32*, ...)*), i32* %{{.+}}, i64 %{{.+}}, i16* %{{.+}}, i8** [[TD1_ADDR]], i8** [[TD2_ADDR]])
+// CHECK-NEXT:  call void @__kmpc_end_taskgroup(%struct.ident_t* @1, i32 [[GTID]])
+// CHECK-NEXT:  call void @__kmpc_end_taskgroup(%struct.ident_t* @1, i32 [[GTID]])
 
 // CHECK:       define internal void [[OMP_PARALLEL]](
-// CHECK:       [[TASK_T:%.+]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @0, i32 [[GTID:%.+]], i32 1, i64 96, i64 40, i32 (i32, i8*)* bitcast (i32 (i32, [[T:%.+]]*)* [[OMP_TASK:@.+]] to i32 (i32, i8*)*))
+// CHECK:       [[TASK_T:%.+]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @1, i32 [[GTID:%.+]], i32 1, i64 96, i64 40, i32 (i32, i8*)* bitcast (i32 (i32, [[T:%.+]]*)* [[OMP_TASK:@.+]] to i32 (i32, i8*)*))
 // CHECK-NEXT:  [[TASK_T_WITH_PRIVS:%.+]] = bitcast i8* [[TASK_T]] to [[T]]*
 // CHECK:       [[PRIVS:%.+]] = getelementptr inbounds [[T]], [[T]]* [[TASK_T_WITH_PRIVS]], i32 0, i32 1
 // CHECK:       [[TD1_REF:%.+]] = getelementptr inbounds [[PRIVATES]], [[PRIVATES]]* [[PRIVS]], i32 0, i32 0
@@ -59,7 +59,7 @@ int main(int argc, char **argv) {
 // CHECK-NEXT:  [[TD2_REF:%.+]] = getelementptr inbounds [[PRIVATES]], [[PRIVATES]]* [[PRIVS]], i32 0, i32 1
 // CHECK-NEXT:  [[TD2:%.+]] = load i8*, i8** %{{.+}},
 // CHECK-NEXT:  store i8* [[TD2]], i8** [[TD2_REF]],
-// CHECK:       call void @__kmpc_taskloop(%struct.ident_t* @0, i32 [[GTID]], i8* [[TASK_T]], i32 1,
+// CHECK:       call void @__kmpc_taskloop(%struct.ident_t* @1, i32 [[GTID]], i8* [[TASK_T]], i32 1,
 // CHECK:       ret void
 // CHECK-NEXT:  }
 
diff --git a/clang/test/OpenMP/taskloop_simd_reduction_codegen.cpp b/clang/test/OpenMP/taskloop_simd_reduction_codegen.cpp
index 16d42ec8e15ea..e7d235b3fc968 100644
--- a/clang/test/OpenMP/taskloop_simd_reduction_codegen.cpp
+++ b/clang/test/OpenMP/taskloop_simd_reduction_codegen.cpp
@@ -157,8 +157,8 @@ sum = 0.0;
 // CHECK:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
 // CHECK:    [[SUB12:%.*]] = sub nsw i32 [[DIV]], 1
 // CHECK:    store i32 [[SUB12]], i32* [[DOTCAPTURE_EXPR_9]],
-// CHECK:    [[TMP65:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* %{{.+}}, i32 [[TMP0]], i32 1, i64 888, i64 40, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @{{.+}} to i32 (i32, i8*)*))
-// CHECK:    call void @__kmpc_taskloop(%struct.ident_t* %{{.+}}, i32 [[TMP0]], i8* [[TMP65]], i32 1, i64* %{{.+}}, i64* %{{.+}}, i64 %{{.+}}, i32 1, i32 0, i64 0, i8* null)
+// CHECK:    [[TMP65:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* {{.+}}, i32 [[TMP0]], i32 1, i64 888, i64 40, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @{{.+}} to i32 (i32, i8*)*))
+// CHECK:    call void @__kmpc_taskloop(%struct.ident_t* {{.+}}, i32 [[TMP0]], i8* [[TMP65]], i32 1, i64* %{{.+}}, i64* %{{.+}}, i64 %{{.+}}, i32 1, i32 0, i64 0, i8* null)
 // CHECK:    call void @__kmpc_end_taskgroup(%struct.ident_t*
 
 // CHECK:    ret i32
diff --git a/clang/test/OpenMP/teams_codegen.cpp b/clang/test/OpenMP/teams_codegen.cpp
index 54e0f6ea29eb4..67031105fce4a 100644
--- a/clang/test/OpenMP/teams_codegen.cpp
+++ b/clang/test/OpenMP/teams_codegen.cpp
@@ -266,7 +266,7 @@ int teams_template_struct(void) {
 
 // CK4-DAG: %struct.ident_t = type { i32, i32, i32, i32, i8* }
 // CK4-DAG: [[STR:@.+]] = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00"
-// CK4-DAG: [[DEF_LOC_0:@.+]] = private unnamed_addr global %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) }
+// CK4-DAG: [[DEF_LOC_0:@.+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) }
 // CK4-DEBUG-DAG: [[LOC1:@.+]] = private unnamed_addr constant [{{.+}} x i8] c";{{.*}}teams_codegen.cpp;main;[[@LINE+14]];9;;\00"
 // CK4-DEBUG-DAG: [[LOC2:@.+]] = private unnamed_addr constant [{{.+}} x i8] c";{{.*}}teams_codegen.cpp;tmain;[[@LINE+7]];9;;\00"
 
@@ -327,7 +327,7 @@ int main (int argc, char **argv) {
 
 // CK5-DAG: %struct.ident_t = type { i32, i32, i32, i32, i8* }
 // CK5-DAG: [[STR:@.+]] = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00"
-// CK5-DAG: [[DEF_LOC_0:@.+]] = private unnamed_addr global %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) }
+// CK5-DAG: [[DEF_LOC_0:@.+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) }
 // CK5-DEBUG-DAG: [[LOC1:@.+]] = private unnamed_addr constant [{{.+}} x i8] c";{{.*}}teams_codegen.cpp;main;[[@LINE+14]];9;;\00"
 // CK5-DEBUG-DAG: [[LOC2:@.+]] = private unnamed_addr constant [{{.+}} x i8] c";{{.*}}teams_codegen.cpp;tmain;[[@LINE+7]];9;;\00"
 
@@ -414,7 +414,7 @@ int main (int argc, char **argv) {
 
 // CK6-LABEL: foo
 void foo() {
-  // CK6: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @0, i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @{{.+}} to void (i32*, i32*, ...)*))
+  // CK6: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @1, i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @{{.+}} to void (i32*, i32*, ...)*))
 #pragma omp teams
   ;
 }
diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_num_threads_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_num_threads_codegen.cpp
index 0f93fe219aae1..ea4afa1418cb2 100644
--- a/clang/test/OpenMP/teams_distribute_parallel_for_num_threads_codegen.cpp
+++ b/clang/test/OpenMP/teams_distribute_parallel_for_num_threads_codegen.cpp
@@ -15,7 +15,7 @@ typedef __INTPTR_TYPE__ intptr_t;
 // CHECK-DAG: [[IDENT_T_TY:%.+]] = type { i32, i32, i32, i32, i8* }
 // CHECK-DAG: [[S_TY:%.+]] = type { [[INTPTR_T_TY:i[0-9]+]], [[INTPTR_T_TY]], [[INTPTR_T_TY]] }
 // CHECK-DAG: [[STR:@.+]] = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00"
-// CHECK-DAG: [[DEF_LOC_2:@.+]] = private unnamed_addr global [[IDENT_T_TY]] { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) }
+// CHECK-DAG: [[DEF_LOC_2:@.+]] = private unnamed_addr constant [[IDENT_T_TY]] { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) }
 
 void foo();
 
diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_proc_bind_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_proc_bind_codegen.cpp
index 0b7f3b2d8c62f..0c242f851b7d6 100644
--- a/clang/test/OpenMP/teams_distribute_parallel_for_proc_bind_codegen.cpp
+++ b/clang/test/OpenMP/teams_distribute_parallel_for_proc_bind_codegen.cpp
@@ -16,7 +16,7 @@ typedef __INTPTR_TYPE__ intptr_t;
 
 // CHECK-DAG: [[IDENT_T_TY:%.+]] = type { i32, i32, i32, i32, i8* }
 // CHECK-DAG: [[STR:@.+]] = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00"
-// CHECK-DAG: [[DEF_LOC_2:@.+]] = private unnamed_addr global [[IDENT_T_TY]] { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) }
+// CHECK-DAG: [[DEF_LOC_2:@.+]] = private unnamed_addr constant [[IDENT_T_TY]] { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) }
 
 void foo();
 
diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_simd_num_threads_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_simd_num_threads_codegen.cpp
index 4faa99e2ee362..8c0c8208ab80f 100644
--- a/clang/test/OpenMP/teams_distribute_parallel_for_simd_num_threads_codegen.cpp
+++ b/clang/test/OpenMP/teams_distribute_parallel_for_simd_num_threads_codegen.cpp
@@ -15,7 +15,7 @@ typedef __INTPTR_TYPE__ intptr_t;
 // CHECK-DAG: [[IDENT_T_TY:%.+]] = type { i32, i32, i32, i32, i8* }
 // CHECK-DAG: [[S_TY:%.+]] = type { [[INTPTR_T_TY:i[0-9]+]], [[INTPTR_T_TY]], [[INTPTR_T_TY]] }
 // CHECK-DAG: [[STR:@.+]] = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00"
-// CHECK-DAG: [[DEF_LOC_2:@.+]] = private unnamed_addr global [[IDENT_T_TY]] { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) }
+// CHECK-DAG: [[DEF_LOC_2:@.+]] = private unnamed_addr constant [[IDENT_T_TY]] { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) }
 
 void foo();
 
diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_simd_proc_bind_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_simd_proc_bind_codegen.cpp
index 447a1a60109c2..08b3cd3a47baf 100644
--- a/clang/test/OpenMP/teams_distribute_parallel_for_simd_proc_bind_codegen.cpp
+++ b/clang/test/OpenMP/teams_distribute_parallel_for_simd_proc_bind_codegen.cpp
@@ -16,7 +16,7 @@ typedef __INTPTR_TYPE__ intptr_t;
 
 // CHECK-DAG: [[IDENT_T_TY:%.+]] = type { i32, i32, i32, i32, i8* }
 // CHECK-DAG: [[STR:@.+]] = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00"
-// CHECK-DAG: [[DEF_LOC_2:@.+]] = private unnamed_addr global [[IDENT_T_TY]] { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) }
+// CHECK-DAG: [[DEF_LOC_2:@.+]] = private unnamed_addr constant [[IDENT_T_TY]] { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) }
 
 void foo();
 
diff --git a/clang/test/OpenMP/threadprivate_codegen.cpp b/clang/test/OpenMP/threadprivate_codegen.cpp
index c24d1ea787454..a46bb69070155 100644
--- a/clang/test/OpenMP/threadprivate_codegen.cpp
+++ b/clang/test/OpenMP/threadprivate_codegen.cpp
@@ -133,7 +133,7 @@ struct S5 {
 
 // CHECK-DAG:  [[GS1:@.+]] = internal global [[S1]] zeroinitializer
 // CHECK-DAG:  [[GS1]].cache. = common global i8** null
-// CHECK-DAG:  [[DEFAULT_LOC:@.+]] = private unnamed_addr global [[IDENT]] { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([{{[0-9]+}} x i8], [{{[0-9]+}} x i8]* {{@.+}}, i32 0, i32 0) }
+// CHECK-DAG:  [[DEFAULT_LOC:@.+]] = private unnamed_addr constant [[IDENT]] { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([{{[0-9]+}} x i8], [{{[0-9]+}} x i8]* {{@.+}}, i32 0, i32 0) }
 // CHECK-DAG:  [[GS2:@.+]] = internal global [[S2]] zeroinitializer
 // CHECK-DAG:  [[ARR_X:@.+]] = global [2 x [3 x [[S1]]]] zeroinitializer
 // CHECK-DAG:  [[ARR_X]].cache. = common global i8** null
@@ -164,7 +164,9 @@ struct S5 {
 // CHECK-DEBUG-DAG: [[ST_S4_ST:@.+]] = linkonce_odr global %struct.S4 zeroinitializer
 
 // CHECK-DEBUG-DAG: [[LOC1:@.*]] = private unnamed_addr constant [{{[0-9]+}} x i8] c";{{.*}}threadprivate_codegen.cpp;;249;1;;\00"
+// CHECK-DEBUG-DAG: [[ID1:@.*]] = private unnamed_addr constant %struct.ident_t { {{.*}} [[LOC1]]
 // CHECK-DEBUG-DAG: [[LOC2:@.*]] = private unnamed_addr constant [{{[0-9]+}} x i8] c";{{.*}}threadprivate_codegen.cpp;;304;1;;\00"
+// CHECK-DEBUG-DAG: [[ID2:@.*]] = private unnamed_addr constant %struct.ident_t { {{.*}} [[LOC2]]
 // CHECK-DEBUG-DAG: [[LOC3:@.*]] = private unnamed_addr constant [{{[0-9]+}} x i8] c";{{.*}}threadprivate_codegen.cpp;;422;19;;\00"
 // CHECK-DEBUG-DAG: [[LOC4:@.*]] = private unnamed_addr constant [{{[0-9]+}} x i8] c";{{.*}}threadprivate_codegen.cpp;main;459;1;;\00"
 // CHECK-DEBUG-DAG: [[LOC5:@.*]] = private unnamed_addr constant [{{[0-9]+}} x i8] c";{{.*}}threadprivate_codegen.cpp;main;476;9;;\00"
@@ -199,10 +201,8 @@ struct S5 {
 // CHECK-TLS-DAG:  @__dso_handle = external hidden global i8
 // CHECK-TLS-DAG:  [[GS1_TLS_INIT:@_ZTHL3gs1]] = internal alias void (), void ()* @__tls_init
 // CHECK-TLS-DAG:  [[ARR_X_TLS_INIT:@_ZTH5arr_x]] = alias void (), void ()* @__tls_init
-
 // CHECK-TLS-DAG:  [[ST_S4_ST_TLS_INIT:@_ZTHN2STI2S4E2stE]] = linkonce_odr alias void (), void ()* [[ST_S4_ST_CXX_INIT:@[^, ]*]]
 
-
 // OMP50-TLS: define internal void [[GS1_CXX_INIT:@.*]]()
 // OMP50-TLS: call void [[GS1_CTOR1:@.*]]([[S1]]* [[GS1]], i32 5)
 // OMP50-TLS: call i32 @__cxa_thread_atexit(void (i8*)* bitcast (void ([[S1]]*)* [[GS1_DTOR1:.*]] to void (i8*)*), i8* bitcast ([[S1]]* [[GS1]] to i8*)
@@ -269,11 +269,11 @@ static S1 gs1(5);
 // CHECK:      call {{.*}}void @__kmpc_threadprivate_register([[IDENT]]* [[DEFAULT_LOC]], i8* bitcast ([[S1]]* [[GS1]] to i8*), i8* (i8*)* [[GS1_CTOR]], i8* (i8*, i8*)* null, void (i8*)* [[GS1_DTOR]])
 // CHECK-NEXT: ret void
 // CHECK-NEXT: }
-// CHECK-DEBUG:      [[KMPC_LOC_ADDR:%.*]] = alloca [[IDENT]]
-// CHECK-DEBUG:      [[KMPC_LOC_ADDR_PSOURCE:%.*]] = getelementptr inbounds [[IDENT]], [[IDENT]]* [[KMPC_LOC_ADDR]], i{{.*}} 0, i{{.*}} 4
-// CHECK-DEBUG:      store i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[LOC1]], i{{.*}} 0, i{{.*}} 0), i8** [[KMPC_LOC_ADDR_PSOURCE]]
+
+
+
 // CHECK-DEBUG:      @__kmpc_global_thread_num
-// CHECK-DEBUG:      call {{.*}}void @__kmpc_threadprivate_register([[IDENT]]* [[KMPC_LOC_ADDR]], i8* bitcast ([[S1]]* [[GS1]] to i8*), i8* (i8*)* [[GS1_CTOR:@\.__kmpc_global_ctor_\..*]], i8* (i8*, i8*)* null, void (i8*)* [[GS1_DTOR:@\.__kmpc_global_dtor_\..*]])
+// CHECK-DEBUG:      call {{.*}}void @__kmpc_threadprivate_register([[IDENT]]* [[ID1]], i8* bitcast ([[S1]]* [[GS1]] to i8*), i8* (i8*)* [[GS1_CTOR:@\.__kmpc_global_ctor_\..*]], i8* (i8*, i8*)* null, void (i8*)* [[GS1_DTOR:@\.__kmpc_global_dtor_\..*]])
 // CHECK-DEBUG:      define internal {{.*}}i8* [[GS1_CTOR]](i8* %0)
 // CHECK-DEBUG:      store i8* %0, i8** [[ARG_ADDR:%.*]],
 // CHECK-DEBUG:      [[ARG:%.+]] = load i8*, i8** [[ARG_ADDR]]
@@ -342,11 +342,11 @@ S1 arr_x[2][3] = { { 1, 2, 3 }, { 4, 5, 6 } };
 // CHECK:      call {{.*}}void @__kmpc_threadprivate_register([[IDENT]]* [[DEFAULT_LOC]], i8* bitcast ([2 x [3 x [[S1]]]]* [[ARR_X]] to i8*), i8* (i8*)* [[ARR_X_CTOR]], i8* (i8*, i8*)* null, void (i8*)* [[ARR_X_DTOR]])
 // CHECK-NEXT: ret void
 // CHECK-NEXT: }
-// CHECK-DEBUG:      [[KMPC_LOC_ADDR:%.*]] = alloca [[IDENT]]
-// CHECK-DEBUG:      [[KMPC_LOC_ADDR_PSOURCE:%.*]] = getelementptr inbounds [[IDENT]], [[IDENT]]* [[KMPC_LOC_ADDR]], i{{.*}} 0, i{{.*}} 4
-// CHECK-DEBUG:      store i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[LOC2]], i{{.*}} 0, i{{.*}} 0), i8** [[KMPC_LOC_ADDR_PSOURCE]]
+
+
+
 // CHECK-DEBUG:      @__kmpc_global_thread_num
-// CHECK-DEBUG:      call {{.*}}void @__kmpc_threadprivate_register([[IDENT]]* [[KMPC_LOC_ADDR]], i8* bitcast ([2 x [3 x [[S1]]]]* [[ARR_X]] to i8*), i8* (i8*)* [[ARR_X_CTOR:@\.__kmpc_global_ctor_\..*]], i8* (i8*, i8*)* null, void (i8*)* [[ARR_X_DTOR:@\.__kmpc_global_dtor_\..*]])
+// CHECK-DEBUG:      call {{.*}}void @__kmpc_threadprivate_register([[IDENT]]* [[ID2]], i8* bitcast ([2 x [3 x [[S1]]]]* [[ARR_X]] to i8*), i8* (i8*)* [[ARR_X_CTOR:@\.__kmpc_global_ctor_\..*]], i8* (i8*, i8*)* null, void (i8*)* [[ARR_X_DTOR:@\.__kmpc_global_dtor_\..*]])
 // CHECK-DEBUG:      define internal {{.*}}i8* [[ARR_X_CTOR]](i8* %0)
 // CHECK-DEBUG:      }
 // CHECK-DEBUG:      define internal {{.*}}void [[ARR_X_DTOR]](i8* %0)
@@ -364,11 +364,11 @@ struct ST {
 };
 
 
-// OMP50-DEBUG:      [[KMPC_LOC_ADDR:%.*]] = alloca [[IDENT]]
-// OMP50-DEBUG:      [[KMPC_LOC_ADDR_PSOURCE:%.*]] = getelementptr inbounds [[IDENT]], [[IDENT]]* [[KMPC_LOC_ADDR]], i{{.*}} 0, i{{.*}} 4
-// OMP50-DEBUG-NEXT: store i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[LOC20]], i{{.*}} 0, i{{.*}} 0), i8** [[KMPC_LOC_ADDR_PSOURCE]]
+
+
+
 // OMP50-DEBUG:      @__kmpc_global_thread_num
-// OMP50-DEBUG:      call {{.*}}void @__kmpc_threadprivate_register([[IDENT]]* [[KMPC_LOC_ADDR]], i8* bitcast ([[S4]]* [[ST_S4_ST]] to i8*), i8* (i8*)* [[ST_S4_ST_CTOR:@\.__kmpc_global_ctor_\..+]], i8* (i8*, i8*)* null, void (i8*)* [[ST_S4_ST_DTOR:@\.__kmpc_global_dtor_\..+]])
+// OMP50-DEBUG:      call {{.*}}void @__kmpc_threadprivate_register([[IDENT]]* {{.*}}, i8* bitcast ([[S4]]* [[ST_S4_ST]] to i8*), i8* (i8*)* [[ST_S4_ST_CTOR:@\.__kmpc_global_ctor_\..+]], i8* (i8*, i8*)* null, void (i8*)* [[ST_S4_ST_DTOR:@\.__kmpc_global_dtor_\..+]])
 // OMP50-DEBUG:      define internal {{.*}}i8* [[ST_S4_ST_CTOR]](i8* %0)
 // OMP50-DEBUG:      }
 // OMP50-DEBUG:      define {{.*}} [[S4_CTOR:@.*]]([[S4]]* {{.*}},
@@ -400,7 +400,7 @@ T ST<T>::st(23);
 // CHECK-LABEL:  @main()
 // CHECK-DEBUG-LABEL: @main()
 int main() {
-  // CHECK-DEBUG:      [[KMPC_LOC_ADDR:%.*]] = alloca [[IDENT]]
+
   int Res;
   struct Smain {
     int a;
@@ -424,21 +424,21 @@ int main() {
 // CHECK:      call {{.*}}i{{.*}} @__cxa_guard_acquire
 // CHECK:      call {{.*}}i32 @__kmpc_global_thread_num([[IDENT]]* [[DEFAULT_LOC]])
 // CHECK:      call {{.*}}void @__kmpc_threadprivate_register([[IDENT]]* [[DEFAULT_LOC]], i8* bitcast ([[SMAIN]]* [[SM]] to i8*), i8* (i8*)* [[SM_CTOR:@\.__kmpc_global_ctor_\..+]], i8* (i8*, i8*)* null, void (i8*)* [[SM_DTOR:@\.__kmpc_global_dtor_\..+]])
-// CHECK:      [[GS1_TEMP_ADDR:%.*]] = call {{.*}}i8* @__kmpc_threadprivate_cached([[IDENT]]* [[DEFAULT_LOC]], i32 [[THREAD_NUM]], i8* bitcast ([[S1]]* [[GS1]] to i8*), i{{.*}} {{[0-9]+}}, i8*** [[GS1]].cache.)
+// CHECK:      [[GS1_TEMP_ADDR:%.*]] = call {{.*}}i8* @__kmpc_threadprivate_cached([[IDENT]]* [[DEFAULT_LOC]], i32 {{.*}}, i8* bitcast ([[S1]]* [[GS1]] to i8*), i{{.*}} {{[0-9]+}}, i8*** [[GS1]].cache.)
 // CHECK-NEXT: [[GS1_ADDR:%.*]] = bitcast i8* [[GS1_TEMP_ADDR]] to [[S1]]*
 // CHECK-NEXT: [[GS1_A_ADDR:%.*]] = getelementptr inbounds [[S1]], [[S1]]* [[GS1_ADDR]], i{{.*}} 0, i{{.*}} 0
 // CHECK-NEXT: [[GS1_A:%.*]] = load [[INT]], [[INT]]* [[GS1_A_ADDR]]
 // CHECK-NEXT: invoke {{.*}} [[SMAIN_CTOR:.*]]([[SMAIN]]* [[SM]], [[INT]] {{.*}}[[GS1_A]])
 // CHECK:      call {{.*}}void @__cxa_guard_release
-// CHECK-DEBUG:      [[KMPC_LOC_ADDR_PSOURCE:%.*]] = getelementptr inbounds [[IDENT]], [[IDENT]]* [[KMPC_LOC_ADDR]], i{{.*}} 0, i{{.*}} 4
-// CHECK-DEBUG-NEXT: store i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[LOC3]], i{{.*}} 0, i{{.*}} 0), i8** [[KMPC_LOC_ADDR_PSOURCE]]
-// CHECK-DEBUG-NEXT: [[THREAD_NUM:%.+]] = call {{.*}}i32 @__kmpc_global_thread_num([[IDENT]]* [[KMPC_LOC_ADDR]])
+
+
+
 // CHECK-DEBUG:      call {{.*}}i{{.*}} @__cxa_guard_acquire
-// CHECK-DEBUG:      call {{.*}}i32 @__kmpc_global_thread_num([[IDENT]]* [[KMPC_LOC_ADDR]])
-// CHECK-DEBUG:      call {{.*}}void @__kmpc_threadprivate_register([[IDENT]]* [[KMPC_LOC_ADDR]], i8* bitcast ([[SMAIN]]* [[SM]] to i8*), i8* (i8*)* [[SM_CTOR:@\.__kmpc_global_ctor_\..+]], i8* (i8*, i8*)* null, void (i8*)* [[SM_DTOR:@\.__kmpc_global_dtor_\..+]])
-// CHECK-DEBUG:      [[KMPC_LOC_ADDR_PSOURCE:%.*]] = getelementptr inbounds [[IDENT]], [[IDENT]]* [[KMPC_LOC_ADDR]], i{{.*}} 0, i{{.*}} 4
-// CHECK-DEBUG-NEXT: store i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[LOC3]], i{{.*}} 0, i{{.*}} 0), i8** [[KMPC_LOC_ADDR_PSOURCE]]
-// CHECK-DEBUG:      [[GS1_TEMP_ADDR:%.*]] = call {{.*}}i8* @__kmpc_threadprivate_cached([[IDENT]]* [[KMPC_LOC_ADDR]], i32 [[THREAD_NUM]], i8* bitcast ([[S1]]* [[GS1]] to i8*), i{{.*}} {{[0-9]+}}, i8***
+// CHECK-DEBUG:      call {{.*}}i32 @__kmpc_global_thread_num([[IDENT]]* [[KMPC_LOC:@.+]])
+// CHECK-DEBUG:      call {{.*}}void @__kmpc_threadprivate_register([[IDENT]]* [[KMPC_LOC]], i8* bitcast ([[SMAIN]]* [[SM]] to i8*), i8* (i8*)* [[SM_CTOR:@\.__kmpc_global_ctor_\..+]], i8* (i8*, i8*)* null, void (i8*)* [[SM_DTOR:@\.__kmpc_global_dtor_\..+]])
+// CHECK-DEBUG:      [[GS1_TEMP_ADDR:%.*]] = call {{.*}}i8* @__kmpc_threadprivate_cached([[IDENT]]* {{.*}}, i32 {{.*}}, i8* bitcast ([[S1]]* [[GS1]] to i8*), i{{.*}} {{[0-9]+}}, i8***
+
+
 // CHECK-DEBUG-NEXT: [[GS1_ADDR:%.*]] = bitcast i8* [[GS1_TEMP_ADDR]] to [[S1]]*
 // CHECK-DEBUG-NEXT: [[GS1_A_ADDR:%.*]] = getelementptr inbounds [[S1]], [[S1]]* [[GS1_ADDR]], i{{.*}} 0, i{{.*}} 0
 // CHECK-DEBUG-NEXT: [[GS1_A:%.*]] = load [[INT]], [[INT]]* [[GS1_A_ADDR]]
@@ -457,14 +457,14 @@ int main() {
 // CHECK-TLS-NEXT: br label %[[INIT_DONE]]
 // CHECK-TLS:      [[INIT_DONE]]
 #pragma omp threadprivate(sm)
-  // CHECK:      [[STATIC_S_TEMP_ADDR:%.*]] = call {{.*}}i8* @__kmpc_threadprivate_cached([[IDENT]]* [[DEFAULT_LOC]], i32 [[THREAD_NUM]], i8* bitcast ([[S3]]* [[STATIC_S]] to i8*), i{{.*}} {{[0-9]+}}, i8*** [[STATIC_S]].cache.)
+  // CHECK:      [[STATIC_S_TEMP_ADDR:%.*]] = call {{.*}}i8* @__kmpc_threadprivate_cached([[IDENT]]* [[DEFAULT_LOC]], i32 {{.*}}, i8* bitcast ([[S3]]* [[STATIC_S]] to i8*), i{{.*}} {{[0-9]+}}, i8*** [[STATIC_S]].cache.)
   // CHECK-NEXT: [[STATIC_S_ADDR:%.*]] = bitcast i8* [[STATIC_S_TEMP_ADDR]] to [[S3]]*
   // CHECK-NEXT: [[STATIC_S_A_ADDR:%.*]] = getelementptr inbounds [[S3]], [[S3]]* [[STATIC_S_ADDR]], i{{.*}} 0, i{{.*}} 0
   // CHECK-NEXT: [[STATIC_S_A:%.*]] = load [[INT]], [[INT]]* [[STATIC_S_A_ADDR]]
   // CHECK-NEXT: store [[INT]] [[STATIC_S_A]], [[INT]]* [[RES_ADDR:[^,]+]]
-  // CHECK-DEBUG:      [[KMPC_LOC_ADDR_PSOURCE:%.*]] = getelementptr inbounds [[IDENT]], [[IDENT]]* [[KMPC_LOC_ADDR]], i{{.*}} 0, i{{.*}} 4
-  // CHECK-DEBUG-NEXT: store i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[LOC5]], i{{.*}} 0, i{{.*}} 0), i8** [[KMPC_LOC_ADDR_PSOURCE]]
-  // CHECK-DEBUG-NEXT: [[STATIC_S_TEMP_ADDR:%.*]] = call {{.*}}i8* @__kmpc_threadprivate_cached([[IDENT]]* [[KMPC_LOC_ADDR]], i32 [[THREAD_NUM]], i8* bitcast ([[S3]]* [[STATIC_S]] to i8*), i{{.*}} {{[0-9]+}}, i8***
+  // CHECK-DEBUG:[[STATIC_S_TEMP_ADDR:%.*]] = call {{.*}}i8* @__kmpc_threadprivate_cached([[IDENT]]* {{.*}}, i32 {{.*}}, i8* bitcast ([[S3]]* [[STATIC_S]] to i8*), i{{.*}} {{[0-9]+}}, i8***
+
+
   // CHECK-DEBUG-NEXT: [[STATIC_S_ADDR:%.*]] = bitcast i8* [[STATIC_S_TEMP_ADDR]] to [[S3]]*
   // CHECK-DEBUG-NEXT: [[STATIC_S_A_ADDR:%.*]] = getelementptr inbounds [[S3]], [[S3]]* [[STATIC_S_ADDR]], i{{.*}} 0, i{{.*}} 0
   // CHECK-DEBUG-NEXT: [[STATIC_S_A:%.*]] = load [[INT]], [[INT]]* [[STATIC_S_A_ADDR]]
@@ -474,16 +474,16 @@ int main() {
   // CHECK-TLS-NEXT: [[STATIC_S_A:%.*]] = load i32, i32* [[STATIC_S_A_ADDR]]
   // CHECK-TLS-NEXT: store i32 [[STATIC_S_A]], i32* [[RES_ADDR:[^,]+]]
   Res = Static::s.a;
-  // CHECK:      [[SM_TEMP_ADDR:%.*]] = call {{.*}}i8* @__kmpc_threadprivate_cached([[IDENT]]* [[DEFAULT_LOC]], i32 [[THREAD_NUM]], i8* bitcast ([[SMAIN]]* [[SM]] to i8*), i{{.*}} {{[0-9]+}}, i8*** [[SM]].cache.)
+  // CHECK:      [[SM_TEMP_ADDR:%.*]] = call {{.*}}i8* @__kmpc_threadprivate_cached([[IDENT]]* [[DEFAULT_LOC]], i32 {{.*}}, i8* bitcast ([[SMAIN]]* [[SM]] to i8*), i{{.*}} {{[0-9]+}}, i8*** [[SM]].cache.)
   // CHECK-NEXT: [[SM_ADDR:%.*]] = bitcast i8* [[SM_TEMP_ADDR]] to [[SMAIN]]*
   // CHECK-NEXT: [[SM_A_ADDR:%.*]] = getelementptr inbounds [[SMAIN]], [[SMAIN]]* [[SM_ADDR]], i{{.*}} 0, i{{.*}} 0
   // CHECK-NEXT: [[SM_A:%.*]] = load [[INT]], [[INT]]* [[SM_A_ADDR]]
   // CHECK-NEXT: [[RES:%.*]] = load [[INT]], [[INT]]* [[RES_ADDR]]
   // CHECK-NEXT: [[ADD:%.*]] = add {{.*}} [[INT]] [[RES]], [[SM_A]]
   // CHECK-NEXT: store [[INT]] [[ADD]], [[INT]]* [[RES:.+]]
-  // CHECK-DEBUG:      [[KMPC_LOC_ADDR_PSOURCE:%.*]] = getelementptr inbounds [[IDENT]], [[IDENT]]* [[KMPC_LOC_ADDR]], i{{.*}} 0, i{{.*}} 4
-  // CHECK-DEBUG-NEXT: store i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[LOC6]], i{{.*}} 0, i{{.*}} 0), i8** [[KMPC_LOC_ADDR_PSOURCE]]
-  // CHECK-DEBUG-NEXT: [[SM_TEMP_ADDR:%.*]] = call {{.*}}i8* @__kmpc_threadprivate_cached([[IDENT]]* [[KMPC_LOC_ADDR]], i32 [[THREAD_NUM]], i8* bitcast ([[SMAIN]]* [[SM]] to i8*), i{{.*}} {{[0-9]+}}, i8***
+  // CHECK-DEBUG-NEXT: [[SM_TEMP_ADDR:%.*]] = call {{.*}}i8* @__kmpc_threadprivate_cached([[IDENT]]* {{.*}}, i32 {{.*}}, i8* bitcast ([[SMAIN]]* [[SM]] to i8*), i{{.*}} {{[0-9]+}}, i8***
+
+
   // CHECK-DEBUG-NEXT: [[SM_ADDR:%.*]] = bitcast i8* [[SM_TEMP_ADDR]] to [[SMAIN]]*
   // CHECK-DEBUG-NEXT: [[SM_A_ADDR:%.*]] = getelementptr inbounds [[SMAIN]], [[SMAIN]]* [[SM_ADDR]], i{{.*}} 0, i{{.*}} 0
   // CHECK-DEBUG-NEXT: [[SM_A:%.*]] = load [[INT]], [[INT]]* [[SM_A_ADDR]]
@@ -496,16 +496,16 @@ int main() {
   // CHECK-TLS-NEXT: [[ADD:%.*]] = add {{.*}} i32 [[RES]], [[SM_A]]
   // CHECK-TLS-NEXT: store i32 [[ADD]], i32* [[RES_ADDR]]
   Res += sm.a;
-  // CHECK:      [[GS1_TEMP_ADDR:%.*]] = call {{.*}}i8* @__kmpc_threadprivate_cached([[IDENT]]* [[DEFAULT_LOC]], i32 [[THREAD_NUM]], i8* bitcast ([[S1]]* [[GS1]] to i8*), i{{.*}} {{[0-9]+}}, i8*** [[GS1]].cache.)
+  // CHECK:      [[GS1_TEMP_ADDR:%.*]] = call {{.*}}i8* @__kmpc_threadprivate_cached([[IDENT]]* [[DEFAULT_LOC]], i32 {{.*}}, i8* bitcast ([[S1]]* [[GS1]] to i8*), i{{.*}} {{[0-9]+}}, i8*** [[GS1]].cache.)
   // CHECK-NEXT: [[GS1_ADDR:%.*]] = bitcast i8* [[GS1_TEMP_ADDR]] to [[S1]]*
   // CHECK-NEXT: [[GS1_A_ADDR:%.*]] = getelementptr inbounds [[S1]], [[S1]]* [[GS1_ADDR]], i{{.*}} 0, i{{.*}} 0
   // CHECK-NEXT: [[GS1_A:%.*]] = load [[INT]], [[INT]]* [[GS1_A_ADDR]]
   // CHECK-NEXT: [[RES:%.*]] = load [[INT]], [[INT]]* [[RES_ADDR]]
   // CHECK-NEXT: [[ADD:%.*]] = add {{.*}} [[INT]] [[RES]], [[GS1_A]]
   // CHECK-NEXT: store [[INT]] [[ADD]], [[INT]]* [[RES:.+]]
-  // CHECK-DEBUG:      [[KMPC_LOC_ADDR_PSOURCE:%.*]] = getelementptr inbounds [[IDENT]], [[IDENT]]* [[KMPC_LOC_ADDR]], i{{.*}} 0, i{{.*}} 4
-  // CHECK-DEBUG-NEXT: store i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[LOC7]], i{{.*}} 0, i{{.*}} 0), i8** [[KMPC_LOC_ADDR_PSOURCE]]
-  // CHECK-DEBUG-NEXT: [[GS1_TEMP_ADDR:%.*]] = call {{.*}}i8* @__kmpc_threadprivate_cached([[IDENT]]* [[KMPC_LOC_ADDR]], i32 [[THREAD_NUM]], i8* bitcast ([[S1]]* [[GS1]] to i8*), i{{.*}} {{[0-9]+}}, i8***
+  // CHECK-DEBUG-NEXT: [[GS1_TEMP_ADDR:%.*]] = call {{.*}}i8* @__kmpc_threadprivate_cached([[IDENT]]* {{.*}}, i32 {{.*}}, i8* bitcast ([[S1]]* [[GS1]] to i8*), i{{.*}} {{[0-9]+}}, i8***
+
+
   // CHECK-DEBUG-NEXT: [[GS1_ADDR:%.*]] = bitcast i8* [[GS1_TEMP_ADDR]] to [[S1]]*
   // CHECK-DEBUG-NEXT: [[GS1_A_ADDR:%.*]] = getelementptr inbounds [[S1]], [[S1]]* [[GS1_ADDR]], i{{.*}} 0, i{{.*}} 0
   // CHECK-DEBUG-NEXT: [[GS1_A:%.*]] = load [[INT]], [[INT]]* [[GS1_A_ADDR]]
@@ -532,16 +532,16 @@ int main() {
   // CHECK-TLS-NEXT: [[ADD:%.*]] = add {{.*}} [[INT]] [[RES]], [[GS2_A]]
   // CHECK-TLS-NEXT: store [[INT]] [[ADD]], [[INT]]* [[RES:.+]]
   Res += gs2.a;
-  // CHECK:      [[GS3_TEMP_ADDR:%.*]] = call {{.*}}i8* @__kmpc_threadprivate_cached([[IDENT]]* [[DEFAULT_LOC]], i32 [[THREAD_NUM]], i8* bitcast ([[S5]]* [[GS3]] to i8*), i{{.*}} {{[0-9]+}}, i8*** [[GS3]].cache.)
+  // CHECK:      [[GS3_TEMP_ADDR:%.*]] = call {{.*}}i8* @__kmpc_threadprivate_cached([[IDENT]]* [[DEFAULT_LOC]], i32 {{.*}}, i8* bitcast ([[S5]]* [[GS3]] to i8*), i{{.*}} {{[0-9]+}}, i8*** [[GS3]].cache.)
   // CHECK-NEXT: [[GS3_ADDR:%.*]] = bitcast i8* [[GS3_TEMP_ADDR]] to [[S5]]*
   // CHECK-NEXT: [[GS3_A_ADDR:%.*]] = getelementptr inbounds [[S5]], [[S5]]* [[GS3_ADDR]], i{{.*}} 0, i{{.*}} 0
   // CHECK-NEXT: [[GS3_A:%.*]] = load [[INT]], [[INT]]* [[GS3_A_ADDR]]
   // CHECK-NEXT: [[RES:%.*]] = load [[INT]], [[INT]]* [[RES_ADDR]]
   // CHECK-NEXT: [[ADD:%.*]] = add {{.*}} [[INT]] [[RES]], [[GS3_A]]
   // CHECK-NEXT: store [[INT]] [[ADD]], [[INT]]* [[RES:.+]]
-  // CHECK-DEBUG:      [[KMPC_LOC_ADDR_PSOURCE:%.*]] = getelementptr inbounds [[IDENT]], [[IDENT]]* [[KMPC_LOC_ADDR]], i{{.*}} 0, i{{.*}} 4
-  // CHECK-DEBUG-NEXT: store i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[LOC8]], i{{.*}} 0, i{{.*}} 0), i8** [[KMPC_LOC_ADDR_PSOURCE]]
-  // CHECK-DEBUG-NEXT: [[GS3_TEMP_ADDR:%.*]] = call {{.*}}i8* @__kmpc_threadprivate_cached([[IDENT]]* [[KMPC_LOC_ADDR]], i32 [[THREAD_NUM]], i8* bitcast ([[S5]]* [[GS3]] to i8*), i{{.*}} {{[0-9]+}}, i8***
+  // CHECK-DEBUG-NEXT: [[GS3_TEMP_ADDR:%.*]] = call {{.*}}i8* @__kmpc_threadprivate_cached([[IDENT]]* {{.*}}, i32 {{.*}}, i8* bitcast ([[S5]]* [[GS3]] to i8*), i{{.*}} {{[0-9]+}}, i8***
+
+
   // CHECK-DEBUG-NEXT: [[GS3_ADDR:%.*]] = bitcast i8* [[GS3_TEMP_ADDR]] to [[S5]]*
   // CHECK-DEBUG-NEXT: [[GS3_A_ADDR:%.*]] = getelementptr inbounds [[S5]], [[S5]]* [[GS3_ADDR]], i{{.*}} 0, i{{.*}} 0
   // CHECK-DEBUG-NEXT: [[GS3_A:%.*]] = load [[INT]], [[INT]]* [[GS3_A_ADDR]]
@@ -555,7 +555,7 @@ int main() {
   // CHECK-TLS-NEXT: [[ADD:%.*]] = add nsw i32 [[RES]], [[GS3_A]]
   // CHECK-TLS-NEXT: store i32 [[ADD]], i32* [[RES_ADDR]]
   Res += gs3.a;
-  // CHECK:      [[ARR_X_TEMP_ADDR:%.*]] = call {{.*}}i8* @__kmpc_threadprivate_cached([[IDENT]]* [[DEFAULT_LOC]], i32 [[THREAD_NUM]], i8* bitcast ([2 x [3 x [[S1]]]]* [[ARR_X]] to i8*), i{{.*}} {{[0-9]+}}, i8*** [[ARR_X]].cache.)
+  // CHECK:      [[ARR_X_TEMP_ADDR:%.*]] = call {{.*}}i8* @__kmpc_threadprivate_cached([[IDENT]]* [[DEFAULT_LOC]], i32 {{.*}}, i8* bitcast ([2 x [3 x [[S1]]]]* [[ARR_X]] to i8*), i{{.*}} {{[0-9]+}}, i8*** [[ARR_X]].cache.)
   // CHECK-NEXT: [[ARR_X_ADDR:%.*]] = bitcast i8* [[ARR_X_TEMP_ADDR]] to [2 x [3 x [[S1]]]]*
   // CHECK-NEXT: [[ARR_X_1_ADDR:%.*]] = getelementptr inbounds [2 x [3 x [[S1]]]], [2 x [3 x [[S1]]]]* [[ARR_X_ADDR]], i{{.*}} 0, i{{.*}} 1
   // CHECK-NEXT: [[ARR_X_1_1_ADDR:%.*]] = getelementptr inbounds [3 x [[S1]]], [3 x [[S1]]]* [[ARR_X_1_ADDR]], i{{.*}} 0, i{{.*}} 1
@@ -564,9 +564,9 @@ int main() {
   // CHECK-NEXT: [[RES:%.*]] = load [[INT]], [[INT]]* [[RES_ADDR]]
   // CHECK-NEXT: [[ADD:%.*]] = add {{.*}} [[INT]] [[RES]], [[ARR_X_1_1_A]]
   // CHECK-NEXT: store [[INT]] [[ADD]], [[INT]]* [[RES:.+]]
-  // CHECK-DEBUG:      [[KMPC_LOC_ADDR_PSOURCE:%.*]] = getelementptr inbounds [[IDENT]], [[IDENT]]* [[KMPC_LOC_ADDR]], i{{.*}} 0, i{{.*}} 4
-  // CHECK-DEBUG-NEXT: store i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[LOC9]], i{{.*}} 0, i{{.*}} 0), i8** [[KMPC_LOC_ADDR_PSOURCE]]
-  // CHECK-DEBUG-NEXT:      [[ARR_X_TEMP_ADDR:%.*]] = call {{.*}}i8* @__kmpc_threadprivate_cached([[IDENT]]* [[KMPC_LOC_ADDR]], i32 [[THREAD_NUM]], i8* bitcast ([2 x [3 x [[S1]]]]* [[ARR_X]] to i8*), i{{.*}} {{[0-9]+}}, i8***
+  // CHECK-DEBUG-NEXT:      [[ARR_X_TEMP_ADDR:%.*]] = call {{.*}}i8* @__kmpc_threadprivate_cached([[IDENT]]* {{.*}}, i32 {{.*}}, i8* bitcast ([2 x [3 x [[S1]]]]* [[ARR_X]] to i8*), i{{.*}} {{[0-9]+}}, i8***
+
+
   // CHECK-DEBUG-NEXT: [[ARR_X_ADDR:%.*]] = bitcast i8* [[ARR_X_TEMP_ADDR]] to [2 x [3 x [[S1]]]]*
   // CHECK-DEBUG-NEXT: [[ARR_X_1_ADDR:%.*]] = getelementptr inbounds [2 x [3 x [[S1]]]], [2 x [3 x [[S1]]]]* [[ARR_X_ADDR]], i{{.*}} 0, i{{.*}} 1
   // CHECK-DEBUG-NEXT: [[ARR_X_1_1_ADDR:%.*]] = getelementptr inbounds [3 x [[S1]]], [3 x [[S1]]]* [[ARR_X_1_ADDR]], i{{.*}} 0, i{{.*}} 1
@@ -584,15 +584,15 @@ int main() {
   // CHECK-TLS-NEXT:  [[ADD:%.*]] = add {{.*}} i32 [[RES]], [[ARR_X_1_1_A]]
   // CHECK-TLS-NEXT:  store i32 [[ADD]], i32* [[RES_ADDR]]
   Res += arr_x[1][1].a;
-  // CHECK:      [[ST_INT_ST_TEMP_ADDR:%.*]] = call {{.*}}i8* @__kmpc_threadprivate_cached([[IDENT]]* [[DEFAULT_LOC]], i32 [[THREAD_NUM]], i8* bitcast ([[INT]]* [[ST_INT_ST]] to i8*), i{{.*}} {{[0-9]+}}, i8*** [[ST_INT_ST]].cache.)
+  // CHECK:      [[ST_INT_ST_TEMP_ADDR:%.*]] = call {{.*}}i8* @__kmpc_threadprivate_cached([[IDENT]]* [[DEFAULT_LOC]], i32 {{.*}}, i8* bitcast ([[INT]]* [[ST_INT_ST]] to i8*), i{{.*}} {{[0-9]+}}, i8*** [[ST_INT_ST]].cache.)
   // CHECK-NEXT: [[ST_INT_ST_ADDR:%.*]] = bitcast i8* [[ST_INT_ST_TEMP_ADDR]] to [[INT]]*
   // CHECK-NEXT: [[ST_INT_ST_VAL:%.*]] = load [[INT]], [[INT]]* [[ST_INT_ST_ADDR]]
   // CHECK-NEXT: [[RES:%.*]] = load [[INT]], [[INT]]* [[RES_ADDR]]
   // CHECK-NEXT: [[ADD:%.*]] = add {{.*}} [[INT]] [[RES]], [[ST_INT_ST_VAL]]
   // CHECK-NEXT: store [[INT]] [[ADD]], [[INT]]* [[RES:.+]]
-  // CHECK-DEBUG:      [[KMPC_LOC_ADDR_PSOURCE:%.*]] = getelementptr inbounds [[IDENT]], [[IDENT]]* [[KMPC_LOC_ADDR]], i{{.*}} 0, i{{.*}} 4
-  // CHECK-DEBUG-NEXT: store i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[LOC10]], i{{.*}} 0, i{{.*}} 0), i8** [[KMPC_LOC_ADDR_PSOURCE]]
-  // CHECK-DEBUG-NEXT: [[ST_INT_ST_TEMP_ADDR:%.*]] = call {{.*}}i8* @__kmpc_threadprivate_cached([[IDENT]]* [[KMPC_LOC_ADDR]], i32 [[THREAD_NUM]], i8* bitcast ([[INT]]* [[ST_INT_ST]] to i8*), i{{.*}} {{[0-9]+}}, i8***
+  // CHECK-DEBUG-NEXT: [[ST_INT_ST_TEMP_ADDR:%.*]] = call {{.*}}i8* @__kmpc_threadprivate_cached([[IDENT]]* {{.*}}, i32 {{.*}}, i8* bitcast ([[INT]]* [[ST_INT_ST]] to i8*), i{{.*}} {{[0-9]+}}, i8***
+
+
   // CHECK-DEBUG-NEXT: [[ST_INT_ST_ADDR:%.*]] = bitcast i8* [[ST_INT_ST_TEMP_ADDR]] to [[INT]]*
   // CHECK-DEBUG-NEXT: [[ST_INT_ST_VAL:%.*]] = load [[INT]], [[INT]]* [[ST_INT_ST_ADDR]]
   // CHECK-DEBUG-NEXT: [[RES:%.*]] = load [[INT]], [[INT]]* [[RES_ADDR]]
@@ -604,16 +604,16 @@ int main() {
   // CHECK-TLS-NEXT:  [[ADD:%.*]] = add {{.*}} i32 [[RES]], [[ST_INT_ST_VAL]]
   // CHECK-TLS-NEXT:  store i32 [[ADD]], i32* [[RES_ADDR]]
   Res += ST<int>::st;
-  // CHECK:      [[ST_FLOAT_ST_TEMP_ADDR:%.*]] = call {{.*}}i8* @__kmpc_threadprivate_cached([[IDENT]]* [[DEFAULT_LOC]], i32 [[THREAD_NUM]], i8* bitcast (float* [[ST_FLOAT_ST]] to i8*), i{{.*}} {{[0-9]+}}, i8*** [[ST_FLOAT_ST]].cache.)
+  // CHECK:      [[ST_FLOAT_ST_TEMP_ADDR:%.*]] = call {{.*}}i8* @__kmpc_threadprivate_cached([[IDENT]]* [[DEFAULT_LOC]], i32 {{.*}}, i8* bitcast (float* [[ST_FLOAT_ST]] to i8*), i{{.*}} {{[0-9]+}}, i8*** [[ST_FLOAT_ST]].cache.)
   // CHECK-NEXT: [[ST_FLOAT_ST_ADDR:%.*]] = bitcast i8* [[ST_FLOAT_ST_TEMP_ADDR]] to float*
   // CHECK-NEXT: [[ST_FLOAT_ST_VAL:%.*]] = load float, float* [[ST_FLOAT_ST_ADDR]]
   // CHECK-NEXT: [[FLOAT_TO_INT_CONV:%.*]] = fptosi float [[ST_FLOAT_ST_VAL]] to [[INT]]
   // CHECK-NEXT: [[RES:%.*]] = load [[INT]], [[INT]]* [[RES_ADDR]]
   // CHECK-NEXT: [[ADD:%.*]] = add {{.*}} [[INT]] [[RES]], [[FLOAT_TO_INT_CONV]]
   // CHECK-NEXT: store [[INT]] [[ADD]], [[INT]]* [[RES:.+]]
-  // CHECK-DEBUG:      [[KMPC_LOC_ADDR_PSOURCE:%.*]] = getelementptr inbounds [[IDENT]], [[IDENT]]* [[KMPC_LOC_ADDR]], i{{.*}} 0, i{{.*}} 4
-  // CHECK-DEBUG-NEXT: store i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[LOC11]], i{{.*}} 0, i{{.*}} 0), i8** [[KMPC_LOC_ADDR_PSOURCE]]
-  // CHECK-DEBUG-NEXT: [[ST_FLOAT_ST_TEMP_ADDR:%.*]] = call {{.*}}i8* @__kmpc_threadprivate_cached([[IDENT]]* [[KMPC_LOC_ADDR]], i32 [[THREAD_NUM]], i8* bitcast (float* [[ST_FLOAT_ST]] to i8*), i{{.*}} {{[0-9]+}}, i8***
+  // CHECK-DEBUG-NEXT: [[ST_FLOAT_ST_TEMP_ADDR:%.*]] = call {{.*}}i8* @__kmpc_threadprivate_cached([[IDENT]]* {{.*}}, i32 {{.*}}, i8* bitcast (float* [[ST_FLOAT_ST]] to i8*), i{{.*}} {{[0-9]+}}, i8***
+
+
   // CHECK-DEBUG-NEXT: [[ST_FLOAT_ST_ADDR:%.*]] = bitcast i8* [[ST_FLOAT_ST_TEMP_ADDR]] to float*
   // CHECK-DEBUG-NEXT: [[ST_FLOAT_ST_VAL:%.*]] = load float, float* [[ST_FLOAT_ST_ADDR]]
   // CHECK-DEBUG-NEXT: [[FLOAT_TO_INT_CONV:%.*]] = fptosi float [[ST_FLOAT_ST_VAL]] to [[INT]]
@@ -627,16 +627,16 @@ int main() {
   // CHECK-TLS-NEXT: [[ADD:%.*]] = add {{.*}} i32 [[RES]], [[FLOAT_TO_INT_CONV]]
   // CHECK-TLS-NEXT: store i32 [[ADD]], i32* [[RES_ADDR]]
   Res += static_cast<int>(ST<float>::st);
-  // CHECK:      [[ST_S4_ST_TEMP_ADDR:%.*]] = call {{.*}}i8* @__kmpc_threadprivate_cached([[IDENT]]* [[DEFAULT_LOC]], i32 [[THREAD_NUM]], i8* bitcast ([[S4]]* [[ST_S4_ST]] to i8*), i{{.*}} {{[0-9]+}}, i8*** [[ST_S4_ST]].cache.)
+  // CHECK:      [[ST_S4_ST_TEMP_ADDR:%.*]] = call {{.*}}i8* @__kmpc_threadprivate_cached([[IDENT]]* [[DEFAULT_LOC]], i32 {{.*}}, i8* bitcast ([[S4]]* [[ST_S4_ST]] to i8*), i{{.*}} {{[0-9]+}}, i8*** [[ST_S4_ST]].cache.)
   // CHECK-NEXT: [[ST_S4_ST_ADDR:%.*]] = bitcast i8* [[ST_S4_ST_TEMP_ADDR]] to [[S4]]*
   // CHECK-NEXT: [[ST_S4_ST_A_ADDR:%.*]] = getelementptr inbounds [[S4]], [[S4]]* [[ST_S4_ST_ADDR]], i{{.*}} 0, i{{.*}} 0
   // CHECK-NEXT: [[ST_S4_ST_A:%.*]] = load [[INT]], [[INT]]* [[ST_S4_ST_A_ADDR]]
   // CHECK-NEXT: [[RES:%.*]] = load [[INT]], [[INT]]* [[RES_ADDR]]
   // CHECK-NEXT: [[ADD:%.*]] = add {{.*}} [[INT]] [[RES]], [[ST_S4_ST_A]]
   // CHECK-NEXT: store [[INT]] [[ADD]], [[INT]]* [[RES:.+]]
-  // CHECK-DEBUG:      [[KMPC_LOC_ADDR_PSOURCE:%.*]] = getelementptr inbounds [[IDENT]], [[IDENT]]* [[KMPC_LOC_ADDR]], i{{.*}} 0, i{{.*}} 4
-  // CHECK-DEBUG-NEXT: store i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[LOC12]], i{{.*}} 0, i{{.*}} 0), i8** [[KMPC_LOC_ADDR_PSOURCE]]
-  // CHECK-DEBUG-NEXT: [[ST_S4_ST_TEMP_ADDR:%.*]] = call {{.*}}i8* @__kmpc_threadprivate_cached([[IDENT]]* [[KMPC_LOC_ADDR]], i32 [[THREAD_NUM]], i8* bitcast ([[S4]]* [[ST_S4_ST]] to i8*), i{{.*}} {{[0-9]+}}, i8***
+  // CHECK-DEBUG-NEXT: [[ST_S4_ST_TEMP_ADDR:%.*]] = call {{.*}}i8* @__kmpc_threadprivate_cached([[IDENT]]* {{.*}}, i32 {{.*}}, i8* bitcast ([[S4]]* [[ST_S4_ST]] to i8*), i{{.*}} {{[0-9]+}}, i8***
+
+
   // CHECK-DEBUG-NEXT: [[ST_S4_ST_ADDR:%.*]] = bitcast i8* [[ST_S4_ST_TEMP_ADDR]] to [[S4]]*
   // CHECK-DEBUG-NEXT: [[ST_S4_ST_A_ADDR:%.*]] = getelementptr inbounds [[S4]], [[S4]]* [[ST_S4_ST_ADDR]], i{{.*}} 0, i{{.*}} 0
   // CHECK-DEBUG-NEXT: [[ST_S4_ST_A:%.*]] = load [[INT]], [[INT]]* [[ST_S4_ST_A_ADDR]]
@@ -665,7 +665,7 @@ int main() {
 // CHECK:      store i8* %0, i8** [[ARG_ADDR:%.*]],
 // CHECK:      [[ARG:%.+]] = load i8*, i8** [[ARG_ADDR]]
 // CHECK:      [[RES:%.*]] = bitcast i8* [[ARG]] to [[SMAIN]]*
-// CHECK:      [[GS1_TEMP_ADDR:%.*]] = call {{.*}}i8* @__kmpc_threadprivate_cached([[IDENT]]* [[DEFAULT_LOC]], i32 [[THREAD_NUM]], i8* bitcast ([[S1]]* [[GS1]] to i8*), i{{.*}} {{[0-9]+}}, i8*** [[GS1]].cache.)
+// CHECK:      [[GS1_TEMP_ADDR:%.*]] = call {{.*}}i8* @__kmpc_threadprivate_cached([[IDENT]]* [[DEFAULT_LOC]], i32 {{.*}}, i8* bitcast ([[S1]]* [[GS1]] to i8*), i{{.*}} {{[0-9]+}}, i8*** [[GS1]].cache.)
 // CHECK-NEXT: [[GS1_ADDR:%.*]] = bitcast i8* [[GS1_TEMP_ADDR]] to [[S1]]*
 // CHECK-NEXT: [[GS1_A_ADDR:%.*]] = getelementptr inbounds [[S1]], [[S1]]* [[GS1_ADDR]], i{{.*}} 0, i{{.*}} 0
 // CHECK-NEXT: [[GS1_A:%.*]] = load [[INT]], [[INT]]* [[GS1_A_ADDR]]
@@ -683,16 +683,16 @@ int main() {
 // CHECK-NEXT: }
 // CHECK:      define {{.*}} [[SMAIN_DTOR]]([[SMAIN]]* {{.*}})
 // CHECK-DEBUG:      define internal {{.*}}i8* [[SM_CTOR]](i8* %0)
-// CHECK-DEBUG:      [[KMPC_LOC_ADDR:%.*]] = alloca [[IDENT]]
-// CHECK-DEBUG:      [[KMPC_LOC_ADDR_PSOURCE:%.*]] = getelementptr inbounds [[IDENT]], [[IDENT]]* [[KMPC_LOC_ADDR]], i{{.*}} 0, i{{.*}} 4
-// CHECK-DEBUG-NEXT: store i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[LOC3]], i{{.*}} 0, i{{.*}} 0), i8** [[KMPC_LOC_ADDR_PSOURCE]]
-// CHECK-DEBUG-NEXT: [[THREAD_NUM:%.+]] = call {{.*}}i32 @__kmpc_global_thread_num([[IDENT]]* [[KMPC_LOC_ADDR]])
+// CHECK-DEBUG:      [[THREAD_NUM:%.+]] = call {{.*}}i32 @__kmpc_global_thread_num([[IDENT]]* {{.*}})
+
+
+
 // CHECK-DEBUG:      store i8* %0, i8** [[ARG_ADDR:%.*]],
 // CHECK-DEBUG:      [[ARG:%.+]] = load i8*, i8** [[ARG_ADDR]]
 // CHECK-DEBUG:      [[RES:%.*]] = bitcast i8* [[ARG]] to [[SMAIN]]*
-// CHECK-DEBUG:      [[KMPC_LOC_ADDR_PSOURCE:%.*]] = getelementptr inbounds [[IDENT]], [[IDENT]]* [[KMPC_LOC_ADDR]], i{{.*}} 0, i{{.*}} 4
-// CHECK-DEBUG-NEXT: store i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[LOC3]], i{{.*}} 0, i{{.*}} 0), i8** [[KMPC_LOC_ADDR_PSOURCE]]
-// CHECK-DEBUG:      [[GS1_TEMP_ADDR:%.*]] = call {{.*}}i8* @__kmpc_threadprivate_cached([[IDENT]]* [[KMPC_LOC_ADDR]], i32 [[THREAD_NUM]], i8* bitcast ([[S1]]* [[GS1]] to i8*), i{{.*}} {{[0-9]+}}, i8***
+// CHECK-DEBUG:      [[GS1_TEMP_ADDR:%.*]] = call {{.*}}i8* @__kmpc_threadprivate_cached([[IDENT]]* {{.*}}, i32 {{.*}}, i8* bitcast ([[S1]]* [[GS1]] to i8*), i{{.*}} {{[0-9]+}}, i8***
+
+
 // CHECK-DEBUG-NEXT: [[GS1_ADDR:%.*]] = bitcast i8* [[GS1_TEMP_ADDR]] to [[S1]]*
 // CHECK-DEBUG-NEXT: [[GS1_A_ADDR:%.*]] = getelementptr inbounds [[S1]], [[S1]]* [[GS1_ADDR]], i{{.*}} 0, i{{.*}} 0
 // CHECK-DEBUG-NEXT: [[GS1_A:%.*]] = load [[INT]], [[INT]]* [[GS1_A_ADDR]]
@@ -749,20 +749,20 @@ int main() {
 // CHECK-DEBUG-LABEL: @{{.*}}foobar{{.*}}()
 // CHECK-TLS-LABEL: @{{.*}}foobar{{.*}}()
 int foobar() {
-  // CHECK-DEBUG:      [[KMPC_LOC_ADDR:%.*]] = alloca [[IDENT]]
+
   int Res;
   // CHECK:      [[THREAD_NUM:%.+]] = call {{.*}}i32 @__kmpc_global_thread_num([[IDENT]]* [[DEFAULT_LOC]])
-  // CHECK:      [[STATIC_S_TEMP_ADDR:%.*]] = call {{.*}}i8* @__kmpc_threadprivate_cached([[IDENT]]* [[DEFAULT_LOC]], i32 [[THREAD_NUM]], i8* bitcast ([[S3]]* [[STATIC_S]] to i8*), i{{.*}} {{[0-9]+}}, i8*** [[STATIC_S]].cache.)
+  // CHECK:      [[STATIC_S_TEMP_ADDR:%.*]] = call {{.*}}i8* @__kmpc_threadprivate_cached([[IDENT]]* [[DEFAULT_LOC]], i32 {{.*}}, i8* bitcast ([[S3]]* [[STATIC_S]] to i8*), i{{.*}} {{[0-9]+}}, i8*** [[STATIC_S]].cache.)
   // CHECK-NEXT: [[STATIC_S_ADDR:%.*]] = bitcast i8* [[STATIC_S_TEMP_ADDR]] to [[S3]]*
   // CHECK-NEXT: [[STATIC_S_A_ADDR:%.*]] = getelementptr inbounds [[S3]], [[S3]]* [[STATIC_S_ADDR]], i{{.*}} 0, i{{.*}} 0
   // CHECK-NEXT: [[STATIC_S_A:%.*]] = load [[INT]], [[INT]]* [[STATIC_S_A_ADDR]]
   // CHECK-NEXT: store [[INT]] [[STATIC_S_A]], [[INT]]* [[RES_ADDR:[^,]+]]
-  // CHECK-DEBUG:      [[KMPC_LOC_ADDR_PSOURCE:%.*]] = getelementptr inbounds [[IDENT]], [[IDENT]]* [[KMPC_LOC_ADDR]], i{{.*}} 0, i{{.*}} 4
-  // CHECK-DEBUG-NEXT: store i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[LOC13]], i{{.*}} 0, i{{.*}} 0), i8** [[KMPC_LOC_ADDR_PSOURCE]]
-  // CHECK-DEBUG-NEXT: [[THREAD_NUM:%.+]] = call {{.*}}i32 @__kmpc_global_thread_num([[IDENT]]* [[KMPC_LOC_ADDR]])
-  // CHECK-DEBUG:      [[KMPC_LOC_ADDR_PSOURCE:%.*]] = getelementptr inbounds [[IDENT]], [[IDENT]]* [[KMPC_LOC_ADDR]], i{{.*}} 0, i{{.*}} 4
-  // CHECK-DEBUG-NEXT: store i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[LOC13]], i{{.*}} 0, i{{.*}} 0), i8** [[KMPC_LOC_ADDR_PSOURCE]]
-  // CHECK-DEBUG-NEXT: [[STATIC_S_TEMP_ADDR:%.*]] = call {{.*}}i8* @__kmpc_threadprivate_cached([[IDENT]]* [[KMPC_LOC_ADDR]], i32 [[THREAD_NUM]], i8* bitcast ([[S3]]* [[STATIC_S]] to i8*), i{{.*}} {{[0-9]+}}, i8***
+  // CHECK-DEBUG:      [[THREAD_NUM:%.+]] = call {{.*}}i32 @__kmpc_global_thread_num([[IDENT]]* {{.*}})
+  // CHECK-DEBUG:      [[STATIC_S_TEMP_ADDR:%.*]] = call {{.*}}i8* @__kmpc_threadprivate_cached([[IDENT]]* {{.*}}, i32 {{.*}}, i8* bitcast ([[S3]]* [[STATIC_S]] to i8*), i{{.*}} {{[0-9]+}}, i8***
+
+
+
+
   // CHECK-DEBUG-NEXT: [[STATIC_S_ADDR:%.*]] = bitcast i8* [[STATIC_S_TEMP_ADDR]] to [[S3]]*
   // CHECK-DEBUG-NEXT: [[STATIC_S_A_ADDR:%.*]] = getelementptr inbounds [[S3]], [[S3]]* [[STATIC_S_ADDR]], i{{.*}} 0, i{{.*}} 0
   // CHECK-DEBUG-NEXT: [[STATIC_S_A:%.*]] = load [[INT]], [[INT]]* [[STATIC_S_A_ADDR]]
@@ -772,16 +772,16 @@ int foobar() {
   // CHECK-TLS-NEXT: [[STATIC_S_A:%.*]] = load i32, i32* [[STATIC_S_A_ADDR]]
   // CHECK-TLS-NEXT: store i32 [[STATIC_S_A]], i32* [[RES_ADDR:[^,]+]]
   Res = Static::s.a;
-  // CHECK:      [[GS1_TEMP_ADDR:%.*]] = call {{.*}}i8* @__kmpc_threadprivate_cached([[IDENT]]* [[DEFAULT_LOC]], i32 [[THREAD_NUM]], i8* bitcast ([[S1]]* [[GS1]] to i8*), i{{.*}} {{[0-9]+}}, i8*** [[GS1]].cache.)
+  // CHECK:      [[GS1_TEMP_ADDR:%.*]] = call {{.*}}i8* @__kmpc_threadprivate_cached([[IDENT]]* [[DEFAULT_LOC]], i32 {{.*}}, i8* bitcast ([[S1]]* [[GS1]] to i8*), i{{.*}} {{[0-9]+}}, i8*** [[GS1]].cache.)
   // CHECK-NEXT: [[GS1_ADDR:%.*]] = bitcast i8* [[GS1_TEMP_ADDR]] to [[S1]]*
   // CHECK-NEXT: [[GS1_A_ADDR:%.*]] = getelementptr inbounds [[S1]], [[S1]]* [[GS1_ADDR]], i{{.*}} 0, i{{.*}} 0
   // CHECK-NEXT: [[GS1_A:%.*]] = load [[INT]], [[INT]]* [[GS1_A_ADDR]]
   // CHECK-NEXT: [[RES:%.*]] = load [[INT]], [[INT]]* [[RES_ADDR]]
   // CHECK-NEXT: [[ADD:%.*]] = add {{.*}} [[INT]] [[RES]], [[GS1_A]]
   // CHECK-NEXT: store [[INT]] [[ADD]], [[INT]]* [[RES:.+]]
-  // CHECK-DEBUG:      [[KMPC_LOC_ADDR_PSOURCE:%.*]] = getelementptr inbounds [[IDENT]], [[IDENT]]* [[KMPC_LOC_ADDR]], i{{.*}} 0, i{{.*}} 4
-  // CHECK-DEBUG-NEXT: store i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[LOC14]], i{{.*}} 0, i{{.*}} 0), i8** [[KMPC_LOC_ADDR_PSOURCE]]
-  // CHECK-DEBUG-NEXT: [[GS1_TEMP_ADDR:%.*]] = call {{.*}}i8* @__kmpc_threadprivate_cached([[IDENT]]* [[KMPC_LOC_ADDR]], i32 [[THREAD_NUM]], i8* bitcast ([[S1]]* [[GS1]] to i8*), i{{.*}} {{[0-9]+}}, i8***
+  // CHECK-DEBUG-NEXT: [[GS1_TEMP_ADDR:%.*]] = call {{.*}}i8* @__kmpc_threadprivate_cached([[IDENT]]* {{.*}}, i32 {{.*}}, i8* bitcast ([[S1]]* [[GS1]] to i8*), i{{.*}} {{[0-9]+}}, i8***
+
+
   // CHECK-DEBUG-NEXT: [[GS1_ADDR:%.*]] = bitcast i8* [[GS1_TEMP_ADDR]] to [[S1]]*
   // CHECK-DEBUG-NEXT: [[GS1_A_ADDR:%.*]] = getelementptr inbounds [[S1]], [[S1]]* [[GS1_ADDR]], i{{.*}} 0, i{{.*}} 0
   // CHECK-DEBUG-NEXT: [[GS1_A:%.*]] = load [[INT]], [[INT]]* [[GS1_A_ADDR]]
@@ -808,16 +808,16 @@ int foobar() {
   // CHECK-TLS-NEXT: [[ADD:%.*]] = add {{.*}} i32 [[RES]], [[GS2_A]]
   // CHECK-TLS-NEXT: store i32 [[ADD]], i32* [[RES:.+]]
   Res += gs2.a;
-  // CHECK:      [[GS3_TEMP_ADDR:%.*]] = call {{.*}}i8* @__kmpc_threadprivate_cached([[IDENT]]* [[DEFAULT_LOC]], i32 [[THREAD_NUM]], i8* bitcast ([[S5]]* [[GS3]] to i8*), i{{.*}} {{[0-9]+}}, i8*** [[GS3]].cache.)
+  // CHECK:      [[GS3_TEMP_ADDR:%.*]] = call {{.*}}i8* @__kmpc_threadprivate_cached([[IDENT]]* [[DEFAULT_LOC]], i32 {{.*}}, i8* bitcast ([[S5]]* [[GS3]] to i8*), i{{.*}} {{[0-9]+}}, i8*** [[GS3]].cache.)
   // CHECK-NEXT: [[GS3_ADDR:%.*]] = bitcast i8* [[GS3_TEMP_ADDR]] to [[S5]]*
   // CHECK-NEXT: [[GS3_A_ADDR:%.*]] = getelementptr inbounds [[S5]], [[S5]]* [[GS3_ADDR]], i{{.*}} 0, i{{.*}} 0
   // CHECK-NEXT: [[GS3_A:%.*]] = load [[INT]], [[INT]]* [[GS3_A_ADDR]]
   // CHECK-NEXT: [[RES:%.*]] = load [[INT]], [[INT]]* [[RES_ADDR]]
   // CHECK-NEXT: [[ADD:%.*]] = add {{.*}} [[INT]] [[RES]], [[GS3_A]]
   // CHECK-NEXT: store [[INT]] [[ADD]], [[INT]]* [[RES:.+]]
-  // CHECK-DEBUG:      [[KMPC_LOC_ADDR_PSOURCE:%.*]] = getelementptr inbounds [[IDENT]], [[IDENT]]* [[KMPC_LOC_ADDR]], i{{.*}} 0, i{{.*}} 4
-  // CHECK-DEBUG-NEXT: store i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[LOC15]], i{{.*}} 0, i{{.*}} 0), i8** [[KMPC_LOC_ADDR_PSOURCE]]
-  // CHECK-DEBUG-NEXT: [[GS3_TEMP_ADDR:%.*]] = call {{.*}}i8* @__kmpc_threadprivate_cached([[IDENT]]* [[KMPC_LOC_ADDR]], i32 [[THREAD_NUM]], i8* bitcast ([[S5]]* [[GS3]] to i8*), i{{.*}} {{[0-9]+}}, i8***
+  // CHECK-DEBUG-NEXT: [[GS3_TEMP_ADDR:%.*]] = call {{.*}}i8* @__kmpc_threadprivate_cached([[IDENT]]* {{.*}}, i32 {{.*}}, i8* bitcast ([[S5]]* [[GS3]] to i8*), i{{.*}} {{[0-9]+}}, i8***
+
+
   // CHECK-DEBUG-NEXT: [[GS3_ADDR:%.*]] = bitcast i8* [[GS3_TEMP_ADDR]] to [[S5]]*
   // CHECK-DEBUG-NEXT: [[GS3_A_ADDR:%.*]] = getelementptr inbounds [[S5]], [[S5]]* [[GS3_ADDR]], i{{.*}} 0, i{{.*}} 0
   // CHECK-DEBUG-NEXT: [[GS3_A:%.*]] = load [[INT]], [[INT]]* [[GS3_A_ADDR]]
@@ -831,7 +831,7 @@ int foobar() {
   // CHECK-TLS-DEBUG: [[ADD:%.*]]= add nsw i32 [[RES]], [[GS3_A]]
   // CHECK-TLS-DEBUG: store i32 [[ADD]], i32* [[RES_ADDR]]
   Res += gs3.a;
-  // CHECK:      [[ARR_X_TEMP_ADDR:%.*]] = call {{.*}}i8* @__kmpc_threadprivate_cached([[IDENT]]* [[DEFAULT_LOC]], i32 [[THREAD_NUM]], i8* bitcast ([2 x [3 x [[S1]]]]* [[ARR_X]] to i8*), i{{.*}} {{[0-9]+}}, i8*** [[ARR_X]].cache.)
+  // CHECK:      [[ARR_X_TEMP_ADDR:%.*]] = call {{.*}}i8* @__kmpc_threadprivate_cached([[IDENT]]* [[DEFAULT_LOC]], i32 {{.*}}, i8* bitcast ([2 x [3 x [[S1]]]]* [[ARR_X]] to i8*), i{{.*}} {{[0-9]+}}, i8*** [[ARR_X]].cache.)
   // CHECK-NEXT: [[ARR_X_ADDR:%.*]] = bitcast i8* [[ARR_X_TEMP_ADDR]] to [2 x [3 x [[S1]]]]*
   // CHECK-NEXT: [[ARR_X_1_ADDR:%.*]] = getelementptr inbounds [2 x [3 x [[S1]]]], [2 x [3 x [[S1]]]]* [[ARR_X_ADDR]], i{{.*}} 0, i{{.*}} 1
   // CHECK-NEXT: [[ARR_X_1_1_ADDR:%.*]] = getelementptr inbounds [3 x [[S1]]], [3 x [[S1]]]* [[ARR_X_1_ADDR]], i{{.*}} 0, i{{.*}} 1
@@ -840,9 +840,9 @@ int foobar() {
   // CHECK-NEXT: [[RES:%.*]] = load [[INT]], [[INT]]* [[RES_ADDR]]
   // CHECK-NEXT: [[ADD:%.*]] = add {{.*}} [[INT]] [[RES]], [[ARR_X_1_1_A]]
   // CHECK-NEXT: store [[INT]] [[ADD]], [[INT]]* [[RES:.+]]
-  // CHECK-DEBUG:      [[KMPC_LOC_ADDR_PSOURCE:%.*]] = getelementptr inbounds [[IDENT]], [[IDENT]]* [[KMPC_LOC_ADDR]], i{{.*}} 0, i{{.*}} 4
-  // CHECK-DEBUG-NEXT: store i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[LOC16]], i{{.*}} 0, i{{.*}} 0), i8** [[KMPC_LOC_ADDR_PSOURCE]]
-  // CHECK-DEBUG-NEXT:      [[ARR_X_TEMP_ADDR:%.*]] = call {{.*}}i8* @__kmpc_threadprivate_cached([[IDENT]]* [[KMPC_LOC_ADDR]], i32 [[THREAD_NUM]], i8* bitcast ([2 x [3 x [[S1]]]]* [[ARR_X]] to i8*), i{{.*}} {{[0-9]+}}, i8***
+  // CHECK-DEBUG-NEXT:      [[ARR_X_TEMP_ADDR:%.*]] = call {{.*}}i8* @__kmpc_threadprivate_cached([[IDENT]]* {{.*}}, i32 {{.*}}, i8* bitcast ([2 x [3 x [[S1]]]]* [[ARR_X]] to i8*), i{{.*}} {{[0-9]+}}, i8***
+
+
   // CHECK-DEBUG-NEXT: [[ARR_X_ADDR:%.*]] = bitcast i8* [[ARR_X_TEMP_ADDR]] to [2 x [3 x [[S1]]]]*
   // CHECK-DEBUG-NEXT: [[ARR_X_1_ADDR:%.*]] = getelementptr inbounds [2 x [3 x [[S1]]]], [2 x [3 x [[S1]]]]* [[ARR_X_ADDR]], i{{.*}} 0, i{{.*}} 1
   // CHECK-DEBUG-NEXT: [[ARR_X_1_1_ADDR:%.*]] = getelementptr inbounds [3 x [[S1]]], [3 x [[S1]]]* [[ARR_X_1_ADDR]], i{{.*}} 0, i{{.*}} 1
@@ -860,15 +860,15 @@ int foobar() {
   // CHECK-TLS-NEXT: [[ADD:%.*]] = add {{.*}} [[INT]] [[RES]], [[ARR_X_1_1_A]]
   // CHECK-TLS-NEXT: store [[INT]] [[ADD]], [[INT]]* [[RES:.+]]
   Res += arr_x[1][1].a;
-  // CHECK:      [[ST_INT_ST_TEMP_ADDR:%.*]] = call {{.*}}i8* @__kmpc_threadprivate_cached([[IDENT]]* [[DEFAULT_LOC]], i32 [[THREAD_NUM]], i8* bitcast ([[INT]]* [[ST_INT_ST]] to i8*), i{{.*}} {{[0-9]+}}, i8*** [[ST_INT_ST]].cache.)
+  // CHECK:      [[ST_INT_ST_TEMP_ADDR:%.*]] = call {{.*}}i8* @__kmpc_threadprivate_cached([[IDENT]]* [[DEFAULT_LOC]], i32 {{.*}}, i8* bitcast ([[INT]]* [[ST_INT_ST]] to i8*), i{{.*}} {{[0-9]+}}, i8*** [[ST_INT_ST]].cache.)
   // CHECK-NEXT: [[ST_INT_ST_ADDR:%.*]] = bitcast i8* [[ST_INT_ST_TEMP_ADDR]] to [[INT]]*
   // CHECK-NEXT: [[ST_INT_ST_VAL:%.*]] = load [[INT]], [[INT]]* [[ST_INT_ST_ADDR]]
   // CHECK-NEXT: [[RES:%.*]] = load [[INT]], [[INT]]* [[RES_ADDR]]
   // CHECK-NEXT: [[ADD:%.*]] = add {{.*}} [[INT]] [[RES]], [[ST_INT_ST_VAL]]
   // CHECK-NEXT: store [[INT]] [[ADD]], [[INT]]* [[RES:.+]]
-  // CHECK-DEBUG:      [[KMPC_LOC_ADDR_PSOURCE:%.*]] = getelementptr inbounds [[IDENT]], [[IDENT]]* [[KMPC_LOC_ADDR]], i{{.*}} 0, i{{.*}} 4
-  // CHECK-DEBUG-NEXT: store i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[LOC17]], i{{.*}} 0, i{{.*}} 0), i8** [[KMPC_LOC_ADDR_PSOURCE]]
-  // CHECK-DEBUG-NEXT: [[ST_INT_ST_TEMP_ADDR:%.*]] = call {{.*}}i8* @__kmpc_threadprivate_cached([[IDENT]]* [[KMPC_LOC_ADDR]], i32 [[THREAD_NUM]], i8* bitcast ([[INT]]* [[ST_INT_ST]] to i8*), i{{.*}} {{[0-9]+}}, i8***
+  // CHECK-DEBUG-NEXT: [[ST_INT_ST_TEMP_ADDR:%.*]] = call {{.*}}i8* @__kmpc_threadprivate_cached([[IDENT]]* {{.*}}, i32 {{.*}}, i8* bitcast ([[INT]]* [[ST_INT_ST]] to i8*), i{{.*}} {{[0-9]+}}, i8***
+
+
   // CHECK-DEBUG-NEXT: [[ST_INT_ST_ADDR:%.*]] = bitcast i8* [[ST_INT_ST_TEMP_ADDR]] to [[INT]]*
   // CHECK-DEBUG-NEXT: [[ST_INT_ST_VAL:%.*]] = load [[INT]], [[INT]]* [[ST_INT_ST_ADDR]]
   // CHECK-DEBUG-NEXT: [[RES:%.*]] = load [[INT]], [[INT]]* [[RES_ADDR]]
@@ -880,16 +880,16 @@ int foobar() {
   // OMP45-TLS-NEXT: [[ADD:%.*]] = add {{.*}} [[INT]] [[RES]], [[ST_INT_ST_VAL]]
   // OMP45-TLS-NEXT: store [[INT]] [[ADD]], [[INT]]* [[RES:.+]]
   Res += ST<int>::st;
-  // CHECK:      [[ST_FLOAT_ST_TEMP_ADDR:%.*]] = call {{.*}}i8* @__kmpc_threadprivate_cached([[IDENT]]* [[DEFAULT_LOC]], i32 [[THREAD_NUM]], i8* bitcast (float* [[ST_FLOAT_ST]] to i8*), i{{.*}} {{[0-9]+}}, i8*** [[ST_FLOAT_ST]].cache.)
+  // CHECK:      [[ST_FLOAT_ST_TEMP_ADDR:%.*]] = call {{.*}}i8* @__kmpc_threadprivate_cached([[IDENT]]* [[DEFAULT_LOC]], i32 {{.*}}, i8* bitcast (float* [[ST_FLOAT_ST]] to i8*), i{{.*}} {{[0-9]+}}, i8*** [[ST_FLOAT_ST]].cache.)
   // CHECK-NEXT: [[ST_FLOAT_ST_ADDR:%.*]] = bitcast i8* [[ST_FLOAT_ST_TEMP_ADDR]] to float*
   // CHECK-NEXT: [[ST_FLOAT_ST_VAL:%.*]] = load float, float* [[ST_FLOAT_ST_ADDR]]
   // CHECK-NEXT: [[FLOAT_TO_INT_CONV:%.*]] = fptosi float [[ST_FLOAT_ST_VAL]] to [[INT]]
   // CHECK-NEXT: [[RES:%.*]] = load [[INT]], [[INT]]* [[RES_ADDR]]
   // CHECK-NEXT: [[ADD:%.*]] = add {{.*}} [[INT]] [[RES]], [[FLOAT_TO_INT_CONV]]
   // CHECK-NEXT: store [[INT]] [[ADD]], [[INT]]* [[RES:.+]]
-  // CHECK-DEBUG:      [[KMPC_LOC_ADDR_PSOURCE:%.*]] = getelementptr inbounds [[IDENT]], [[IDENT]]* [[KMPC_LOC_ADDR]], i{{.*}} 0, i{{.*}} 4
-  // CHECK-DEBUG-NEXT: store i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[LOC18]], i{{.*}} 0, i{{.*}} 0), i8** [[KMPC_LOC_ADDR_PSOURCE]]
-  // CHECK-DEBUG-NEXT: [[ST_FLOAT_ST_TEMP_ADDR:%.*]] = call {{.*}}i8* @__kmpc_threadprivate_cached([[IDENT]]* [[KMPC_LOC_ADDR]], i32 [[THREAD_NUM]], i8* bitcast (float* [[ST_FLOAT_ST]] to i8*), i{{.*}} {{[0-9]+}}, i8***
+  // CHECK-DEBUG-NEXT: [[ST_FLOAT_ST_TEMP_ADDR:%.*]] = call {{.*}}i8* @__kmpc_threadprivate_cached([[IDENT]]* {{.*}}, i32 {{.*}}, i8* bitcast (float* [[ST_FLOAT_ST]] to i8*), i{{.*}} {{[0-9]+}}, i8***
+
+
   // CHECK-DEBUG-NEXT: [[ST_FLOAT_ST_ADDR:%.*]] = bitcast i8* [[ST_FLOAT_ST_TEMP_ADDR]] to float*
   // CHECK-DEBUG-NEXT: [[ST_FLOAT_ST_VAL:%.*]] = load float, float* [[ST_FLOAT_ST_ADDR]]
   // CHECK-DEBUG-NEXT: [[FLOAT_TO_INT_CONV:%.*]] = fptosi float [[ST_FLOAT_ST_VAL]] to [[INT]]
@@ -903,16 +903,16 @@ int foobar() {
   // OMP45-TLS-NEXT: [[ADD:%.*]] = add {{.*}} [[INT]] [[RES]], [[FLOAT_TO_INT_CONV]]
   // OMP45-TLS-NEXT: store [[INT]] [[ADD]], [[INT]]* [[RES:.+]]
   Res += static_cast<int>(ST<float>::st);
-  // CHECK:      [[ST_S4_ST_TEMP_ADDR:%.*]] = call {{.*}}i8* @__kmpc_threadprivate_cached([[IDENT]]* [[DEFAULT_LOC]], i32 [[THREAD_NUM]], i8* bitcast ([[S4]]* [[ST_S4_ST]] to i8*), i{{.*}} {{[0-9]+}}, i8*** [[ST_S4_ST]].cache.)
+  // CHECK:      [[ST_S4_ST_TEMP_ADDR:%.*]] = call {{.*}}i8* @__kmpc_threadprivate_cached([[IDENT]]* [[DEFAULT_LOC]], i32 {{.*}}, i8* bitcast ([[S4]]* [[ST_S4_ST]] to i8*), i{{.*}} {{[0-9]+}}, i8*** [[ST_S4_ST]].cache.)
   // CHECK-NEXT: [[ST_S4_ST_ADDR:%.*]] = bitcast i8* [[ST_S4_ST_TEMP_ADDR]] to [[S4]]*
   // CHECK-NEXT: [[ST_S4_ST_A_ADDR:%.*]] = getelementptr inbounds [[S4]], [[S4]]* [[ST_S4_ST_ADDR]], i{{.*}} 0, i{{.*}} 0
   // CHECK-NEXT: [[ST_S4_ST_A:%.*]] = load [[INT]], [[INT]]* [[ST_S4_ST_A_ADDR]]
   // CHECK-NEXT: [[RES:%.*]] = load [[INT]], [[INT]]* [[RES_ADDR]]
   // CHECK-NEXT: [[ADD:%.*]] = add {{.*}} [[INT]] [[RES]], [[ST_S4_ST_A]]
   // CHECK-NEXT: store [[INT]] [[ADD]], [[INT]]* [[RES:.+]]
-  // CHECK-DEBUG:      [[KMPC_LOC_ADDR_PSOURCE:%.*]] = getelementptr inbounds [[IDENT]], [[IDENT]]* [[KMPC_LOC_ADDR]], i{{.*}} 0, i{{.*}} 4
-  // CHECK-DEBUG-NEXT: store i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[LOC19]], i{{.*}} 0, i{{.*}} 0), i8** [[KMPC_LOC_ADDR_PSOURCE]]
-  // CHECK-DEBUG-NEXT: [[ST_S4_ST_TEMP_ADDR:%.*]] = call {{.*}}i8* @__kmpc_threadprivate_cached([[IDENT]]* [[KMPC_LOC_ADDR]], i32 [[THREAD_NUM]], i8* bitcast ([[S4]]* [[ST_S4_ST]] to i8*), i{{.*}} {{[0-9]+}}, i8***
+  // CHECK-DEBUG-NEXT: [[ST_S4_ST_TEMP_ADDR:%.*]] = call {{.*}}i8* @__kmpc_threadprivate_cached([[IDENT]]* {{.*}}, i32 {{.*}}, i8* bitcast ([[S4]]* [[ST_S4_ST]] to i8*), i{{.*}} {{[0-9]+}}, i8***
+
+
   // CHECK-DEBUG-NEXT: [[ST_S4_ST_ADDR:%.*]] = bitcast i8* [[ST_S4_ST_TEMP_ADDR]] to [[S4]]*
   // CHECK-DEBUG-NEXT: [[ST_S4_ST_A_ADDR:%.*]] = getelementptr inbounds [[S4]], [[S4]]* [[ST_S4_ST_ADDR]], i{{.*}} 0, i{{.*}} 0
   // CHECK-DEBUG-NEXT: [[ST_S4_ST_A:%.*]] = load [[INT]], [[INT]]* [[ST_S4_ST_A_ADDR]]
@@ -955,11 +955,11 @@ int foobar() {
 // OMP45-NEXT: }
 // OMP45:      define {{.*}} [[S4_DTOR]]([[S4]]* {{.*}})
 
-// OMP45-DEBUG:      [[KMPC_LOC_ADDR:%.*]] = alloca [[IDENT]]
-// OMP45-DEBUG:      [[KMPC_LOC_ADDR_PSOURCE:%.*]] = getelementptr inbounds [[IDENT]], [[IDENT]]* [[KMPC_LOC_ADDR]], i{{.*}} 0, i{{.*}} 4
-// OMP45-DEBUG-NEXT: store i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[LOC20]], i{{.*}} 0, i{{.*}} 0), i8** [[KMPC_LOC_ADDR_PSOURCE]]
+
+
+
 // OMP45-DEBUG:      @__kmpc_global_thread_num
-// OMP45-DEBUG:      call {{.*}}void @__kmpc_threadprivate_register([[IDENT]]* [[KMPC_LOC_ADDR]], i8* bitcast ([[S4]]* [[ST_S4_ST]] to i8*), i8* (i8*)* [[ST_S4_ST_CTOR:@\.__kmpc_global_ctor_\..+]], i8* (i8*, i8*)* null, void (i8*)* [[ST_S4_ST_DTOR:@\.__kmpc_global_dtor_\..+]])
+// OMP45-DEBUG:      call {{.*}}void @__kmpc_threadprivate_register([[IDENT]]* {{.*}}, i8* bitcast ([[S4]]* [[ST_S4_ST]] to i8*), i8* (i8*)* [[ST_S4_ST_CTOR:@\.__kmpc_global_ctor_\..+]], i8* (i8*, i8*)* null, void (i8*)* [[ST_S4_ST_DTOR:@\.__kmpc_global_dtor_\..+]])
 // OMP45-DEBUG:      define internal {{.*}}i8* [[ST_S4_ST_CTOR]](i8* %0)
 // OMP45-DEBUG:      }
 // OMP45-DEBUG:      define {{.*}} [[S4_CTOR:@.*]]([[S4]]* {{.*}},
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
index 95eed59f1b3d0..a2a440d65fd8b 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
@@ -210,12 +210,19 @@ class OpenMPIRBuilder {
   /// Return the (LLVM-IR) string describing the default source location.
   Constant *getOrCreateDefaultSrcLocStr();
 
+  /// Return the (LLVM-IR) string describing the source location identified by
+  /// the arguments.
+  Constant *getOrCreateSrcLocStr(StringRef FunctionName, StringRef FileName,
+                                 unsigned Line, unsigned Column);
+
   /// Return the (LLVM-IR) string describing the source location \p Loc.
   Constant *getOrCreateSrcLocStr(const LocationDescription &Loc);
 
   /// Return an ident_t* encoding the source location \p SrcLocStr and \p Flags.
+  /// TODO: Create a enum class for the Reserve2Flags
   Value *getOrCreateIdent(Constant *SrcLocStr,
-                          omp::IdentFlag Flags = omp::IdentFlag(0));
+                          omp::IdentFlag Flags = omp::IdentFlag(0),
+                          unsigned Reserve2Flags = 0);
 
   /// Generate control flow and cleanup for cancellation.
   ///
@@ -280,7 +287,7 @@ class OpenMPIRBuilder {
   StringMap<Constant *> SrcLocStrMap;
 
   /// Map to remember existing ident_t*.
-  DenseMap<std::pair<Constant *, uint64_t>, GlobalVariable *> IdentMap;
+  DenseMap<std::pair<Constant *, uint64_t>, Value *> IdentMap;
 
   /// Helper that contains information about regions we need to outline
   /// during finalization.
diff --git a/llvm/include/llvm/IR/IRBuilder.h b/llvm/include/llvm/IR/IRBuilder.h
index ffec4ff64ca66..b90480ebc59ef 100644
--- a/llvm/include/llvm/IR/IRBuilder.h
+++ b/llvm/include/llvm/IR/IRBuilder.h
@@ -386,8 +386,12 @@ class IRBuilderBase {
   /// filled in with the null terminated string value specified.  The new global
   /// variable will be marked mergable with any others of the same contents.  If
   /// Name is specified, it is the name of the global variable created.
+  ///
+  /// If no module is given via \p M, it is take from the insertion point basic
+  /// block.
   GlobalVariable *CreateGlobalString(StringRef Str, const Twine &Name = "",
-                                     unsigned AddressSpace = 0);
+                                     unsigned AddressSpace = 0,
+                                     Module *M = nullptr);
 
   /// Get a constant value representing either true or false.
   ConstantInt *getInt1(bool V) {
@@ -1934,9 +1938,13 @@ class IRBuilderBase {
 
   /// Same as CreateGlobalString, but return a pointer with "i8*" type
   /// instead of a pointer to array of i8.
+  ///
+  /// If no module is given via \p M, it is take from the insertion point basic
+  /// block.
   Constant *CreateGlobalStringPtr(StringRef Str, const Twine &Name = "",
-                                  unsigned AddressSpace = 0) {
-    GlobalVariable *GV = CreateGlobalString(Str, Name, AddressSpace);
+                                  unsigned AddressSpace = 0,
+                                  Module *M = nullptr) {
+    GlobalVariable *GV = CreateGlobalString(Str, Name, AddressSpace, M);
     Constant *Zero = ConstantInt::get(Type::getInt32Ty(Context), 0);
     Constant *Indices[] = {Zero, Zero};
     return ConstantExpr::getInBoundsGetElementPtr(GV->getValueType(), GV,
diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index 9468a3aa3c8dd..6c72cd01ce6ea 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -185,16 +185,18 @@ void OpenMPIRBuilder::finalize() {
 }
 
 Value *OpenMPIRBuilder::getOrCreateIdent(Constant *SrcLocStr,
-                                         IdentFlag LocFlags) {
+                                         IdentFlag LocFlags,
+                                         unsigned Reserve2Flags) {
   // Enable "C-mode".
   LocFlags |= OMP_IDENT_FLAG_KMPC;
 
-  GlobalVariable *&DefaultIdent = IdentMap[{SrcLocStr, uint64_t(LocFlags)}];
-  if (!DefaultIdent) {
+  Value *&Ident =
+      IdentMap[{SrcLocStr, uint64_t(LocFlags) << 31 | Reserve2Flags}];
+  if (!Ident) {
     Constant *I32Null = ConstantInt::getNullValue(Int32);
-    Constant *IdentData[] = {I32Null,
-                             ConstantInt::get(Int32, uint64_t(LocFlags)),
-                             I32Null, I32Null, SrcLocStr};
+    Constant *IdentData[] = {
+        I32Null, ConstantInt::get(Int32, uint32_t(LocFlags)),
+        ConstantInt::get(Int32, Reserve2Flags), I32Null, SrcLocStr};
     Constant *Initializer = ConstantStruct::get(
         cast<StructType>(IdentPtr->getPointerElementType()), IdentData);
 
@@ -203,15 +205,16 @@ Value *OpenMPIRBuilder::getOrCreateIdent(Constant *SrcLocStr,
     for (GlobalVariable &GV : M.getGlobalList())
       if (GV.getType() == IdentPtr && GV.hasInitializer())
         if (GV.getInitializer() == Initializer)
-          return DefaultIdent = &GV;
-
-    DefaultIdent = new GlobalVariable(M, IdentPtr->getPointerElementType(),
-                                      /* isConstant = */ false,
-                                      GlobalValue::PrivateLinkage, Initializer);
-    DefaultIdent->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
-    DefaultIdent->setAlignment(Align(8));
+          return Ident = &GV;
+
+    auto *GV = new GlobalVariable(M, IdentPtr->getPointerElementType(),
+                                  /* isConstant = */ true,
+                                  GlobalValue::PrivateLinkage, Initializer);
+    GV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
+    GV->setAlignment(Align(8));
+    Ident = GV;
   }
-  return DefaultIdent;
+  return Ident;
 }
 
 Constant *OpenMPIRBuilder::getOrCreateSrcLocStr(StringRef LocStr) {
@@ -227,11 +230,30 @@ Constant *OpenMPIRBuilder::getOrCreateSrcLocStr(StringRef LocStr) {
           GV.getInitializer() == Initializer)
         return SrcLocStr = ConstantExpr::getPointerCast(&GV, Int8Ptr);
 
-    SrcLocStr = Builder.CreateGlobalStringPtr(LocStr);
+    SrcLocStr = Builder.CreateGlobalStringPtr(LocStr, /* Name */ "",
+                                              /* AddressSpace */ 0, &M);
   }
   return SrcLocStr;
 }
 
+Constant *OpenMPIRBuilder::getOrCreateSrcLocStr(StringRef FunctionName,
+                                                StringRef FileName,
+                                                unsigned Line,
+                                                unsigned Column) {
+  SmallString<128> Buffer;
+  Buffer.push_back(';');
+  Buffer.append(FileName);
+  Buffer.push_back(';');
+  Buffer.append(FunctionName);
+  Buffer.push_back(';');
+  Buffer.append(std::to_string(Line));
+  Buffer.push_back(';');
+  Buffer.append(std::to_string(Column));
+  Buffer.push_back(';');
+  Buffer.push_back(';');
+  return getOrCreateSrcLocStr(Buffer.str());
+}
+
 Constant *OpenMPIRBuilder::getOrCreateDefaultSrcLocStr() {
   return getOrCreateSrcLocStr(";unknown;unknown;0;0;;");
 }
@@ -241,17 +263,13 @@ OpenMPIRBuilder::getOrCreateSrcLocStr(const LocationDescription &Loc) {
   DILocation *DIL = Loc.DL.get();
   if (!DIL)
     return getOrCreateDefaultSrcLocStr();
-  StringRef Filename =
+  StringRef FileName =
       !DIL->getFilename().empty() ? DIL->getFilename() : M.getName();
   StringRef Function = DIL->getScope()->getSubprogram()->getName();
   Function =
       !Function.empty() ? Function : Loc.IP.getBlock()->getParent()->getName();
-  std::string LineStr = std::to_string(DIL->getLine());
-  std::string ColumnStr = std::to_string(DIL->getColumn());
-  std::stringstream SrcLocStr;
-  SrcLocStr << ";" << Filename.data() << ";" << Function.data() << ";"
-            << LineStr << ";" << ColumnStr << ";;";
-  return getOrCreateSrcLocStr(SrcLocStr.str());
+  return getOrCreateSrcLocStr(Function, FileName, DIL->getLine(),
+                              DIL->getColumn());
 }
 
 Value *OpenMPIRBuilder::getOrCreateThreadID(Value *Ident) {
diff --git a/llvm/lib/IR/IRBuilder.cpp b/llvm/lib/IR/IRBuilder.cpp
index 1fffce015f707..a82f15895782c 100644
--- a/llvm/lib/IR/IRBuilder.cpp
+++ b/llvm/lib/IR/IRBuilder.cpp
@@ -42,13 +42,14 @@ using namespace llvm;
 /// created.
 GlobalVariable *IRBuilderBase::CreateGlobalString(StringRef Str,
                                                   const Twine &Name,
-                                                  unsigned AddressSpace) {
+                                                  unsigned AddressSpace,
+                                                  Module *M) {
   Constant *StrConstant = ConstantDataArray::getString(Context, Str);
-  Module &M = *BB->getParent()->getParent();
-  auto *GV = new GlobalVariable(M, StrConstant->getType(), true,
-                                GlobalValue::PrivateLinkage, StrConstant, Name,
-                                nullptr, GlobalVariable::NotThreadLocal,
-                                AddressSpace);
+  if (!M)
+    M = BB->getParent()->getParent();
+  auto *GV = new GlobalVariable(
+      *M, StrConstant->getType(), true, GlobalValue::PrivateLinkage,
+      StrConstant, Name, nullptr, GlobalVariable::NotThreadLocal, AddressSpace);
   GV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
   GV->setAlignment(Align(1));
   return GV;
diff --git a/llvm/test/Transforms/OpenMP/deduplication.ll b/llvm/test/Transforms/OpenMP/deduplication.ll
index a25d980b1806f..9074b948cc3fe 100644
--- a/llvm/test/Transforms/OpenMP/deduplication.ll
+++ b/llvm/test/Transforms/OpenMP/deduplication.ll
@@ -5,21 +5,21 @@ target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16
 
 %struct.ident_t = type { i32, i32, i32, i32, i8* }
 
-@0 = private unnamed_addr global %struct.ident_t { i32 0, i32 34, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @.str0, i32 0, i32 0) }, align 8
-@1 = private unnamed_addr global %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @.str1, i32 0, i32 0) }, align 8
-@2 = private unnamed_addr global %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @.str2, i32 0, i32 0) }, align 8
+@0 = private unnamed_addr constant %struct.ident_t { i32 0, i32 34, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @.str0, i32 0, i32 0) }, align 8
+@1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @.str1, i32 0, i32 0) }, align 8
+@2 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @.str2, i32 0, i32 0) }, align 8
 @.str0 = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1
 @.str1 = private unnamed_addr constant [23 x i8] c";file001;loc0001;0;0;;\00", align 1
 @.str2 = private unnamed_addr constant [23 x i8] c";file002;loc0002;0;0;;\00", align 1
 
 ; UTC_ARGS: --disable
-; CHECK-DAG: @0 = private unnamed_addr global %struct.ident_t { i32 0, i32 34, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @.str0, i32 0, i32 0) }, align 8
-; CHECK-DAG: @1 = private unnamed_addr global %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @.str1, i32 0, i32 0) }, align 8
-; CHECK-DAG: @2 = private unnamed_addr global %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @.str2, i32 0, i32 0) }, align 8
+; CHECK-DAG: @0 = private unnamed_addr constant %struct.ident_t { i32 0, i32 34, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @.str0, i32 0, i32 0) }, align 8
+; CHECK-DAG: @1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @.str1, i32 0, i32 0) }, align 8
+; CHECK-DAG: @2 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @.str2, i32 0, i32 0) }, align 8
 ; CHECK-DAG: @.str0 = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1
 ; CHECK-DAG: @.str1 = private unnamed_addr constant [23 x i8] c";file001;loc0001;0;0;;\00", align 1
 ; CHECK-DAG: @.str2 = private unnamed_addr constant [23 x i8] c";file002;loc0002;0;0;;\00", align 1
-; CHECK-DAG: @3 = private unnamed_addr global %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @.str0, i32 0, i32 0) }, align 8
+; CHECK-DAG: @3 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @.str0, i32 0, i32 0) }, align 8
 ; UTC_ARGS: --enable
 
 

From 1274d83482b950fa31a34a5fdc3a0575c8d1b6a4 Mon Sep 17 00:00:00 2001
From: Johannes Doerfert <johannes@jdoerfert.de>
Date: Sat, 15 Aug 2020 17:27:14 -0500
Subject: [PATCH 014/109] Do not use TBAA in type punning reduction GPU code
 PR46156

When we implement OpenMP GPU reductions we use type punning a lot during
the shuffle and reduce operations. This is not always compatible with
language rules on aliasing. So far we generated TBAA which later allowed
to remove some of the reduce code as accesses and initialization were
"known to not alias". With this patch we avoid TBAA in this step,
hopefully for all accesses that we need to.

Verified on the reproducer of PR46156 and QMCPack.

Reviewed By: ABataev

Differential Revision: https://reviews.llvm.org/D86037
---
 clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp    | 61 +++++++++++++------
 ...arallel_reduction_codegen_tbaa_PR46146.cpp | 38 ++++++++++++
 2 files changed, 80 insertions(+), 19 deletions(-)
 create mode 100644 clang/test/OpenMP/nvptx_target_parallel_reduction_codegen_tbaa_PR46146.cpp

diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
index 1f79b33772f38..de78926755df5 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
@@ -2857,8 +2857,12 @@ static llvm::Value *castValueToType(CodeGenFunction &CGF, llvm::Value *Val,
   Address CastItem = CGF.CreateMemTemp(CastTy);
   Address ValCastItem = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
       CastItem, Val->getType()->getPointerTo(CastItem.getAddressSpace()));
-  CGF.EmitStoreOfScalar(Val, ValCastItem, /*Volatile=*/false, ValTy);
-  return CGF.EmitLoadOfScalar(CastItem, /*Volatile=*/false, CastTy, Loc);
+  CGF.EmitStoreOfScalar(Val, ValCastItem, /*Volatile=*/false, ValTy,
+                        LValueBaseInfo(AlignmentSource::Type),
+                        TBAAAccessInfo());
+  return CGF.EmitLoadOfScalar(CastItem, /*Volatile=*/false, CastTy, Loc,
+                              LValueBaseInfo(AlignmentSource::Type),
+                              TBAAAccessInfo());
 }
 
 /// This function creates calls to one of two shuffle functions to copy
@@ -2945,9 +2949,14 @@ static void shuffleAndStore(CodeGenFunction &CGF, Address SrcAddr,
                        ThenBB, ExitBB);
       CGF.EmitBlock(ThenBB);
       llvm::Value *Res = createRuntimeShuffleFunction(
-          CGF, CGF.EmitLoadOfScalar(Ptr, /*Volatile=*/false, IntType, Loc),
+          CGF,
+          CGF.EmitLoadOfScalar(Ptr, /*Volatile=*/false, IntType, Loc,
+                               LValueBaseInfo(AlignmentSource::Type),
+                               TBAAAccessInfo()),
           IntType, Offset, Loc);
-      CGF.EmitStoreOfScalar(Res, ElemPtr, /*Volatile=*/false, IntType);
+      CGF.EmitStoreOfScalar(Res, ElemPtr, /*Volatile=*/false, IntType,
+                            LValueBaseInfo(AlignmentSource::Type),
+                            TBAAAccessInfo());
       Address LocalPtr = Bld.CreateConstGEP(Ptr, 1);
       Address LocalElemPtr = Bld.CreateConstGEP(ElemPtr, 1);
       PhiSrc->addIncoming(LocalPtr.getPointer(), ThenBB);
@@ -2956,9 +2965,14 @@ static void shuffleAndStore(CodeGenFunction &CGF, Address SrcAddr,
       CGF.EmitBlock(ExitBB);
     } else {
       llvm::Value *Res = createRuntimeShuffleFunction(
-          CGF, CGF.EmitLoadOfScalar(Ptr, /*Volatile=*/false, IntType, Loc),
+          CGF,
+          CGF.EmitLoadOfScalar(Ptr, /*Volatile=*/false, IntType, Loc,
+                               LValueBaseInfo(AlignmentSource::Type),
+                               TBAAAccessInfo()),
           IntType, Offset, Loc);
-      CGF.EmitStoreOfScalar(Res, ElemPtr, /*Volatile=*/false, IntType);
+      CGF.EmitStoreOfScalar(Res, ElemPtr, /*Volatile=*/false, IntType,
+                            LValueBaseInfo(AlignmentSource::Type),
+                            TBAAAccessInfo());
       Ptr = Bld.CreateConstGEP(Ptr, 1);
       ElemPtr = Bld.CreateConstGEP(ElemPtr, 1);
     }
@@ -3112,12 +3126,14 @@ static void emitReductionListCopy(
     } else {
       switch (CGF.getEvaluationKind(Private->getType())) {
       case TEK_Scalar: {
-        llvm::Value *Elem =
-            CGF.EmitLoadOfScalar(SrcElementAddr, /*Volatile=*/false,
-                                 Private->getType(), Private->getExprLoc());
+        llvm::Value *Elem = CGF.EmitLoadOfScalar(
+            SrcElementAddr, /*Volatile=*/false, Private->getType(),
+            Private->getExprLoc(), LValueBaseInfo(AlignmentSource::Type),
+            TBAAAccessInfo());
         // Store the source element value to the dest element address.
-        CGF.EmitStoreOfScalar(Elem, DestElementAddr, /*Volatile=*/false,
-                              Private->getType());
+        CGF.EmitStoreOfScalar(
+            Elem, DestElementAddr, /*Volatile=*/false, Private->getType(),
+            LValueBaseInfo(AlignmentSource::Type), TBAAAccessInfo());
         break;
       }
       case TEK_Complex: {
@@ -3260,8 +3276,9 @@ static llvm::Value *emitInterWarpCopyFunction(CodeGenModule &CGM,
   Address AddrReduceListArg = CGF.GetAddrOfLocalVar(&ReduceListArg);
   Address LocalReduceList(
       Bld.CreatePointerBitCastOrAddrSpaceCast(
-          CGF.EmitLoadOfScalar(AddrReduceListArg, /*Volatile=*/false,
-                               C.VoidPtrTy, Loc),
+          CGF.EmitLoadOfScalar(
+              AddrReduceListArg, /*Volatile=*/false, C.VoidPtrTy, Loc,
+              LValueBaseInfo(AlignmentSource::Type), TBAAAccessInfo()),
           CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo()),
       CGF.getPointerAlign());
 
@@ -3339,10 +3356,13 @@ static llvm::Value *emitInterWarpCopyFunction(CodeGenModule &CGM,
 
       // elem = *elemptr
       //*MediumPtr = elem
-      llvm::Value *Elem =
-          CGF.EmitLoadOfScalar(ElemPtr, /*Volatile=*/false, CType, Loc);
+      llvm::Value *Elem = CGF.EmitLoadOfScalar(
+          ElemPtr, /*Volatile=*/false, CType, Loc,
+          LValueBaseInfo(AlignmentSource::Type), TBAAAccessInfo());
       // Store the source element value to the dest element address.
-      CGF.EmitStoreOfScalar(Elem, MediumPtr, /*Volatile=*/true, CType);
+      CGF.EmitStoreOfScalar(Elem, MediumPtr, /*Volatile=*/true, CType,
+                            LValueBaseInfo(AlignmentSource::Type),
+                            TBAAAccessInfo());
 
       Bld.CreateBr(MergeBB);
 
@@ -3722,8 +3742,9 @@ static llvm::Value *emitListToGlobalCopyFunction(
     GlobLVal.setAddress(Address(BufferPtr, GlobLVal.getAlignment()));
     switch (CGF.getEvaluationKind(Private->getType())) {
     case TEK_Scalar: {
-      llvm::Value *V = CGF.EmitLoadOfScalar(ElemPtr, /*Volatile=*/false,
-                                            Private->getType(), Loc);
+      llvm::Value *V = CGF.EmitLoadOfScalar(
+          ElemPtr, /*Volatile=*/false, Private->getType(), Loc,
+          LValueBaseInfo(AlignmentSource::Type), TBAAAccessInfo());
       CGF.EmitStoreOfScalar(V, GlobLVal);
       break;
     }
@@ -3926,7 +3947,9 @@ static llvm::Value *emitGlobalToListCopyFunction(
     switch (CGF.getEvaluationKind(Private->getType())) {
     case TEK_Scalar: {
       llvm::Value *V = CGF.EmitLoadOfScalar(GlobLVal, Loc);
-      CGF.EmitStoreOfScalar(V, ElemPtr, /*Volatile=*/false, Private->getType());
+      CGF.EmitStoreOfScalar(V, ElemPtr, /*Volatile=*/false, Private->getType(),
+                            LValueBaseInfo(AlignmentSource::Type),
+                            TBAAAccessInfo());
       break;
     }
     case TEK_Complex: {
diff --git a/clang/test/OpenMP/nvptx_target_parallel_reduction_codegen_tbaa_PR46146.cpp b/clang/test/OpenMP/nvptx_target_parallel_reduction_codegen_tbaa_PR46146.cpp
new file mode 100644
index 0000000000000..8f814de05b70b
--- /dev/null
+++ b/clang/test/OpenMP/nvptx_target_parallel_reduction_codegen_tbaa_PR46146.cpp
@@ -0,0 +1,38 @@
+// RUN: %clang_cc1 -x c++ -O1 -disable-llvm-optzns -verify -fopenmp -fopenmp-cuda-mode -internal-isystem %S/../Headers/Inputs/include -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -x c++ -O1 -disable-llvm-optzns -verify -fopenmp -fopenmp-cuda-mode -internal-isystem %S/../Headers/Inputs/include -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -triple nvptx64-unknown-unknown -aux-triple powerpc64le-unknown-unknown  -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s
+// RUN: %clang_cc1 -x c++ -O1 -disable-llvm-optzns -verify -fopenmp -fopenmp-cuda-mode -internal-isystem %S/../Headers/Inputs/include -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -triple i386-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm-bc %s -o %t-x86-host.bc
+// RUN: %clang_cc1 -x c++ -O1 -disable-llvm-optzns -verify -fopenmp -fopenmp-cuda-mode -internal-isystem %S/../Headers/Inputs/include -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -triple nvptx-unknown-unknown -aux-triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s
+// RUN: %clang_cc1 -x c++ -O1 -disable-llvm-optzns -verify -fopenmp -fopenmp-cuda-mode -internal-isystem %S/../Headers/Inputs/include -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -fexceptions -fcxx-exceptions -aux-triple powerpc64le-unknown-unknown -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+#include <complex>
+
+// Verify we do not add tbaa metadata to type punned memory operations:
+
+// CHECK:      call i64 @__kmpc_shuffle_int64(
+// CHECK-NEXT: store i64 %{{.*}}, i64* %{{.*}}, align {{[0-9]+$}}
+
+// CHECK:      call i64 @__kmpc_shuffle_int64(
+// CHECK-NEXT: store i64 %{{.*}}, i64* %{{.*}}, align {{[0-9]+$}}
+
+template <typename T>
+void complex_reduction() {
+#pragma omp target teams distribute
+  for (int ib = 0; ib < 100; ib++) {
+    std::complex<T> partial_sum;
+    const int istart = ib * 4;
+    const int iend = (ib + 1) * 4;
+#pragma omp parallel for reduction(+ \
+                                   : partial_sum)
+    for (int i = istart; i < iend; i++)
+      partial_sum += std::complex<T>(i, i);
+  }
+}
+
+void test() {
+  complex_reduction<float>();
+  complex_reduction<double>();
+}
+#endif

From 45574524c3a15f1e34c7d181e3bc17e9e7d90210 Mon Sep 17 00:00:00 2001
From: Hans Wennborg <hans@chromium.org>
Date: Tue, 25 Aug 2020 17:41:59 +0200
Subject: [PATCH 015/109] OpenMP: Fix for PR46868: Incorrect target map

https://bugs.llvm.org/attachment.cgi?id=23891 by Alexey Bataev.
---
 clang/lib/CodeGen/CGOpenMPRuntime.cpp         | 93 ++++++++++++++-----
 .../target_data_use_device_ptr_codegen.cpp    | 14 +--
 clang/test/OpenMP/target_map_codegen.cpp      | 21 ++---
 clang/test/OpenMP/target_update_codegen.cpp   |  3 +-
 openmp/libomptarget/src/omptarget.cpp         |  9 +-
 .../test/env/base_ptr_ref_count.c             | 47 ++++++++++
 6 files changed, 137 insertions(+), 50 deletions(-)
 create mode 100644 openmp/libomptarget/test/env/base_ptr_ref_count.c

diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index b221deab0174c..14e0cba62b237 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -7265,6 +7265,8 @@ class MappableExprsHandler {
     // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM
     //
     // map(p[1:24])
+    // &p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM | PTR_AND_OBJ
+    // in unified shared memory mode or for local pointers
     // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM
     //
     // map(s)
@@ -7400,6 +7402,7 @@ class MappableExprsHandler {
     // Track if the map information being generated is the first for a list of
     // components.
     bool IsExpressionFirstInfo = true;
+    bool FirstPointerInComplexData = false;
     Address BP = Address::invalid();
     const Expr *AssocExpr = I->getAssociatedExpression();
     const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr);
@@ -7442,10 +7445,15 @@ class MappableExprsHandler {
       QualType Ty =
           I->getAssociatedDeclaration()->getType().getNonReferenceType();
       if (Ty->isAnyPointerType() && std::next(I) != CE) {
-        BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
-
-        // We do not need to generate individual map information for the
-        // pointer, it can be associated with the combined storage.
+        // No need to generate individual map information for the pointer, it
+        // can be associated with the combined storage if shared memory mode is
+        // active or the base declaration is not global variable.
+        const auto *VD = dyn_cast<VarDecl>(I->getAssociatedDeclaration());
+         if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
+            !VD || VD->hasLocalStorage())
+          BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
+        else
+          FirstPointerInComplexData = IsCaptureFirstInfo;
         ++I;
       }
     }
@@ -7481,8 +7489,19 @@ class MappableExprsHandler {
         EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression());
         // If we encounter a PTR_AND_OBJ entry from now on it should be marked
         // as MEMBER_OF the parent struct.
-        if (EncounteredME)
+        if (EncounteredME) {
           ShouldBeMemberOf = true;
+          // Do not emit as complex pointer if this is actually not array-like
+          // expression.
+          if (FirstPointerInComplexData) {
+            QualType Ty = std::prev(I)
+                              ->getAssociatedDeclaration()
+                              ->getType()
+                              .getNonReferenceType();
+            BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
+            FirstPointerInComplexData = false;
+          }
+        }
       }
 
       auto Next = std::next(I);
@@ -7615,10 +7634,11 @@ class MappableExprsHandler {
           // same expression except for the first one. We also need to signal
           // this map is the first one that relates with the current capture
           // (there is a set of entries for each capture).
-          OpenMPOffloadMappingFlags Flags = getMapTypeBits(
-              MapType, MapModifiers, IsImplicit,
-              !IsExpressionFirstInfo || RequiresReference,
-              IsCaptureFirstInfo && !RequiresReference);
+          OpenMPOffloadMappingFlags Flags =
+              getMapTypeBits(MapType, MapModifiers, IsImplicit,
+                             !IsExpressionFirstInfo || RequiresReference ||
+                                 FirstPointerInComplexData,
+                             IsCaptureFirstInfo && !RequiresReference);
 
           if (!IsExpressionFirstInfo) {
             // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well,
@@ -7676,6 +7696,7 @@ class MappableExprsHandler {
 
         IsExpressionFirstInfo = false;
         IsCaptureFirstInfo = false;
+        FirstPointerInComplexData = false;
       }
     }
   }
@@ -7906,6 +7927,10 @@ class MappableExprsHandler {
     // emission of that entry until the whole struct has been processed.
     llvm::MapVector<const ValueDecl *, SmallVector<DeferredDevicePtrEntryTy, 4>>
         DeferredInfo;
+    MapBaseValuesArrayTy UseDevicePtrBasePointers;
+    MapValuesArrayTy UseDevicePtrPointers;
+    MapValuesArrayTy UseDevicePtrSizes;
+    MapFlagsArrayTy UseDevicePtrTypes;
 
     for (const auto *C :
          CurExecDir->getClausesOfKind<OMPUseDevicePtrClause>()) {
@@ -7922,15 +7947,27 @@ class MappableExprsHandler {
         // We potentially have map information for this declaration already.
         // Look for the first set of components that refer to it.
         if (It != Info.end()) {
-          auto CI = std::find_if(
-              It->second.begin(), It->second.end(), [VD](const MapInfo &MI) {
-                return MI.Components.back().getAssociatedDeclaration() == VD;
-              });
+          auto *CI = llvm::find_if(It->second, [VD](const MapInfo &MI) {
+            return MI.Components.back().getAssociatedDeclaration() == VD;
+          });
           // If we found a map entry, signal that the pointer has to be returned
           // and move on to the next declaration.
+          // Exclude cases where the base pointer is mapped as array subscript,
+          // array section or array shaping. The base address is passed as a
+          // pointer to base in this case and cannot be used as a base for
+          // use_device_ptr list item.
           if (CI != It->second.end()) {
-            CI->ReturnDevicePointer = true;
-            continue;
+            auto PrevCI = std::next(CI->Components.rbegin());
+            const auto *VarD = dyn_cast<VarDecl>(VD);
+            if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
+                isa<MemberExpr>(IE) ||
+                !VD->getType().getNonReferenceType()->isPointerType() ||
+                PrevCI == CI->Components.rend() ||
+                isa<MemberExpr>(PrevCI->getAssociatedExpression()) || !VarD ||
+                VarD->hasLocalStorage()) {
+              CI->ReturnDevicePointer = true;
+              continue;
+            }
           }
         }
 
@@ -7951,10 +7988,12 @@ class MappableExprsHandler {
         } else {
           llvm::Value *Ptr =
               CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc());
-          BasePointers.emplace_back(Ptr, VD);
-          Pointers.push_back(Ptr);
-          Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
-          Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_TARGET_PARAM);
+          UseDevicePtrBasePointers.emplace_back(Ptr, VD);
+          UseDevicePtrPointers.push_back(Ptr);
+          UseDevicePtrSizes.push_back(
+              llvm::Constant::getNullValue(CGF.Int64Ty));
+          UseDevicePtrTypes.push_back(OMP_MAP_RETURN_PARAM |
+                                      OMP_MAP_TARGET_PARAM);
         }
       }
     }
@@ -8015,10 +8054,12 @@ class MappableExprsHandler {
             Ptr = CGF.EmitLValue(IE).getPointer(CGF);
           else
             Ptr = CGF.EmitScalarExpr(IE);
-          BasePointers.emplace_back(Ptr, VD);
-          Pointers.push_back(Ptr);
-          Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
-          Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_TARGET_PARAM);
+          UseDevicePtrBasePointers.emplace_back(Ptr, VD);
+          UseDevicePtrPointers.push_back(Ptr);
+          UseDevicePtrSizes.push_back(
+              llvm::Constant::getNullValue(CGF.Int64Ty));
+          UseDevicePtrTypes.push_back(OMP_MAP_RETURN_PARAM |
+                                      OMP_MAP_TARGET_PARAM);
         }
       }
     }
@@ -8108,6 +8149,12 @@ class MappableExprsHandler {
       Sizes.append(CurSizes.begin(), CurSizes.end());
       Types.append(CurTypes.begin(), CurTypes.end());
     }
+    // Append data for use_device_ptr clauses.
+    BasePointers.append(UseDevicePtrBasePointers.begin(),
+                        UseDevicePtrBasePointers.end());
+    Pointers.append(UseDevicePtrPointers.begin(), UseDevicePtrPointers.end());
+    Sizes.append(UseDevicePtrSizes.begin(), UseDevicePtrSizes.end());
+    Types.append(UseDevicePtrTypes.begin(), UseDevicePtrTypes.end());
   }
 
   /// Generate all the base pointers, section pointers, sizes and map types for
diff --git a/clang/test/OpenMP/target_data_use_device_ptr_codegen.cpp b/clang/test/OpenMP/target_data_use_device_ptr_codegen.cpp
index a3d8043b6b4e9..fa53cc4aa8f7a 100644
--- a/clang/test/OpenMP/target_data_use_device_ptr_codegen.cpp
+++ b/clang/test/OpenMP/target_data_use_device_ptr_codegen.cpp
@@ -22,18 +22,18 @@
 double *g;
 
 // CK1: @g = global double*
-// CK1: [[MTYPE00:@.+]] = {{.*}}constant [1 x i64] [i64 99]
+// CK1: [[MTYPE00:@.+]] = {{.*}}constant [2 x i64] [i64 51, i64 96]
 // CK1: [[MTYPE01:@.+]] = {{.*}}constant [1 x i64] [i64 99]
 // CK1: [[MTYPE03:@.+]] = {{.*}}constant [1 x i64] [i64 99]
 // CK1: [[MTYPE04:@.+]] = {{.*}}constant [1 x i64] [i64 99]
 // CK1: [[MTYPE05:@.+]] = {{.*}}constant [1 x i64] [i64 99]
 // CK1: [[MTYPE06:@.+]] = {{.*}}constant [1 x i64] [i64 99]
 // CK1: [[MTYPE07:@.+]] = {{.*}}constant [1 x i64] [i64 99]
-// CK1: [[MTYPE08:@.+]] = {{.*}}constant [2 x i64] [{{i64 35, i64 99|i64 99, i64 35}}]
+// CK1: [[MTYPE08:@.+]] = {{.*}}constant [2 x i64] [i64 99, i64 35]
 // CK1: [[MTYPE09:@.+]] = {{.*}}constant [2 x i64] [i64 99, i64 99]
 // CK1: [[MTYPE10:@.+]] = {{.*}}constant [2 x i64] [i64 99, i64 99]
-// CK1: [[MTYPE11:@.+]] = {{.*}}constant [2 x i64] [i64 96, i64 35]
-// CK1: [[MTYPE12:@.+]] = {{.*}}constant [2 x i64] [i64 96, i64 35]
+// CK1: [[MTYPE11:@.+]] = {{.*}}constant [2 x i64] [i64 35, i64 96]
+// CK1: [[MTYPE12:@.+]] = {{.*}}constant [2 x i64] [i64 35, i64 96]
 
 // CK1-LABEL: @_Z3foo
 template<typename T>
@@ -42,7 +42,7 @@ void foo(float *&lr, T *&tr) {
   T *t;
 
   // CK1:     [[T:%.+]] = load double*, double** [[DECL:@g]],
-  // CK1:     [[BP:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* %{{.+}}, i32 0, i32 0
+  // CK1:     [[BP:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* %{{.+}}, i32 0, i32 1
   // CK1:     [[CBP:%.+]] = bitcast i8** [[BP]] to double**
   // CK1:     store double* [[T]], double** [[CBP]],
   // CK1:     call void @__tgt_target_data_begin{{.+}}[[MTYPE00]]
@@ -280,7 +280,7 @@ void foo(float *&lr, T *&tr) {
   ++l; ++t;
 
   // CK1:     [[T1:%.+]] = load i32*, i32** [[DECL:%.+]],
-  // CK1:     [[BP:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* %{{.+}}, i32 0, i32 0
+  // CK1:     [[BP:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* %{{.+}}, i32 0, i32 1
   // CK1:     [[CBP:%.+]] = bitcast i8** [[BP]] to i32**
   // CK1:     store i32* [[T1]], i32** [[CBP]],
   // CK1:     call void @__tgt_target_data_begin{{.+}}[[MTYPE11]]
@@ -300,7 +300,7 @@ void foo(float *&lr, T *&tr) {
 
   // CK1:     [[T2:%.+]] = load i32**, i32*** [[DECL:%.+]],
   // CK1:     [[T1:%.+]] = load i32*, i32** [[T2]],
-  // CK1:     [[BP:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* %{{.+}}, i32 0, i32 0
+  // CK1:     [[BP:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* %{{.+}}, i32 0, i32 1
   // CK1:     [[CBP:%.+]] = bitcast i8** [[BP]] to i32**
   // CK1:     store i32* [[T1]], i32** [[CBP]],
   // CK1:     call void @__tgt_target_data_begin{{.+}}[[MTYPE12]]
diff --git a/clang/test/OpenMP/target_map_codegen.cpp b/clang/test/OpenMP/target_map_codegen.cpp
index 92e0224a2de3b..ad54b560889b9 100644
--- a/clang/test/OpenMP/target_map_codegen.cpp
+++ b/clang/test/OpenMP/target_map_codegen.cpp
@@ -3195,7 +3195,7 @@ int explicit_maps_template_args_and_members(int a){
 
 // CK22-LABEL: @.__omp_offloading_{{.*}}explicit_maps_globals{{.*}}_l{{[0-9]+}}.region_id = weak constant i8 0
 // CK22: [[SIZE04:@.+]] = private {{.*}}constant [1 x i64] [i64 20]
-// CK22: [[MTYPE04:@.+]] = private {{.*}}constant [1 x i64] [i64 35]
+// CK22: [[MTYPE04:@.+]] = private {{.*}}constant [1 x i64] [i64 51]
 
 // CK22-LABEL: @.__omp_offloading_{{.*}}explicit_maps_globals{{.*}}_l{{[0-9]+}}.region_id = weak constant i8 0
 // CK22: [[SIZE05:@.+]] = private {{.*}}constant [1 x i64] [i64 4]
@@ -3215,7 +3215,7 @@ int explicit_maps_template_args_and_members(int a){
 
 // CK22-LABEL: @.__omp_offloading_{{.*}}explicit_maps_globals{{.*}}_l{{[0-9]+}}.region_id = weak constant i8 0
 // CK22: [[SIZE09:@.+]] = private {{.*}}constant [1 x i64] [i64 20]
-// CK22: [[MTYPE09:@.+]] = private {{.*}}constant [1 x i64] [i64 35]
+// CK22: [[MTYPE09:@.+]] = private {{.*}}constant [1 x i64] [i64 51]
 
 // CK22-LABEL: @.__omp_offloading_{{.*}}explicit_maps_globals{{.*}}_l{{[0-9]+}}.region_id = weak constant i8 0
 // CK22: [[SIZE10:@.+]] = private {{.*}}constant [1 x i64] [i64 4]
@@ -3235,7 +3235,7 @@ int explicit_maps_template_args_and_members(int a){
 
 // CK22-LABEL: @.__omp_offloading_{{.*}}explicit_maps_globals{{.*}}_l{{[0-9]+}}.region_id = weak constant i8 0
 // CK22: [[SIZE14:@.+]] = private {{.*}}constant [1 x i64] [i64 20]
-// CK22: [[MTYPE14:@.+]] = private {{.*}}constant [1 x i64] [i64 35]
+// CK22: [[MTYPE14:@.+]] = private {{.*}}constant [1 x i64] [i64 51]
 
 int a;
 int c[100];
@@ -3331,11 +3331,10 @@ int explicit_maps_globals(void){
 
   // CK22-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 0
   // CK22-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 0
-  // CK22-DAG: [[CBP0:%.+]] = bitcast i8** [[BP0]] to i32**
+  // CK22-DAG: [[CBP0:%.+]] = bitcast i8** [[BP0]] to i32***
   // CK22-DAG: [[CP0:%.+]] = bitcast i8** [[P0]] to i32**
-  // CK22-DAG: store i32* [[RVAR0:%.+]], i32** [[CBP0]]
+  // CK22-DAG: store i32** @d, i32*** [[CBP0]]
   // CK22-DAG: store i32* [[SEC0:%.+]], i32** [[CP0]]
-  // CK22-DAG: [[RVAR0]] = load i32*, i32** @d
   // CK22-DAG: [[SEC0]] = getelementptr {{.*}}i32* [[RVAR00:%.+]], i{{.+}} 2
   // CK22-DAG: [[RVAR00]] = load i32*, i32** @d
 
@@ -3414,11 +3413,10 @@ int explicit_maps_globals(void){
 
   // CK22-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 0
   // CK22-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 0
-  // CK22-DAG: [[CBP0:%.+]] = bitcast i8** [[BP0]] to [[ST]]**
+  // CK22-DAG: [[CBP0:%.+]] = bitcast i8** [[BP0]] to [[ST]]***
   // CK22-DAG: [[CP0:%.+]] = bitcast i8** [[P0]] to [[ST]]**
-  // CK22-DAG: store [[ST]]* [[RVAR0:%.+]], [[ST]]** [[CBP0]]
+  // CK22-DAG: store [[ST]]** @sd, [[ST]]*** [[CBP0]]
   // CK22-DAG: store [[ST]]* [[SEC0:%.+]], [[ST]]** [[CP0]]
-  // CK22-DAG: [[RVAR0]] = load [[ST]]*, [[ST]]** @sd
   // CK22-DAG: [[SEC0]] = getelementptr {{.*}}[[ST]]* [[RVAR00:%.+]], i{{.+}} 2
   // CK22-DAG: [[RVAR00]] = load [[ST]]*, [[ST]]** @sd
 
@@ -3497,11 +3495,10 @@ int explicit_maps_globals(void){
 
   // CK22-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 0
   // CK22-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 0
-  // CK22-DAG: [[CBP0:%.+]] = bitcast i8** [[BP0]] to [[STT]]**
+  // CK22-DAG: [[CBP0:%.+]] = bitcast i8** [[BP0]] to [[STT]]***
   // CK22-DAG: [[CP0:%.+]] = bitcast i8** [[P0]] to [[STT]]**
-  // CK22-DAG: store [[STT]]* [[RVAR0:%.+]], [[STT]]** [[CBP0]]
+  // CK22-DAG: store [[STT]]** @std, [[STT]]*** [[CBP0]]
   // CK22-DAG: store [[STT]]* [[SEC0:%.+]], [[STT]]** [[CP0]]
-  // CK22-DAG: [[RVAR0]] = load [[STT]]*, [[STT]]** @std
   // CK22-DAG: [[SEC0]] = getelementptr {{.*}}[[STT]]* [[RVAR00:%.+]], i{{.+}} 2
   // CK22-DAG: [[RVAR00]] = load [[STT]]*, [[STT]]** @std
 
diff --git a/clang/test/OpenMP/target_update_codegen.cpp b/clang/test/OpenMP/target_update_codegen.cpp
index fd5a62a8067c7..a308b9ed6deb7 100644
--- a/clang/test/OpenMP/target_update_codegen.cpp
+++ b/clang/test/OpenMP/target_update_codegen.cpp
@@ -737,7 +737,7 @@ void lvalue(int **BB, int a, int b) {
   // CK13-64-DAG: [[ADD_PTR]] = getelementptr inbounds i32*, i32** [[B_VAL:%.+]], i64 [[IDX_EXT:%.+]]
   // CK13-32-DAG: [[ADD_PTR]] = getelementptr inbounds i32*, i32** [[B_VAL:%.+]], i32 [[A_ADDR:%.+]]
   // CK13-64-DAG: [[IDX_EXT]] = sext i32 [[TWO:%.+]] to i64
-  // CK13-DAG: [[B_VAL]] = load i32**, i32*** [[BB_ADDR]]
+  // CK13-DAG: [[B_VAL]] = load i32**, i32*** [[BB_ADDR:%.+]]
   #pragma omp target update to(*(*(BB+a)+b))
   *(*(BB+a)+b) = 1;
   #pragma omp target update from(*(*(BB+a)+b))
@@ -978,6 +978,7 @@ void lvalue_find_base(float **f, SSA *sa) {
   // CK17-DAG: [[FIVE]] = load i32, i32* [[I_2:%.+]],
   // CK17-DAG: [[I_2]] = getelementptr inbounds [[SSA:%.+]], [[SSA]]* [[FOUR:%.+]], i32 0, i32 0
   // CK17-DAG: [[FOUR]] = load [[SSA]]*, [[SSA]]** [[SSA_ADDR:%.+]],
+  // CK17-DAG: [[F]] = load float**, float*** [[F_ADDR:%.+]],
 
   #pragma omp target update to(*(sa->sa->i+*(1+sa->i+f)))
   *(sa->sa->i+*(1+sa->i+f)) = 1;
diff --git a/openmp/libomptarget/src/omptarget.cpp b/openmp/libomptarget/src/omptarget.cpp
index cce9dbd2fe154..15712323d43ec 100644
--- a/openmp/libomptarget/src/omptarget.cpp
+++ b/openmp/libomptarget/src/omptarget.cpp
@@ -746,14 +746,9 @@ int target(int64_t device_id, void *host_ptr, int32_t arg_num,
           return OFFLOAD_FAIL;
         }
       }
-    } else if (arg_types[i] & OMP_TGT_MAPTYPE_PTR_AND_OBJ) {
-      TgtPtrBegin = Device.getTgtPtrBegin(HstPtrBase, sizeof(void *), IsLast,
-          false, IsHostPtr);
-      TgtBaseOffset = 0; // no offset for ptrs.
-      DP("Obtained target argument " DPxMOD " from host pointer " DPxMOD " to "
-         "object " DPxMOD "\n", DPxPTR(TgtPtrBegin), DPxPTR(HstPtrBase),
-         DPxPTR(HstPtrBase));
     } else {
+      if (arg_types[i] & OMP_TGT_MAPTYPE_PTR_AND_OBJ)
+        HstPtrBase = *reinterpret_cast<void **>(HstPtrBase);
       TgtPtrBegin = Device.getTgtPtrBegin(HstPtrBegin, arg_sizes[i], IsLast,
           false, IsHostPtr);
       TgtBaseOffset = (intptr_t)HstPtrBase - (intptr_t)HstPtrBegin;
diff --git a/openmp/libomptarget/test/env/base_ptr_ref_count.c b/openmp/libomptarget/test/env/base_ptr_ref_count.c
new file mode 100644
index 0000000000000..5b62f5eb8ac32
--- /dev/null
+++ b/openmp/libomptarget/test/env/base_ptr_ref_count.c
@@ -0,0 +1,47 @@
+// RUN: %libomptarget-compile-aarch64-unknown-linux-gnu && env LIBOMPTARGET_DEBUG=1 %libomptarget-run-aarch64-unknown-linux-gnu 2>&1 | %fcheck-aarch64-unknown-linux-gnu
+// RUN: %libomptarget-compile-powerpc64-ibm-linux-gnu && env LIBOMPTARGET_DEBUG=1 %libomptarget-run-powerpc64-ibm-linux-gnu 2>&1 | %fcheck-powerpc64-ibm-linux-gnu
+// RUN: %libomptarget-compile-powerpc64le-ibm-linux-gnu && env LIBOMPTARGET_DEBUG=1 %libomptarget-run-powerpc64le-ibm-linux-gnu 2>&1 | %fcheck-powerpc64le-ibm-linux-gnu
+// RUN: %libomptarget-compile-x86_64-pc-linux-gnu && env LIBOMPTARGET_DEBUG=1 %libomptarget-run-x86_64-pc-linux-gnu 2>&1 | %fcheck-x86_64-pc-linux-gnu
+// RUN: %libomptarget-compile-nvptx64-nvidia-cuda && env LIBOMPTARGET_DEBUG=1 %libomptarget-run-nvptx64-nvidia-cuda 2>&1 | %fcheck-nvptx64-nvidia-cuda
+// REQUIRES: libomptarget-debug
+
+#include <stdlib.h>
+#include <stdio.h>
+
+int *allocate(size_t n) {
+  int *ptr = malloc(sizeof(int) * n);
+#pragma omp target enter data map(to : ptr[:n])
+  return ptr;
+}
+
+void deallocate(int *ptr, size_t n) {
+#pragma omp target exit data map(delete : ptr[:n])
+  free(ptr);
+}
+
+#pragma omp declare target
+int *cnt;
+void foo() {
+  ++(*cnt);
+}
+#pragma omp end declare target
+
+int main(void) {
+  int *A = allocate(10);
+  int *V = allocate(10);
+  deallocate(A, 10);
+  deallocate(V, 10);
+// CHECK-NOT: RefCount=2
+  cnt = malloc(sizeof(int));
+  *cnt = 0;
+#pragma omp target data map(cnt[:1])
+#pragma omp target
+  foo();
+  printf("Cnt = %d.\n", *cnt);
+// CHECK: Cnt = 1.
+  free(cnt);
+
+  return 0;
+}
+
+

From 4d16d8dfe50eb45545e844c3c9acafd363637dad Mon Sep 17 00:00:00 2001
From: QingShan Zhang <qshanz@cn.ibm.com>
Date: Mon, 24 Aug 2020 02:50:58 +0000
Subject: [PATCH 016/109] [DAGCombine] Remove dead node when it is created by
 getNegatedExpression

We hit the compiling time reported by https://bugs.llvm.org/show_bug.cgi?id=46877
and the reason is the same as D77319. So we need to remove the dead node we created
to avoid increase the problem size of DAGCombiner.

Reviewed By: Spatel

Differential Revision: https://reviews.llvm.org/D86183

(cherry picked from commit 960cbc53ca170c8c605bf83fa63b49ab27a56f65)
---
 .../CodeGen/SelectionDAG/TargetLowering.cpp   |  11 +
 llvm/test/CodeGen/X86/pr46877.ll              | 416 ++++++++++++++++++
 2 files changed, 427 insertions(+)
 create mode 100644 llvm/test/CodeGen/X86/pr46877.ll

diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 96df20039b15d..94cb6da3d69e9 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -5726,6 +5726,11 @@ SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG,
       return SDValue();
   }
 
+  auto RemoveDeadNode = [&](SDValue N) {
+    if (N && N.getNode()->use_empty())
+      DAG.RemoveDeadNode(N.getNode());
+  };
+
   SDLoc DL(Op);
 
   switch (Opcode) {
@@ -5804,12 +5809,14 @@ SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG,
     // Negate the X if its cost is less or equal than Y.
     if (NegX && (CostX <= CostY)) {
       Cost = CostX;
+      RemoveDeadNode(NegY);
       return DAG.getNode(ISD::FSUB, DL, VT, NegX, Y, Flags);
     }
 
     // Negate the Y if it is not expensive.
     if (NegY) {
       Cost = CostY;
+      RemoveDeadNode(NegX);
       return DAG.getNode(ISD::FSUB, DL, VT, NegY, X, Flags);
     }
     break;
@@ -5847,6 +5854,7 @@ SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG,
     // Negate the X if its cost is less or equal than Y.
     if (NegX && (CostX <= CostY)) {
       Cost = CostX;
+      RemoveDeadNode(NegY);
       return DAG.getNode(Opcode, DL, VT, NegX, Y, Flags);
     }
 
@@ -5858,6 +5866,7 @@ SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG,
     // Negate the Y if it is not expensive.
     if (NegY) {
       Cost = CostY;
+      RemoveDeadNode(NegX);
       return DAG.getNode(Opcode, DL, VT, X, NegY, Flags);
     }
     break;
@@ -5887,12 +5896,14 @@ SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG,
     // Negate the X if its cost is less or equal than Y.
     if (NegX && (CostX <= CostY)) {
       Cost = std::min(CostX, CostZ);
+      RemoveDeadNode(NegY);
       return DAG.getNode(Opcode, DL, VT, NegX, Y, NegZ, Flags);
     }
 
     // Negate the Y if it is not expensive.
     if (NegY) {
       Cost = std::min(CostY, CostZ);
+      RemoveDeadNode(NegX);
       return DAG.getNode(Opcode, DL, VT, X, NegY, NegZ, Flags);
     }
     break;
diff --git a/llvm/test/CodeGen/X86/pr46877.ll b/llvm/test/CodeGen/X86/pr46877.ll
new file mode 100644
index 0000000000000..581b2d586fa0c
--- /dev/null
+++ b/llvm/test/CodeGen/X86/pr46877.ll
@@ -0,0 +1,416 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -O3 < %s -mcpu=haswell -mtriple=x86_64 | FileCheck %s
+
+; Verify that we are not exponentially increasing compiling time.
+define void @tester(float %0, float %1, float %2, float %3, float %4, float %5, float %6, float %7, float %8, float %9, float %10, float %11, float %12, float %13, float %14, float %15, float %16, float %17, float %18, float %19, float %20, float %21, float %22, float %23, float %24, float %25, float %26, float %27, float %28, float %29, float %30, float %31, float %32, float %33, float %34, float %35, float %36, float %37, float %38, float %39, float %40, float %41, float %42, float %43, float %44, float %45, float %46, float %47, float %48, float %49, float %50, float %51, float %52, float %53, float %54, float %55, float %56, float %57, float %58, float %59, float %60, float %61, float %62, float %63, float %64, float %65, float %66, float %67, float %68, float %69, float %70, float %71, float %72, float %73, float %74, float %75, float %76, float %77, float %78, float %79, float* %80) {
+; CHECK-LABEL: tester:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vmovaps %xmm3, %xmm15
+; CHECK-NEXT:    vmovss {{.*#+}} xmm14 = mem[0],zero,zero,zero
+; CHECK-NEXT:    vmovss {{.*#+}} xmm10 = mem[0],zero,zero,zero
+; CHECK-NEXT:    vmovss {{.*#+}} xmm13 = mem[0],zero,zero,zero
+; CHECK-NEXT:    vsubss %xmm1, %xmm0, %xmm12
+; CHECK-NEXT:    vmulss %xmm2, %xmm1, %xmm3
+; CHECK-NEXT:    vfmsub213ss {{.*#+}} xmm3 = (xmm15 * xmm3) - xmm0
+; CHECK-NEXT:    vfnmadd213ss {{.*#+}} xmm5 = -(xmm12 * xmm5) + xmm0
+; CHECK-NEXT:    vmulss %xmm5, %xmm4, %xmm2
+; CHECK-NEXT:    vmulss %xmm2, %xmm3, %xmm3
+; CHECK-NEXT:    vmulss %xmm6, %xmm12, %xmm2
+; CHECK-NEXT:    vfnmadd213ss {{.*#+}} xmm2 = -(xmm7 * xmm2) + xmm0
+; CHECK-NEXT:    vmulss %xmm3, %xmm2, %xmm5
+; CHECK-NEXT:    vmulss %xmm0, %xmm13, %xmm2
+; CHECK-NEXT:    vmovss %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; CHECK-NEXT:    vmulss %xmm2, %xmm10, %xmm2
+; CHECK-NEXT:    vfnmadd132ss {{.*#+}} xmm2 = -(xmm2 * mem) + xmm0
+; CHECK-NEXT:    vmulss {{[0-9]+}}(%rsp), %xmm7, %xmm3
+; CHECK-NEXT:    vfnmadd132ss {{.*#+}} xmm3 = -(xmm3 * mem) + xmm0
+; CHECK-NEXT:    vmulss %xmm3, %xmm2, %xmm2
+; CHECK-NEXT:    vmulss {{[0-9]+}}(%rsp), %xmm0, %xmm3
+; CHECK-NEXT:    vmulss {{[0-9]+}}(%rsp), %xmm3, %xmm4
+; CHECK-NEXT:    vfnmadd213ss {{.*#+}} xmm4 = -(xmm14 * xmm4) + xmm0
+; CHECK-NEXT:    vmulss %xmm4, %xmm5, %xmm4
+; CHECK-NEXT:    vmovss {{.*#+}} xmm5 = mem[0],zero,zero,zero
+; CHECK-NEXT:    vfnmadd132ss {{.*#+}} xmm5 = -(xmm5 * mem) + xmm0
+; CHECK-NEXT:    vmulss %xmm5, %xmm2, %xmm2
+; CHECK-NEXT:    vmovss {{.*#+}} xmm7 = mem[0],zero,zero,zero
+; CHECK-NEXT:    vmulss {{[0-9]+}}(%rsp), %xmm7, %xmm5
+; CHECK-NEXT:    vfnmadd213ss {{.*#+}} xmm5 = -(xmm10 * xmm5) + xmm0
+; CHECK-NEXT:    vmulss %xmm5, %xmm4, %xmm4
+; CHECK-NEXT:    vmovss {{.*#+}} xmm9 = mem[0],zero,zero,zero
+; CHECK-NEXT:    vmulss %xmm0, %xmm9, %xmm6
+; CHECK-NEXT:    vmovss %xmm6, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; CHECK-NEXT:    vmulss %xmm6, %xmm14, %xmm5
+; CHECK-NEXT:    vfnmadd213ss {{.*#+}} xmm5 = -(xmm12 * xmm5) + xmm0
+; CHECK-NEXT:    vmulss %xmm5, %xmm2, %xmm2
+; CHECK-NEXT:    vmovss {{.*#+}} xmm5 = mem[0],zero,zero,zero
+; CHECK-NEXT:    vfnmadd213ss {{.*#+}} xmm5 = -(xmm13 * xmm5) + xmm0
+; CHECK-NEXT:    vmulss %xmm5, %xmm4, %xmm4
+; CHECK-NEXT:    vmulss {{[0-9]+}}(%rsp), %xmm3, %xmm11
+; CHECK-NEXT:    vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero
+; CHECK-NEXT:    vfnmadd213ss {{.*#+}} xmm3 = -(xmm11 * xmm3) + xmm0
+; CHECK-NEXT:    vmulss %xmm3, %xmm2, %xmm2
+; CHECK-NEXT:    vmulss %xmm2, %xmm4, %xmm2
+; CHECK-NEXT:    vmulss {{[0-9]+}}(%rsp), %xmm2, %xmm2
+; CHECK-NEXT:    vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero
+; CHECK-NEXT:    vfnmadd213ss {{.*#+}} xmm3 = -(xmm15 * xmm3) + xmm0
+; CHECK-NEXT:    vmulss %xmm2, %xmm3, %xmm2
+; CHECK-NEXT:    vmulss {{[0-9]+}}(%rsp), %xmm2, %xmm2
+; CHECK-NEXT:    vmovss %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; CHECK-NEXT:    vmulss {{[0-9]+}}(%rsp), %xmm1, %xmm4
+; CHECK-NEXT:    vfnmadd132ss {{.*#+}} xmm4 = -(xmm4 * mem) + xmm0
+; CHECK-NEXT:    vmovss {{.*#+}} xmm8 = mem[0],zero,zero,zero
+; CHECK-NEXT:    vmulss {{[0-9]+}}(%rsp), %xmm8, %xmm6
+; CHECK-NEXT:    vfnmadd132ss {{.*#+}} xmm6 = -(xmm6 * mem) + xmm0
+; CHECK-NEXT:    vmulss %xmm6, %xmm4, %xmm4
+; CHECK-NEXT:    vmulss %xmm4, %xmm2, %xmm2
+; CHECK-NEXT:    vmulss {{[0-9]+}}(%rsp), %xmm2, %xmm2
+; CHECK-NEXT:    vmulss {{[0-9]+}}(%rsp), %xmm2, %xmm2
+; CHECK-NEXT:    vmulss {{[0-9]+}}(%rsp), %xmm2, %xmm2
+; CHECK-NEXT:    vmulss {{[0-9]+}}(%rsp), %xmm10, %xmm4
+; CHECK-NEXT:    vfnmadd132ss {{.*#+}} xmm4 = -(xmm4 * mem) + xmm0
+; CHECK-NEXT:    vmulss %xmm2, %xmm4, %xmm2
+; CHECK-NEXT:    vmulss {{[0-9]+}}(%rsp), %xmm2, %xmm2
+; CHECK-NEXT:    vmovss {{.*#+}} xmm4 = mem[0],zero,zero,zero
+; CHECK-NEXT:    vfnmadd213ss {{.*#+}} xmm4 = -(xmm1 * xmm4) + xmm0
+; CHECK-NEXT:    vmovss {{.*#+}} xmm6 = mem[0],zero,zero,zero
+; CHECK-NEXT:    vfnmadd132ss {{.*#+}} xmm6 = -(xmm6 * mem) + xmm0
+; CHECK-NEXT:    vmulss %xmm6, %xmm4, %xmm4
+; CHECK-NEXT:    vmulss %xmm4, %xmm2, %xmm2
+; CHECK-NEXT:    vmulss {{[0-9]+}}(%rsp), %xmm2, %xmm2
+; CHECK-NEXT:    vmovss {{.*#+}} xmm4 = mem[0],zero,zero,zero
+; CHECK-NEXT:    vmulss {{[0-9]+}}(%rsp), %xmm9, %xmm1
+; CHECK-NEXT:    vmovss %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; CHECK-NEXT:    vfnmadd213ss {{.*#+}} xmm4 = -(xmm1 * xmm4) + xmm0
+; CHECK-NEXT:    vmulss %xmm2, %xmm4, %xmm10
+; CHECK-NEXT:    vmulss %xmm0, %xmm12, %xmm6
+; CHECK-NEXT:    vmulss {{[0-9]+}}(%rsp), %xmm6, %xmm4
+; CHECK-NEXT:    vfnmadd132ss {{.*#+}} xmm4 = -(xmm4 * mem) + xmm0
+; CHECK-NEXT:    vmulss {{[0-9]+}}(%rsp), %xmm13, %xmm5
+; CHECK-NEXT:    vfnmadd213ss {{.*#+}} xmm5 = -(xmm7 * xmm5) + xmm0
+; CHECK-NEXT:    vmulss %xmm5, %xmm4, %xmm4
+; CHECK-NEXT:    vmulss {{[0-9]+}}(%rsp), %xmm10, %xmm5
+; CHECK-NEXT:    vmulss {{[0-9]+}}(%rsp), %xmm5, %xmm5
+; CHECK-NEXT:    vmulss %xmm4, %xmm5, %xmm12
+; CHECK-NEXT:    vmovss {{.*#+}} xmm5 = mem[0],zero,zero,zero
+; CHECK-NEXT:    vfnmadd213ss {{.*#+}} xmm5 = -(xmm7 * xmm5) + xmm0
+; CHECK-NEXT:    vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero
+; CHECK-NEXT:    vmulss %xmm6, %xmm3, %xmm2
+; CHECK-NEXT:    vmovss {{.*#+}} xmm10 = mem[0],zero,zero,zero
+; CHECK-NEXT:    vfnmadd213ss {{.*#+}} xmm2 = -(xmm10 * xmm2) + xmm0
+; CHECK-NEXT:    vmulss {{[0-9]+}}(%rsp), %xmm0, %xmm9
+; CHECK-NEXT:    vmulss {{[0-9]+}}(%rsp), %xmm9, %xmm1
+; CHECK-NEXT:    vfnmadd132ss {{.*#+}} xmm1 = -(xmm1 * mem) + xmm0
+; CHECK-NEXT:    vmulss %xmm2, %xmm5, %xmm2
+; CHECK-NEXT:    vmulss {{[0-9]+}}(%rsp), %xmm3, %xmm5
+; CHECK-NEXT:    vfnmadd132ss {{.*#+}} xmm5 = -(xmm5 * mem) + xmm0
+; CHECK-NEXT:    vmulss %xmm1, %xmm2, %xmm1
+; CHECK-NEXT:    vmulss %xmm5, %xmm1, %xmm1
+; CHECK-NEXT:    vmulss {{[0-9]+}}(%rsp), %xmm3, %xmm2
+; CHECK-NEXT:    vfnmadd213ss {{.*#+}} xmm2 = -(xmm13 * xmm2) + xmm0
+; CHECK-NEXT:    vmulss %xmm2, %xmm1, %xmm1
+; CHECK-NEXT:    vmulss {{[0-9]+}}(%rsp), %xmm12, %xmm2
+; CHECK-NEXT:    vmulss {{[0-9]+}}(%rsp), %xmm2, %xmm2
+; CHECK-NEXT:    vmulss %xmm1, %xmm2, %xmm4
+; CHECK-NEXT:    vmovss {{.*#+}} xmm13 = mem[0],zero,zero,zero
+; CHECK-NEXT:    vmovss {{.*#+}} xmm5 = mem[0],zero,zero,zero
+; CHECK-NEXT:    vmulss {{[0-9]+}}(%rsp), %xmm5, %xmm3
+; CHECK-NEXT:    vfnmadd213ss {{.*#+}} xmm3 = -(xmm13 * xmm3) + xmm0
+; CHECK-NEXT:    vmulss {{[0-9]+}}(%rsp), %xmm6, %xmm2
+; CHECK-NEXT:    vfnmadd132ss {{.*#+}} xmm2 = -(xmm2 * mem) + xmm0
+; CHECK-NEXT:    vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload
+; CHECK-NEXT:    # xmm1 = mem[0],zero,zero,zero
+; CHECK-NEXT:    vmulss {{[0-9]+}}(%rsp), %xmm1, %xmm1
+; CHECK-NEXT:    vmulss %xmm2, %xmm3, %xmm2
+; CHECK-NEXT:    vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero
+; CHECK-NEXT:    vfnmadd213ss {{.*#+}} xmm1 = -(xmm3 * xmm1) + xmm0
+; CHECK-NEXT:    vmulss %xmm1, %xmm2, %xmm1
+; CHECK-NEXT:    vmulss {{[0-9]+}}(%rsp), %xmm4, %xmm2
+; CHECK-NEXT:    vmulss {{[0-9]+}}(%rsp), %xmm2, %xmm2
+; CHECK-NEXT:    vmulss %xmm1, %xmm2, %xmm1
+; CHECK-NEXT:    vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm12 # 4-byte Reload
+; CHECK-NEXT:    # xmm12 = mem[0],zero,zero,zero
+; CHECK-NEXT:    vmulss {{[0-9]+}}(%rsp), %xmm12, %xmm2
+; CHECK-NEXT:    vfnmadd132ss {{.*#+}} xmm7 = -(xmm7 * mem) + xmm0
+; CHECK-NEXT:    vfnmadd213ss {{.*#+}} xmm2 = -(xmm13 * xmm2) + xmm0
+; CHECK-NEXT:    vmulss %xmm7, %xmm2, %xmm2
+; CHECK-NEXT:    vmulss {{[0-9]+}}(%rsp), %xmm1, %xmm1
+; CHECK-NEXT:    vmulss {{[0-9]+}}(%rsp), %xmm1, %xmm1
+; CHECK-NEXT:    vfnmadd132ss {{.*#+}} xmm8 = -(xmm8 * mem) + xmm0
+; CHECK-NEXT:    vmulss %xmm2, %xmm8, %xmm2
+; CHECK-NEXT:    vmulss %xmm2, %xmm1, %xmm1
+; CHECK-NEXT:    vmulss {{[0-9]+}}(%rsp), %xmm1, %xmm1
+; CHECK-NEXT:    vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
+; CHECK-NEXT:    vfnmadd213ss {{.*#+}} xmm2 = -(xmm15 * xmm2) + xmm0
+; CHECK-NEXT:    vmulss %xmm1, %xmm2, %xmm1
+; CHECK-NEXT:    vmulss %xmm0, %xmm5, %xmm2
+; CHECK-NEXT:    vmulss %xmm3, %xmm2, %xmm2
+; CHECK-NEXT:    vfnmadd213ss {{.*#+}} xmm2 = -(xmm10 * xmm2) + xmm0
+; CHECK-NEXT:    vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero
+; CHECK-NEXT:    vfnmadd213ss {{.*#+}} xmm3 = -(xmm5 * xmm3) + xmm0
+; CHECK-NEXT:    vmulss %xmm2, %xmm3, %xmm2
+; CHECK-NEXT:    vmulss {{[0-9]+}}(%rsp), %xmm9, %xmm8
+; CHECK-NEXT:    vmulss {{[0-9]+}}(%rsp), %xmm9, %xmm4
+; CHECK-NEXT:    vfnmadd132ss {{.*#+}} xmm4 = -(xmm4 * mem) + xmm0
+; CHECK-NEXT:    vmulss %xmm4, %xmm2, %xmm2
+; CHECK-NEXT:    vmulss {{[0-9]+}}(%rsp), %xmm1, %xmm1
+; CHECK-NEXT:    vmulss %xmm2, %xmm1, %xmm10
+; CHECK-NEXT:    vfnmadd213ss {{.*#+}} xmm11 = -(xmm5 * xmm11) + xmm0
+; CHECK-NEXT:    vmulss {{[0-9]+}}(%rsp), %xmm6, %xmm2
+; CHECK-NEXT:    vfnmadd213ss {{.*#+}} xmm2 = -(xmm15 * xmm2) + xmm0
+; CHECK-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; CHECK-NEXT:    vmulss {{[0-9]+}}(%rsp), %xmm1, %xmm4
+; CHECK-NEXT:    vfnmadd132ss {{.*#+}} xmm4 = -(xmm4 * mem) + xmm0
+; CHECK-NEXT:    vmulss %xmm2, %xmm11, %xmm2
+; CHECK-NEXT:    vmulss %xmm4, %xmm2, %xmm2
+; CHECK-NEXT:    vfnmadd132ss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm14 # 4-byte Folded Reload
+; CHECK-NEXT:    # xmm14 = -(xmm14 * mem) + xmm0
+; CHECK-NEXT:    vmulss %xmm2, %xmm14, %xmm9
+; CHECK-NEXT:    vmulss {{[0-9]+}}(%rsp), %xmm0, %xmm2
+; CHECK-NEXT:    vmulss {{[0-9]+}}(%rsp), %xmm2, %xmm11
+; CHECK-NEXT:    vfnmadd132ss {{.*#+}} xmm11 = -(xmm11 * mem) + xmm0
+; CHECK-NEXT:    vmovss {{.*#+}} xmm5 = mem[0],zero,zero,zero
+; CHECK-NEXT:    vmulss {{[0-9]+}}(%rsp), %xmm5, %xmm7
+; CHECK-NEXT:    vmulss {{[-0-9]+}}(%r{{[sb]}}p), %xmm5, %xmm5 # 4-byte Folded Reload
+; CHECK-NEXT:    vmulss {{[0-9]+}}(%rsp), %xmm6, %xmm1
+; CHECK-NEXT:    vmulss %xmm6, %xmm15, %xmm6
+; CHECK-NEXT:    vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero
+; CHECK-NEXT:    vfnmadd213ss {{.*#+}} xmm6 = -(xmm3 * xmm6) + xmm0
+; CHECK-NEXT:    vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
+; CHECK-NEXT:    vmulss {{[0-9]+}}(%rsp), %xmm2, %xmm4
+; CHECK-NEXT:    vfnmadd213ss {{.*#+}} xmm4 = -(xmm3 * xmm4) + xmm0
+; CHECK-NEXT:    vfnmadd213ss {{.*#+}} xmm7 = -(xmm3 * xmm7) + xmm0
+; CHECK-NEXT:    vfnmadd213ss {{.*#+}} xmm5 = -(xmm3 * xmm5) + xmm0
+; CHECK-NEXT:    vmulss {{[0-9]+}}(%rsp), %xmm12, %xmm2
+; CHECK-NEXT:    vmulss %xmm0, %xmm13, %xmm3
+; CHECK-NEXT:    vmulss {{[0-9]+}}(%rsp), %xmm3, %xmm3
+; CHECK-NEXT:    vmovss {{.*#+}} xmm12 = mem[0],zero,zero,zero
+; CHECK-NEXT:    vfnmadd213ss {{.*#+}} xmm3 = -(xmm12 * xmm3) + xmm0
+; CHECK-NEXT:    vfnmadd213ss {{.*#+}} xmm2 = -(xmm12 * xmm2) + xmm0
+; CHECK-NEXT:    vfmsub213ss {{.*#+}} xmm1 = (xmm15 * xmm1) - xmm0
+; CHECK-NEXT:    vfnmadd132ss {{.*#+}} xmm8 = -(xmm8 * mem) + xmm0
+; CHECK-NEXT:    vmulss %xmm8, %xmm9, %xmm0
+; CHECK-NEXT:    vmulss %xmm6, %xmm0, %xmm0
+; CHECK-NEXT:    vmulss %xmm4, %xmm0, %xmm0
+; CHECK-NEXT:    vmulss %xmm7, %xmm0, %xmm0
+; CHECK-NEXT:    vmulss {{[0-9]+}}(%rsp), %xmm10, %xmm4
+; CHECK-NEXT:    vmulss %xmm0, %xmm4, %xmm0
+; CHECK-NEXT:    vmulss %xmm5, %xmm11, %xmm4
+; CHECK-NEXT:    vmulss %xmm3, %xmm4, %xmm3
+; CHECK-NEXT:    vmulss %xmm2, %xmm3, %xmm2
+; CHECK-NEXT:    vmulss {{[0-9]+}}(%rsp), %xmm0, %xmm0
+; CHECK-NEXT:    vmulss %xmm1, %xmm2, %xmm1
+; CHECK-NEXT:    vmulss %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    vmovss %xmm0, (%rdi)
+; CHECK-NEXT:    retq
+entry:
+  %81 = fsub reassoc nsz contract float %0, %1
+  %82 = fmul reassoc nsz contract float %1, %2
+  %83 = fmul reassoc nsz contract float %3, %82
+  %84 = fsub reassoc nsz contract float %0, %83
+  %85 = fmul reassoc nsz contract float %84, %4
+  %86 = fmul reassoc nsz contract float %81, %5
+  %87 = fsub reassoc nsz contract float %0, %86
+  %88 = fmul reassoc nsz contract float %87, %85
+  %89 = fmul reassoc nsz contract float %81, %6
+  %90 = fmul reassoc nsz contract float %89, %7
+  %91 = fsub reassoc nsz contract float %0, %90
+  %92 = fmul reassoc nsz contract float %91, %88
+  %93 = fmul reassoc nsz contract float %8, %0
+  %94 = fmul reassoc nsz contract float %93, %9
+  %95 = fmul reassoc nsz contract float %94, %10
+  %96 = fsub reassoc nsz contract float %0, %95
+  %97 = fmul reassoc nsz contract float %96, %92
+  %98 = fmul reassoc nsz contract float %11, %7
+  %99 = fmul reassoc nsz contract float %98, %12
+  %100 = fsub reassoc nsz contract float %0, %99
+  %101 = fmul reassoc nsz contract float %100, %97
+  %102 = fmul reassoc nsz contract float %13, %0
+  %103 = fmul reassoc nsz contract float %102, %14
+  %104 = fmul reassoc nsz contract float %103, %15
+  %105 = fsub reassoc nsz contract float %0, %104
+  %106 = fmul reassoc nsz contract float %105, %101
+  %107 = fmul reassoc nsz contract float %16, %17
+  %108 = fsub reassoc nsz contract float %0, %107
+  %109 = fmul reassoc nsz contract float %108, %106
+  %110 = fmul reassoc nsz contract float %18, %19
+  %111 = fmul reassoc nsz contract float %110, %9
+  %112 = fsub reassoc nsz contract float %0, %111
+  %113 = fmul reassoc nsz contract float %112, %109
+  %114 = fmul reassoc nsz contract float %20, %0
+  %115 = fmul reassoc nsz contract float %114, %15
+  %116 = fmul reassoc nsz contract float %81, %115
+  %117 = fsub reassoc nsz contract float %0, %116
+  %118 = fmul reassoc nsz contract float %117, %113
+  %119 = fmul reassoc nsz contract float %8, %21
+  %120 = fsub reassoc nsz contract float %0, %119
+  %121 = fmul reassoc nsz contract float %120, %118
+  %122 = fmul reassoc nsz contract float %102, %22
+  %123 = fmul reassoc nsz contract float %122, %23
+  %124 = fsub reassoc nsz contract float %0, %123
+  %125 = fmul reassoc nsz contract float %124, %121
+  %126 = fmul reassoc nsz contract float %125, %24
+  %127 = fmul reassoc nsz contract float %3, %25
+  %128 = fsub reassoc nsz contract float %0, %127
+  %129 = fmul reassoc nsz contract float %128, %126
+  %130 = fmul reassoc nsz contract float %129, %26
+  %131 = fmul reassoc nsz contract float %27, %1
+  %132 = fmul reassoc nsz contract float %131, %28
+  %133 = fsub reassoc nsz contract float %0, %132
+  %134 = fmul reassoc nsz contract float %133, %130
+  %135 = fmul reassoc nsz contract float %29, %30
+  %136 = fmul reassoc nsz contract float %135, %31
+  %137 = fsub reassoc nsz contract float %0, %136
+  %138 = fmul reassoc nsz contract float %137, %134
+  %139 = fmul reassoc nsz contract float %138, %32
+  %140 = fmul reassoc nsz contract float %139, %33
+  %141 = fmul reassoc nsz contract float %140, %34
+  %142 = fmul reassoc nsz contract float %35, %9
+  %143 = fmul reassoc nsz contract float %142, %36
+  %144 = fsub reassoc nsz contract float %0, %143
+  %145 = fmul reassoc nsz contract float %144, %141
+  %146 = fmul reassoc nsz contract float %145, %37
+  %147 = fmul reassoc nsz contract float %1, %38
+  %148 = fsub reassoc nsz contract float %0, %147
+  %149 = fmul reassoc nsz contract float %148, %146
+  %150 = fmul reassoc nsz contract float %39, %40
+  %151 = fsub reassoc nsz contract float %0, %150
+  %152 = fmul reassoc nsz contract float %151, %149
+  %153 = fmul reassoc nsz contract float %152, %41
+  %154 = fmul reassoc nsz contract float %20, %42
+  %155 = fmul reassoc nsz contract float %154, %43
+  %156 = fsub reassoc nsz contract float %0, %155
+  %157 = fmul reassoc nsz contract float %156, %153
+  %158 = fmul reassoc nsz contract float %157, %44
+  %159 = fmul reassoc nsz contract float %158, %45
+  %160 = fmul reassoc nsz contract float %81, %0
+  %161 = fmul reassoc nsz contract float %160, %46
+  %162 = fmul reassoc nsz contract float %161, %14
+  %163 = fsub reassoc nsz contract float %0, %162
+  %164 = fmul reassoc nsz contract float %163, %159
+  %165 = fmul reassoc nsz contract float %8, %47
+  %166 = fmul reassoc nsz contract float %18, %165
+  %167 = fsub reassoc nsz contract float %0, %166
+  %168 = fmul reassoc nsz contract float %167, %164
+  %169 = fmul reassoc nsz contract float %168, %48
+  %170 = fmul reassoc nsz contract float %169, %49
+  %171 = fmul reassoc nsz contract float %18, %50
+  %172 = fsub reassoc nsz contract float %0, %171
+  %173 = fmul reassoc nsz contract float %172, %170
+  %174 = fmul reassoc nsz contract float %16, %160
+  %175 = fmul reassoc nsz contract float %174, %12
+  %176 = fsub reassoc nsz contract float %0, %175
+  %177 = fmul reassoc nsz contract float %176, %173
+  %178 = fmul reassoc nsz contract float %51, %0
+  %179 = fmul reassoc nsz contract float %178, %22
+  %180 = fmul reassoc nsz contract float %179, %52
+  %181 = fsub reassoc nsz contract float %0, %180
+  %182 = fmul reassoc nsz contract float %181, %177
+  %183 = fmul reassoc nsz contract float %27, %16
+  %184 = fmul reassoc nsz contract float %183, %53
+  %185 = fsub reassoc nsz contract float %0, %184
+  %186 = fmul reassoc nsz contract float %185, %182
+  %187 = fmul reassoc nsz contract float %16, %54
+  %188 = fmul reassoc nsz contract float %8, %187
+  %189 = fsub reassoc nsz contract float %0, %188
+  %190 = fmul reassoc nsz contract float %189, %186
+  %191 = fmul reassoc nsz contract float %190, %55
+  %192 = fmul reassoc nsz contract float %191, %56
+  %193 = fmul reassoc nsz contract float %57, %58
+  %194 = fmul reassoc nsz contract float %193, %59
+  %195 = fsub reassoc nsz contract float %0, %194
+  %196 = fmul reassoc nsz contract float %195, %192
+  %197 = fmul reassoc nsz contract float %13, %160
+  %198 = fmul reassoc nsz contract float %197, %36
+  %199 = fsub reassoc nsz contract float %0, %198
+  %200 = fmul reassoc nsz contract float %199, %196
+  %201 = fmul reassoc nsz contract float %93, %60
+  %202 = fmul reassoc nsz contract float %201, %61
+  %203 = fsub reassoc nsz contract float %0, %202
+  %204 = fmul reassoc nsz contract float %203, %200
+  %205 = fmul reassoc nsz contract float %204, %62
+  %206 = fmul reassoc nsz contract float %205, %63
+  %207 = fmul reassoc nsz contract float %114, %9
+  %208 = fmul reassoc nsz contract float %207, %59
+  %209 = fsub reassoc nsz contract float %0, %208
+  %210 = fmul reassoc nsz contract float %209, %206
+  %211 = fmul reassoc nsz contract float %18, %64
+  %212 = fsub reassoc nsz contract float %0, %211
+  %213 = fmul reassoc nsz contract float %212, %210
+  %214 = fmul reassoc nsz contract float %29, %65
+  %215 = fsub reassoc nsz contract float %0, %214
+  %216 = fmul reassoc nsz contract float %215, %213
+  %217 = fmul reassoc nsz contract float %216, %66
+  %218 = fmul reassoc nsz contract float %3, %67
+  %219 = fsub reassoc nsz contract float %0, %218
+  %220 = fmul reassoc nsz contract float %219, %217
+  %221 = fmul reassoc nsz contract float %220, %68
+  %222 = fmul reassoc nsz contract float %57, %69
+  %223 = fsub reassoc nsz contract float %0, %222
+  %224 = fmul reassoc nsz contract float %223, %221
+  %225 = fmul reassoc nsz contract float %57, %0
+  %226 = fmul reassoc nsz contract float %225, %61
+  %227 = fmul reassoc nsz contract float %226, %12
+  %228 = fsub reassoc nsz contract float %0, %227
+  %229 = fmul reassoc nsz contract float %228, %224
+  %230 = fmul reassoc nsz contract float %178, %70
+  %231 = fmul reassoc nsz contract float %230, %46
+  %232 = fsub reassoc nsz contract float %0, %231
+  %233 = fmul reassoc nsz contract float %232, %229
+  %234 = fmul reassoc nsz contract float %233, %71
+  %235 = fmul reassoc nsz contract float %57, %122
+  %236 = fsub reassoc nsz contract float %0, %235
+  %237 = fmul reassoc nsz contract float %236, %234
+  %238 = fmul reassoc nsz contract float %20, %160
+  %239 = fmul reassoc nsz contract float %3, %238
+  %240 = fsub reassoc nsz contract float %0, %239
+  %241 = fmul reassoc nsz contract float %240, %237
+  %242 = fmul reassoc nsz contract float %16, %72
+  %243 = fmul reassoc nsz contract float %242, %73
+  %244 = fsub reassoc nsz contract float %0, %243
+  %245 = fmul reassoc nsz contract float %244, %241
+  %246 = fmul reassoc nsz contract float %154, %15
+  %247 = fsub reassoc nsz contract float %0, %246
+  %248 = fmul reassoc nsz contract float %247, %245
+  %249 = fmul reassoc nsz contract float %178, %23
+  %250 = fmul reassoc nsz contract float %249, %74
+  %251 = fsub reassoc nsz contract float %0, %250
+  %252 = fmul reassoc nsz contract float %251, %248
+  %253 = fmul reassoc nsz contract float %3, %160
+  %254 = fmul reassoc nsz contract float %51, %253
+  %255 = fsub reassoc nsz contract float %0, %254
+  %256 = fmul reassoc nsz contract float %255, %252
+  %257 = fmul reassoc nsz contract float %13, %75
+  %258 = fmul reassoc nsz contract float %257, %51
+  %259 = fsub reassoc nsz contract float %0, %258
+  %260 = fmul reassoc nsz contract float %259, %256
+  %261 = fmul reassoc nsz contract float %8, %76
+  %262 = fmul reassoc nsz contract float %51, %261
+  %263 = fsub reassoc nsz contract float %0, %262
+  %264 = fmul reassoc nsz contract float %263, %260
+  %265 = fmul reassoc nsz contract float %264, %77
+  %266 = fmul reassoc nsz contract float %39, %0
+  %267 = fmul reassoc nsz contract float %266, %78
+  %268 = fmul reassoc nsz contract float %267, %14
+  %269 = fsub reassoc nsz contract float %0, %268
+  %270 = fmul reassoc nsz contract float %269, %265
+  %271 = fmul reassoc nsz contract float %1, %76
+  %272 = fmul reassoc nsz contract float %51, %271
+  %273 = fsub reassoc nsz contract float %0, %272
+  %274 = fmul reassoc nsz contract float %273, %270
+  %275 = fmul reassoc nsz contract float %0, %59
+  %276 = fmul reassoc nsz contract float %275, %79
+  %277 = fmul reassoc nsz contract float %276, %36
+  %278 = fsub reassoc nsz contract float %0, %277
+  %279 = fmul reassoc nsz contract float %278, %274
+  %280 = fmul reassoc nsz contract float %114, %22
+  %281 = fmul reassoc nsz contract float %280, %36
+  %282 = fsub reassoc nsz contract float %0, %281
+  %283 = fmul reassoc nsz contract float %282, %279
+  %284 = fmul reassoc nsz contract float %0, %43
+  %285 = fmul reassoc nsz contract float %284, %81
+  %286 = fmul reassoc nsz contract float %3, %285
+  %287 = fsub reassoc nsz contract float %0, %286
+  %288 = fmul reassoc nsz contract float %287, %283
+  store float %288, float* %80, align 4
+  ret void
+}

From 03c8e1cc7efabd122294e1cd670fba6d544f2831 Mon Sep 17 00:00:00 2001
From: Hans Wennborg <hans@chromium.org>
Date: Wed, 26 Aug 2020 15:16:02 +0200
Subject: [PATCH 017/109] ReleaseNotes: removal of llgo

---
 llvm/docs/ReleaseNotes.rst | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/llvm/docs/ReleaseNotes.rst b/llvm/docs/ReleaseNotes.rst
index aea1550960e80..6c92c1224238e 100644
--- a/llvm/docs/ReleaseNotes.rst
+++ b/llvm/docs/ReleaseNotes.rst
@@ -44,6 +44,9 @@ Non-comprehensive list of changes in this release
    functionality, or simply have a lot to talk about), see the `NOTE` below
    for adding a new subsection.
 
+* The llgo frontend has been removed for now, but may be resurrected in the
+  future.
+
 * ...
 
 

From 21d01a67c9613932053dd89c9957782f86e0c93f Mon Sep 17 00:00:00 2001
From: Fangrui Song <maskray@google.com>
Date: Wed, 12 Aug 2020 20:50:59 -0700
Subject: [PATCH 018/109] [ELF] --gdb-index: skip SHF_GROUP .debug_info

-gdwarf-5 -fdebug-types-section may produce multiple .debug_info sections.  All
except one are type units (.debug_types before DWARF v5). When constructing
.gdb_index, we should ignore these type units. We use a simple heuristic: the
compile unit does not have the SHF_GROUP flag. (This needs to be revisited if
people place compile unit .debug_info in COMDAT groups.)

This issue manifests as a data race: because an object file may have multiple
.debug_info sections, we may concurrently construct `LLDDwarfObj` for the same
file in multiple threads. The threads may access `InputSectionBase::data()`
concurrently on the same input section. `InputSectionBase::data()` does a lazy
uncompress() and rewrites the member variable `rawData`. A thread running zlib
`inflate()` (transitively called by uncompress()) on a buffer with `rawData`
tampered by another thread may fail with `uncompress failed: zlib error: Z_DATA_ERROR`.

Even if no data race occurred in an optimistic run, if there are N .debug_info,
one CU entry and its address ranges will be replicated N times. The result
.gdb_index can be much larger than a correct one.

The new test gdb-index-dwarf5-type-unit.s actually has two compile units. This
cannot be produced with regular approaches (it can be produced with -r
--unique). This is used to demonstrate that the .gdb_index construction code
only considers the last non-SHF_GROUP .debug_info

Reviewed By: grimar

Differential Revision: https://reviews.llvm.org/D85579

(cherry picked from commit fb141292f4411448af41fc454c07f3903acb84dd)
---
 lld/ELF/DWARF.cpp                         | 22 +++++-
 lld/ELF/DWARF.h                           |  4 +
 lld/ELF/SyntheticSections.cpp             | 50 ++++++------
 lld/test/ELF/gdb-index-dwarf5-type-unit.s | 93 +++++++++++++++++++++++
 4 files changed, 143 insertions(+), 26 deletions(-)
 create mode 100644 lld/test/ELF/gdb-index-dwarf5-type-unit.s

diff --git a/lld/ELF/DWARF.cpp b/lld/ELF/DWARF.cpp
index 24c44730bf64b..5767f6020f933 100644
--- a/lld/ELF/DWARF.cpp
+++ b/lld/ELF/DWARF.cpp
@@ -26,7 +26,12 @@ using namespace lld;
 using namespace lld::elf;
 
 template <class ELFT> LLDDwarfObj<ELFT>::LLDDwarfObj(ObjFile<ELFT> *obj) {
-  for (InputSectionBase *sec : obj->getSections()) {
+  // Get the ELF sections to retrieve sh_flags. See the SHF_GROUP comment below.
+  ArrayRef<typename ELFT::Shdr> objSections =
+      CHECK(obj->getObj().sections(), obj);
+  assert(objSections.size() == obj->getSections().size());
+  for (auto it : llvm::enumerate(obj->getSections())) {
+    InputSectionBase *sec = it.value();
     if (!sec)
       continue;
 
@@ -35,7 +40,6 @@ template <class ELFT> LLDDwarfObj<ELFT>::LLDDwarfObj(ObjFile<ELFT> *obj) {
                 .Case(".debug_addr", &addrSection)
                 .Case(".debug_gnu_pubnames", &gnuPubnamesSection)
                 .Case(".debug_gnu_pubtypes", &gnuPubtypesSection)
-                .Case(".debug_info", &infoSection)
                 .Case(".debug_loclists", &loclistsSection)
                 .Case(".debug_ranges", &rangesSection)
                 .Case(".debug_rnglists", &rnglistsSection)
@@ -53,6 +57,20 @@ template <class ELFT> LLDDwarfObj<ELFT>::LLDDwarfObj(ObjFile<ELFT> *obj) {
       strSection = toStringRef(sec->data());
     else if (sec->name == ".debug_line_str")
       lineStrSection = toStringRef(sec->data());
+    else if (sec->name == ".debug_info" &&
+             !(objSections[it.index()].sh_flags & ELF::SHF_GROUP)) {
+      // In DWARF v5, -fdebug-types-section places type units in .debug_info
+      // sections in COMDAT groups. They are not compile units and thus should
+      // be ignored for .gdb_index/diagnostics purposes.
+      //
+      // We use a simple heuristic: the compile unit does not have the SHF_GROUP
+      // flag. If we place compile units in COMDAT groups in the future, we may
+      // need to perform a lightweight parsing. We drop the SHF_GROUP flag when
+      // the InputSection was created, so we need to retrieve sh_flags from the
+      // associated ELF section header.
+      infoSection.Data = toStringRef(sec->data());
+      infoSection.sec = sec;
+    }
   }
 }
 
diff --git a/lld/ELF/DWARF.h b/lld/ELF/DWARF.h
index a12dae6e99600..900c63de26ff3 100644
--- a/lld/ELF/DWARF.h
+++ b/lld/ELF/DWARF.h
@@ -32,6 +32,10 @@ template <class ELFT> class LLDDwarfObj final : public llvm::DWARFObject {
     f(infoSection);
   }
 
+  InputSection *getInfoSection() const {
+    return cast<InputSection>(infoSection.sec);
+  }
+
   const llvm::DWARFSection &getLoclistsSection() const override {
     return loclistsSection;
   }
diff --git a/lld/ELF/SyntheticSections.cpp b/lld/ELF/SyntheticSections.cpp
index 731b9f6580609..09f771d123595 100644
--- a/lld/ELF/SyntheticSections.cpp
+++ b/lld/ELF/SyntheticSections.cpp
@@ -28,6 +28,7 @@
 #include "lld/Common/Strings.h"
 #include "lld/Common/Version.h"
 #include "llvm/ADT/SetOperations.h"
+#include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/BinaryFormat/Dwarf.h"
 #include "llvm/DebugInfo/DWARF/DWARFDebugPubTable.h"
@@ -2653,15 +2654,6 @@ void GdbIndexSection::initOutputSize() {
   }
 }
 
-static std::vector<InputSection *> getDebugInfoSections() {
-  std::vector<InputSection *> ret;
-  for (InputSectionBase *s : inputSections)
-    if (InputSection *isec = dyn_cast<InputSection>(s))
-      if (isec->name == ".debug_info")
-        ret.push_back(isec);
-  return ret;
-}
-
 static std::vector<GdbIndexSection::CuEntry> readCuList(DWARFContext &dwarf) {
   std::vector<GdbIndexSection::CuEntry> ret;
   for (std::unique_ptr<DWARFUnit> &cu : dwarf.compile_units())
@@ -2815,30 +2807,40 @@ createSymbols(ArrayRef<std::vector<GdbIndexSection::NameAttrEntry>> nameAttrs,
 
 // Returns a newly-created .gdb_index section.
 template <class ELFT> GdbIndexSection *GdbIndexSection::create() {
-  std::vector<InputSection *> sections = getDebugInfoSections();
-
-  // .debug_gnu_pub{names,types} are useless in executables.
-  // They are present in input object files solely for creating
-  // a .gdb_index. So we can remove them from the output.
-  for (InputSectionBase *s : inputSections)
+  // Collect InputFiles with .debug_info. See the comment in
+  // LLDDwarfObj<ELFT>::LLDDwarfObj. If we do lightweight parsing in the future,
+  // note that isec->data() may uncompress the full content, which should be
+  // parallelized.
+  SetVector<InputFile *> files;
+  for (InputSectionBase *s : inputSections) {
+    InputSection *isec = dyn_cast<InputSection>(s);
+    if (!isec)
+      continue;
+    // .debug_gnu_pub{names,types} are useless in executables.
+    // They are present in input object files solely for creating
+    // a .gdb_index. So we can remove them from the output.
     if (s->name == ".debug_gnu_pubnames" || s->name == ".debug_gnu_pubtypes")
       s->markDead();
+    else if (isec->name == ".debug_info")
+      files.insert(isec->file);
+  }
 
-  std::vector<GdbChunk> chunks(sections.size());
-  std::vector<std::vector<NameAttrEntry>> nameAttrs(sections.size());
+  std::vector<GdbChunk> chunks(files.size());
+  std::vector<std::vector<NameAttrEntry>> nameAttrs(files.size());
 
-  parallelForEachN(0, sections.size(), [&](size_t i) {
+  parallelForEachN(0, files.size(), [&](size_t i) {
     // To keep memory usage low, we don't want to keep cached DWARFContext, so
     // avoid getDwarf() here.
-    ObjFile<ELFT> *file = sections[i]->getFile<ELFT>();
+    ObjFile<ELFT> *file = cast<ObjFile<ELFT>>(files[i]);
     DWARFContext dwarf(std::make_unique<LLDDwarfObj<ELFT>>(file));
+    auto &dobj = static_cast<const LLDDwarfObj<ELFT> &>(dwarf.getDWARFObj());
 
-    chunks[i].sec = sections[i];
+    // If the are multiple compile units .debug_info (very rare ld -r --unique),
+    // this only picks the last one. Other address ranges are lost.
+    chunks[i].sec = dobj.getInfoSection();
     chunks[i].compilationUnits = readCuList(dwarf);
-    chunks[i].addressAreas = readAddressAreas(dwarf, sections[i]);
-    nameAttrs[i] = readPubNamesAndTypes<ELFT>(
-        static_cast<const LLDDwarfObj<ELFT> &>(dwarf.getDWARFObj()),
-        chunks[i].compilationUnits);
+    chunks[i].addressAreas = readAddressAreas(dwarf, chunks[i].sec);
+    nameAttrs[i] = readPubNamesAndTypes<ELFT>(dobj, chunks[i].compilationUnits);
   });
 
   auto *ret = make<GdbIndexSection>();
diff --git a/lld/test/ELF/gdb-index-dwarf5-type-unit.s b/lld/test/ELF/gdb-index-dwarf5-type-unit.s
new file mode 100644
index 0000000000000..5cd6778fe7e48
--- /dev/null
+++ b/lld/test/ELF/gdb-index-dwarf5-type-unit.s
@@ -0,0 +1,93 @@
+# REQUIRES: x86, zlib
+## -gdwarf-5 -fdebug-types-section may produce multiple .debug_info sections.
+## All except one are type units. Test we can locate the compile unit, add it to
+## the index, and not erroneously duplicate it (which would happen if we
+## consider every .debug_info a compile unit).
+
+# RUN: llvm-mc -filetype=obj -triple=x86_64 %s -o %t.o
+# RUN: ld.lld --gdb-index -Ttext=0x1000 %t.o -o %t
+# RUN: llvm-dwarfdump --gdb-index %t | FileCheck %s
+
+## Test we don't uncompress a section while another thread is concurrently
+## accessing it. This would be detected by tsan as a data race.
+# RUN: llvm-objcopy --compress-debug-sections %t.o
+# RUN: ld.lld --gdb-index -Ttext=0x1000 %t.o -o %t1
+# RUN: llvm-dwarfdump --gdb-index %t1 | FileCheck %s
+
+## In this test, there are actually two compile unit .debug_info (very uncommon;
+## -r --unique). Currently we only handle the last compile unit.
+# CHECK:      CU list offset = 0x18, has 1 entries:
+# CHECK-NEXT:   0: Offset = 0x32, Length = 0x19
+
+# CHECK:      Address area offset = 0x28, has 1 entries:
+# CHECK-NEXT:   Low/High address = [0x1001, 0x1002) (Size: 0x1), CU id = 0
+
+.Lfunc_begin0:
+  ret
+.Lfunc_end0:
+.Lfunc_begin1:
+  ret
+.Lfunc_end1:
+
+.section  .debug_abbrev,"",@progbits
+  .byte  1                         # Abbreviation Code
+  .byte  65                        # DW_TAG_type_unit
+  .byte  0                         # DW_CHILDREN_no
+  .byte  0                         # EOM(1)
+  .byte  0                         # EOM(2)
+
+  .byte  2                         # Abbreviation Code
+  .byte  17                        # DW_TAG_compile_unit
+  .byte  0                         # DW_CHILDREN_no
+  .byte  17                        # DW_AT_low_pc
+  .byte  1                         # DW_FORM_addr
+  .byte  18                        # DW_AT_high_pc
+  .byte  6                         # DW_FORM_data4
+  .byte  0                         # EOM(1)
+  .byte  0                         # EOM(2)
+
+  .byte  0                         # EOM(3)
+
+.macro TYPE_UNIT id signature
+.section  .debug_info,"G",@progbits,\signature
+  .long  .Ldebug_info_end\id-.Ldebug_info_start\id # Length of Unit
+.Ldebug_info_start\id:
+  .short 5                         # DWARF version number
+  .byte  2                         # DWARF Unit Type
+  .byte  8                         # Address Size
+  .long  .debug_abbrev             # Offset Into Abbrev. Section
+  .quad  \signature                # Type Signature
+  .long  .Ldebug_info_end\id       # Type DIE Offset
+  .byte  1                         # Abbrev [1] DW_TAG_type_unit
+.Ldebug_info_end\id:
+.endm
+
+## We place compile units between two type units (rare). A naive approach will
+## take either the first or the last .debug_info
+TYPE_UNIT 0, 123
+
+.section  .debug_info,"",@progbits,unique,0
+.Lcu_begin0:
+  .long .Lcu_end0-.Lcu_begin0-4    # Length of Unit
+  .short 5                         # DWARF version number
+  .byte  1                         # DWARF Unit Type
+  .byte  8                         # Address Size
+  .long  .debug_abbrev             # Offset Into Abbrev. Section
+  .byte  2                         # Abbrev [2] DW_TAG_compile_unit
+  .quad  .Lfunc_begin0             # DW_AT_low_pc
+  .long  .Lfunc_end0-.Lfunc_begin0 # DW_AT_high_pc
+.Lcu_end0:
+
+.section  .debug_info,"",@progbits,unique,1
+.Lcu_begin1:
+  .long .Lcu_end1-.Lcu_begin1-4    # Length of Unit
+  .short 5                         # DWARF version number
+  .byte  1                         # DWARF Unit Type
+  .byte  8                         # Address Size
+  .long  .debug_abbrev             # Offset Into Abbrev. Section
+  .byte  2                         # Abbrev [2] DW_TAG_compile_unit
+  .quad  .Lfunc_begin1             # DW_AT_low_pc
+  .long  .Lfunc_end1-.Lfunc_begin1 # DW_AT_high_pc
+.Lcu_end1:
+
+TYPE_UNIT 1, 456

From ce9f3f19f512f78c5d9ffb2753bae7bcb203161b Mon Sep 17 00:00:00 2001
From: Teresa Johnson <tejohnson@google.com>
Date: Wed, 4 Mar 2020 15:38:45 -0800
Subject: [PATCH 019/109] [Docs] Document --lto-whole-program-visibility

Summary:
Documents interaction of linker option added in D71913 with LTO
visibility.

Reviewers: pcc

Subscribers: inglorion, hiraditya, steven_wu, dexonsmith, cfe-commits

Tags: #clang

Differential Revision: https://reviews.llvm.org/D75655

(cherry picked from commit 72bdb41a06a27b5453bf966a0ffecfa6f5fae1a6)
---
 clang/docs/LTOVisibility.rst | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/clang/docs/LTOVisibility.rst b/clang/docs/LTOVisibility.rst
index 3a60f54e1b907..cdc0b9cc0e19e 100644
--- a/clang/docs/LTOVisibility.rst
+++ b/clang/docs/LTOVisibility.rst
@@ -35,6 +35,16 @@ other classes receive hidden LTO visibility. Classes with internal linkage
 (e.g. classes declared in unnamed namespaces) also receive hidden LTO
 visibility.
 
+During the LTO link, all classes with public LTO visibility will be refined
+to hidden LTO visibility when the ``--lto-whole-program-visibility`` lld linker
+option is applied (``-plugin-opt=whole-program-visibility`` for gold). This flag
+can be used to defer specifying whether classes have hidden LTO visibility until
+link time, to allow bitcode objects to be shared by different LTO links.
+Due to an implementation limitation, symbols associated with classes with hidden
+LTO visibility may still be exported from the binary when using this flag. It is
+unsafe to refer to these symbols, and their visibility may be relaxed to hidden
+in a future compiler release.
+
 A class defined in a translation unit built without LTO receives public
 LTO visibility regardless of its object file visibility, linkage or other
 attributes.

From 6f2ba83779c8055a58f1cc9ee33686a8109ff33a Mon Sep 17 00:00:00 2001
From: Francesco Petrogalli <francesco.petrogalli@arm.com>
Date: Wed, 26 Aug 2020 15:16:15 +0100
Subject: [PATCH 020/109] [release][SVE] Move notes for SVE ACLE to the release
 notes of clang.

---
 clang/docs/ReleaseNotes.rst | 54 +++++++++++++++++++++++++++++++++++
 llvm/docs/ReleaseNotes.rst  | 56 +++----------------------------------
 2 files changed, 58 insertions(+), 52 deletions(-)

diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 6f336088750fb..a8fde6b452d03 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -90,6 +90,60 @@ Non-comprehensive list of changes in this release
   a fixed hashing algorithm that prevents some collision when loading
   out-of-date profile informations. Clang can still read old profile files.
 
+- Clang adds support for the following macros that enable the
+  C-intrinsics from the `Arm C language extensions for SVE
+  <https://developer.arm.com/documentation/100987/>`_ (version
+  ``00bet5``, see section 2.1 for the list of intrinsics associated to
+  each macro):
+
+
+      =================================  =================
+      Preprocessor macro                 Target feature
+      =================================  =================
+      ``__ARM_FEATURE_SVE``              ``+sve``
+      ``__ARM_FEATURE_SVE_BF16``         ``+sve+bf16``
+      ``__ARM_FEATURE_SVE_MATMUL_FP32``  ``+sve+f32mm``
+      ``__ARM_FEATURE_SVE_MATMUL_FP64``  ``+sve+f64mm``
+      ``__ARM_FEATURE_SVE_MATMUL_INT8``  ``+sve+i8mm``
+      ``__ARM_FEATURE_SVE2``             ``+sve2``
+      ``__ARM_FEATURE_SVE2_AES``         ``+sve2-aes``
+      ``__ARM_FEATURE_SVE2_BITPERM``     ``+sve2-bitperm``
+      ``__ARM_FEATURE_SVE2_SHA3``        ``+sve2-sha3``
+      ``__ARM_FEATURE_SVE2_SM4``         ``+sve2-sm4``
+      =================================  =================
+
+  The macros enable users to write C/C++ `Vector Length Agnostic
+  (VLA)` loops, that can be executed on any CPU that implements the
+  underlying instructions supported by the C intrinsics, independently
+  of the hardware vector register size.
+
+  For example, the ``__ARM_FEATURE_SVE`` macro is enabled when
+  targeting AArch64 code generation by setting ``-march=armv8-a+sve``
+  on the command line.
+
+  .. code-block:: c
+     :caption: Example of VLA addition of two arrays with SVE ACLE.
+
+     // Compile with:
+     // `clang++ -march=armv8a+sve ...` (for c++)
+     // `clang -stc=c11 -march=armv8a+sve ...` (for c)
+     #include <arm_sve.h>
+
+     void VLA_add_arrays(double *x, double *y, double *out, unsigned N) {
+       for (unsigned i = 0; i < N; i += svcntd()) {
+         svbool_t Pg = svwhilelt_b64(i, N);
+         svfloat64_t vx = svld1(Pg, &x[i]);
+         svfloat64_t vy = svld1(Pg, &y[i]);
+         svfloat64_t vout = svadd_x(Pg, vx, vy);
+         svst1(Pg, &out[i], vout);
+       }
+     }
+
+  Please note that support for lazy binding of SVE function calls is
+  incomplete. When you interface user code with SVE functions that are
+  provided through shared libraries, avoid using lazy binding. If you
+  use lazy binding, the results could be corrupted.
+
 New Compiler Flags
 ------------------
 
diff --git a/llvm/docs/ReleaseNotes.rst b/llvm/docs/ReleaseNotes.rst
index 6c92c1224238e..5bbdea65c3e72 100644
--- a/llvm/docs/ReleaseNotes.rst
+++ b/llvm/docs/ReleaseNotes.rst
@@ -106,59 +106,11 @@ Changes to the AArch64 Backend
 * Clearly error out on unsupported relocations when targeting COFF, instead
   of silently accepting some (without being able to do what was requested).
 
-* Clang adds support for the following macros that enable the
-  C-intrinsics from the `Arm C language extensions for SVE
+* Implemented codegen support for the SVE C-language intrinsics
+  documented in `Arm C Language Extensions (ACLE) for SVE
   <https://developer.arm.com/documentation/100987/>`_ (version
-  ``00bet5``, see section 2.1 for the list of intrinsics associated to
-  each macro):
-
-
-      =================================  =================
-      Preprocessor macro                 Target feature
-      =================================  =================
-      ``__ARM_FEATURE_SVE``              ``+sve``
-      ``__ARM_FEATURE_SVE_BF16``         ``+sve+bf16``
-      ``__ARM_FEATURE_SVE_MATMUL_FP32``  ``+sve+f32mm``
-      ``__ARM_FEATURE_SVE_MATMUL_FP64``  ``+sve+f64mm``
-      ``__ARM_FEATURE_SVE_MATMUL_INT8``  ``+sve+i8mm``
-      ``__ARM_FEATURE_SVE2``             ``+sve2``
-      ``__ARM_FEATURE_SVE2_AES``         ``+sve2-aes``
-      ``__ARM_FEATURE_SVE2_BITPERM``     ``+sve2-bitperm``
-      ``__ARM_FEATURE_SVE2_SHA3``        ``+sve2-sha3``
-      ``__ARM_FEATURE_SVE2_SM4``         ``+sve2-sm4``
-      =================================  =================
-
-  The macros enable users to write C/C++ `Vector Length Agnostic
-  (VLA)` loops, that can be executed on any CPU that implements the
-  underlying instructions supported by the C intrinsics, independently
-  of the hardware vector register size.
-
-  For example, the ``__ARM_FEATURE_SVE`` macro is enabled when
-  targeting AArch64 code generation by setting ``-march=armv8-a+sve``
-  on the command line.
-
-  .. code-block:: c
-     :caption: Example of VLA addition of two arrays with SVE ACLE.
-
-     // Compile with:
-     // `clang++ -march=armv8a+sve ...` (for c++)
-     // `clang -stc=c11 -march=armv8a+sve ...` (for c)
-     #include <arm_sve.h>
-
-     void VLA_add_arrays(double *x, double *y, double *out, unsigned N) {
-       for (unsigned i = 0; i < N; i += svcntd()) {
-         svbool_t Pg = svwhilelt_b64(i, N);
-         svfloat64_t vx = svld1(Pg, &x[i]);
-         svfloat64_t vy = svld1(Pg, &y[i]);
-         svfloat64_t vout = svadd_x(Pg, vx, vy);
-         svst1(Pg, &out[i], vout);
-       }
-     }
-
-  Please note that support for lazy binding of SVE function calls is
-  incomplete. When you interface user code with SVE functions that are
-  provided through shared libraries, avoid using lazy binding. If you
-  use lazy binding, the results could be corrupted.
+  ``00bet5``). For more information, see the ``clang`` 11 release
+  notes.
 
 Changes to the ARM Backend
 --------------------------

From 46f3aed198a5530b5115881628e1fcfb3e7541c9 Mon Sep 17 00:00:00 2001
From: Hans Wennborg <hans@chromium.org>
Date: Wed, 26 Aug 2020 16:44:55 +0200
Subject: [PATCH 021/109] Bump -len_control value in fuzzer-custommutator.test
 (PR47286)

to make the test more stable, as suggested by mmoroz.

(cherry picked from commit 8421503300c6145480710761983f089ccbe0bb56)
---
 compiler-rt/test/fuzzer/fuzzer-custommutator.test | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/compiler-rt/test/fuzzer/fuzzer-custommutator.test b/compiler-rt/test/fuzzer/fuzzer-custommutator.test
index 87e69a0d8cf3a..25f5fe697b43f 100644
--- a/compiler-rt/test/fuzzer/fuzzer-custommutator.test
+++ b/compiler-rt/test/fuzzer/fuzzer-custommutator.test
@@ -6,7 +6,7 @@ LLVMFuzzerCustomMutator: {{.*}} lim: 4096 {{.*}}
 LLVMFuzzerCustomMutator: BINGO
 
 # len_control is disabled for custom mutators by default, test that it can be enabled.
-RUN: not %run %t-CustomMutatorTest -len_control=100 2>&1 | FileCheck %s --check-prefix=LLVMFuzzerCustomMutatorWithLenControl
+RUN: not %run %t-CustomMutatorTest -len_control=1000 2>&1 | FileCheck %s --check-prefix=LLVMFuzzerCustomMutatorWithLenControl
 LLVMFuzzerCustomMutatorWithLenControl: INFO: found LLVMFuzzerCustomMutator
 LLVMFuzzerCustomMutatorWithLenControl: In LLVMFuzzerCustomMutator
 LLVMFuzzerCustomMutatorWithLenControl: {{.*}} lim: {{[1-9][0-9]?}} {{.*}}

From 04d70cd0f07dab371abf586627ce9ac09e04362c Mon Sep 17 00:00:00 2001
From: Fangrui Song <i@maskray.me>
Date: Wed, 12 Aug 2020 20:14:00 -0700
Subject: [PATCH 022/109] [ELF] -r: allow SHT_X86_64_UNWIND to be merged into
 SHT_PROGBITS

* For .cfi_*, GCC/GNU as emits SHT_PROGBITS type .eh_frame sections.
* Since rL252300, clang emits SHT_X86_64_UNWIND type .eh_frame sections
  (originated from Solaris, documented in the x86-64 psABI).
* Some assembly use `.section .eh_frame,"a",@unwind` to generate
  SHT_X86_64_UNWIND .eh_frame sections.

In a non-relocatable link, input .eh_frame are combined and there is
only one SyntheticSection .eh_frame in the output section, so the
"section type mismatch" diagnostic does not fire.

In a relocatable link, there is no SyntheticSection .eh_frame. .eh_frame of
mixed types can trigger the diagnostic. This patch fixes it by adding another
special case 0x70000001 (= SHT_X86_64_UNWIND) to canMergeToProgbits().

    ld.lld -r gcc.o clang.o => error: section type mismatch for .eh_frame

There was a discussion "RFC: Usefulness of SHT_X86_64_UNWIND" on the x86-64-abi
mailing list. Folks are not wild about making the psABI value 0x70000001 into
gABI, but a few think defining 0x70000001 for .eh_frame may be a good idea for a
new architecture.

Reviewed By: grimar

Differential Revision: https://reviews.llvm.org/D85785

(cherry picked from commit 88498f44dfe7d9b886f2622335cdeae4dbf2b02a)
---
 lld/ELF/OutputSections.cpp      | 6 +++++-
 lld/test/ELF/eh-frame-type.test | 3 +++
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/lld/ELF/OutputSections.cpp b/lld/ELF/OutputSections.cpp
index 7e9e76b070ec8..881c375a1159e 100644
--- a/lld/ELF/OutputSections.cpp
+++ b/lld/ELF/OutputSections.cpp
@@ -77,10 +77,14 @@ OutputSection::OutputSection(StringRef name, uint32_t type, uint64_t flags)
 // to be allocated for nobits sections. Other ones don't require
 // any special treatment on top of progbits, so there doesn't
 // seem to be a harm in merging them.
+//
+// NOTE: clang since rL252300 emits SHT_X86_64_UNWIND .eh_frame sections. Allow
+// them to be merged into SHT_PROGBITS .eh_frame (GNU as .cfi_*).
 static bool canMergeToProgbits(unsigned type) {
   return type == SHT_NOBITS || type == SHT_PROGBITS || type == SHT_INIT_ARRAY ||
          type == SHT_PREINIT_ARRAY || type == SHT_FINI_ARRAY ||
-         type == SHT_NOTE;
+         type == SHT_NOTE ||
+         (type == SHT_X86_64_UNWIND && config->emachine == EM_X86_64);
 }
 
 // Record that isec will be placed in the OutputSection. isec does not become
diff --git a/lld/test/ELF/eh-frame-type.test b/lld/test/ELF/eh-frame-type.test
index 22b802a9e528f..4105491372b8d 100644
--- a/lld/test/ELF/eh-frame-type.test
+++ b/lld/test/ELF/eh-frame-type.test
@@ -11,6 +11,9 @@
 # RUN: ld.lld %t1.o %t2.o -o %tboth
 # RUN: llvm-readobj -S %tboth | FileCheck %s
 
+# RUN: ld.lld -r %t1.o %t2.o -o %tboth.ro
+# RUN: llvm-readobj -S %tboth.ro | FileCheck %s
+
 # CHECK:      Name: .eh_frame
 # CHECK-NEXT: Type: SHT_PROGBITS
 

From 761cd1ce23769b459d8f111e1448ff1e9807b90e Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@intel.com>
Date: Thu, 20 Aug 2020 23:45:34 -0700
Subject: [PATCH 023/109] [X86] Correct the implementation of the testFeature
 macro in getIntelProcessorTypeAndSubtype to do a proper bit test.

Instead of ANDing with a one hot mask representing the bit to
be tested, we were ANDing with just the bit number. This tests
multiple bits none of them the correct one.

This caused skylake-avx512, cascadelake and cooperlake to all
be misdetected. Based on experiments with the Intel SDE, it seems
that all of these CPUs are being detected as being cooperlake.
This is bad since its the newest CPU of the 3.

(cherry picked from commit df9a9bb7beb7bc04ca4188fe0e527baac2900ff1)
---
 compiler-rt/lib/builtins/cpu_model.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/compiler-rt/lib/builtins/cpu_model.c b/compiler-rt/lib/builtins/cpu_model.c
index 8346bb62dcfb4..468bcc84cbcb3 100644
--- a/compiler-rt/lib/builtins/cpu_model.c
+++ b/compiler-rt/lib/builtins/cpu_model.c
@@ -277,7 +277,7 @@ getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model,
                                 const unsigned *Features,
                                 unsigned *Type, unsigned *Subtype) {
 #define testFeature(F)                                                         \
-  (Features[F / 32] & (F % 32)) != 0
+  (Features[F / 32] & (1 << (F % 32))) != 0
 
   // We select CPU strings to match the code in Host.cpp, but we don't use them
   // in compiler-rt.

From 124e8259abe1dc7d0e8ad9d238f698bcfc31562e Mon Sep 17 00:00:00 2001
From: Hans Wennborg <hans@chromium.org>
Date: Wed, 26 Aug 2020 19:29:56 +0200
Subject: [PATCH 024/109] ReleaseNotes: mention the build preferring python 3

Text by Saleem!
---
 llvm/docs/ReleaseNotes.rst | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/llvm/docs/ReleaseNotes.rst b/llvm/docs/ReleaseNotes.rst
index 5bbdea65c3e72..49b1a040a393e 100644
--- a/llvm/docs/ReleaseNotes.rst
+++ b/llvm/docs/ReleaseNotes.rst
@@ -44,6 +44,13 @@ Non-comprehensive list of changes in this release
    functionality, or simply have a lot to talk about), see the `NOTE` below
    for adding a new subsection.
 
+* The LLVM project has started the migration towards Python 3, and the build
+  system now prefers Python 3 whenever available.  If the Python 3 interpreter
+  (or libraries) are not found, the build system will, for the time being, fall
+  back to Python 2.  It is recommended that downstream projects migrate to
+  Python 3 as Python 2 has been end-of-life'd by the Python Software
+  Foundation.
+
 * The llgo frontend has been removed for now, but may be resurrected in the
   future.
 

From 29e94ddb3930e3d7b54afb3753a6a40d6ef57898 Mon Sep 17 00:00:00 2001
From: Francesco Petrogalli <francesco.petrogalli@arm.com>
Date: Wed, 26 Aug 2020 15:43:56 +0000
Subject: [PATCH 025/109] [MC][SVE] Fix data operand for instruction alias of
 `st1d`.

The version of `st1d` that operates with vector plus immediate
addressing mode uses the alias `st1d { <Zn>.d }, <Pg>, [<Za>.d]` for
rendering `st1d { <Zn>.d }, <Pg>, [<Za>.d, #0]`. The disassembler was
generating `<Zn>.s` instead of `<Zn>.d>`.

Differential Revision: https://reviews.llvm.org/D86633
---
 llvm/lib/Target/AArch64/SVEInstrFormats.td |  2 +-
 llvm/test/MC/AArch64/SVE/st1b.s            | 24 ++++++++++++++++++++++
 llvm/test/MC/AArch64/SVE/st1d.s            | 12 +++++++++++
 llvm/test/MC/AArch64/SVE/st1h.s            | 24 ++++++++++++++++++++++
 llvm/test/MC/AArch64/SVE/st1w.s            | 24 ++++++++++++++++++++++
 5 files changed, 85 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td
index c56a65b9e2124..e86f2a6ebde46 100644
--- a/llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -5416,7 +5416,7 @@ multiclass sve_mem_64b_sst_vi_ptrs<bits<3> opc, string asm,
   def : InstAlias<asm # "\t$Zt, $Pg, [$Zn, $imm5]",
                   (!cast<Instruction>(NAME # _IMM) ZPR64:$Zt, PPR3bAny:$Pg, ZPR64:$Zn, imm_ty:$imm5), 0>;
   def : InstAlias<asm # "\t$Zt, $Pg, [$Zn]",
-                  (!cast<Instruction>(NAME # _IMM) Z_s:$Zt, PPR3bAny:$Pg, ZPR64:$Zn, 0), 1>;
+                  (!cast<Instruction>(NAME # _IMM) Z_d:$Zt, PPR3bAny:$Pg, ZPR64:$Zn, 0), 1>;
 
   def : Pat<(op (nxv2i64 ZPR:$data), (nxv2i1 PPR:$gp), (nxv2i64 ZPR:$ptrs), imm_ty:$index, vt),
             (!cast<Instruction>(NAME # _IMM) ZPR:$data, PPR:$gp, ZPR:$ptrs, imm_ty:$index)>;
diff --git a/llvm/test/MC/AArch64/SVE/st1b.s b/llvm/test/MC/AArch64/SVE/st1b.s
index a6f766bdfd7cc..40b830709ead4 100644
--- a/llvm/test/MC/AArch64/SVE/st1b.s
+++ b/llvm/test/MC/AArch64/SVE/st1b.s
@@ -168,3 +168,27 @@ st1b    { z31.d }, p7, [z31.d, #31]
 // CHECK-ENCODING: [0xff,0xbf,0x5f,0xe4]
 // CHECK-ERROR: instruction requires: sve
 // CHECK-UNKNOWN: ff bf 5f e4 <unknown>
+
+st1b    { z0.s }, p7, [z0.s, #0]
+// CHECK-INST: st1b    { z0.s }, p7, [z0.s]
+// CHECK-ENCODING: [0x00,0xbc,0x60,0xe4]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 00 bc 60 e4 <unknown>
+
+st1b    { z0.s }, p7, [z0.s]
+// CHECK-INST: st1b    { z0.s }, p7, [z0.s]
+// CHECK-ENCODING: [0x00,0xbc,0x60,0xe4]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 00 bc 60 e4 <unknown>
+
+st1b    { z0.d }, p7, [z0.d, #0]
+// CHECK-INST: st1b    { z0.d }, p7, [z0.d]
+// CHECK-ENCODING: [0x00,0xbc,0x40,0xe4]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 00 bc 40 e4 <unknown>
+
+st1b    { z0.d }, p7, [z0.d]
+// CHECK-INST: st1b    { z0.d }, p7, [z0.d]
+// CHECK-ENCODING: [0x00,0xbc,0x40,0xe4]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 00 bc 40 e4 <unknown>
diff --git a/llvm/test/MC/AArch64/SVE/st1d.s b/llvm/test/MC/AArch64/SVE/st1d.s
index ba4a0e5be114b..a5a19e772b528 100644
--- a/llvm/test/MC/AArch64/SVE/st1d.s
+++ b/llvm/test/MC/AArch64/SVE/st1d.s
@@ -78,3 +78,15 @@ st1d    { z31.d }, p7, [z31.d, #248]
 // CHECK-ENCODING: [0xff,0xbf,0xdf,0xe5]
 // CHECK-ERROR: instruction requires: sve
 // CHECK-UNKNOWN: ff bf df e5 <unknown>
+
+st1d    { z0.d }, p7, [z0.d, #0]
+// CHECK-INST: st1d    { z0.d }, p7, [z0.d]
+// CHECK-ENCODING: [0x00,0xbc,0xc0,0xe5]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 00 bc c0 e5 <unknown>
+
+st1d    { z0.d }, p7, [z0.d]
+// CHECK-INST: st1d    { z0.d }, p7, [z0.d]
+// CHECK-ENCODING: [0x00,0xbc,0xc0,0xe5]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 00 bc c0 e5 <unknown>
diff --git a/llvm/test/MC/AArch64/SVE/st1h.s b/llvm/test/MC/AArch64/SVE/st1h.s
index cd6c20d83482e..fe22c52bb9bef 100644
--- a/llvm/test/MC/AArch64/SVE/st1h.s
+++ b/llvm/test/MC/AArch64/SVE/st1h.s
@@ -168,3 +168,27 @@ st1h    { z31.d }, p7, [z31.d, #62]
 // CHECK-ENCODING: [0xff,0xbf,0xdf,0xe4]
 // CHECK-ERROR: instruction requires: sve
 // CHECK-UNKNOWN: ff bf df e4 <unknown>
+
+st1h    { z0.s }, p7, [z0.s, #0]
+// CHECK-INST: st1h    { z0.s }, p7, [z0.s]
+// CHECK-ENCODING: [0x00,0xbc,0xe0,0xe4]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 00 bc e0 e4 <unknown>
+
+st1h    { z0.s }, p7, [z0.s]
+// CHECK-INST: st1h    { z0.s }, p7, [z0.s]
+// CHECK-ENCODING: [0x00,0xbc,0xe0,0xe4]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 00 bc e0 e4 <unknown>
+
+st1h    { z0.d }, p7, [z0.d, #0]
+// CHECK-INST: st1h    { z0.d }, p7, [z0.d]
+// CHECK-ENCODING: [0x00,0xbc,0xc0,0xe4]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 00 bc c0 e4 <unknown>
+
+st1h    { z0.d }, p7, [z0.d]
+// CHECK-INST: st1h    { z0.d }, p7, [z0.d]
+// CHECK-ENCODING: [0x00,0xbc,0xc0,0xe4]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 00 bc c0 e4 <unknown>
diff --git a/llvm/test/MC/AArch64/SVE/st1w.s b/llvm/test/MC/AArch64/SVE/st1w.s
index e20194f5747e9..5bbcd2e1ea0ff 100644
--- a/llvm/test/MC/AArch64/SVE/st1w.s
+++ b/llvm/test/MC/AArch64/SVE/st1w.s
@@ -138,3 +138,27 @@ st1w    { z31.d }, p7, [z31.d, #124]
 // CHECK-ENCODING: [0xff,0xbf,0x5f,0xe5]
 // CHECK-ERROR: instruction requires: sve
 // CHECK-UNKNOWN: ff bf 5f e5 <unknown>
+
+st1w    { z0.s }, p7, [z0.s, #0]
+// CHECK-INST: st1w    { z0.s }, p7, [z0.s]
+// CHECK-ENCODING: [0x00,0xbc,0x60,0xe5]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 00 bc 60 e5 <unknown>
+
+st1w    { z0.s }, p7, [z0.s]
+// CHECK-INST: st1w    { z0.s }, p7, [z0.s]
+// CHECK-ENCODING: [0x00,0xbc,0x60,0xe5]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 00 bc 60 e5 <unknown>
+
+st1w    { z0.d }, p7, [z0.d, #0]
+// CHECK-INST: st1w    { z0.d }, p7, [z0.d]
+// CHECK-ENCODING: [0x00,0xbc,0x40,0xe5]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 00 bc 40 e5 <unknown>
+
+st1w    { z0.d }, p7, [z0.d]
+// CHECK-INST: st1w    { z0.d }, p7, [z0.d]
+// CHECK-ENCODING: [0x00,0xbc,0x40,0xe5]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 00 bc 40 e5 <unknown>

From 63255250c2d76c31282493b8368c74869c5657cc Mon Sep 17 00:00:00 2001
From: Brad Smith <brad@comstyle.com>
Date: Thu, 27 Aug 2020 01:12:16 -0400
Subject: [PATCH 026/109] Default to -fuse-init-array on OpenBSD.

(cherry picked from commit a45ccc983b51330fd49c8526fe4770e40eeab708)
---
 clang/lib/Driver/ToolChains/OpenBSD.cpp | 9 ---------
 clang/lib/Driver/ToolChains/OpenBSD.h   | 5 -----
 clang/test/Driver/openbsd.c             | 5 -----
 3 files changed, 19 deletions(-)

diff --git a/clang/lib/Driver/ToolChains/OpenBSD.cpp b/clang/lib/Driver/ToolChains/OpenBSD.cpp
index 4f2d04058d249..1177fba965628 100644
--- a/clang/lib/Driver/ToolChains/OpenBSD.cpp
+++ b/clang/lib/Driver/ToolChains/OpenBSD.cpp
@@ -313,15 +313,6 @@ std::string OpenBSD::getCompilerRT(const ArgList &Args,
   return std::string(Path.str());
 }
 
-void OpenBSD::addClangTargetOptions(const ArgList &DriverArgs,
-                                    ArgStringList &CC1Args,
-                                    Action::OffloadKind) const {
-  // Support for .init_array is still new (Aug 2016).
-  if (!DriverArgs.hasFlag(options::OPT_fuse_init_array,
-                          options::OPT_fno_use_init_array, false))
-    CC1Args.push_back("-fno-use-init-array");
-}
-
 Tool *OpenBSD::buildAssembler() const {
   return new tools::openbsd::Assembler(*this);
 }
diff --git a/clang/lib/Driver/ToolChains/OpenBSD.h b/clang/lib/Driver/ToolChains/OpenBSD.h
index 09595faf9d6bb..5f9b259bf8613 100644
--- a/clang/lib/Driver/ToolChains/OpenBSD.h
+++ b/clang/lib/Driver/ToolChains/OpenBSD.h
@@ -86,11 +86,6 @@ class LLVM_LIBRARY_VISIBILITY OpenBSD : public Generic_ELF {
 
   SanitizerMask getSupportedSanitizers() const override;
 
-  void
-  addClangTargetOptions(const llvm::opt::ArgList &DriverArgs,
-                        llvm::opt::ArgStringList &CC1Args,
-                        Action::OffloadKind DeviceOffloadKind) const override;
-
 protected:
   Tool *buildAssembler() const override;
   Tool *buildLinker() const override;
diff --git a/clang/test/Driver/openbsd.c b/clang/test/Driver/openbsd.c
index e17d05dc76da3..cee4539eaca2f 100644
--- a/clang/test/Driver/openbsd.c
+++ b/clang/test/Driver/openbsd.c
@@ -122,8 +122,3 @@
 // RUN: %clang -target powerpc-unknown-openbsd -### -c %s 2>&1 \
 // RUN:   | FileCheck -check-prefix=CHECK-POWERPC-SECUREPLT %s
 // CHECK-POWERPC-SECUREPLT: "-target-feature" "+secure-plt"
-
-// Check -fno-init-array
-// RUN: %clang -no-canonical-prefixes -target i686-pc-openbsd %s -### 2>&1 \
-// RUN:   | FileCheck --check-prefix=CHECK-CTORS %s
-// CHECK-CTORS: "-fno-use-init-array"

From 522d80ab553b42e2feadfd4178932069dfc51d3f Mon Sep 17 00:00:00 2001
From: AndreyChurbanov <andrey.churbanov@intel.com>
Date: Wed, 26 Aug 2020 21:56:01 +0300
Subject: [PATCH 027/109] [OpenMP] Fix import library installation with MinGW

Patch by mati865@gmail.com

Differential Revision: https://reviews.llvm.org/D86552

(cherry picked from commit 1596ea80fdf3410f94ef9a2548701d26cc81c2f5)
---
 openmp/runtime/src/CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/openmp/runtime/src/CMakeLists.txt b/openmp/runtime/src/CMakeLists.txt
index 81275c0483dd4..19423f58c6c4a 100644
--- a/openmp/runtime/src/CMakeLists.txt
+++ b/openmp/runtime/src/CMakeLists.txt
@@ -195,7 +195,7 @@ if(WIN32)
   # the import library is "re-linked" to include kmp_import.cpp which prevents
   # linking of both Visual Studio OpenMP and newly built OpenMP
   set_source_files_properties(kmp_import.cpp PROPERTIES COMPILE_FLAGS "${LIBOMP_CONFIGURED_CXXFLAGS}")
-  set(LIBOMP_IMP_LIB_FILE ${LIBOMP_LIB_NAME}${CMAKE_STATIC_LIBRARY_SUFFIX})
+  set(LIBOMP_IMP_LIB_FILE ${LIBOMP_LIB_NAME}${CMAKE_IMPORT_LIBRARY_SUFFIX})
   set(LIBOMP_GENERATED_IMP_LIB_FILENAME ${LIBOMP_LIB_FILE}${CMAKE_STATIC_LIBRARY_SUFFIX})
   set_target_properties(omp PROPERTIES
     VERSION ${LIBOMP_VERSION_MAJOR}.${LIBOMP_VERSION_MINOR} # uses /version flag

From 2eab0b4f20aa192ba5ca8492c20aeae85b44e70b Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@gmail.com>
Date: Thu, 27 Aug 2020 21:19:58 -0700
Subject: [PATCH 028/109] [X86] Update release notes.

---
 llvm/docs/ReleaseNotes.rst | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/llvm/docs/ReleaseNotes.rst b/llvm/docs/ReleaseNotes.rst
index 49b1a040a393e..c7ca861dbc34c 100644
--- a/llvm/docs/ReleaseNotes.rst
+++ b/llvm/docs/ReleaseNotes.rst
@@ -167,6 +167,16 @@ During this release ...
   avx512bw otherwise they would split into multiple YMM registers. This means
   vXi16/vXi8 vectors are consistently treated the same as
   vXi32/vXi64/vXf64/vXf32 vectors of the same total width.
+* Support was added for Intel AMX instructions.
+* Support was added for TSXLDTRK instructions.
+* A pass was added for mitigating the Load Value Injection vulnerability.
+* The Speculative Execution Side Effect Suppression pass was added which can
+  be used to as a last resort mitigation for speculative execution related
+  CPU vulnerabilities.
+* Improved recognition of boolean vector reductions with better MOVMSKB/PTEST
+  handling
+* Exteded recognition of rotation patterns to handle funnel shift as well,
+  allowing us to remove the existing x86-specific SHLD/SHRD combine.
 
 Changes to the AMDGPU Target
 -----------------------------

From f81c61748dd80b92a638bf16eebddc1a7ccfcf8e Mon Sep 17 00:00:00 2001
From: Fangrui Song <i@maskray.me>
Date: Thu, 27 Aug 2020 23:15:21 -0700
Subject: [PATCH 029/109] ReleaseNotes: add some clang items

---
 clang/docs/ReleaseNotes.rst | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index a8fde6b452d03..7b1df2ed9c3f2 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -58,6 +58,10 @@ Improvements to Clang's diagnostics
   -Wuninitialized. It warns on cases where uninitialized variables are passed
   as const reference arguments to a function.
 
+- ``-Wimplicit-const-int-float-conversion`` (enabled by default) is a new
+  option controlled by ``-Wimplicit-int-float-conversion``.  It warns on
+  implicit conversion from a floating constant to an integer type.
+
 Non-comprehensive list of changes in this release
 -------------------------------------------------
 
@@ -144,6 +148,21 @@ Non-comprehensive list of changes in this release
   provided through shared libraries, avoid using lazy binding. If you
   use lazy binding, the results could be corrupted.
 
+- ``-O`` maps to ``-O1`` instead of ``-O2``.
+  (`D79916 <https://reviews.llvm.org/D79916>`_)
+
+- In a ``-flto={full,thin}`` link, ``-Os``, ``-Oz`` and ``-Og`` can be used
+  now. ``-Os`` and ``-Oz`` map to the -O2 pipe line while ``-Og`` maps to the
+  -O1 pipeline.
+  (`D79919 <https://reviews.llvm.org/D79919>`_)
+
+- ``--coverage`` (gcov) defaults to gcov [4.8,8) compatible format now.
+
+- On x86, ``-fpic/-fPIC -fno-semantic-interposition`` assumes a global
+  definition of default visibility non-interposable and allows interprocedural
+  optimizations. In produced assembly ``-Lfunc$local`` local aliases are created
+  for global symbols of default visibility.
+
 New Compiler Flags
 ------------------
 
@@ -195,6 +214,8 @@ New Compiler Flags
     adding -fdata-sections -ffunction-sections to the command generating
     the shared object).
 
+- ``-fsanitize-coverage-allowlist`` and ``-fsanitize-coverage-blocklist`` are added.
+
 Deprecated Compiler Flags
 -------------------------
 

From 5d21aedfdbf0b85d65bad08b7b89913205de4b33 Mon Sep 17 00:00:00 2001
From: Haojian Wu <hokein.wu@gmail.com>
Date: Fri, 28 Aug 2020 09:56:52 +0200
Subject: [PATCH 030/109] Add release note for RecoveryExpr.

---
 clang/docs/ReleaseNotes.rst | 40 +++++++++++++++++++++++++++++++++++++
 1 file changed, 40 insertions(+)

diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 7b1df2ed9c3f2..83877d0d95a2e 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -48,6 +48,46 @@ Major New Features
 
 - ...
 
+Recovery AST
+^^^^^^^^^^^^
+
+clang's AST now improves support for representing broken C++ code. This improves
+the quality of subsequent diagnostics after an error is encountered. It also
+exposes more information to tools like clang-tidy and clangd that consume
+clang’s AST, allowing them to be more accurate on broken code.
+
+A RecoveryExpr is introduced in clang's AST, marking an expression containing
+semantic errors. This preserves the source range and subexpressions of the
+broken expression in the AST (rather than discarding the whole expression).
+
+For the following invalid code:
+
+  .. code-block:: c++
+
+     int NoArg(); // Line 1
+     int x = NoArg(42); // oops!
+
+clang-10 produces the minimal placeholder:
+
+  .. code-block:: c++
+
+     // VarDecl <line:2:1, col:5> col:5 x 'int'
+
+clang-11 produces a richer AST:
+
+  .. code-block:: c++
+
+     // VarDecl <line:2:1, col:16> col:5 x 'int' cinit
+     // `-RecoveryExpr <col:9, col:16> '<dependent type>' contains-errors lvalue
+     //    `-UnresolvedLookupExpr <col:9> '<overloaded function>' lvalue (ADL) = 'NoArg'
+     //    `-IntegerLiteral <col:15> 'int' 42
+
+Note that error-dependent types and values may now occur outside a template
+context. Tools may need to adjust assumptions about dependent code.
+
+This feature is on by default for C++ code, and can be explicitly controlled
+with `-Xclang -f[no-]recovery-ast`.
+
 Improvements to Clang's diagnostics
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 

From b931e22c954374acf75c4f1d1f2666f3f8e67470 Mon Sep 17 00:00:00 2001
From: Kai Luo <lkail@cn.ibm.com>
Date: Fri, 28 Aug 2020 01:56:12 +0000
Subject: [PATCH 031/109] [PowerPC] PPCBoolRetToInt: Don't translate Constant's
 operands

When collecting `i1` values via `findAllDefs`, ignore Constant's
operands, since Constant's operands might not be `i1`.

Fixes https://bugs.llvm.org/show_bug.cgi?id=46923 which causes ICE
```
llvm-project/llvm/lib/IR/Constants.cpp:1924: static llvm::Constant *llvm::ConstantExpr::getZExt(llvm::Constant *, llvm::Type *, bool): Assertion `C->getType()->getScalarSizeInBits() < Ty->getScalarSizeInBits()&& "SrcTy must be smaller than DestTy for ZExt!"' failed.
```

Differential Revision: https://reviews.llvm.org/D85007

(cherry picked from commit cbea17568f4301582c1d5d43990f089ca6cff522)
---
 llvm/lib/Target/PowerPC/PPCBoolRetToInt.cpp | 15 ++++++-----
 llvm/test/CodeGen/PowerPC/pr46923.ll        | 29 +++++++++++++++++++++
 2 files changed, 38 insertions(+), 6 deletions(-)
 create mode 100644 llvm/test/CodeGen/PowerPC/pr46923.ll

diff --git a/llvm/lib/Target/PowerPC/PPCBoolRetToInt.cpp b/llvm/lib/Target/PowerPC/PPCBoolRetToInt.cpp
index 2259a29f838ab..f125ca011cd22 100644
--- a/llvm/lib/Target/PowerPC/PPCBoolRetToInt.cpp
+++ b/llvm/lib/Target/PowerPC/PPCBoolRetToInt.cpp
@@ -78,9 +78,9 @@ class PPCBoolRetToInt : public FunctionPass {
       Value *Curr = WorkList.back();
       WorkList.pop_back();
       auto *CurrUser = dyn_cast<User>(Curr);
-      // Operands of CallInst are skipped because they may not be Bool type,
-      // and their positions are defined by ABI.
-      if (CurrUser && !isa<CallInst>(Curr))
+      // Operands of CallInst/Constant are skipped because they may not be Bool
+      // type. For CallInst, their positions are defined by ABI.
+      if (CurrUser && !isa<CallInst>(Curr) && !isa<Constant>(Curr))
         for (auto &Op : CurrUser->operands())
           if (Defs.insert(Op).second)
             WorkList.push_back(Op);
@@ -90,6 +90,9 @@ class PPCBoolRetToInt : public FunctionPass {
 
   // Translate a i1 value to an equivalent i32/i64 value:
   Value *translate(Value *V) {
+    assert(V->getType() == Type::getInt1Ty(V->getContext()) &&
+           "Expect an i1 value");
+
     Type *IntTy = ST->isPPC64() ? Type::getInt64Ty(V->getContext())
                                 : Type::getInt32Ty(V->getContext());
 
@@ -252,9 +255,9 @@ class PPCBoolRetToInt : public FunctionPass {
       auto *First = dyn_cast<User>(Pair.first);
       auto *Second = dyn_cast<User>(Pair.second);
       assert((!First || Second) && "translated from user to non-user!?");
-      // Operands of CallInst are skipped because they may not be Bool type,
-      // and their positions are defined by ABI.
-      if (First && !isa<CallInst>(First))
+      // Operands of CallInst/Constant are skipped because they may not be Bool
+      // type. For CallInst, their positions are defined by ABI.
+      if (First && !isa<CallInst>(First) && !isa<Constant>(First))
         for (unsigned i = 0; i < First->getNumOperands(); ++i)
           Second->setOperand(i, BoolToIntMap[First->getOperand(i)]);
     }
diff --git a/llvm/test/CodeGen/PowerPC/pr46923.ll b/llvm/test/CodeGen/PowerPC/pr46923.ll
new file mode 100644
index 0000000000000..3e9faa60422af
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/pr46923.ll
@@ -0,0 +1,29 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-unknown \
+; RUN:   -ppc-asm-full-reg-names < %s | FileCheck %s
+
+@bar = external constant i64, align 8
+
+define i1 @foo() {
+; CHECK-LABEL: foo:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    li r3, 0
+; CHECK-NEXT:    isel r3, 0, r3, 4*cr5+lt
+; CHECK-NEXT:    blr
+entry:
+  br label %next
+
+next:
+  br i1 undef, label %true, label %false
+
+true:
+  br label %end
+
+false:
+  br label %end
+
+end:
+  %a = phi i1 [ icmp ugt (i64 0, i64 ptrtoint (i64* @bar to i64)), %true ],
+              [ icmp ugt (i64 0, i64 2), %false ]
+  ret i1 %a
+}

From aa0dcfb1179b0916e0315f2125fd35af6d6869d3 Mon Sep 17 00:00:00 2001
From: KAWASHIMA Takahiro <t-kawashima@fujitsu.com>
Date: Fri, 28 Aug 2020 18:00:59 +0900
Subject: [PATCH 032/109] [release][docs] Add -mtls-size= option to the release
 note

---
 clang/docs/ReleaseNotes.rst | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 83877d0d95a2e..c32952cce51da 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -256,6 +256,12 @@ New Compiler Flags
 
 - ``-fsanitize-coverage-allowlist`` and ``-fsanitize-coverage-blocklist`` are added.
 
+- -mtls-size={12,24,32,48} allows selecting the size of the TLS (thread-local
+  storage) in the local exec TLS model of AArch64, which is the default TLS
+  model for non-PIC objects. Each value represents 4KB, 16MB (default), 4GB,
+  and 256TB (needs -mcmodel=large). This allows large/many thread local
+  variables or a compact/fast code in an executable.
+
 Deprecated Compiler Flags
 -------------------------
 

From ba3413982cbd7a5b5aeaf2ea34e0a91d5561202d Mon Sep 17 00:00:00 2001
From: Lucas Prates <lucas.prates@arm.com>
Date: Thu, 27 Aug 2020 15:31:40 +0100
Subject: [PATCH 033/109] [CodeGen] Properly propagating Calling Convention
 information when lowering vector arguments

When joining the legal parts of vector arguments into its original value
during the lower of Formal Arguments in SelectionDAGBuilder, the Calling
Convention information was not being propagated for the handling of each
individual parts. The same did not happen when lowering calls, causing a
mismatch.

This patch fixes the issue by properly propagating the Calling
Convention details.

This fixes Bugzilla #47001.

Reviewed By: arsenm

Differential Revision: https://reviews.llvm.org/D86715

(cherry picked from commit 3d943bcd223e5b97179840c2f5885fe341e51747)
---
 .../SelectionDAG/SelectionDAGBuilder.cpp      |  4 +-
 llvm/test/CodeGen/ARM/fp16-args.ll            | 89 +++++++++++++++++--
 llvm/test/CodeGen/ARM/fp16-v3.ll              |  3 -
 3 files changed, 83 insertions(+), 13 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 1d596c89c9113..feb949f81eba3 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -409,7 +409,7 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL,
       // as appropriate.
       for (unsigned i = 0; i != NumParts; ++i)
         Ops[i] = getCopyFromParts(DAG, DL, &Parts[i], 1,
-                                  PartVT, IntermediateVT, V);
+                                  PartVT, IntermediateVT, V, CallConv);
     } else if (NumParts > 0) {
       // If the intermediate type was expanded, build the intermediate
       // operands from the parts.
@@ -418,7 +418,7 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL,
       unsigned Factor = NumParts / NumIntermediates;
       for (unsigned i = 0; i != NumIntermediates; ++i)
         Ops[i] = getCopyFromParts(DAG, DL, &Parts[i * Factor], Factor,
-                                  PartVT, IntermediateVT, V);
+                                  PartVT, IntermediateVT, V, CallConv);
     }
 
     // Build a vector with BUILD_VECTOR or CONCAT_VECTORS from the
diff --git a/llvm/test/CodeGen/ARM/fp16-args.ll b/llvm/test/CodeGen/ARM/fp16-args.ll
index 7ed1e883eef19..18bbcd12c768a 100644
--- a/llvm/test/CodeGen/ARM/fp16-args.ll
+++ b/llvm/test/CodeGen/ARM/fp16-args.ll
@@ -1,12 +1,12 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=armv7a--none-eabi -float-abi soft -mattr=+fp16 < %s | FileCheck %s --check-prefix=CHECK --check-prefix=SOFT
-; RUN: llc -mtriple=armv7a--none-eabi -float-abi hard -mattr=+fp16 < %s | FileCheck %s --check-prefix=CHECK --check-prefix=HARD
-; RUN: llc -mtriple=armv7a--none-eabi -float-abi soft -mattr=+fullfp16 < %s | FileCheck %s --check-prefix=CHECK --check-prefix=FULL-SOFT
-; RUN: llc -mtriple=armv7a--none-eabi -float-abi hard -mattr=+fullfp16 < %s | FileCheck %s --check-prefix=CHECK --check-prefix=FULL-HARD
-; RUN: llc -mtriple=armv7aeb--none-eabi -float-abi soft -mattr=+fp16 < %s | FileCheck %s --check-prefix=CHECK --check-prefix=SOFT
-; RUN: llc -mtriple=armv7aeb--none-eabi -float-abi hard -mattr=+fp16 < %s | FileCheck %s --check-prefix=CHECK --check-prefix=HARD
-; RUN: llc -mtriple=armv7aeb--none-eabi -float-abi soft -mattr=+fullfp16 < %s | FileCheck %s --check-prefix=CHECK --check-prefix=FULL-SOFT
-; RUN: llc -mtriple=armv7aeb--none-eabi -float-abi hard -mattr=+fullfp16 < %s | FileCheck %s --check-prefix=CHECK --check-prefix=FULL-HARD
+; RUN: llc -mtriple=armv7a--none-eabi -float-abi soft -mattr=+fp16 < %s | FileCheck %s --check-prefix=SOFT
+; RUN: llc -mtriple=armv7a--none-eabi -float-abi hard -mattr=+fp16 < %s | FileCheck %s --check-prefix=HARD
+; RUN: llc -mtriple=armv7a--none-eabi -float-abi soft -mattr=+fullfp16 < %s | FileCheck %s --check-prefix=FULL-SOFT --check-prefix=FULL-SOFT-LE
+; RUN: llc -mtriple=armv7a--none-eabi -float-abi hard -mattr=+fullfp16 < %s | FileCheck %s --check-prefix=FULL-HARD --check-prefix=FULL-HARD-LE
+; RUN: llc -mtriple=armv7aeb--none-eabi -float-abi soft -mattr=+fp16 < %s | FileCheck %s --check-prefix=SOFT
+; RUN: llc -mtriple=armv7aeb--none-eabi -float-abi hard -mattr=+fp16 < %s | FileCheck %s --check-prefix=HARD
+; RUN: llc -mtriple=armv7aeb--none-eabi -float-abi soft -mattr=+fullfp16 < %s | FileCheck %s --check-prefix=FULL-SOFT --check-prefix=FULL-SOFT-BE
+; RUN: llc -mtriple=armv7aeb--none-eabi -float-abi hard -mattr=+fullfp16 < %s | FileCheck %s --check-prefix=FULL-HARD --check-prefix=FULL-HARD-BE
 
 define half @foo(half %a, half %b) {
 ; SOFT-LABEL: foo:
@@ -44,3 +44,76 @@ entry:
   %0 = fadd half %a, %b
   ret half %0
 }
+
+define <4 x half> @foo_vec(<4 x half> %a) {
+; SOFT-LABEL: foo_vec:
+; SOFT:       @ %bb.0: @ %entry
+; SOFT-NEXT:    vmov s0, r3
+; SOFT-NEXT:    vmov s2, r1
+; SOFT-NEXT:    vcvtb.f32.f16 s0, s0
+; SOFT-NEXT:    vmov s4, r0
+; SOFT-NEXT:    vcvtb.f32.f16 s2, s2
+; SOFT-NEXT:    vmov s6, r2
+; SOFT-NEXT:    vcvtb.f32.f16 s4, s4
+; SOFT-NEXT:    vcvtb.f32.f16 s6, s6
+; SOFT-NEXT:    vadd.f32 s0, s0, s0
+; SOFT-NEXT:    vadd.f32 s2, s2, s2
+; SOFT-NEXT:    vcvtb.f16.f32 s0, s0
+; SOFT-NEXT:    vadd.f32 s4, s4, s4
+; SOFT-NEXT:    vcvtb.f16.f32 s2, s2
+; SOFT-NEXT:    vadd.f32 s6, s6, s6
+; SOFT-NEXT:    vcvtb.f16.f32 s4, s4
+; SOFT-NEXT:    vcvtb.f16.f32 s6, s6
+; SOFT-NEXT:    vmov r0, s4
+; SOFT-NEXT:    vmov r1, s2
+; SOFT-NEXT:    vmov r2, s6
+; SOFT-NEXT:    vmov r3, s0
+; SOFT-NEXT:    bx lr
+;
+; HARD-LABEL: foo_vec:
+; HARD:       @ %bb.0: @ %entry
+; HARD-NEXT:    vcvtb.f32.f16 s4, s3
+; HARD-NEXT:    vcvtb.f32.f16 s2, s2
+; HARD-NEXT:    vcvtb.f32.f16 s6, s1
+; HARD-NEXT:    vcvtb.f32.f16 s0, s0
+; HARD-NEXT:    vadd.f32 s2, s2, s2
+; HARD-NEXT:    vadd.f32 s0, s0, s0
+; HARD-NEXT:    vcvtb.f16.f32 s2, s2
+; HARD-NEXT:    vadd.f32 s4, s4, s4
+; HARD-NEXT:    vcvtb.f16.f32 s0, s0
+; HARD-NEXT:    vadd.f32 s6, s6, s6
+; HARD-NEXT:    vcvtb.f16.f32 s3, s4
+; HARD-NEXT:    vcvtb.f16.f32 s1, s6
+; HARD-NEXT:    bx lr
+;
+; FULL-SOFT-LE-LABEL: foo_vec:
+; FULL-SOFT-LE:       @ %bb.0: @ %entry
+; FULL-SOFT-LE-NEXT:    vmov d16, r0, r1
+; FULL-SOFT-LE-NEXT:    vadd.f16 d16, d16, d16
+; FULL-SOFT-LE-NEXT:    vmov r0, r1, d16
+; FULL-SOFT-LE-NEXT:    bx lr
+;
+; FULL-HARD-LE-LABEL: foo_vec:
+; FULL-HARD-LE:       @ %bb.0: @ %entry
+; FULL-HARD-LE-NEXT:    vadd.f16 d0, d0, d0
+; FULL-HARD-LE-NEXT:    bx lr
+;
+; FULL-SOFT-BE-LABEL: foo_vec:
+; FULL-SOFT-BE:       @ %bb.0: @ %entry
+; FULL-SOFT-BE-NEXT:    vmov d16, r1, r0
+; FULL-SOFT-BE-NEXT:    vrev64.16 d16, d16
+; FULL-SOFT-BE-NEXT:    vadd.f16 d16, d16, d16
+; FULL-SOFT-BE-NEXT:    vrev64.16 d16, d16
+; FULL-SOFT-BE-NEXT:    vmov r1, r0, d16
+; FULL-SOFT-BE-NEXT:    bx lr
+;
+; FULL-HARD-BE-LABEL: foo_vec:
+; FULL-HARD-BE:       @ %bb.0: @ %entry
+; FULL-HARD-BE-NEXT:    vrev64.16 d16, d0
+; FULL-HARD-BE-NEXT:    vadd.f16 d16, d16, d16
+; FULL-HARD-BE-NEXT:    vrev64.16 d0, d16
+; FULL-HARD-BE-NEXT:    bx lr
+entry:
+  %0 = fadd <4 x half> %a, %a
+  ret <4 x half> %0
+}
diff --git a/llvm/test/CodeGen/ARM/fp16-v3.ll b/llvm/test/CodeGen/ARM/fp16-v3.ll
index e84fee2c2e1b5..085503e80c7f2 100644
--- a/llvm/test/CodeGen/ARM/fp16-v3.ll
+++ b/llvm/test/CodeGen/ARM/fp16-v3.ll
@@ -28,9 +28,6 @@ define void @test_vec3(<3 x half>* %arr, i32 %i) #0 {
 }
 
 ; CHECK-LABEL: test_bitcast:
-; CHECK: vcvtb.f16.f32
-; CHECK: vcvtb.f16.f32
-; CHECK: vcvtb.f16.f32
 ; CHECK: pkhbt
 ; CHECK: uxth
 define void @test_bitcast(<3 x half> %inp, <3 x i16>* %arr) #0 {

From dae9fe408793def8a49f5e1d10d2a859627785e3 Mon Sep 17 00:00:00 2001
From: Anastasia Stulova <anastasia.stulova@arm.com>
Date: Fri, 28 Aug 2020 11:10:11 +0200
Subject: [PATCH 034/109] [OpenCL][Docs] 10.x release notes

Summary of major changes for OpenCL support in clang 11.

Differential revision: https://reviews.llvm.org/D86626
---
 clang/docs/ReleaseNotes.rst | 32 +++++++++++++++++++++++++++++---
 1 file changed, 29 insertions(+), 3 deletions(-)

diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index c32952cce51da..9d0ab935063fd 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -382,10 +382,36 @@ C++1z Feature Support
 Objective-C Language Changes in Clang
 -------------------------------------
 
-OpenCL C Language Changes in Clang
-----------------------------------
+OpenCL Kernel Language Changes in Clang
+---------------------------------------
 
-...
+- Added extensions from `cl_khr_subgroup_extensions` to clang and the internal
+  header.
+
+- Added rocm device libs linking for AMDGPU.
+
+- Added diagnostic for OpenCL 2.0 blocks used in function arguments.
+
+- Fixed MS mangling for OpenCL 2.0 pipe type specifier.
+
+- Improved command line options for fast relaxed math.
+
+- Improved `atomic_fetch_min/max` functions in the internal header
+  (`opencl-c.h`).
+
+- Improved size of builtin function table for `TableGen`-based internal header
+  (enabled by `-fdeclare-opencl-builtins`) and added new functionality for
+  OpenCL 2.0 atomics, pipes, enqueue kernel, `cl_khr_subgroups`,
+  `cl_arm_integer_dot_product`.
+
+Changes related to C++ for OpenCL
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+- Added `addrspace_cast` operator.
+
+- Improved address space deduction in templates.
+
+- Improved diagnostics of address spaces in nested pointer conversions.
 
 ABI Changes in Clang
 --------------------

From bc1425082be4a01a661fda974a8b90bfbbd14faf Mon Sep 17 00:00:00 2001
From: Sander de Smalen <sander.desmalen@arm.com>
Date: Wed, 19 Aug 2020 10:34:25 +0100
Subject: [PATCH 035/109] [Clang][SVE] NFC: Move info about ACLE types into
 separate function.

This function returns a struct `BuiltinVectorTypeInfo` that contains
the builtin vector's element type, element count and number of vectors
(used for vector tuples).

Reviewed By: c-rhodes

Differential Revision: https://reviews.llvm.org/D86100

(cherry picked from commit 0353848cc94f0fc23a953f8f420be7ee3342c8dc)
---
 clang/include/clang/AST/ASTContext.h |  16 ++++
 clang/lib/AST/ASTContext.cpp         | 113 +++++++++++++++++++++++++++
 clang/lib/CodeGen/CodeGenTypes.cpp   |  51 ++----------
 3 files changed, 135 insertions(+), 45 deletions(-)

diff --git a/clang/include/clang/AST/ASTContext.h b/clang/include/clang/AST/ASTContext.h
index 2b988be60da9f..9020e6629d089 100644
--- a/clang/include/clang/AST/ASTContext.h
+++ b/clang/include/clang/AST/ASTContext.h
@@ -60,6 +60,7 @@
 #include "llvm/Support/Allocator.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/Compiler.h"
+#include "llvm/Support/TypeSize.h"
 #include <cassert>
 #include <cstddef>
 #include <cstdint>
@@ -1297,6 +1298,21 @@ class ASTContext : public RefCountedBase<ASTContext> {
   /// Returns a vla type where known sizes are replaced with [*].
   QualType getVariableArrayDecayedType(QualType Ty) const;
 
+  // Convenience struct to return information about a builtin vector type.
+  struct BuiltinVectorTypeInfo {
+    QualType ElementType;
+    llvm::ElementCount EC;
+    unsigned NumVectors;
+    BuiltinVectorTypeInfo(QualType ElementType, llvm::ElementCount EC,
+                          unsigned NumVectors)
+        : ElementType(ElementType), EC(EC), NumVectors(NumVectors) {}
+  };
+
+  /// Returns the element type, element count and number of vectors
+  /// (in case of tuple) for a builtin vector type.
+  BuiltinVectorTypeInfo
+  getBuiltinVectorTypeInfo(const BuiltinType *VecTy) const;
+
   /// Return the unique reference to a scalable vector type of the specified
   /// element type and scalable number of elements.
   ///
diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp
index e3798bb46e86a..bf51d35d96939 100644
--- a/clang/lib/AST/ASTContext.cpp
+++ b/clang/lib/AST/ASTContext.cpp
@@ -3634,6 +3634,119 @@ QualType ASTContext::getIncompleteArrayType(QualType elementType,
   return QualType(newType, 0);
 }
 
+ASTContext::BuiltinVectorTypeInfo
+ASTContext::getBuiltinVectorTypeInfo(const BuiltinType *Ty) const {
+#define SVE_INT_ELTTY(BITS, ELTS, SIGNED, NUMVECTORS)                          \
+  {getIntTypeForBitwidth(BITS, SIGNED), llvm::ElementCount(ELTS, true),        \
+   NUMVECTORS};
+
+#define SVE_ELTTY(ELTTY, ELTS, NUMVECTORS)                                     \
+  {ELTTY, llvm::ElementCount(ELTS, true), NUMVECTORS};
+
+  switch (Ty->getKind()) {
+  default:
+    llvm_unreachable("Unsupported builtin vector type");
+  case BuiltinType::SveInt8:
+    return SVE_INT_ELTTY(8, 16, true, 1);
+  case BuiltinType::SveUint8:
+    return SVE_INT_ELTTY(8, 16, false, 1);
+  case BuiltinType::SveInt8x2:
+    return SVE_INT_ELTTY(8, 16, true, 2);
+  case BuiltinType::SveUint8x2:
+    return SVE_INT_ELTTY(8, 16, false, 2);
+  case BuiltinType::SveInt8x3:
+    return SVE_INT_ELTTY(8, 16, true, 3);
+  case BuiltinType::SveUint8x3:
+    return SVE_INT_ELTTY(8, 16, false, 3);
+  case BuiltinType::SveInt8x4:
+    return SVE_INT_ELTTY(8, 16, true, 4);
+  case BuiltinType::SveUint8x4:
+    return SVE_INT_ELTTY(8, 16, false, 4);
+  case BuiltinType::SveInt16:
+    return SVE_INT_ELTTY(16, 8, true, 1);
+  case BuiltinType::SveUint16:
+    return SVE_INT_ELTTY(16, 8, false, 1);
+  case BuiltinType::SveInt16x2:
+    return SVE_INT_ELTTY(16, 8, true, 2);
+  case BuiltinType::SveUint16x2:
+    return SVE_INT_ELTTY(16, 8, false, 2);
+  case BuiltinType::SveInt16x3:
+    return SVE_INT_ELTTY(16, 8, true, 3);
+  case BuiltinType::SveUint16x3:
+    return SVE_INT_ELTTY(16, 8, false, 3);
+  case BuiltinType::SveInt16x4:
+    return SVE_INT_ELTTY(16, 8, true, 4);
+  case BuiltinType::SveUint16x4:
+    return SVE_INT_ELTTY(16, 8, false, 4);
+  case BuiltinType::SveInt32:
+    return SVE_INT_ELTTY(32, 4, true, 1);
+  case BuiltinType::SveUint32:
+    return SVE_INT_ELTTY(32, 4, false, 1);
+  case BuiltinType::SveInt32x2:
+    return SVE_INT_ELTTY(32, 4, true, 2);
+  case BuiltinType::SveUint32x2:
+    return SVE_INT_ELTTY(32, 4, false, 2);
+  case BuiltinType::SveInt32x3:
+    return SVE_INT_ELTTY(32, 4, true, 3);
+  case BuiltinType::SveUint32x3:
+    return SVE_INT_ELTTY(32, 4, false, 3);
+  case BuiltinType::SveInt32x4:
+    return SVE_INT_ELTTY(32, 4, true, 4);
+  case BuiltinType::SveUint32x4:
+    return SVE_INT_ELTTY(32, 4, false, 4);
+  case BuiltinType::SveInt64:
+    return SVE_INT_ELTTY(64, 2, true, 1);
+  case BuiltinType::SveUint64:
+    return SVE_INT_ELTTY(64, 2, false, 1);
+  case BuiltinType::SveInt64x2:
+    return SVE_INT_ELTTY(64, 2, true, 2);
+  case BuiltinType::SveUint64x2:
+    return SVE_INT_ELTTY(64, 2, false, 2);
+  case BuiltinType::SveInt64x3:
+    return SVE_INT_ELTTY(64, 2, true, 3);
+  case BuiltinType::SveUint64x3:
+    return SVE_INT_ELTTY(64, 2, false, 3);
+  case BuiltinType::SveInt64x4:
+    return SVE_INT_ELTTY(64, 2, true, 4);
+  case BuiltinType::SveUint64x4:
+    return SVE_INT_ELTTY(64, 2, false, 4);
+  case BuiltinType::SveBool:
+    return SVE_ELTTY(BoolTy, 16, 1);
+  case BuiltinType::SveFloat16:
+    return SVE_ELTTY(HalfTy, 8, 1);
+  case BuiltinType::SveFloat16x2:
+    return SVE_ELTTY(HalfTy, 8, 2);
+  case BuiltinType::SveFloat16x3:
+    return SVE_ELTTY(HalfTy, 8, 3);
+  case BuiltinType::SveFloat16x4:
+    return SVE_ELTTY(HalfTy, 8, 4);
+  case BuiltinType::SveFloat32:
+    return SVE_ELTTY(FloatTy, 4, 1);
+  case BuiltinType::SveFloat32x2:
+    return SVE_ELTTY(FloatTy, 4, 2);
+  case BuiltinType::SveFloat32x3:
+    return SVE_ELTTY(FloatTy, 4, 3);
+  case BuiltinType::SveFloat32x4:
+    return SVE_ELTTY(FloatTy, 4, 4);
+  case BuiltinType::SveFloat64:
+    return SVE_ELTTY(DoubleTy, 2, 1);
+  case BuiltinType::SveFloat64x2:
+    return SVE_ELTTY(DoubleTy, 2, 2);
+  case BuiltinType::SveFloat64x3:
+    return SVE_ELTTY(DoubleTy, 2, 3);
+  case BuiltinType::SveFloat64x4:
+    return SVE_ELTTY(DoubleTy, 2, 4);
+  case BuiltinType::SveBFloat16:
+    return SVE_ELTTY(BFloat16Ty, 8, 1);
+  case BuiltinType::SveBFloat16x2:
+    return SVE_ELTTY(BFloat16Ty, 8, 2);
+  case BuiltinType::SveBFloat16x3:
+    return SVE_ELTTY(BFloat16Ty, 8, 3);
+  case BuiltinType::SveBFloat16x4:
+    return SVE_ELTTY(BFloat16Ty, 8, 4);
+  }
+}
+
 /// getScalableVectorType - Return the unique reference to a scalable vector
 /// type of the specified element type and size. VectorType must be a built-in
 /// type.
diff --git a/clang/lib/CodeGen/CodeGenTypes.cpp b/clang/lib/CodeGen/CodeGenTypes.cpp
index d431c0263666e..4792c10ecdaef 100644
--- a/clang/lib/CodeGen/CodeGenTypes.cpp
+++ b/clang/lib/CodeGen/CodeGenTypes.cpp
@@ -533,99 +533,60 @@ llvm::Type *CodeGenTypes::ConvertType(QualType T) {
     case BuiltinType::OCLReserveID:
       ResultType = CGM.getOpenCLRuntime().convertOpenCLSpecificType(Ty);
       break;
-#define GET_SVE_INT_VEC(BITS, ELTS)                                            \
-  llvm::ScalableVectorType::get(                                               \
-      llvm::IntegerType::get(getLLVMContext(), BITS), ELTS);
     case BuiltinType::SveInt8:
     case BuiltinType::SveUint8:
-      return GET_SVE_INT_VEC(8, 16);
     case BuiltinType::SveInt8x2:
     case BuiltinType::SveUint8x2:
-      return GET_SVE_INT_VEC(8, 32);
     case BuiltinType::SveInt8x3:
     case BuiltinType::SveUint8x3:
-      return GET_SVE_INT_VEC(8, 48);
     case BuiltinType::SveInt8x4:
     case BuiltinType::SveUint8x4:
-      return GET_SVE_INT_VEC(8, 64);
     case BuiltinType::SveInt16:
     case BuiltinType::SveUint16:
-      return GET_SVE_INT_VEC(16, 8);
     case BuiltinType::SveInt16x2:
     case BuiltinType::SveUint16x2:
-      return GET_SVE_INT_VEC(16, 16);
     case BuiltinType::SveInt16x3:
     case BuiltinType::SveUint16x3:
-      return GET_SVE_INT_VEC(16, 24);
     case BuiltinType::SveInt16x4:
     case BuiltinType::SveUint16x4:
-      return GET_SVE_INT_VEC(16, 32);
     case BuiltinType::SveInt32:
     case BuiltinType::SveUint32:
-      return GET_SVE_INT_VEC(32, 4);
     case BuiltinType::SveInt32x2:
     case BuiltinType::SveUint32x2:
-      return GET_SVE_INT_VEC(32, 8);
     case BuiltinType::SveInt32x3:
     case BuiltinType::SveUint32x3:
-      return GET_SVE_INT_VEC(32, 12);
     case BuiltinType::SveInt32x4:
     case BuiltinType::SveUint32x4:
-      return GET_SVE_INT_VEC(32, 16);
     case BuiltinType::SveInt64:
     case BuiltinType::SveUint64:
-      return GET_SVE_INT_VEC(64, 2);
     case BuiltinType::SveInt64x2:
     case BuiltinType::SveUint64x2:
-      return GET_SVE_INT_VEC(64, 4);
     case BuiltinType::SveInt64x3:
     case BuiltinType::SveUint64x3:
-      return GET_SVE_INT_VEC(64, 6);
     case BuiltinType::SveInt64x4:
     case BuiltinType::SveUint64x4:
-      return GET_SVE_INT_VEC(64, 8);
     case BuiltinType::SveBool:
-      return GET_SVE_INT_VEC(1, 16);
-#undef GET_SVE_INT_VEC
-#define GET_SVE_FP_VEC(TY, ISFP16, ELTS)                                       \
-  llvm::ScalableVectorType::get(                                               \
-      getTypeForFormat(getLLVMContext(),                                       \
-                       Context.getFloatTypeSemantics(Context.TY),              \
-                       /* UseNativeHalf = */ ISFP16),                          \
-      ELTS);
     case BuiltinType::SveFloat16:
-      return GET_SVE_FP_VEC(HalfTy, true, 8);
     case BuiltinType::SveFloat16x2:
-      return GET_SVE_FP_VEC(HalfTy, true, 16);
     case BuiltinType::SveFloat16x3:
-      return GET_SVE_FP_VEC(HalfTy, true, 24);
     case BuiltinType::SveFloat16x4:
-      return GET_SVE_FP_VEC(HalfTy, true, 32);
     case BuiltinType::SveFloat32:
-      return GET_SVE_FP_VEC(FloatTy, false, 4);
     case BuiltinType::SveFloat32x2:
-      return GET_SVE_FP_VEC(FloatTy, false, 8);
     case BuiltinType::SveFloat32x3:
-      return GET_SVE_FP_VEC(FloatTy, false, 12);
     case BuiltinType::SveFloat32x4:
-      return GET_SVE_FP_VEC(FloatTy, false, 16);
     case BuiltinType::SveFloat64:
-      return GET_SVE_FP_VEC(DoubleTy, false, 2);
     case BuiltinType::SveFloat64x2:
-      return GET_SVE_FP_VEC(DoubleTy, false, 4);
     case BuiltinType::SveFloat64x3:
-      return GET_SVE_FP_VEC(DoubleTy, false, 6);
     case BuiltinType::SveFloat64x4:
-      return GET_SVE_FP_VEC(DoubleTy, false, 8);
     case BuiltinType::SveBFloat16:
-      return GET_SVE_FP_VEC(BFloat16Ty, false, 8);
     case BuiltinType::SveBFloat16x2:
-      return GET_SVE_FP_VEC(BFloat16Ty, false, 16);
     case BuiltinType::SveBFloat16x3:
-      return GET_SVE_FP_VEC(BFloat16Ty, false, 24);
-    case BuiltinType::SveBFloat16x4:
-      return GET_SVE_FP_VEC(BFloat16Ty, false, 32);
-#undef GET_SVE_FP_VEC
+    case BuiltinType::SveBFloat16x4: {
+      ASTContext::BuiltinVectorTypeInfo Info =
+          Context.getBuiltinVectorTypeInfo(cast<BuiltinType>(Ty));
+      return llvm::ScalableVectorType::get(ConvertType(Info.ElementType),
+                                           Info.EC.Min * Info.NumVectors);
+    }
     case BuiltinType::Dependent:
 #define BUILTIN_TYPE(Id, SingletonId)
 #define PLACEHOLDER_TYPE(Id, SingletonId) \

From 2bde1011ba1a794a0391a37d41d0b461dec89d54 Mon Sep 17 00:00:00 2001
From: Sander de Smalen <sander.desmalen@arm.com>
Date: Wed, 19 Aug 2020 11:06:51 +0100
Subject: [PATCH 036/109] [AArch64][SVE] Fix calculation restore point for SVE
 callee saves.

This fixes an issue where the restore point of callee-saves in the
function epilogues was incorrectly calculated when the basic block
consisted of only a RET instruction. This caused dealloc instructions
to be inserted in between the block of callee-save restore instructions,
rather than before it.

Reviewed By: paulwalker-arm

Differential Revision: https://reviews.llvm.org/D86099

(cherry picked from commit 5f47d4456d192eaea8c56a2b4648023c8743c927)
---
 .../Target/AArch64/AArch64FrameLowering.cpp   |  7 ++--
 .../framelayout-sve-calleesaves-fix.mir       | 36 +++++++++++++++++++
 2 files changed, 39 insertions(+), 4 deletions(-)
 create mode 100644 llvm/test/CodeGen/AArch64/framelayout-sve-calleesaves-fix.mir

diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
index 83653dcbb8cf7..c6cc6e9e84718 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -1694,11 +1694,10 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
   StackOffset DeallocateBefore = {}, DeallocateAfter = SVEStackSize;
   MachineBasicBlock::iterator RestoreBegin = LastPopI, RestoreEnd = LastPopI;
   if (int64_t CalleeSavedSize = AFI->getSVECalleeSavedStackSize()) {
-    RestoreBegin = std::prev(RestoreEnd);;
-    while (IsSVECalleeSave(RestoreBegin) &&
-           RestoreBegin != MBB.begin())
+    RestoreBegin = std::prev(RestoreEnd);
+    while (RestoreBegin != MBB.begin() &&
+           IsSVECalleeSave(std::prev(RestoreBegin)))
       --RestoreBegin;
-    ++RestoreBegin;
 
     assert(IsSVECalleeSave(RestoreBegin) &&
            IsSVECalleeSave(std::prev(RestoreEnd)) && "Unexpected instruction");
diff --git a/llvm/test/CodeGen/AArch64/framelayout-sve-calleesaves-fix.mir b/llvm/test/CodeGen/AArch64/framelayout-sve-calleesaves-fix.mir
new file mode 100644
index 0000000000000..a3cbd39c6531f
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/framelayout-sve-calleesaves-fix.mir
@@ -0,0 +1,36 @@
+# NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+# RUN: llc -mattr=+sve -mtriple=aarch64-none-linux-gnu -start-before=prologepilog %s -o - | FileCheck %s
+
+--- |
+  define aarch64_sve_vector_pcs void @fix_restorepoint_p4() { entry: unreachable }
+  ; CHECK-LABEL: fix_restorepoint_p4:
+  ; CHECK:       // %bb.0: // %entry
+  ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
+  ; CHECK-NEXT:    addvl sp, sp, #-2
+  ; CHECK-NEXT:    str p4, [sp, #7, mul vl] // 2-byte Folded Spill
+  ; CHECK-NEXT:    str z8, [sp, #1, mul vl] // 16-byte Folded Spill
+  ; CHECK-NEXT:    addvl sp, sp, #-1
+  ; CHECK-NEXT:    .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 24 * VG
+  ; CHECK-NEXT:    .cfi_escape 0x10, 0x48, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 16 - 8 * VG
+  ; CHECK-NEXT:    .cfi_offset w29, -16
+  ; CHECK-NEXT:    // implicit-def: $z8
+  ; CHECK-NEXT:    // implicit-def: $p4
+  ; CHECK-NEXT:    addvl sp, sp, #1
+  ; CHECK-NEXT:    ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload
+  ; CHECK-NEXT:    ldr z8, [sp, #1, mul vl] // 16-byte Folded Reload
+  ; CHECK-NEXT:    addvl sp, sp, #2
+  ; CHECK-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
+  ; CHECK-NEXT:    ret
+...
+name: fix_restorepoint_p4
+stack:
+  - { id: 0, stack-id: sve-vec, size: 16, alignment: 16 }
+body:             |
+  bb.0.entry:
+    $z8 = IMPLICIT_DEF
+    $p4 = IMPLICIT_DEF
+    B %bb.1
+
+  bb.1.entry:
+    RET_ReallyLR
+---

From f5c02e4855d3206e27e151dfdda457b95af33529 Mon Sep 17 00:00:00 2001
From: Sander de Smalen <sander.desmalen@arm.com>
Date: Thu, 27 Aug 2020 08:12:43 +0100
Subject: [PATCH 037/109] [AArch64][SVE] Add missing debug info for ACLE types.

This patch adds type information for SVE ACLE vector types,
by describing them as vectors, with a lower bound of 0, and
an upper bound described by a DWARF expression using the
AArch64 Vector Granule register (VG), which contains the
runtime multiple of 64bit granules in an SVE vector.

Reviewed By: efriedma

Differential Revision: https://reviews.llvm.org/D86101

(cherry picked from commit 4e9b66de3f046c1e97b34c938b0920fa6401f40c)
---
 clang/lib/CodeGen/CGDebugInfo.cpp             | 46 ++++++++----
 .../CodeGen/aarch64-debug-sve-vector-types.c  | 71 +++++++++++++++++++
 .../aarch64-debug-sve-vectorx2-types.c        | 67 +++++++++++++++++
 .../aarch64-debug-sve-vectorx3-types.c        | 67 +++++++++++++++++
 .../aarch64-debug-sve-vectorx4-types.c        | 67 +++++++++++++++++
 clang/test/CodeGen/aarch64-sve.c              | 16 -----
 llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp     |  6 +-
 llvm/test/DebugInfo/AArch64/dbg-sve-types.ll  | 44 ++++++++++++
 8 files changed, 351 insertions(+), 33 deletions(-)
 create mode 100644 clang/test/CodeGen/aarch64-debug-sve-vector-types.c
 create mode 100644 clang/test/CodeGen/aarch64-debug-sve-vectorx2-types.c
 create mode 100644 clang/test/CodeGen/aarch64-debug-sve-vectorx3-types.c
 create mode 100644 clang/test/CodeGen/aarch64-debug-sve-vectorx4-types.c
 create mode 100644 llvm/test/DebugInfo/AArch64/dbg-sve-types.ll

diff --git a/clang/lib/CodeGen/CGDebugInfo.cpp b/clang/lib/CodeGen/CGDebugInfo.cpp
index 6965c4a1209c2..703f5087370af 100644
--- a/clang/lib/CodeGen/CGDebugInfo.cpp
+++ b/clang/lib/CodeGen/CGDebugInfo.cpp
@@ -719,23 +719,39 @@ llvm::DIType *CGDebugInfo::CreateType(const BuiltinType *BT) {
   case BuiltinType::Id: \
     return getOrCreateStructPtrType("opencl_" #ExtType, Id##Ty);
 #include "clang/Basic/OpenCLExtensionTypes.def"
-  // TODO: real support for SVE types requires more infrastructure
-  // to be added first.  The types have a variable length and are
-  // represented in debug info as types whose length depends on a
-  // target-specific pseudo register.
-#define SVE_TYPE(Name, Id, SingletonId) \
-  case BuiltinType::Id:
+
+#define SVE_TYPE(Name, Id, SingletonId) case BuiltinType::Id:
 #include "clang/Basic/AArch64SVEACLETypes.def"
-  {
-    unsigned DiagID = CGM.getDiags().getCustomDiagID(
-        DiagnosticsEngine::Error,
-        "cannot yet generate debug info for SVE type '%0'");
-    auto Name = BT->getName(CGM.getContext().getPrintingPolicy());
-    CGM.getDiags().Report(DiagID) << Name;
-    // Return something safe.
-    return CreateType(cast<const BuiltinType>(CGM.getContext().IntTy));
-  }
+    {
+      ASTContext::BuiltinVectorTypeInfo Info =
+          CGM.getContext().getBuiltinVectorTypeInfo(BT);
+      unsigned NumElemsPerVG = (Info.EC.Min * Info.NumVectors) / 2;
+
+      // Debuggers can't extract 1bit from a vector, so will display a
+      // bitpattern for svbool_t instead.
+      if (Info.ElementType == CGM.getContext().BoolTy) {
+        NumElemsPerVG /= 8;
+        Info.ElementType = CGM.getContext().UnsignedCharTy;
+      }
 
+      auto *LowerBound =
+          llvm::ConstantAsMetadata::get(llvm::ConstantInt::getSigned(
+              llvm::Type::getInt64Ty(CGM.getLLVMContext()), 0));
+      SmallVector<int64_t, 9> Expr(
+          {llvm::dwarf::DW_OP_constu, NumElemsPerVG, llvm::dwarf::DW_OP_bregx,
+           /* AArch64::VG */ 46, 0, llvm::dwarf::DW_OP_mul,
+           llvm::dwarf::DW_OP_constu, 1, llvm::dwarf::DW_OP_minus});
+      auto *UpperBound = DBuilder.createExpression(Expr);
+
+      llvm::Metadata *Subscript = DBuilder.getOrCreateSubrange(
+          /*count*/ nullptr, LowerBound, UpperBound, /*stride*/ nullptr);
+      llvm::DINodeArray SubscriptArray = DBuilder.getOrCreateArray(Subscript);
+      llvm::DIType *ElemTy =
+          getOrCreateType(Info.ElementType, TheCU->getFile());
+      auto Align = getTypeAlignIfRequired(BT, CGM.getContext());
+      return DBuilder.createVectorType(/*Size*/ 0, Align, ElemTy,
+                                       SubscriptArray);
+    }
   case BuiltinType::UChar:
   case BuiltinType::Char_U:
     Encoding = llvm::dwarf::DW_ATE_unsigned_char;
diff --git a/clang/test/CodeGen/aarch64-debug-sve-vector-types.c b/clang/test/CodeGen/aarch64-debug-sve-vector-types.c
new file mode 100644
index 0000000000000..4325e3f44747b
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-debug-sve-vector-types.c
@@ -0,0 +1,71 @@
+// RUN:  %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve \
+// RUN:  -emit-llvm -o - %s -debug-info-kind=limited 2>&1 | FileCheck %s
+
+void test_locals(void) {
+  // CHECK-DAG: name: "__SVBool_t",{{.*}}, baseType: ![[CT1:[0-9]+]]
+  // CHECK-DAG: ![[CT1]] = !DICompositeType(tag: DW_TAG_array_type, baseType: ![[ELTTYU8:[0-9]+]], flags: DIFlagVector, elements: ![[ELTS1_64:[0-9]+]])
+  // CHECK-DAG: ![[ELTTYU8]] = !DIBasicType(name: "unsigned char", size: 8, encoding: DW_ATE_unsigned_char)
+  // CHECK-DAG: ![[ELTS1_64]] = !{![[REALELTS1_64:[0-9]+]]}
+  // CHECK-DAG: ![[REALELTS1_64]] = !DISubrange(lowerBound: 0, upperBound: !DIExpression(DW_OP_constu, 1, DW_OP_bregx, 46, 0, DW_OP_mul, DW_OP_constu, 1, DW_OP_minus))
+  __SVBool_t b8;
+
+  // CHECK-DAG: name: "__SVInt8_t",{{.*}}, baseType: ![[CT8:[0-9]+]]
+  // CHECK-DAG: ![[CT8]] = !DICompositeType(tag: DW_TAG_array_type, baseType: ![[ELTTYS8:[0-9]+]], flags: DIFlagVector, elements: ![[ELTS8:[0-9]+]])
+  // CHECK-DAG: ![[ELTTYS8]] = !DIBasicType(name: "signed char", size: 8, encoding: DW_ATE_signed_char)
+  // CHECK-DAG: ![[ELTS8]] = !{![[REALELTS8:[0-9]+]]}
+  // CHECK-DAG: ![[REALELTS8]] = !DISubrange(lowerBound: 0, upperBound: !DIExpression(DW_OP_constu, 8, DW_OP_bregx, 46, 0, DW_OP_mul, DW_OP_constu, 1, DW_OP_minus))
+  __SVInt8_t s8;
+
+  // CHECK-DAG: name: "__SVUint8_t",{{.*}}, baseType: ![[CT8:[0-9]+]]
+  // CHECK-DAG: ![[CT8]] = !DICompositeType(tag: DW_TAG_array_type, baseType: ![[ELTTYU8]], flags: DIFlagVector, elements: ![[ELTS8]])
+  __SVUint8_t u8;
+
+  // CHECK-DAG: name: "__SVInt16_t",{{.*}}, baseType: ![[CT16:[0-9]+]]
+  // CHECK-DAG: ![[CT16]] = !DICompositeType(tag: DW_TAG_array_type, baseType: ![[ELTTY16:[0-9]+]], flags: DIFlagVector, elements: ![[ELTS16:[0-9]+]])
+  // CHECK-DAG: ![[ELTTY16]] = !DIBasicType(name: "short", size: 16, encoding: DW_ATE_signed)
+  // CHECK-DAG: ![[ELTS16]] = !{![[REALELTS16:[0-9]+]]}
+  // CHECK-DAG: ![[REALELTS16]] = !DISubrange(lowerBound: 0, upperBound: !DIExpression(DW_OP_constu, 4, DW_OP_bregx, 46, 0, DW_OP_mul, DW_OP_constu, 1, DW_OP_minus))
+  __SVInt16_t s16;
+
+  // CHECK-DAG: name: "__SVUint16_t",{{.*}}, baseType: ![[CT16:[0-9]+]]
+  // CHECK-DAG: ![[CT16]] = !DICompositeType(tag: DW_TAG_array_type, baseType: ![[ELTTY16:[0-9]+]], flags: DIFlagVector, elements: ![[ELTS16]])
+  // CHECK-DAG: ![[ELTTY16]] = !DIBasicType(name: "unsigned short", size: 16, encoding: DW_ATE_unsigned)
+  __SVUint16_t u16;
+
+  // CHECK-DAG: name: "__SVInt32_t",{{.*}}, baseType: ![[CT32:[0-9]+]]
+  // CHECK-DAG: ![[CT32]] = !DICompositeType(tag: DW_TAG_array_type, baseType: ![[ELTTY32:[0-9]+]], flags: DIFlagVector, elements: ![[ELTS32:[0-9]+]])
+  // CHECK-DAG: ![[ELTTY32]] = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
+  // CHECK-DAG: ![[ELTS32]] = !{![[REALELTS32:[0-9]+]]}
+  // CHECK-DAG: ![[REALELTS32]] = !DISubrange(lowerBound: 0, upperBound: !DIExpression(DW_OP_constu, 2, DW_OP_bregx, 46, 0, DW_OP_mul, DW_OP_constu, 1, DW_OP_minus))
+  __SVInt32_t s32;
+
+  // CHECK-DAG: name: "__SVUint32_t",{{.*}}, baseType: ![[CT32:[0-9]+]]
+  // CHECK-DAG: ![[CT32]] = !DICompositeType(tag: DW_TAG_array_type, baseType: ![[ELTTY32:[0-9]+]], flags: DIFlagVector, elements: ![[ELTS32]])
+  // CHECK-DAG: ![[ELTTY32]] = !DIBasicType(name: "unsigned int", size: 32, encoding: DW_ATE_unsigned)
+  __SVUint32_t u32;
+
+  // CHECK-DAG: name: "__SVInt64_t",{{.*}}, baseType: ![[CT64:[0-9]+]]
+  // CHECK-DAG: ![[CT64]] = !DICompositeType(tag: DW_TAG_array_type, baseType: ![[ELTTY64:[0-9]+]], flags: DIFlagVector, elements: ![[ELTS1_64]])
+  // CHECK-DAG: ![[ELTTY64]] = !DIBasicType(name: "long int", size: 64, encoding: DW_ATE_signed)
+  __SVInt64_t s64;
+
+  // CHECK-DAG: name: "__SVUint64_t",{{.*}}, baseType: ![[CT64:[0-9]+]]
+  // CHECK-DAG: ![[CT64]] = !DICompositeType(tag: DW_TAG_array_type, baseType: ![[ELTTY64:[0-9]+]], flags: DIFlagVector, elements: ![[ELTS1_64]])
+  // CHECK-DAG: ![[ELTTY64]] = !DIBasicType(name: "long unsigned int", size: 64, encoding: DW_ATE_unsigned)
+  __SVUint64_t u64;
+
+  // CHECK:     name: "__SVFloat16_t",{{.*}}, baseType: ![[CT16:[0-9]+]]
+  // CHECK-DAG: ![[CT16]] = !DICompositeType(tag: DW_TAG_array_type, baseType: ![[ELTTY16:[0-9]+]], flags: DIFlagVector, elements: ![[ELTS16]])
+  // CHECK-DAG: ![[ELTTY16]] = !DIBasicType(name: "__fp16", size: 16, encoding: DW_ATE_float)
+  __SVFloat16_t f16;
+
+  // CHECK:     name: "__SVFloat32_t",{{.*}}, baseType: ![[CT32:[0-9]+]]
+  // CHECK-DAG: ![[CT32]] = !DICompositeType(tag: DW_TAG_array_type, baseType: ![[ELTTY32:[0-9]+]], flags: DIFlagVector, elements: ![[ELTS32]])
+  // CHECK-DAG: ![[ELTTY32]] = !DIBasicType(name: "float", size: 32, encoding: DW_ATE_float)
+  __SVFloat32_t f32;
+
+  // CHECK:     name: "__SVFloat64_t",{{.*}}, baseType: ![[CT64:[0-9]+]]
+  // CHECK-DAG: ![[CT64]] = !DICompositeType(tag: DW_TAG_array_type, baseType: ![[ELTTY64:[0-9]+]], flags: DIFlagVector, elements: ![[ELTS1_64]])
+  // CHECK-DAG: ![[ELTTY64]] = !DIBasicType(name: "double", size: 64, encoding: DW_ATE_float)
+  __SVFloat64_t f64;
+}
diff --git a/clang/test/CodeGen/aarch64-debug-sve-vectorx2-types.c b/clang/test/CodeGen/aarch64-debug-sve-vectorx2-types.c
new file mode 100644
index 0000000000000..0d874c0b557cd
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-debug-sve-vectorx2-types.c
@@ -0,0 +1,67 @@
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve \
+// RUN:  -emit-llvm -o - %s -debug-info-kind=limited 2>&1 | FileCheck %s
+
+void test_locals(void) {
+  // CHECK-DAG: name: "__clang_svint8x2_t",{{.*}}, baseType: ![[CT8:[0-9]+]]
+  // CHECK-DAG: ![[CT8]] = !DICompositeType(tag: DW_TAG_array_type, baseType: ![[ELTTY8:[0-9]+]], flags: DIFlagVector, elements: ![[ELTS8x2:[0-9]+]])
+  // CHECK-DAG: ![[ELTTY8]] = !DIBasicType(name: "signed char", size: 8, encoding: DW_ATE_signed_char)
+  // CHECK-DAG: ![[ELTS8x2]] = !{![[REALELTS8x2:[0-9]+]]}
+  // CHECK-DAG: ![[REALELTS8x2]] = !DISubrange(lowerBound: 0, upperBound: !DIExpression(DW_OP_constu, 16, DW_OP_bregx, 46, 0, DW_OP_mul, DW_OP_constu, 1, DW_OP_minus))
+  __clang_svint8x2_t s8;
+
+  // CHECK-DAG: name: "__clang_svuint8x2_t",{{.*}}, baseType: ![[CT8:[0-9]+]]
+  // CHECK-DAG: ![[CT8]] = !DICompositeType(tag: DW_TAG_array_type, baseType: ![[ELTTY8:[0-9]+]], flags: DIFlagVector, elements: ![[ELTS8x2]])
+  // CHECK-DAG: ![[ELTTY8]] = !DIBasicType(name: "unsigned char", size: 8, encoding: DW_ATE_unsigned_char)
+  __clang_svuint8x2_t u8;
+
+  // CHECK-DAG: name: "__clang_svint16x2_t",{{.*}}, baseType: ![[CT16:[0-9]+]]
+  // CHECK-DAG: ![[CT16]] = !DICompositeType(tag: DW_TAG_array_type, baseType: ![[ELTTY16:[0-9]+]], flags: DIFlagVector, elements: ![[ELTS16x2:[0-9]+]])
+  // CHECK-DAG: ![[ELTTY16]] = !DIBasicType(name: "short", size: 16, encoding: DW_ATE_signed)
+  // CHECK-DAG: ![[ELTS16x2]] = !{![[REALELTS16x2:[0-9]+]]}
+  // CHECK-DAG: ![[REALELTS16x2]] = !DISubrange(lowerBound: 0, upperBound: !DIExpression(DW_OP_constu, 8, DW_OP_bregx, 46, 0, DW_OP_mul, DW_OP_constu, 1, DW_OP_minus))
+  __clang_svint16x2_t s16;
+
+  // CHECK-DAG: name: "__clang_svuint16x2_t",{{.*}}, baseType: ![[CT16:[0-9]+]]
+  // CHECK-DAG: ![[CT16]] = !DICompositeType(tag: DW_TAG_array_type, baseType: ![[ELTTY16:[0-9]+]], flags: DIFlagVector, elements: ![[ELTS16x2]])
+  // CHECK-DAG: ![[ELTTY16]] = !DIBasicType(name: "unsigned short", size: 16, encoding: DW_ATE_unsigned)
+  __clang_svuint16x2_t u16;
+
+  // CHECK-DAG: name: "__clang_svint32x2_t",{{.*}}, baseType: ![[CT32:[0-9]+]]
+  // CHECK-DAG: ![[CT32]] = !DICompositeType(tag: DW_TAG_array_type, baseType: ![[ELTTY32:[0-9]+]], flags: DIFlagVector, elements: ![[ELTS32x2:[0-9]+]])
+  // CHECK-DAG: ![[ELTTY32]] = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
+  // CHECK-DAG: ![[ELTS32x2]] = !{![[REALELTS32x2:[0-9]+]]}
+  // CHECK-DAG: ![[REALELTS32x2]] = !DISubrange(lowerBound: 0, upperBound: !DIExpression(DW_OP_constu, 4, DW_OP_bregx, 46, 0, DW_OP_mul, DW_OP_constu, 1, DW_OP_minus))
+  __clang_svint32x2_t s32;
+
+  // CHECK-DAG: name: "__clang_svuint32x2_t",{{.*}}, baseType: ![[CT32:[0-9]+]]
+  // CHECK-DAG: ![[CT32]] = !DICompositeType(tag: DW_TAG_array_type, baseType: ![[ELTTY32:[0-9]+]], flags: DIFlagVector, elements: ![[ELTS32x2]])
+  // CHECK-DAG: ![[ELTTY32]] = !DIBasicType(name: "unsigned int", size: 32, encoding: DW_ATE_unsigned)
+  __clang_svuint32x2_t u32;
+
+  // CHECK-DAG: name: "__clang_svint64x2_t",{{.*}}, baseType: ![[CT64:[0-9]+]]
+  // CHECK-DAG: ![[CT64]] = !DICompositeType(tag: DW_TAG_array_type, baseType: ![[ELTTY64:[0-9]+]], flags: DIFlagVector, elements: ![[ELTS1x2_64:[0-9]+]])
+  // CHECK-DAG: ![[ELTTY64]] = !DIBasicType(name: "long int", size: 64, encoding: DW_ATE_signed)
+  // CHECK-DAG: ![[ELTS1x2_64]] = !{![[REALELTS1x2_64:[0-9]+]]}
+  // CHECK-DAG: ![[REALELTS1x2_64]] = !DISubrange(lowerBound: 0, upperBound: !DIExpression(DW_OP_constu, 2, DW_OP_bregx, 46, 0, DW_OP_mul, DW_OP_constu, 1, DW_OP_minus))
+  __clang_svint64x2_t s64;
+
+  // CHECK-DAG: name: "__clang_svuint64x2_t",{{.*}}, baseType: ![[CT64:[0-9]+]]
+  // CHECK-DAG: ![[CT64]] = !DICompositeType(tag: DW_TAG_array_type, baseType: ![[ELTTY64:[0-9]+]], flags: DIFlagVector, elements: ![[ELTS1x2_64]])
+  // CHECK-DAG: ![[ELTTY64]] = !DIBasicType(name: "long unsigned int", size: 64, encoding: DW_ATE_unsigned)
+  __clang_svuint64x2_t u64;
+
+  // CHECK:     name: "__clang_svfloat16x2_t",{{.*}}, baseType: ![[CT16:[0-9]+]]
+  // CHECK-DAG: ![[CT16]] = !DICompositeType(tag: DW_TAG_array_type, baseType: ![[ELTTY16:[0-9]+]], flags: DIFlagVector, elements: ![[ELTS16x2]])
+  // CHECK-DAG: ![[ELTTY16]] = !DIBasicType(name: "__fp16", size: 16, encoding: DW_ATE_float)
+  __clang_svfloat16x2_t f16;
+
+  // CHECK:     name: "__clang_svfloat32x2_t",{{.*}}, baseType: ![[CT32:[0-9]+]]
+  // CHECK-DAG: ![[CT32]] = !DICompositeType(tag: DW_TAG_array_type, baseType: ![[ELTTY32:[0-9]+]], flags: DIFlagVector, elements: ![[ELTS32x2]])
+  // CHECK-DAG: ![[ELTTY32]] = !DIBasicType(name: "float", size: 32, encoding: DW_ATE_float)
+  __clang_svfloat32x2_t f32;
+
+  // CHECK:     name: "__clang_svfloat64x2_t",{{.*}}, baseType: ![[CT64:[0-9]+]]
+  // CHECK-DAG: ![[CT64]] = !DICompositeType(tag: DW_TAG_array_type, baseType: ![[ELTTY64:[0-9]+]], flags: DIFlagVector, elements: ![[ELTS1x2_64]])
+  // CHECK-DAG: ![[ELTTY64]] = !DIBasicType(name: "double", size: 64, encoding: DW_ATE_float)
+  __clang_svfloat64x2_t f64;
+}
diff --git a/clang/test/CodeGen/aarch64-debug-sve-vectorx3-types.c b/clang/test/CodeGen/aarch64-debug-sve-vectorx3-types.c
new file mode 100644
index 0000000000000..c5dde7d1295d1
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-debug-sve-vectorx3-types.c
@@ -0,0 +1,67 @@
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve \
+// RUN:  -emit-llvm -o - %s -debug-info-kind=limited 2>&1 | FileCheck %s
+
+void test_locals(void) {
+  // CHECK-DAG: name: "__clang_svint8x3_t",{{.*}}, baseType: ![[CT8:[0-9]+]]
+  // CHECK-DAG: ![[CT8]] = !DICompositeType(tag: DW_TAG_array_type, baseType: ![[ELTTY8:[0-9]+]], flags: DIFlagVector, elements: ![[ELTS8x3:[0-9]+]])
+  // CHECK-DAG: ![[ELTTY8]] = !DIBasicType(name: "signed char", size: 8, encoding: DW_ATE_signed_char)
+  // CHECK-DAG: ![[ELTS8x3]] = !{![[REALELTS8x3:[0-9]+]]}
+  // CHECK-DAG: ![[REALELTS8x3]] = !DISubrange(lowerBound: 0, upperBound: !DIExpression(DW_OP_constu, 24, DW_OP_bregx, 46, 0, DW_OP_mul, DW_OP_constu, 1, DW_OP_minus))
+  __clang_svint8x3_t s8;
+
+  // CHECK-DAG: name: "__clang_svuint8x3_t",{{.*}}, baseType: ![[CT8:[0-9]+]]
+  // CHECK-DAG: ![[CT8]] = !DICompositeType(tag: DW_TAG_array_type, baseType: ![[ELTTY8:[0-9]+]], flags: DIFlagVector, elements: ![[ELTS8x3]])
+  // CHECK-DAG: ![[ELTTY8]] = !DIBasicType(name: "unsigned char", size: 8, encoding: DW_ATE_unsigned_char)
+  __clang_svuint8x3_t u8;
+
+  // CHECK-DAG: name: "__clang_svint16x3_t",{{.*}}, baseType: ![[CT16:[0-9]+]]
+  // CHECK-DAG: ![[CT16]] = !DICompositeType(tag: DW_TAG_array_type, baseType: ![[ELTTY16:[0-9]+]], flags: DIFlagVector, elements: ![[ELTS16x3:[0-9]+]])
+  // CHECK-DAG: ![[ELTTY16]] = !DIBasicType(name: "short", size: 16, encoding: DW_ATE_signed)
+  // CHECK-DAG: ![[ELTS16x3]] = !{![[REALELTS16x3:[0-9]+]]}
+  // CHECK-DAG: ![[REALELTS16x3]] = !DISubrange(lowerBound: 0, upperBound: !DIExpression(DW_OP_constu, 12, DW_OP_bregx, 46, 0, DW_OP_mul, DW_OP_constu, 1, DW_OP_minus))
+  __clang_svint16x3_t s16;
+
+  // CHECK-DAG: name: "__clang_svuint16x3_t",{{.*}}, baseType: ![[CT16:[0-9]+]]
+  // CHECK-DAG: ![[CT16]] = !DICompositeType(tag: DW_TAG_array_type, baseType: ![[ELTTY16:[0-9]+]], flags: DIFlagVector, elements: ![[ELTS16x3]])
+  // CHECK-DAG: ![[ELTTY16]] = !DIBasicType(name: "unsigned short", size: 16, encoding: DW_ATE_unsigned)
+  __clang_svuint16x3_t u16;
+
+  // CHECK-DAG: name: "__clang_svint32x3_t",{{.*}}, baseType: ![[CT32:[0-9]+]]
+  // CHECK-DAG: ![[CT32]] = !DICompositeType(tag: DW_TAG_array_type, baseType: ![[ELTTY32:[0-9]+]], flags: DIFlagVector, elements: ![[ELTS32x3:[0-9]+]])
+  // CHECK-DAG: ![[ELTTY32]] = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
+  // CHECK-DAG: ![[ELTS32x3]] = !{![[REALELTS32x3:[0-9]+]]}
+  // CHECK-DAG: ![[REALELTS32x3]] = !DISubrange(lowerBound: 0, upperBound: !DIExpression(DW_OP_constu, 6, DW_OP_bregx, 46, 0, DW_OP_mul, DW_OP_constu, 1, DW_OP_minus))
+  __clang_svint32x3_t s32;
+
+  // CHECK-DAG: name: "__clang_svuint32x3_t",{{.*}}, baseType: ![[CT32:[0-9]+]]
+  // CHECK-DAG: ![[CT32]] = !DICompositeType(tag: DW_TAG_array_type, baseType: ![[ELTTY32:[0-9]+]], flags: DIFlagVector, elements: ![[ELTS32x3]])
+  // CHECK-DAG: ![[ELTTY32]] = !DIBasicType(name: "unsigned int", size: 32, encoding: DW_ATE_unsigned)
+  __clang_svuint32x3_t u32;
+
+  // CHECK-DAG: name: "__clang_svint64x3_t",{{.*}}, baseType: ![[CT64:[0-9]+]]
+  // CHECK-DAG: ![[CT64]] = !DICompositeType(tag: DW_TAG_array_type, baseType: ![[ELTTY64:[0-9]+]], flags: DIFlagVector, elements: ![[ELTS1x3_64:[0-9]+]])
+  // CHECK-DAG: ![[ELTTY64]] = !DIBasicType(name: "long int", size: 64, encoding: DW_ATE_signed)
+  // CHECK-DAG: ![[ELTS1x3_64]] = !{![[REALELTS1x3_64:[0-9]+]]}
+  // CHECK-DAG: ![[REALELTS1x3_64]] = !DISubrange(lowerBound: 0, upperBound: !DIExpression(DW_OP_constu, 3, DW_OP_bregx, 46, 0, DW_OP_mul, DW_OP_constu, 1, DW_OP_minus))
+  __clang_svint64x3_t s64;
+
+  // CHECK-DAG: name: "__clang_svuint64x3_t",{{.*}}, baseType: ![[CT64:[0-9]+]]
+  // CHECK-DAG: ![[CT64]] = !DICompositeType(tag: DW_TAG_array_type, baseType: ![[ELTTY64:[0-9]+]], flags: DIFlagVector, elements: ![[ELTS1x3_64]])
+  // CHECK-DAG: ![[ELTTY64]] = !DIBasicType(name: "long unsigned int", size: 64, encoding: DW_ATE_unsigned)
+  __clang_svuint64x3_t u64;
+
+  // CHECK:     name: "__clang_svfloat16x3_t",{{.*}}, baseType: ![[CT16:[0-9]+]]
+  // CHECK-DAG: ![[CT16]] = !DICompositeType(tag: DW_TAG_array_type, baseType: ![[ELTTY16:[0-9]+]], flags: DIFlagVector, elements: ![[ELTS16x3]])
+  // CHECK-DAG: ![[ELTTY16]] = !DIBasicType(name: "__fp16", size: 16, encoding: DW_ATE_float)
+  __clang_svfloat16x3_t f16;
+
+  // CHECK:     name: "__clang_svfloat32x3_t",{{.*}}, baseType: ![[CT32:[0-9]+]]
+  // CHECK-DAG: ![[CT32]] = !DICompositeType(tag: DW_TAG_array_type, baseType: ![[ELTTY32:[0-9]+]], flags: DIFlagVector, elements: ![[ELTS32x3]])
+  // CHECK-DAG: ![[ELTTY32]] = !DIBasicType(name: "float", size: 32, encoding: DW_ATE_float)
+  __clang_svfloat32x3_t f32;
+
+  // CHECK:     name: "__clang_svfloat64x3_t",{{.*}}, baseType: ![[CT64:[0-9]+]]
+  // CHECK-DAG: ![[CT64]] = !DICompositeType(tag: DW_TAG_array_type, baseType: ![[ELTTY64:[0-9]+]], flags: DIFlagVector, elements: ![[ELTS1x3_64]])
+  // CHECK-DAG: ![[ELTTY64]] = !DIBasicType(name: "double", size: 64, encoding: DW_ATE_float)
+  __clang_svfloat64x3_t f64;
+}
diff --git a/clang/test/CodeGen/aarch64-debug-sve-vectorx4-types.c b/clang/test/CodeGen/aarch64-debug-sve-vectorx4-types.c
new file mode 100644
index 0000000000000..90a266c53f907
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-debug-sve-vectorx4-types.c
@@ -0,0 +1,67 @@
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve \
+// RUN:  -emit-llvm -o - %s -debug-info-kind=limited 2>&1 | FileCheck %s
+
+void test_locals(void) {
+  // CHECK-DAG: name: "__clang_svint8x4_t",{{.*}}, baseType: ![[CT8:[0-9]+]]
+  // CHECK-DAG: ![[CT8]] = !DICompositeType(tag: DW_TAG_array_type, baseType: ![[ELTTY8:[0-9]+]], flags: DIFlagVector, elements: ![[ELTS8x4:[0-9]+]])
+  // CHECK-DAG: ![[ELTTY8]] = !DIBasicType(name: "signed char", size: 8, encoding: DW_ATE_signed_char)
+  // CHECK-DAG: ![[ELTS8x4]] = !{![[REALELTS8x4:[0-9]+]]}
+  // CHECK-DAG: ![[REALELTS8x4]] = !DISubrange(lowerBound: 0, upperBound: !DIExpression(DW_OP_constu, 32, DW_OP_bregx, 46, 0, DW_OP_mul, DW_OP_constu, 1, DW_OP_minus))
+  __clang_svint8x4_t s8;
+
+  // CHECK-DAG: name: "__clang_svuint8x4_t",{{.*}}, baseType: ![[CT8:[0-9]+]]
+  // CHECK-DAG: ![[CT8]] = !DICompositeType(tag: DW_TAG_array_type, baseType: ![[ELTTY8:[0-9]+]], flags: DIFlagVector, elements: ![[ELTS8x4]])
+  // CHECK-DAG: ![[ELTTY8]] = !DIBasicType(name: "unsigned char", size: 8, encoding: DW_ATE_unsigned_char)
+  __clang_svuint8x4_t u8;
+
+  // CHECK-DAG: name: "__clang_svint16x4_t",{{.*}}, baseType: ![[CT16:[0-9]+]]
+  // CHECK-DAG: ![[CT16]] = !DICompositeType(tag: DW_TAG_array_type, baseType: ![[ELTTY16:[0-9]+]], flags: DIFlagVector, elements: ![[ELTS16x4:[0-9]+]])
+  // CHECK-DAG: ![[ELTTY16]] = !DIBasicType(name: "short", size: 16, encoding: DW_ATE_signed)
+  // CHECK-DAG: ![[ELTS16x4]] = !{![[REALELTS16x4:[0-9]+]]}
+  // CHECK-DAG: ![[REALELTS16x4]] = !DISubrange(lowerBound: 0, upperBound: !DIExpression(DW_OP_constu, 16, DW_OP_bregx, 46, 0, DW_OP_mul, DW_OP_constu, 1, DW_OP_minus))
+  __clang_svint16x4_t s16;
+
+  // CHECK-DAG: name: "__clang_svuint16x4_t",{{.*}}, baseType: ![[CT16:[0-9]+]]
+  // CHECK-DAG: ![[CT16]] = !DICompositeType(tag: DW_TAG_array_type, baseType: ![[ELTTY16:[0-9]+]], flags: DIFlagVector, elements: ![[ELTS16x4]])
+  // CHECK-DAG: ![[ELTTY16]] = !DIBasicType(name: "unsigned short", size: 16, encoding: DW_ATE_unsigned)
+  __clang_svuint16x4_t u16;
+
+  // CHECK-DAG: name: "__clang_svint32x4_t",{{.*}}, baseType: ![[CT32:[0-9]+]]
+  // CHECK-DAG: ![[CT32]] = !DICompositeType(tag: DW_TAG_array_type, baseType: ![[ELTTY32:[0-9]+]], flags: DIFlagVector, elements: ![[ELTS32x4:[0-9]+]])
+  // CHECK-DAG: ![[ELTTY32]] = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
+  // CHECK-DAG: ![[ELTS32x4]] = !{![[REALELTS32x4:[0-9]+]]}
+  // CHECK-DAG: ![[REALELTS32x4]] = !DISubrange(lowerBound: 0, upperBound: !DIExpression(DW_OP_constu, 8, DW_OP_bregx, 46, 0, DW_OP_mul, DW_OP_constu, 1, DW_OP_minus))
+  __clang_svint32x4_t s32;
+
+  // CHECK-DAG: name: "__clang_svuint32x4_t",{{.*}}, baseType: ![[CT32:[0-9]+]]
+  // CHECK-DAG: ![[CT32]] = !DICompositeType(tag: DW_TAG_array_type, baseType: ![[ELTTY32:[0-9]+]], flags: DIFlagVector, elements: ![[ELTS32x4]])
+  // CHECK-DAG: ![[ELTTY32]] = !DIBasicType(name: "unsigned int", size: 32, encoding: DW_ATE_unsigned)
+  __clang_svuint32x4_t u32;
+
+  // CHECK-DAG: name: "__clang_svint64x4_t",{{.*}}, baseType: ![[CT64:[0-9]+]]
+  // CHECK-DAG: ![[CT64]] = !DICompositeType(tag: DW_TAG_array_type, baseType: ![[ELTTY64:[0-9]+]], flags: DIFlagVector, elements: ![[ELTS1x4_64:[0-9]+]])
+  // CHECK-DAG: ![[ELTTY64]] = !DIBasicType(name: "long int", size: 64, encoding: DW_ATE_signed)
+  // CHECK-DAG: ![[ELTS1x4_64]] = !{![[REALELTS1x4_64:[0-9]+]]}
+  // CHECK-DAG: ![[REALELTS1x4_64]] = !DISubrange(lowerBound: 0, upperBound: !DIExpression(DW_OP_constu, 4, DW_OP_bregx, 46, 0, DW_OP_mul, DW_OP_constu, 1, DW_OP_minus))
+  __clang_svint64x4_t s64;
+
+  // CHECK-DAG: name: "__clang_svuint64x4_t",{{.*}}, baseType: ![[CT64:[0-9]+]]
+  // CHECK-DAG: ![[CT64]] = !DICompositeType(tag: DW_TAG_array_type, baseType: ![[ELTTY64:[0-9]+]], flags: DIFlagVector, elements: ![[ELTS1x4_64]])
+  // CHECK-DAG: ![[ELTTY64]] = !DIBasicType(name: "long unsigned int", size: 64, encoding: DW_ATE_unsigned)
+  __clang_svuint64x4_t u64;
+
+  // CHECK:     name: "__clang_svfloat16x4_t",{{.*}}, baseType: ![[CT16:[0-9]+]]
+  // CHECK-DAG: ![[CT16]] = !DICompositeType(tag: DW_TAG_array_type, baseType: ![[ELTTY16:[0-9]+]], flags: DIFlagVector, elements: ![[ELTS16x4]])
+  // CHECK-DAG: ![[ELTTY16]] = !DIBasicType(name: "__fp16", size: 16, encoding: DW_ATE_float)
+  __clang_svfloat16x4_t f16;
+
+  // CHECK:     name: "__clang_svfloat32x4_t",{{.*}}, baseType: ![[CT32:[0-9]+]]
+  // CHECK-DAG: ![[CT32]] = !DICompositeType(tag: DW_TAG_array_type, baseType: ![[ELTTY32:[0-9]+]], flags: DIFlagVector, elements: ![[ELTS32x4]])
+  // CHECK-DAG: ![[ELTTY32]] = !DIBasicType(name: "float", size: 32, encoding: DW_ATE_float)
+  __clang_svfloat32x4_t f32;
+
+  // CHECK:     name: "__clang_svfloat64x4_t",{{.*}}, baseType: ![[CT64:[0-9]+]]
+  // CHECK-DAG: ![[CT64]] = !DICompositeType(tag: DW_TAG_array_type, baseType: ![[ELTTY64:[0-9]+]], flags: DIFlagVector, elements: ![[ELTS1x4_64]])
+  // CHECK-DAG: ![[ELTTY64]] = !DIBasicType(name: "double", size: 64, encoding: DW_ATE_float)
+  __clang_svfloat64x4_t f64;
+}
diff --git a/clang/test/CodeGen/aarch64-sve.c b/clang/test/CodeGen/aarch64-sve.c
index d21af74319f99..ebcf334f11d69 100644
--- a/clang/test/CodeGen/aarch64-sve.c
+++ b/clang/test/CodeGen/aarch64-sve.c
@@ -1,22 +1,6 @@
-// RUN: not %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve \
-// RUN:  -emit-llvm -o - %s -debug-info-kind=limited 2>&1 | FileCheck %s -check-prefix=CHECK-DEBUG
 // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve \
 // RUN:  -emit-llvm -o - %s 2>&1 | FileCheck %s -check-prefix=CHECK
 
-// CHECK-DEBUG: cannot yet generate debug info for SVE type '__SVInt8_t'
-// CHECK-DEBUG: cannot yet generate debug info for SVE type '__SVInt16_t'
-// CHECK-DEBUG: cannot yet generate debug info for SVE type '__SVInt32_t'
-// CHECK-DEBUG: cannot yet generate debug info for SVE type '__SVInt64_t'
-// CHECK-DEBUG: cannot yet generate debug info for SVE type '__SVUint8_t'
-// CHECK-DEBUG: cannot yet generate debug info for SVE type '__SVUint16_t'
-// CHECK-DEBUG: cannot yet generate debug info for SVE type '__SVUint32_t'
-// CHECK-DEBUG: cannot yet generate debug info for SVE type '__SVUint64_t'
-// CHECK-DEBUG: cannot yet generate debug info for SVE type '__SVFloat16_t'
-// CHECK-DEBUG: cannot yet generate debug info for SVE type '__SVFloat32_t'
-// CHECK-DEBUG: cannot yet generate debug info for SVE type '__SVFloat64_t'
-// CHECK-DEBUG: cannot yet generate debug info for SVE type '__SVBFloat16_t'
-// CHECK-DEBUG: cannot yet generate debug info for SVE type '__SVBool_t'
-
 // CHECK: @ptr = global <vscale x 16 x i8>* null, align 8
 // CHECK: %s8 = alloca <vscale x 16 x i8>, align 16
 // CHECK: %s16 = alloca <vscale x 8 x i16>, align 16
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
index e958f38e486b0..ceeae14c10738 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
@@ -1417,8 +1417,10 @@ static bool hasVectorBeenPadded(const DICompositeType *CTy) {
          Elements[0]->getTag() == dwarf::DW_TAG_subrange_type &&
          "Invalid vector element array, expected one element of type subrange");
   const auto Subrange = cast<DISubrange>(Elements[0]);
-  const auto CI = Subrange->getCount().get<ConstantInt *>();
-  const int32_t NumVecElements = CI->getSExtValue();
+  const auto NumVecElements =
+      Subrange->getCount()
+          ? Subrange->getCount().get<ConstantInt *>()->getSExtValue()
+          : 0;
 
   // Ensure we found the element count and that the actual size is wide
   // enough to contain the requested size.
diff --git a/llvm/test/DebugInfo/AArch64/dbg-sve-types.ll b/llvm/test/DebugInfo/AArch64/dbg-sve-types.ll
new file mode 100644
index 0000000000000..62b86f294861d
--- /dev/null
+++ b/llvm/test/DebugInfo/AArch64/dbg-sve-types.ll
@@ -0,0 +1,44 @@
+; Test that the debug info for the vector type is correctly codegenerated
+; when the DISubrange has no count, but only an upperbound.
+; RUN: llc -mtriple aarch64 -mattr=+sve -filetype=obj -o %t %s
+; RUN: llvm-dwarfdump %t | FileCheck %s
+; RUN: rm %t
+
+; CHECK:      {{.*}}: DW_TAG_subrange_type
+; CHECK-NEXT:   DW_AT_type    ({{.*}} "__ARRAY_SIZE_TYPE__")
+; CHECK-NEXT:   DW_AT_upper_bound     (DW_OP_lit8, DW_OP_bregx VG+0, DW_OP_mul, DW_OP_lit1, DW_OP_minus)
+
+define <vscale x 16 x i8> @test_svint8_t(<vscale x 16 x i8> returned %op1) !dbg !7 {
+entry:
+  call void @llvm.dbg.value(metadata <vscale x 16 x i8> %op1, metadata !19, metadata !DIExpression()), !dbg !20
+  ret <vscale x 16 x i8> %op1, !dbg !21
+}
+
+declare void @llvm.dbg.value(metadata, metadata, metadata)
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!3, !4, !5}
+!llvm.ident = !{!6}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 12.0.0", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, nameTableKind: None)
+!1 = !DIFile(filename: "dbg-sve-types.ll", directory: "")
+!2 = !{}
+!3 = !{i32 7, !"Dwarf Version", i32 4}
+!4 = !{i32 2, !"Debug Info Version", i32 3}
+!5 = !{i32 1, !"wchar_size", i32 4}
+!6 = !{!"clang version 12.0.0"}
+!7 = distinct !DISubprogram(name: "test_svint8_t", scope: !8, file: !8, line: 5, type: !9, scopeLine: 5, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !18)
+!8 = !DIFile(filename: "dbg-sve-types.ll", directory: "")
+!9 = !DISubroutineType(types: !10)
+!10 = !{!11, !11}
+!11 = !DIDerivedType(tag: DW_TAG_typedef, name: "svint8_t", file: !12, line: 32, baseType: !13)
+!12 = !DIFile(filename: "lib/clang/12.0.0/include/arm_sve.h", directory: "")
+!13 = !DIDerivedType(tag: DW_TAG_typedef, name: "__SVInt8_t", file: !1, baseType: !14)
+!14 = !DICompositeType(tag: DW_TAG_array_type, baseType: !15, flags: DIFlagVector, elements: !16)
+!15 = !DIBasicType(name: "signed char", size: 8, encoding: DW_ATE_signed_char)
+!16 = !{!17}
+!17 = !DISubrange(lowerBound: 0, upperBound: !DIExpression(DW_OP_constu, 8, DW_OP_bregx, 46, 0, DW_OP_mul, DW_OP_constu, 1, DW_OP_minus))
+!18 = !{!19}
+!19 = !DILocalVariable(name: "op1", arg: 1, scope: !7, file: !8, line: 5, type: !11)
+!20 = !DILocation(line: 0, scope: !7)
+!21 = !DILocation(line: 5, column: 39, scope: !7)

From e37a52fe921280105f5f291bb01722c391ef1123 Mon Sep 17 00:00:00 2001
From: Adam Czachorowski <adamcz@google.com>
Date: Wed, 26 Aug 2020 16:20:01 +0200
Subject: [PATCH 038/109] [clang] Exclude invalid destructors from lookups.

This fixes a crash when declaring a destructor with a wrong name, then
writing result to pch file and loading it again. The PCH storage uses
DeclarationNameKey as key and it is the same key for both the invalid
destructor and the implicit one that was created because the other one
was invalid. When querying for the Foo::~Foo we end up getting
Foo::~Bar, which is then rejected and we end up with nullptr in
CXXRecordDecl::GetDestructor().

Fixes https://bugs.llvm.org/show_bug.cgi?id=47270

Differential Revision: https://reviews.llvm.org/D86624

(cherry picked from commit eed0af6179ca4fe9e60121e0829ed8d3849b1ce5)
---
 clang/lib/AST/DeclBase.cpp                | 7 +++++++
 clang/test/PCH/cxx-invalid-destructor.cpp | 4 ++++
 clang/test/PCH/cxx-invalid-destructor.h   | 7 +++++++
 3 files changed, 18 insertions(+)
 create mode 100644 clang/test/PCH/cxx-invalid-destructor.cpp
 create mode 100644 clang/test/PCH/cxx-invalid-destructor.h

diff --git a/clang/lib/AST/DeclBase.cpp b/clang/lib/AST/DeclBase.cpp
index da1eadd9d931d..f4314d0bd9614 100644
--- a/clang/lib/AST/DeclBase.cpp
+++ b/clang/lib/AST/DeclBase.cpp
@@ -1487,6 +1487,13 @@ static bool shouldBeHidden(NamedDecl *D) {
     if (FD->isFunctionTemplateSpecialization())
       return true;
 
+  // Hide destructors that are invalid. There should always be one destructor,
+  // but if it is an invalid decl, another one is created. We need to hide the
+  // invalid one from places that expect exactly one destructor, like the
+  // serialization code.
+  if (isa<CXXDestructorDecl>(D) && D->isInvalidDecl())
+    return true;
+
   return false;
 }
 
diff --git a/clang/test/PCH/cxx-invalid-destructor.cpp b/clang/test/PCH/cxx-invalid-destructor.cpp
new file mode 100644
index 0000000000000..fc89cf1f3dfc1
--- /dev/null
+++ b/clang/test/PCH/cxx-invalid-destructor.cpp
@@ -0,0 +1,4 @@
+// RUN: %clang_cc1 -x c++ -std=c++11 -emit-pch -o %t %S/cxx-invalid-destructor.h -fallow-pch-with-compiler-errors
+// RUN: %clang_cc1 -x c++ -std=c++11 -include-pch %t %S/cxx-invalid-destructor.cpp -fsyntax-only -fno-validate-pch
+
+Foo f;
diff --git a/clang/test/PCH/cxx-invalid-destructor.h b/clang/test/PCH/cxx-invalid-destructor.h
new file mode 100644
index 0000000000000..59095a37c203e
--- /dev/null
+++ b/clang/test/PCH/cxx-invalid-destructor.h
@@ -0,0 +1,7 @@
+struct Base {
+  ~Base();
+};
+
+struct Foo : public Base {
+  ~Base();
+};

From 5b08e498cd35b05a937d6afd5cc20bde90822a29 Mon Sep 17 00:00:00 2001
From: Adam Balogh <adam.balogh@ericsson.com>
Date: Thu, 27 Aug 2020 08:01:43 -0700
Subject: [PATCH 039/109] [analyzer] NFC: Store the pointee/referenced type for
 dynamic type tracking.

The successfulness of a dynamic cast depends only on the C++ class, not the pointer or reference. Thus if *A is a *B, then &A is a &B,
const *A is a const *B etc. This patch changes DynamicCastInfo to store
and check the cast between the unqualified pointed/referenced types.
It also removes e.g. SubstTemplateTypeParmType from both the pointer
and the pointed type.

Differential Revision: https://reviews.llvm.org/D85752

(cherry picked from commit 5a9e7789396e7618c1407aafc329e00584437a2f)
---
 .../StaticAnalyzer/Checkers/CastValueChecker.cpp    |  2 +-
 clang/lib/StaticAnalyzer/Core/DynamicType.cpp       | 13 +++++++++++++
 clang/test/Analysis/cast-value-state-dump.cpp       |  4 ++--
 3 files changed, 16 insertions(+), 3 deletions(-)

diff --git a/clang/lib/StaticAnalyzer/Checkers/CastValueChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/CastValueChecker.cpp
index 1ef70b650414e..3d1721f048756 100644
--- a/clang/lib/StaticAnalyzer/Checkers/CastValueChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/CastValueChecker.cpp
@@ -106,7 +106,7 @@ static const NoteTag *getNoteTag(CheckerContext &C,
                                  QualType CastToTy, const Expr *Object,
                                  bool CastSucceeds, bool IsKnownCast) {
   std::string CastToName =
-      CastInfo ? CastInfo->to()->getPointeeCXXRecordDecl()->getNameAsString()
+      CastInfo ? CastInfo->to()->getAsCXXRecordDecl()->getNameAsString()
                : CastToTy->getPointeeCXXRecordDecl()->getNameAsString();
   Object = Object->IgnoreParenImpCasts();
 
diff --git a/clang/lib/StaticAnalyzer/Core/DynamicType.cpp b/clang/lib/StaticAnalyzer/Core/DynamicType.cpp
index e9b64fd79614d..9ed915aafcab0 100644
--- a/clang/lib/StaticAnalyzer/Core/DynamicType.cpp
+++ b/clang/lib/StaticAnalyzer/Core/DynamicType.cpp
@@ -65,6 +65,13 @@ const DynamicTypeInfo *getRawDynamicTypeInfo(ProgramStateRef State,
   return State->get<DynamicTypeMap>(MR);
 }
 
+static void unbox(QualType &Ty) {
+  // FIXME: Why are we being fed references to pointers in the first place?
+  while (Ty->isReferenceType() || Ty->isPointerType())
+    Ty = Ty->getPointeeType();
+  Ty = Ty.getCanonicalType().getUnqualifiedType();
+}
+
 const DynamicCastInfo *getDynamicCastInfo(ProgramStateRef State,
                                           const MemRegion *MR,
                                           QualType CastFromTy,
@@ -73,6 +80,9 @@ const DynamicCastInfo *getDynamicCastInfo(ProgramStateRef State,
   if (!Lookup)
     return nullptr;
 
+  unbox(CastFromTy);
+  unbox(CastToTy);
+
   for (const DynamicCastInfo &Cast : *Lookup)
     if (Cast.equals(CastFromTy, CastToTy))
       return &Cast;
@@ -112,6 +122,9 @@ ProgramStateRef setDynamicTypeAndCastInfo(ProgramStateRef State,
     State = State->set<DynamicTypeMap>(MR, CastToTy);
   }
 
+  unbox(CastFromTy);
+  unbox(CastToTy);
+
   DynamicCastInfo::CastResult ResultKind =
       CastSucceeds ? DynamicCastInfo::CastResult::Success
                    : DynamicCastInfo::CastResult::Failure;
diff --git a/clang/test/Analysis/cast-value-state-dump.cpp b/clang/test/Analysis/cast-value-state-dump.cpp
index 3dffb78767cf4..3e6a40cf1319b 100644
--- a/clang/test/Analysis/cast-value-state-dump.cpp
+++ b/clang/test/Analysis/cast-value-state-dump.cpp
@@ -35,8 +35,8 @@ void evalNonNullParamNonNullReturn(const Shape *S) {
   // CHECK-NEXT: ],
   // CHECK-NEXT: "dynamic_casts": [
   // CHECK:        { "region": "SymRegion{reg_$0<const struct clang::Shape * S>}", "casts": [
-  // CHECK-NEXT:     { "from": "const struct clang::Shape *", "to": "const class clang::Circle *", "kind": "success" },
-  // CHECK-NEXT:     { "from": "const struct clang::Shape *", "to": "const class clang::Square *", "kind": "fail" }
+  // CHECK-NEXT:     { "from": "struct clang::Shape", "to": "class clang::Circle", "kind": "success" },
+  // CHECK-NEXT:     { "from": "struct clang::Shape", "to": "class clang::Square", "kind": "fail" }
   // CHECK-NEXT:   ] }
 
   (void)(1 / !C);

From 22bce848a0b27b5ce7ef1e086054522a39c70651 Mon Sep 17 00:00:00 2001
From: Adam Balogh <adam.balogh@ericsson.com>
Date: Thu, 27 Aug 2020 08:06:10 -0700
Subject: [PATCH 040/109] [analyzer] pr47037: CastValueChecker: Support for the
 new variadic isa<>.

llvm::isa<>() and llvm::isa_and_not_null<>() template functions recently became
variadic. Unfortunately this causes crashes in case of isa_and_not_null<>()
and incorrect behavior in isa<>(). This patch fixes this issue.

Differential Revision: https://reviews.llvm.org/D85728

(cherry picked from commit 4448affede5100658530aea8793ae7a7bc05a110)
---
 .../Checkers/CastValueChecker.cpp             | 136 ++++++++++++++----
 clang/test/Analysis/Inputs/llvm.h             |  18 ++-
 clang/test/Analysis/cast-value-logic.cpp      |  19 +++
 clang/test/Analysis/cast-value-notes.cpp      |  80 +++++++++--
 4 files changed, 209 insertions(+), 44 deletions(-)

diff --git a/clang/lib/StaticAnalyzer/Checkers/CastValueChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/CastValueChecker.cpp
index 3d1721f048756..528f68c6c429d 100644
--- a/clang/lib/StaticAnalyzer/Checkers/CastValueChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/CastValueChecker.cpp
@@ -135,6 +135,47 @@ static const NoteTag *getNoteTag(CheckerContext &C,
       /*IsPrunable=*/true);
 }
 
+static const NoteTag *getNoteTag(CheckerContext &C,
+                                 SmallVector<QualType, 4> CastToTyVec,
+                                 const Expr *Object,
+                                 bool IsKnownCast) {
+  Object = Object->IgnoreParenImpCasts();
+
+  return C.getNoteTag(
+      [=]() -> std::string {
+        SmallString<128> Msg;
+        llvm::raw_svector_ostream Out(Msg);
+
+        if (!IsKnownCast)
+          Out << "Assuming ";
+
+        if (const auto *DRE = dyn_cast<DeclRefExpr>(Object)) {
+          Out << '\'' << DRE->getDecl()->getNameAsString() << '\'';
+        } else if (const auto *ME = dyn_cast<MemberExpr>(Object)) {
+          Out << (IsKnownCast ? "Field '" : "field '")
+              << ME->getMemberDecl()->getNameAsString() << '\'';
+        } else {
+          Out << (IsKnownCast ? "The object" : "the object");
+        }
+        Out << " is";
+
+        bool First = true;
+        for (QualType CastToTy: CastToTyVec) {
+          std::string CastToName =
+            CastToTy->getAsCXXRecordDecl() ?
+            CastToTy->getAsCXXRecordDecl()->getNameAsString() :
+            CastToTy->getPointeeCXXRecordDecl()->getNameAsString();
+          Out << ' ' << ((CastToTyVec.size() == 1) ? "not" :
+                         (First ? "neither" : "nor")) << " a '" << CastToName
+              << '\'';
+          First = false;
+        }
+
+        return std::string(Out.str());
+      },
+      /*IsPrunable=*/true);
+}
+
 //===----------------------------------------------------------------------===//
 // Main logic to evaluate a cast.
 //===----------------------------------------------------------------------===//
@@ -220,40 +261,76 @@ static void addInstanceOfTransition(const CallEvent &Call,
                                     bool IsInstanceOf) {
   const FunctionDecl *FD = Call.getDecl()->getAsFunction();
   QualType CastFromTy = Call.parameters()[0]->getType();
-  QualType CastToTy = FD->getTemplateSpecializationArgs()->get(0).getAsType();
-  if (CastFromTy->isPointerType())
-    CastToTy = C.getASTContext().getPointerType(CastToTy);
-  else if (CastFromTy->isReferenceType())
-    CastToTy = alignReferenceTypes(CastToTy, CastFromTy, C.getASTContext());
-  else
-    return;
+  SmallVector<QualType, 4> CastToTyVec;
+  for (unsigned idx = 0; idx < FD->getTemplateSpecializationArgs()->size() - 1;
+       ++idx) {
+    TemplateArgument CastToTempArg =
+      FD->getTemplateSpecializationArgs()->get(idx);
+    switch (CastToTempArg.getKind()) {
+    default:
+      return;
+    case TemplateArgument::Type:
+      CastToTyVec.push_back(CastToTempArg.getAsType());
+      break;
+    case TemplateArgument::Pack:
+      for (TemplateArgument ArgInPack: CastToTempArg.pack_elements())
+        CastToTyVec.push_back(ArgInPack.getAsType());
+      break;
+    }
+  }
 
   const MemRegion *MR = DV.getAsRegion();
-  const DynamicCastInfo *CastInfo =
-      getDynamicCastInfo(State, MR, CastFromTy, CastToTy);
+  if (MR && CastFromTy->isReferenceType())
+    MR = State->getSVal(DV.castAs<Loc>()).getAsRegion();
+
+  bool Success = false;
+  bool IsAnyKnown = false;
+  for (QualType CastToTy: CastToTyVec) {
+    if (CastFromTy->isPointerType())
+      CastToTy = C.getASTContext().getPointerType(CastToTy);
+    else if (CastFromTy->isReferenceType())
+      CastToTy = alignReferenceTypes(CastToTy, CastFromTy, C.getASTContext());
+    else
+      return;
 
-  bool CastSucceeds;
-  if (CastInfo)
-    CastSucceeds = IsInstanceOf && CastInfo->succeeds();
-  else
-    CastSucceeds = IsInstanceOf || CastFromTy == CastToTy;
+    const DynamicCastInfo *CastInfo =
+      getDynamicCastInfo(State, MR, CastFromTy, CastToTy);
 
-  if (isInfeasibleCast(CastInfo, CastSucceeds)) {
-    C.generateSink(State, C.getPredecessor());
-    return;
+    bool CastSucceeds;
+    if (CastInfo)
+      CastSucceeds = IsInstanceOf && CastInfo->succeeds();
+    else
+      CastSucceeds = IsInstanceOf || CastFromTy == CastToTy;
+
+    // Store the type and the cast information.
+    bool IsKnownCast = CastInfo || CastFromTy == CastToTy;
+    IsAnyKnown = IsAnyKnown || IsKnownCast;
+    ProgramStateRef NewState = State;
+    if (!IsKnownCast)
+      NewState = setDynamicTypeAndCastInfo(State, MR, CastFromTy, CastToTy,
+                                           IsInstanceOf);
+
+    if (CastSucceeds) {
+      Success = true;
+      C.addTransition(
+          NewState->BindExpr(Call.getOriginExpr(), C.getLocationContext(),
+                             C.getSValBuilder().makeTruthVal(true)),
+          getNoteTag(C, CastInfo, CastToTy, Call.getArgExpr(0), true,
+                     IsKnownCast));
+      if (IsKnownCast)
+        return;
+    } else if (CastInfo && CastInfo->succeeds()) {
+      C.generateSink(NewState, C.getPredecessor());
+      return;
+    }
   }
 
-  // Store the type and the cast information.
-  bool IsKnownCast = CastInfo || CastFromTy == CastToTy;
-  if (!IsKnownCast)
-    State = setDynamicTypeAndCastInfo(State, MR, CastFromTy, CastToTy,
-                                      IsInstanceOf);
-
-  C.addTransition(
-      State->BindExpr(Call.getOriginExpr(), C.getLocationContext(),
-                      C.getSValBuilder().makeTruthVal(CastSucceeds)),
-      getNoteTag(C, CastInfo, CastToTy, Call.getArgExpr(0), CastSucceeds,
-                 IsKnownCast));
+  if (!Success) {
+    C.addTransition(
+        State->BindExpr(Call.getOriginExpr(), C.getLocationContext(),
+                        C.getSValBuilder().makeTruthVal(false)),
+        getNoteTag(C, CastToTyVec, Call.getArgExpr(0), IsAnyKnown));
+  }
 }
 
 //===----------------------------------------------------------------------===//
@@ -402,8 +479,9 @@ bool CastValueChecker::evalCall(const CallEvent &Call,
     QualType ParamT = Call.parameters()[0]->getType();
     QualType ResultT = Call.getResultType();
     if (!(ParamT->isPointerType() && ResultT->isPointerType()) &&
-        !(ParamT->isReferenceType() && ResultT->isReferenceType()))
+        !(ParamT->isReferenceType() && ResultT->isReferenceType())) {
       return false;
+    }
 
     DV = Call.getArgSVal(0).getAs<DefinedOrUnknownSVal>();
     break;
diff --git a/clang/test/Analysis/Inputs/llvm.h b/clang/test/Analysis/Inputs/llvm.h
index c9d66ba2374d3..b80567bcb5863 100644
--- a/clang/test/Analysis/Inputs/llvm.h
+++ b/clang/test/Analysis/Inputs/llvm.h
@@ -19,11 +19,19 @@ const X *dyn_cast_or_null(Y *Value);
 template <class X, class Y>
 const X *dyn_cast_or_null(Y &Value);
 
-template <class X, class Y>
-bool isa(Y Value);
-
-template <class X, class Y>
-bool isa_and_nonnull(Y Value);
+template <class X, class Y> inline bool isa(const Y &Val);
+
+template <typename First, typename Second, typename... Rest, typename Y>
+inline bool isa(const Y &Val) {
+  return isa<First>(Val) || isa<Second, Rest...>(Val);
+}
+
+template <typename... X, class Y>
+inline bool isa_and_nonnull(const Y &Val) {
+  if (!Val)
+    return false;
+  return isa<X...>(Val);
+}
 
 template <typename X, typename Y>
 std::unique_ptr<X> cast(std::unique_ptr<Y> &&Value);
diff --git a/clang/test/Analysis/cast-value-logic.cpp b/clang/test/Analysis/cast-value-logic.cpp
index 1411ede92e366..52a94f24fba67 100644
--- a/clang/test/Analysis/cast-value-logic.cpp
+++ b/clang/test/Analysis/cast-value-logic.cpp
@@ -19,6 +19,8 @@ struct Shape {
   virtual double area();
 };
 class Triangle : public Shape {};
+class Rectangle : public Shape {};
+class Hexagon : public Shape {};
 class Circle : public Shape {
 public:
   ~Circle();
@@ -39,6 +41,23 @@ void test_regions_isa(const Shape *A, const Shape *B) {
     clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}}
 }
 
+void test_regions_isa_variadic(const Shape *A, const Shape *B) {
+  if (isa<Triangle, Rectangle, Hexagon>(A) &&
+      !isa<Rectangle, Hexagon, Circle>(B))
+    clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}}
+}
+
+void test_regions_isa_and_nonnull(const Shape *A, const Shape *B) {
+  if (isa_and_nonnull<Circle>(A) && !isa_and_nonnull<Circle>(B))
+    clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}}
+}
+
+void test_regions_isa_and_nonnull_variadic(const Shape *A, const Shape *B) {
+  if (isa_and_nonnull<Triangle, Rectangle, Hexagon>(A) &&
+      !isa_and_nonnull<Rectangle, Hexagon, Circle>(B))
+    clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}}
+}
+
 namespace test_cast {
 void evalLogic(const Shape *S) {
   const Circle *C = cast<Circle>(S);
diff --git a/clang/test/Analysis/cast-value-notes.cpp b/clang/test/Analysis/cast-value-notes.cpp
index eb5d1b3d3fe27..a09586309fb41 100644
--- a/clang/test/Analysis/cast-value-notes.cpp
+++ b/clang/test/Analysis/cast-value-notes.cpp
@@ -13,6 +13,8 @@ struct Shape {
   const T *getAs() const;
 };
 class Triangle : public Shape {};
+class Rectangle : public Shape {};
+class Hexagon : public Shape {};
 class Circle : public Shape {};
 } // namespace clang
 
@@ -27,7 +29,6 @@ void evalReferences(const Shape &S) {
 }
 
 void evalNonNullParamNonNullReturnReference(const Shape &S) {
-  // Unmodeled cast from reference to pointer.
   const auto *C = dyn_cast_or_null<Circle>(S);
   // expected-note@-1 {{'C' initialized here}}
 
@@ -43,13 +44,37 @@ void evalNonNullParamNonNullReturnReference(const Shape &S) {
     return;
   }
 
+  if (dyn_cast_or_null<Rectangle>(C)) {
+    // expected-note@-1 {{Assuming 'C' is not a 'Rectangle'}}
+    // expected-note@-2 {{Taking false branch}}
+    return;
+  }
+
+  if (dyn_cast_or_null<Hexagon>(C)) {
+    // expected-note@-1 {{Assuming 'C' is not a 'Hexagon'}}
+    // expected-note@-2 {{Taking false branch}}
+    return;
+  }
+
   if (isa<Triangle>(C)) {
     // expected-note@-1 {{'C' is not a 'Triangle'}}
     // expected-note@-2 {{Taking false branch}}
     return;
   }
 
-  if (isa<Circle>(C)) {
+  if (isa<Triangle, Rectangle>(C)) {
+    // expected-note@-1 {{'C' is neither a 'Triangle' nor a 'Rectangle'}}
+    // expected-note@-2 {{Taking false branch}}
+    return;
+  }
+
+  if (isa<Triangle, Rectangle, Hexagon>(C)) {
+    // expected-note@-1 {{'C' is neither a 'Triangle' nor a 'Rectangle' nor a 'Hexagon'}}
+    // expected-note@-2 {{Taking false branch}}
+    return;
+  }
+
+  if (isa<Circle, Rectangle, Hexagon>(C)) {
     // expected-note@-1 {{'C' is a 'Circle'}}
     // expected-note@-2 {{Taking true branch}}
 
@@ -65,22 +90,57 @@ void evalNonNullParamNonNullReturn(const Shape *S) {
   // expected-note@-1 {{'S' is a 'Circle'}}
   // expected-note@-2 {{'C' initialized here}}
 
-  if (!isa<Triangle>(C)) {
-    // expected-note@-1 {{Assuming 'C' is a 'Triangle'}}
+  if (!dyn_cast_or_null<Circle>(C)) {
+    // expected-note@-1 {{'C' is a 'Circle'}}
     // expected-note@-2 {{Taking false branch}}
     return;
   }
 
-  if (!isa<Triangle>(C)) {
-    // expected-note@-1 {{'C' is a 'Triangle'}}
+  if (dyn_cast_or_null<Triangle>(C)) {
+    // expected-note@-1 {{Assuming 'C' is not a 'Triangle'}}
     // expected-note@-2 {{Taking false branch}}
     return;
   }
 
-  (void)(1 / !C);
-  // expected-note@-1 {{'C' is non-null}}
-  // expected-note@-2 {{Division by zero}}
-  // expected-warning@-3 {{Division by zero}}
+  if (dyn_cast_or_null<Rectangle>(C)) {
+    // expected-note@-1 {{Assuming 'C' is not a 'Rectangle'}}
+    // expected-note@-2 {{Taking false branch}}
+    return;
+  }
+
+  if (dyn_cast_or_null<Hexagon>(C)) {
+    // expected-note@-1 {{Assuming 'C' is not a 'Hexagon'}}
+    // expected-note@-2 {{Taking false branch}}
+    return;
+  }
+
+  if (isa<Triangle>(C)) {
+    // expected-note@-1 {{'C' is not a 'Triangle'}}
+    // expected-note@-2 {{Taking false branch}}
+    return;
+  }
+
+  if (isa<Triangle, Rectangle>(C)) {
+    // expected-note@-1 {{'C' is neither a 'Triangle' nor a 'Rectangle'}}
+    // expected-note@-2 {{Taking false branch}}
+    return;
+  }
+
+  if (isa<Triangle, Rectangle, Hexagon>(C)) {
+    // expected-note@-1 {{'C' is neither a 'Triangle' nor a 'Rectangle' nor a 'Hexagon'}}
+    // expected-note@-2 {{Taking false branch}}
+    return;
+  }
+
+  if (isa<Circle, Rectangle, Hexagon>(C)) {
+    // expected-note@-1 {{'C' is a 'Circle'}}
+    // expected-note@-2 {{Taking true branch}}
+
+    (void)(1 / !C);
+    // expected-note@-1 {{'C' is non-null}}
+    // expected-note@-2 {{Division by zero}}
+    // expected-warning@-3 {{Division by zero}}
+  }
 }
 
 void evalNonNullParamNullReturn(const Shape *S) {

From 97ac9e82002d6b12831ca2c78f739cca65a4fa05 Mon Sep 17 00:00:00 2001
From: Brad Smith <brad@comstyle.com>
Date: Thu, 27 Aug 2020 17:17:38 -0400
Subject: [PATCH 041/109] [SSP] Restore setting the visibility of __guard_local
 to hidden for better code generation.

Patch by: Philip Guenther

(cherry picked from commit d870e363263835bec96c83f51b20e64722cad742)
---
 llvm/lib/CodeGen/TargetLoweringBase.cpp | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp
index 2c94c2c62e5f0..42c1fa8af0e6b 100644
--- a/llvm/lib/CodeGen/TargetLoweringBase.cpp
+++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp
@@ -1827,7 +1827,10 @@ Value *TargetLoweringBase::getIRStackGuard(IRBuilder<> &IRB) const {
   if (getTargetMachine().getTargetTriple().isOSOpenBSD()) {
     Module &M = *IRB.GetInsertBlock()->getParent()->getParent();
     PointerType *PtrTy = Type::getInt8PtrTy(M.getContext());
-    return M.getOrInsertGlobal("__guard_local", PtrTy);
+    Constant *C = M.getOrInsertGlobal("__guard_local", PtrTy);
+    if (GlobalVariable *G = dyn_cast_or_null<GlobalVariable>(C))
+      G->setVisibility(GlobalValue::HiddenVisibility);
+    return C;
   }
   return nullptr;
 }

From 2c6a593b5e186a686fdaf6b6082b0dbcae29c265 Mon Sep 17 00:00:00 2001
From: Fangrui Song <i@maskray.me>
Date: Sat, 29 Aug 2020 08:51:19 -0700
Subject: [PATCH 042/109] ReleaseNotes: add lld/ELF notes

Differential Revision: https://reviews.llvm.org/D86579
---
 lld/docs/ReleaseNotes.rst | 133 ++++++++++++++++++++++++++++++++++++--
 1 file changed, 127 insertions(+), 6 deletions(-)

diff --git a/lld/docs/ReleaseNotes.rst b/lld/docs/ReleaseNotes.rst
index 513ad37e278e2..466a7f7073549 100644
--- a/lld/docs/ReleaseNotes.rst
+++ b/lld/docs/ReleaseNotes.rst
@@ -24,22 +24,143 @@ Non-comprehensive list of changes in this release
 ELF Improvements
 ----------------
 
-* New ``--time-trace`` option records a time trace file that can be viewed in
+* ``--lto-emit-asm`` is added to emit assembly output for debugging purposes.
+  (`D77231 <https://reviews.llvm.org/D77231>`_)
+* ``--lto-whole-program-visibility`` is added to support LTO whole-program devirtualization.
+  (`D71913 <https://reviews.llvm.org/D71913>`_)
+* ``--print-archive-stats=`` is added to print the number of members and the number of fetched members for each archive.
+  The feature is similar to GNU gold's ``--print-symbol-counts=``.
+  (`D78983 <https://reviews.llvm.org/D78983>`_)
+* ``--shuffle-sections=`` is added to introduce randomization in the output to help reduce measurement bias and detect static initialization order fiasco.
+  (`D74791 <https://reviews.llvm.org/D74791>`_)
+  (`D74887 <https://reviews.llvm.org/D74887>`_)
+* ``--time-trace`` is added. It records a time trace file that can be viewed in
   chrome://tracing. The file can be specified with ``--time-trace-file``.
   Trace granularity can be specified with ``--time-trace-granularity``.
   (`D71060 <https://reviews.llvm.org/D71060>`_)
-* For ARM architectures the default max page size was increased to 64k.
-  This increases compatibility with systems where a non standard page
-  size was configured. This also is inline with GNU ld defaults.
-  (`D77330 <https://reviews.llvm.org/D77330>`_)
-* ...
+* ``--thinlto-single-module`` is added to compile a subset of modules in ThinLTO for debugging purposes.
+  (`D80406 <https://reviews.llvm.org/D80406>`_)
+* ``--unique`` is added to create separate output sections for orphan sections.
+  (`D75536 <https://reviews.llvm.org/D75536>`_)
+* ``--warn-backrefs`` has been improved to emulate GNU ld's archive semantics.
+  If a link passes with warnings from ``--warn-backrefs``, it almost assuredly
+  means that the link will fail with GNU ld, or the symbol will get different
+  resolutions in GNU ld and LLD. ``--warn-backrefs-exclude=`` is added to
+  exclude known issues.
+  (`D77522 <https://reviews.llvm.org/D77522>`_)
+  (`D77630 <https://reviews.llvm.org/D77630>`_)
+  (`D77512 <https://reviews.llvm.org/D77512>`_)
+* ``--no-relax`` is accepted but ignored. The Linux kernel's RISC-V port uses this option.
+  (`D81359 <https://reviews.llvm.org/D81359>`_)
+* ``--rosegment`` (default) is added to complement ``--no-rosegment``.
+  GNU gold from 2.35 onwards support both options.
+* ``--threads=N`` is added. The default uses all threads.
+  (`D76885 <https://reviews.llvm.org/D76885>`_)
+* ``--wrap`` has better compatibility with GNU ld.
+* ``-z dead-reloc-in-nonalloc=<section_glob>=<value>`` is added to resolve an absolute relocation
+  referencing a discarded symbol.
+  (`D83264 <https://reviews.llvm.org/D83264>`_)
+* Changed tombstone values to (``.debug_ranges``/``.debug_loc``) 1 and (other ``.debug_*``) 0.
+  A tombstone value is the computed value of a relocation referencing a discarded symbol (``--gc-sections``, ICF or ``/DISCARD/``).
+  (`D84825 <https://reviews.llvm.org/D84825>`_)
+  In the future many .debug_* may switch to 0xffffffff/0xffffffffffffffff as the tombstone value.
+* ``-z keep-text-section-prefix`` moves ``.text.unknown.*`` input sections to ``.text.unknown``.
+* ``-z rel`` and ``-z rela`` are added to select the REL/RELA format for dynamic relocations.
+  The default is target specific and typically matches the form used in relocatable objects.
+* ``-z start-stop-visibility={default,protected,internal,hidden}`` is added.
+  GNU ld/gold from 2.35 onwards support this option.
+  (`D55682 <https://reviews.llvm.org/D55682>`_)
+* When ``-r`` or ``--emit-relocs`` is specified, the GNU ld compatible
+  ``--discard-all`` and ``--discard-locals`` semantics are implemented.
+  (`D77807 <https://reviews.llvm.org/D77807>`_)
+* ``--emit-relocs --strip-debug`` can now be used together.
+  (`D74375 <https://reviews.llvm.org/D74375>`_)
+* ``--gdb-index`` supports DWARF v5.
+  (`D79061 <https://reviews.llvm.org/D79061>`_)
+  (`D85579 <https://reviews.llvm.org/D85579>`_)
+* ``-r`` allows SHT_X86_64_UNWIND to be merged into SHT_PROGBITS.
+  This allows clang/GCC produced object files to be mixed together.
+  (`D85785 <https://reviews.llvm.org/D85785>`_)
+* Better linker script support related to output section alignments and LMA regions.
+  (`D74286 <https://reviews.llvm.org/D75724>`_)
+  (`D74297 <https://reviews.llvm.org/D75724>`_)
+  (`D75724 <https://reviews.llvm.org/D75724>`_)
+  (`D81986 <https://reviews.llvm.org/D81986>`_)
+* In a input section description, the filename can be specified in double quotes.
+  ``archive:file`` syntax is added.
+  (`D72517 <https://reviews.llvm.org/D72517>`_)
+  (`D75100 <https://reviews.llvm.org/D75100>`_)
+* Linker script specified empty ``(.init|.preinit|.fini)_array`` are allowed with RELRO.
+  (`D76915 <https://reviews.llvm.org/D76915>`_)
+* ``INSERT AFTER`` and ``INSERT BEFORE`` work for orphan sections now.
+  (`D74375 <https://reviews.llvm.org/D74375>`_)
+* ``INPUT_SECTION_FLAGS`` is supported in linker scripts.
+  (`D72745 <https://reviews.llvm.org/D72745>`_)
+* ``DF_1_PIE`` is set for position-independent executables.
+  (`D80872 <https://reviews.llvm.org/D80872>`_)
+* For a symbol assignment ``alias = aliasee;``, ``alias`` inherits the ``aliasee``'s symbol type.
+  (`D86263 <https://reviews.llvm.org/D86263>`_)
+* ``SHT_GNU_verneed`` in shared objects are parsed, and versioned undefined symbols in shared objects are respected.
+  (`D80059 <https://reviews.llvm.org/D80059>`_)
+* SHF_LINK_ORDER and non-SHF_LINK_ORDER sections can be mixed along as the SHF_LINK_ORDER components are contiguous.
+  (`D77007 <https://reviews.llvm.org/D77007>`_)
+* An out-of-range relocation diagnostic mentions the referenced symbol now.
+  (`D73518 <https://reviews.llvm.org/D73518>`_)
+* AArch64: ``R_AARCH64_PLT32`` is supported.
+  (`D81184 <https://reviews.llvm.org/D81184>`_)
+* ARM: SBREL type relocations are supported.
+  (`D74375 <https://reviews.llvm.org/D74375>`_)
+* ARM: ``R_ARM_ALU_PC_G0``, ``R_ARM_LDR_PC_G0``, ``R_ARM_THUMB_PC8`` and ``R_ARM_THUMB__PC12`` are supported.
+  (`D75349 <https://reviews.llvm.org/D75349>`_)
+  (`D77200 <https://reviews.llvm.org/D77200>`_)
+* ARM: various improvements to .ARM.exidx: ``/DISCARD/`` support for a subset, out-of-range handling, support for non monotonic section order.
+  (`PR44824 <https://llvm.org/PR44824>`_)
+* AVR: many relocation types are supported.
+  (`D78741 <https://reviews.llvm.org/D78741>`_)
+* Hexagon: General Dynamic and some other relocation types are supported.
+* PPC: Canonical PLT and range extension thunks with addends are supported.
+  (`D73399 <https://reviews.llvm.org/D73399>`_)
+  (`D73424 <https://reviews.llvm.org/D73424>`_)
+  (`D75394 <https://reviews.llvm.org/D75394>`_)
+* PPC and PPC64: copy relocations.
+  (`D73255 <https://reviews.llvm.org/D73255>`_)
+* PPC64: ``_savegpr[01]_{14..31}`` and ``_restgpr[01]_{14..31}`` can be synthesized.
+  (`D79977 <https://reviews.llvm.org/D79977>`_)
+* PPC64: ``R_PPC64_GOT_PCREL34`` and ``R_PPC64_REL24_NOTOC`` are supported. r2 save stub is supported.
+  (`D81948 <https://reviews.llvm.org/D81948>`_)
+  (`D82950 <https://reviews.llvm.org/D82950>`_)
+  (`D82816 <https://reviews.llvm.org/D82816>`_)
+* RISC-V: ``R_RISCV_IRELATIVE`` is supported.
+  (`D74022 <https://reviews.llvm.org/D74022>`_)
+* RISC-V: ``R_RISCV_ALIGN`` is errored because GNU ld style linker relaxation is not supported.
+  (`D71820 <https://reviews.llvm.org/D71820>`_)
+* SPARCv9: more relocation types are supported.
+  (`D77672 <https://reviews.llvm.org/D77672>`_)
 
 Breaking changes
 ----------------
 
 * One-dash form of some long option (``--thinlto-*``, ``--lto-*``, ``--shuffle-sections=``)
   are no longer supported.
+  (`D79371 <https://reviews.llvm.org/D79371>`_)
 * ``--export-dynamic-symbol`` no longer implies ``-u``.
+  The new behavior matches GNU ld from binutils 2.35 onwards.
+  (`D80487 <https://reviews.llvm.org/D80487>`_)
+* ARM: the default max page size was increased from 4096 to 65536.
+  This increases compatibility with systems where a non standard page
+  size was configured. This also is inline with GNU ld defaults.
+  (`D77330 <https://reviews.llvm.org/D77330>`_)
+* ARM: for non-STT_FUNC symbols, Thumb interworking thunks are not added and BL/BLX are not substituted.
+  (`D73474 <https://reviews.llvm.org/D73474>`_)
+  (`D73542 <https://reviews.llvm.org/D73542>`_)
+* AArch64: ``--force-bti`` is renamed to ``-z force-bti`. ``--pac-plt`` is renamed to ``-z pac-plt``.
+  This change is compatibile with GNU ld.
+* A readonly ``PT_LOAD`` is created in the presence of a ``SECTIONS`` command.
+  The new behavior is consistent with the longstanding behavior in the absence of a SECTIONS command.
+* Orphan section names like ``.rodata.foo`` and ``.text.foo`` are not grouped into ``.rodata`` and ``.text`` in the presence of a ``SECTIONS`` command.
+  The new behavior matches GNU ld.
+  (`D75225 <https://reviews.llvm.org/D75225>`_)
+* ``--no-threads`` is removed. Use ``--threads=1`` instead. ``--threads`` (no-op) is removed.
 
 COFF Improvements
 -----------------

From db16ab428bbae15b5c157fd518574baecf63f8dc Mon Sep 17 00:00:00 2001
From: sameeran joshi <sameeranjayant.joshi@amd.com>
Date: Tue, 18 Aug 2020 15:05:51 +0530
Subject: [PATCH 043/109] [Flang] Move markdown files(.MD) from documentation/
 to docs/

Summary:
Other LLVM sub-projects use docs/ folder for documentation files.
Follow LLVM project policy.
Modify `documentation/` references in sources to `docs/`.
This patch doesn't modify files to reStructuredText(.rst) file format.

Reviewed By: DavidTruby, sscalpone

Differential Revision: https://reviews.llvm.org/D85884

(cherry picked from commit eaff200429a3dcf36eebfae39d2e859d6815285e)
---
 flang/README.md                               | 20 +++++++++----------
 .../ArrayComposition.md                       |  2 +-
 .../BijectiveInternalNameUniquing.md          |  0
 flang/{documentation => docs}/C++17.md        |  2 +-
 flang/{documentation => docs}/C++style.md     |  2 +-
 flang/{documentation => docs}/Calls.md        |  2 +-
 flang/{documentation => docs}/Character.md    |  2 +-
 .../ControlFlowGraph.md                       |  2 +-
 flang/{documentation => docs}/Directives.md   |  2 +-
 flang/{documentation => docs}/Extensions.md   |  2 +-
 .../FortranForCProgrammers.md                 |  2 +-
 flang/{documentation => docs}/FortranIR.md    |  2 +-
 .../IORuntimeInternals.md                     |  2 +-
 .../ImplementingASemanticCheck.md             |  2 +-
 flang/{documentation => docs}/Intrinsics.md   |  2 +-
 .../LabelResolution.md                        |  2 +-
 flang/{documentation => docs}/ModFiles.md     |  2 +-
 .../OpenMP-4.5-grammar.txt                    |  2 +-
 .../OpenMP-semantics.md                       |  2 +-
 .../OptionComparison.md                       |  2 +-
 flang/{documentation => docs}/Overview.md     |  2 +-
 .../ParserCombinators.md                      |  2 +-
 flang/{documentation => docs}/Parsing.md      |  2 +-
 .../{documentation => docs}/Preprocessing.md  |  2 +-
 .../PullRequestChecklist.md                   |  2 +-
 .../RuntimeDescriptor.md                      |  2 +-
 flang/{documentation => docs}/Semantics.md    |  2 +-
 .../{documentation => docs}/f2018-grammar.txt |  2 +-
 .../{documentation => docs}/flang-c-style.el  |  2 +-
 flang/lib/Evaluate/intrinsics.cpp             |  2 +-
 30 files changed, 38 insertions(+), 38 deletions(-)
 rename flang/{documentation => docs}/ArrayComposition.md (99%)
 rename flang/{documentation => docs}/BijectiveInternalNameUniquing.md (100%)
 rename flang/{documentation => docs}/C++17.md (99%)
 rename flang/{documentation => docs}/C++style.md (99%)
 rename flang/{documentation => docs}/Calls.md (99%)
 rename flang/{documentation => docs}/Character.md (99%)
 rename flang/{documentation => docs}/ControlFlowGraph.md (99%)
 rename flang/{documentation => docs}/Directives.md (92%)
 rename flang/{documentation => docs}/Extensions.md (99%)
 rename flang/{documentation => docs}/FortranForCProgrammers.md (99%)
 rename flang/{documentation => docs}/FortranIR.md (99%)
 rename flang/{documentation => docs}/IORuntimeInternals.md (99%)
 rename flang/{documentation => docs}/ImplementingASemanticCheck.md (99%)
 rename flang/{documentation => docs}/Intrinsics.md (99%)
 rename flang/{documentation => docs}/LabelResolution.md (99%)
 rename flang/{documentation => docs}/ModFiles.md (99%)
 rename flang/{documentation => docs}/OpenMP-4.5-grammar.txt (99%)
 rename flang/{documentation => docs}/OpenMP-semantics.md (99%)
 rename flang/{documentation => docs}/OptionComparison.md (99%)
 rename flang/{documentation => docs}/Overview.md (98%)
 rename flang/{documentation => docs}/ParserCombinators.md (99%)
 rename flang/{documentation => docs}/Parsing.md (99%)
 rename flang/{documentation => docs}/Preprocessing.md (99%)
 rename flang/{documentation => docs}/PullRequestChecklist.md (98%)
 rename flang/{documentation => docs}/RuntimeDescriptor.md (99%)
 rename flang/{documentation => docs}/Semantics.md (99%)
 rename flang/{documentation => docs}/f2018-grammar.txt (99%)
 rename flang/{documentation => docs}/flang-c-style.el (92%)

diff --git a/flang/README.md b/flang/README.md
index f7797ed55bd3e..44573ae4b9b6b 100644
--- a/flang/README.md
+++ b/flang/README.md
@@ -8,30 +8,30 @@ F18 was subsequently accepted into the LLVM project and rechristened as Flang.
 
 ## Getting Started
 
-Read more about flang in the [documentation directory](documentation).
-Start with the [compiler overview](documentation/Overview.md).
+Read more about flang in the [docs directory](docs).
+Start with the [compiler overview](docs/Overview.md).
 
 To better understand Fortran as a language
 and the specific grammar accepted by flang,
-read [Fortran For C Programmers](documentation/FortranForCProgrammers.md)
+read [Fortran For C Programmers](docs/FortranForCProgrammers.md)
 and
-flang's specifications of the [Fortran grammar](documentation/f2018-grammar.txt)
+flang's specifications of the [Fortran grammar](docs/f2018-grammar.txt)
 and
-the [OpenMP grammar](documentation/OpenMP-4.5-grammar.txt).
+the [OpenMP grammar](docs/OpenMP-4.5-grammar.txt).
 
 Treatment of language extensions is covered
-in [this document](documentation/Extensions.md).
+in [this document](docs/Extensions.md).
 
 To understand the compilers handling of intrinsics,
-see the [discussion of intrinsics](documentation/Intrinsics.md).
+see the [discussion of intrinsics](docs/Intrinsics.md).
 
 To understand how a flang program communicates with libraries at runtime,
-see the discussion of [runtime descriptors](documentation/RuntimeDescriptor.md).
+see the discussion of [runtime descriptors](docs/RuntimeDescriptor.md).
 
 If you're interested in contributing to the compiler,
-read the [style guide](documentation/C++style.md)
+read the [style guide](docs/C++style.md)
 and
-also review [how flang uses modern C++ features](documentation/C++17.md).
+also review [how flang uses modern C++ features](docs/C++17.md).
 
 ## Supported C++ compilers
 
diff --git a/flang/documentation/ArrayComposition.md b/flang/docs/ArrayComposition.md
similarity index 99%
rename from flang/documentation/ArrayComposition.md
rename to flang/docs/ArrayComposition.md
index 099909c5ef0d0..0f30af39f9e4b 100644
--- a/flang/documentation/ArrayComposition.md
+++ b/flang/docs/ArrayComposition.md
@@ -1,4 +1,4 @@
-<!--===- documentation/ArrayComposition.md 
+<!--===- docs/ArrayComposition.md 
   
    Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
    See https://llvm.org/LICENSE.txt for license information.
diff --git a/flang/documentation/BijectiveInternalNameUniquing.md b/flang/docs/BijectiveInternalNameUniquing.md
similarity index 100%
rename from flang/documentation/BijectiveInternalNameUniquing.md
rename to flang/docs/BijectiveInternalNameUniquing.md
diff --git a/flang/documentation/C++17.md b/flang/docs/C++17.md
similarity index 99%
rename from flang/documentation/C++17.md
rename to flang/docs/C++17.md
index 18ea0b23e70ed..87d5fc01f0922 100644
--- a/flang/documentation/C++17.md
+++ b/flang/docs/C++17.md
@@ -1,4 +1,4 @@
-<!--===- documentation/C++17.md 
+<!--===- docs/C++17.md 
   
    Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
    See https://llvm.org/LICENSE.txt for license information.
diff --git a/flang/documentation/C++style.md b/flang/docs/C++style.md
similarity index 99%
rename from flang/documentation/C++style.md
rename to flang/docs/C++style.md
index ca532463ae831..4ab95393d758a 100644
--- a/flang/documentation/C++style.md
+++ b/flang/docs/C++style.md
@@ -1,4 +1,4 @@
-<!--===- documentation/C++style.md 
+<!--===- docs/C++style.md 
   
    Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
    See https://llvm.org/LICENSE.txt for license information.
diff --git a/flang/documentation/Calls.md b/flang/docs/Calls.md
similarity index 99%
rename from flang/documentation/Calls.md
rename to flang/docs/Calls.md
index 79f0d972bc733..d70bc910d73db 100644
--- a/flang/documentation/Calls.md
+++ b/flang/docs/Calls.md
@@ -1,4 +1,4 @@
-<!--===- documentation/Calls.md
+<!--===- docs/Calls.md
 
    Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
    See https://llvm.org/LICENSE.txt for license information.
diff --git a/flang/documentation/Character.md b/flang/docs/Character.md
similarity index 99%
rename from flang/documentation/Character.md
rename to flang/docs/Character.md
index d1c7ca479a173..700db864f2dac 100644
--- a/flang/documentation/Character.md
+++ b/flang/docs/Character.md
@@ -1,4 +1,4 @@
-<!--===- documentation/Character.md
+<!--===- docs/Character.md
 
    Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
    See https://llvm.org/LICENSE.txt for license information.
diff --git a/flang/documentation/ControlFlowGraph.md b/flang/docs/ControlFlowGraph.md
similarity index 99%
rename from flang/documentation/ControlFlowGraph.md
rename to flang/docs/ControlFlowGraph.md
index 6ed9183daf7cc..b2b549845ebb6 100644
--- a/flang/documentation/ControlFlowGraph.md
+++ b/flang/docs/ControlFlowGraph.md
@@ -1,4 +1,4 @@
-<!--===- documentation/ControlFlowGraph.md 
+<!--===- docs/ControlFlowGraph.md 
   
    Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
    See https://llvm.org/LICENSE.txt for license information.
diff --git a/flang/documentation/Directives.md b/flang/docs/Directives.md
similarity index 92%
rename from flang/documentation/Directives.md
rename to flang/docs/Directives.md
index e178a69714cce..c2e93c5f3de2e 100644
--- a/flang/documentation/Directives.md
+++ b/flang/docs/Directives.md
@@ -1,4 +1,4 @@
-<!--===- documentation/Directives.md 
+<!--===- docs/Directives.md 
   
    Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
    See https://llvm.org/LICENSE.txt for license information.
diff --git a/flang/documentation/Extensions.md b/flang/docs/Extensions.md
similarity index 99%
rename from flang/documentation/Extensions.md
rename to flang/docs/Extensions.md
index a2420c727e82f..9010b770cca6d 100644
--- a/flang/documentation/Extensions.md
+++ b/flang/docs/Extensions.md
@@ -1,4 +1,4 @@
-<!--===- documentation/Extensions.md 
+<!--===- docs/Extensions.md 
   
    Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
    See https://llvm.org/LICENSE.txt for license information.
diff --git a/flang/documentation/FortranForCProgrammers.md b/flang/docs/FortranForCProgrammers.md
similarity index 99%
rename from flang/documentation/FortranForCProgrammers.md
rename to flang/docs/FortranForCProgrammers.md
index ce4a0b7072b01..103def2a92ce6 100644
--- a/flang/documentation/FortranForCProgrammers.md
+++ b/flang/docs/FortranForCProgrammers.md
@@ -1,4 +1,4 @@
-<!--===- documentation/FortranForCProgrammers.md
+<!--===- docs/FortranForCProgrammers.md
 
    Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
    See https://llvm.org/LICENSE.txt for license information.
diff --git a/flang/documentation/FortranIR.md b/flang/docs/FortranIR.md
similarity index 99%
rename from flang/documentation/FortranIR.md
rename to flang/docs/FortranIR.md
index ccdeb8fc4f565..5d83aaa8e34cf 100644
--- a/flang/documentation/FortranIR.md
+++ b/flang/docs/FortranIR.md
@@ -1,4 +1,4 @@
-<!--===- documentation/FortranIR.md 
+<!--===- docs/FortranIR.md 
   
    Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
    See https://llvm.org/LICENSE.txt for license information.
diff --git a/flang/documentation/IORuntimeInternals.md b/flang/docs/IORuntimeInternals.md
similarity index 99%
rename from flang/documentation/IORuntimeInternals.md
rename to flang/docs/IORuntimeInternals.md
index c9b1ce4078ecb..b4f3092a014ec 100644
--- a/flang/documentation/IORuntimeInternals.md
+++ b/flang/docs/IORuntimeInternals.md
@@ -1,4 +1,4 @@
-<!--===- documentation/IORuntimeInternals.md
+<!--===- docs/IORuntimeInternals.md
 
    Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
    See https://llvm.org/LICENSE.txt for license information.
diff --git a/flang/documentation/ImplementingASemanticCheck.md b/flang/docs/ImplementingASemanticCheck.md
similarity index 99%
rename from flang/documentation/ImplementingASemanticCheck.md
rename to flang/docs/ImplementingASemanticCheck.md
index fc2e4f14061eb..3bb16915cb880 100644
--- a/flang/documentation/ImplementingASemanticCheck.md
+++ b/flang/docs/ImplementingASemanticCheck.md
@@ -1,4 +1,4 @@
-<!--===- documentation/ImplementingASemanticCheck.md 
+<!--===- docs/ImplementingASemanticCheck.md 
   
    Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
    See https://llvm.org/LICENSE.txt for license information.
diff --git a/flang/documentation/Intrinsics.md b/flang/docs/Intrinsics.md
similarity index 99%
rename from flang/documentation/Intrinsics.md
rename to flang/docs/Intrinsics.md
index 8fd06766710c6..7be0bf3e4a9ca 100644
--- a/flang/documentation/Intrinsics.md
+++ b/flang/docs/Intrinsics.md
@@ -1,4 +1,4 @@
-<!--===- documentation/Intrinsics.md 
+<!--===- docs/Intrinsics.md 
   
    Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
    See https://llvm.org/LICENSE.txt for license information.
diff --git a/flang/documentation/LabelResolution.md b/flang/docs/LabelResolution.md
similarity index 99%
rename from flang/documentation/LabelResolution.md
rename to flang/docs/LabelResolution.md
index 87c7798c234ed..e837b4fa6aece 100644
--- a/flang/documentation/LabelResolution.md
+++ b/flang/docs/LabelResolution.md
@@ -1,4 +1,4 @@
-<!--===- documentation/LabelResolution.md 
+<!--===- docs/LabelResolution.md 
   
    Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
    See https://llvm.org/LICENSE.txt for license information.
diff --git a/flang/documentation/ModFiles.md b/flang/docs/ModFiles.md
similarity index 99%
rename from flang/documentation/ModFiles.md
rename to flang/docs/ModFiles.md
index 7d7ceb4e0a0e8..483341bdd0f47 100644
--- a/flang/documentation/ModFiles.md
+++ b/flang/docs/ModFiles.md
@@ -1,4 +1,4 @@
-<!--===- documentation/ModFiles.md 
+<!--===- docs/ModFiles.md 
   
    Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
    See https://llvm.org/LICENSE.txt for license information.
diff --git a/flang/documentation/OpenMP-4.5-grammar.txt b/flang/docs/OpenMP-4.5-grammar.txt
similarity index 99%
rename from flang/documentation/OpenMP-4.5-grammar.txt
rename to flang/docs/OpenMP-4.5-grammar.txt
index 01cfa1b65f94a..c74072ba1ef27 100644
--- a/flang/documentation/OpenMP-4.5-grammar.txt
+++ b/flang/docs/OpenMP-4.5-grammar.txt
@@ -1,4 +1,4 @@
-#===-- documentation/OpenMP-4.5-grammar.txt --------------------------------===#
+#===-- docs/OpenMP-4.5-grammar.txt --------------------------------===#
 #
 # Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 # See https://llvm.org/LICENSE.txt for license information.
diff --git a/flang/documentation/OpenMP-semantics.md b/flang/docs/OpenMP-semantics.md
similarity index 99%
rename from flang/documentation/OpenMP-semantics.md
rename to flang/docs/OpenMP-semantics.md
index 783acef6a4e01..4e2a81739cf81 100644
--- a/flang/documentation/OpenMP-semantics.md
+++ b/flang/docs/OpenMP-semantics.md
@@ -1,4 +1,4 @@
-<!--===- documentation/OpenMP-semantics.md 
+<!--===- docs/OpenMP-semantics.md 
   
    Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
    See https://llvm.org/LICENSE.txt for license information.
diff --git a/flang/documentation/OptionComparison.md b/flang/docs/OptionComparison.md
similarity index 99%
rename from flang/documentation/OptionComparison.md
rename to flang/docs/OptionComparison.md
index e8ccf57fbdf25..db5932411cc1e 100644
--- a/flang/documentation/OptionComparison.md
+++ b/flang/docs/OptionComparison.md
@@ -1,4 +1,4 @@
-<!--===- documentation/OptionComparison.md 
+<!--===- docs/OptionComparison.md 
   
    Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
    See https://llvm.org/LICENSE.txt for license information.
diff --git a/flang/documentation/Overview.md b/flang/docs/Overview.md
similarity index 98%
rename from flang/documentation/Overview.md
rename to flang/docs/Overview.md
index 47ad18f023f95..75a8cd1c4cab0 100644
--- a/flang/documentation/Overview.md
+++ b/flang/docs/Overview.md
@@ -1,4 +1,4 @@
-<!--===- documentation/Overview.md 
+<!--===- docs/Overview.md 
   
    Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
    See https://llvm.org/LICENSE.txt for license information.
diff --git a/flang/documentation/ParserCombinators.md b/flang/docs/ParserCombinators.md
similarity index 99%
rename from flang/documentation/ParserCombinators.md
rename to flang/docs/ParserCombinators.md
index 984a4d0434394..4f3dc6fd07ae6 100644
--- a/flang/documentation/ParserCombinators.md
+++ b/flang/docs/ParserCombinators.md
@@ -1,4 +1,4 @@
-<!--===- documentation/ParserCombinators.md 
+<!--===- docs/ParserCombinators.md 
   
    Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
    See https://llvm.org/LICENSE.txt for license information.
diff --git a/flang/documentation/Parsing.md b/flang/docs/Parsing.md
similarity index 99%
rename from flang/documentation/Parsing.md
rename to flang/docs/Parsing.md
index b961cd630ae18..fad9a4d57278c 100644
--- a/flang/documentation/Parsing.md
+++ b/flang/docs/Parsing.md
@@ -1,4 +1,4 @@
-<!--===- documentation/Parsing.md 
+<!--===- docs/Parsing.md 
   
    Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
    See https://llvm.org/LICENSE.txt for license information.
diff --git a/flang/documentation/Preprocessing.md b/flang/docs/Preprocessing.md
similarity index 99%
rename from flang/documentation/Preprocessing.md
rename to flang/docs/Preprocessing.md
index eff3f921e43c5..7f6f3951cfd16 100644
--- a/flang/documentation/Preprocessing.md
+++ b/flang/docs/Preprocessing.md
@@ -1,4 +1,4 @@
-<!--===- documentation/Preprocessing.md 
+<!--===- docs/Preprocessing.md 
   
    Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
    See https://llvm.org/LICENSE.txt for license information.
diff --git a/flang/documentation/PullRequestChecklist.md b/flang/docs/PullRequestChecklist.md
similarity index 98%
rename from flang/documentation/PullRequestChecklist.md
rename to flang/docs/PullRequestChecklist.md
index 9a43fa9b46e02..12a67be374a20 100644
--- a/flang/documentation/PullRequestChecklist.md
+++ b/flang/docs/PullRequestChecklist.md
@@ -1,4 +1,4 @@
-<!--===- documentation/PullRequestChecklist.md 
+<!--===- docs/PullRequestChecklist.md 
 
    Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
    See https://llvm.org/LICENSE.txt for license information.
diff --git a/flang/documentation/RuntimeDescriptor.md b/flang/docs/RuntimeDescriptor.md
similarity index 99%
rename from flang/documentation/RuntimeDescriptor.md
rename to flang/docs/RuntimeDescriptor.md
index bf88452657923..d819517fa9795 100644
--- a/flang/documentation/RuntimeDescriptor.md
+++ b/flang/docs/RuntimeDescriptor.md
@@ -1,4 +1,4 @@
-<!--===- documentation/RuntimeDescriptor.md 
+<!--===- docs/RuntimeDescriptor.md 
   
    Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
    See https://llvm.org/LICENSE.txt for license information.
diff --git a/flang/documentation/Semantics.md b/flang/docs/Semantics.md
similarity index 99%
rename from flang/documentation/Semantics.md
rename to flang/docs/Semantics.md
index 3f185f9f52b8a..6ea0b292de69f 100644
--- a/flang/documentation/Semantics.md
+++ b/flang/docs/Semantics.md
@@ -1,4 +1,4 @@
-<!--===- documentation/Semantics.md 
+<!--===- docs/Semantics.md 
   
    Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
    See https://llvm.org/LICENSE.txt for license information.
diff --git a/flang/documentation/f2018-grammar.txt b/flang/docs/f2018-grammar.txt
similarity index 99%
rename from flang/documentation/f2018-grammar.txt
rename to flang/docs/f2018-grammar.txt
index 10d3747d12875..2de8cdfc1b8f7 100644
--- a/flang/documentation/f2018-grammar.txt
+++ b/flang/docs/f2018-grammar.txt
@@ -1,4 +1,4 @@
-#===-- documentation/f2018-grammar.txt -------------------------------------===#
+#===-- docs/f2018-grammar.txt -------------------------------------===#
 #
 # Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 # See https://llvm.org/LICENSE.txt for license information.
diff --git a/flang/documentation/flang-c-style.el b/flang/docs/flang-c-style.el
similarity index 92%
rename from flang/documentation/flang-c-style.el
rename to flang/docs/flang-c-style.el
index 001ab07504662..1749b5dd85705 100644
--- a/flang/documentation/flang-c-style.el
+++ b/flang/docs/flang-c-style.el
@@ -1,4 +1,4 @@
-;;===-- documentation/flang-c-style.el ------------------------------------===;;
+;;===-- docs/flang-c-style.el ------------------------------------===;;
 ;;
 ;; Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 ;; See https://llvm.org/LICENSE.txt for license information.
diff --git a/flang/lib/Evaluate/intrinsics.cpp b/flang/lib/Evaluate/intrinsics.cpp
index 6237499d12ef7..923a5d1e933c4 100644
--- a/flang/lib/Evaluate/intrinsics.cpp
+++ b/flang/lib/Evaluate/intrinsics.cpp
@@ -30,7 +30,7 @@ class FoldingContext;
 
 // This file defines the supported intrinsic procedures and implements
 // their recognition and validation.  It is largely table-driven.  See
-// documentation/intrinsics.md and section 16 of the Fortran 2018 standard
+// docs/intrinsics.md and section 16 of the Fortran 2018 standard
 // for full details on each of the intrinsics.  Be advised, they have
 // complicated details, and the design of these tables has to accommodate
 // that complexity.

From e6b4ec963a0b303a5b9d193f8dfb0ae4ea09d66b Mon Sep 17 00:00:00 2001
From: Sameeran joshi <joshisameeran17@gmail.com>
Date: Mon, 24 Aug 2020 19:51:45 +0530
Subject: [PATCH 044/109] [FLANG] Pick `.md` files when building sphinx
 documentation.

Need to build sphinx using below flags to Cmake
`-DLLVM_ENABLE_SPHINX=ON -DSPHINX_WARNINGS_AS_ERRORS=OFF`.
Generate html docs using cmake target
`docs-flang-html`
Generated html files should be at `build/tools/flang/docs/html`.
Patch in series from the dicussion on review
https://reviews.llvm.org/D85828

After this patch the markdown docmentation must be written using guide in-
`llvm/docs/MarkdownQuickstartTemplate.md`

Reviewed By: sscalpone

Differential Revision: https://reviews.llvm.org/D86131

(cherry picked from commit bc9cdfa12bde46c77bcc6450707f2714133e9b2f)
---
 flang/README.md    | 25 +++++++++++++++++++++++++
 flang/docs/conf.py | 27 +++++++++++++++++++++------
 2 files changed, 46 insertions(+), 6 deletions(-)

diff --git a/flang/README.md b/flang/README.md
index 44573ae4b9b6b..fafc1f91a421f 100644
--- a/flang/README.md
+++ b/flang/README.md
@@ -33,6 +33,9 @@ read the [style guide](docs/C++style.md)
 and
 also review [how flang uses modern C++ features](docs/C++17.md).
 
+If you are interested in writing new documentation, follow 
+[markdown style guide from LLVM](https://github.com/llvm/llvm-project/blob/master/llvm/docs/MarkdownQuickstartTemplate.md).
+
 ## Supported C++ compilers
 
 Flang is written in C++17.
@@ -216,3 +219,25 @@ It will generate html in
 
     <build-dir>/tools/flang/docs/doxygen/html # for flang docs
 ```
+## Generate Sphinx-based Documentation
+<!TODO: Add webpage once we have a website.
+!>
+Flang documentation should preferably be written in `markdown(.md)` syntax (they can be in `reStructuredText(.rst)` format as well but markdown is recommended in first place), it
+is mostly meant to be processed by the Sphinx documentation generation
+system to create HTML pages which would be hosted on the webpage of flang and
+updated periodically.
+
+If you would like to generate and view the HTML locally, install
+Sphinx <http://sphinx-doc.org/> and then:
+
+- Pass `-DLLVM_ENABLE_SPHINX=ON -DSPHINX_WARNINGS_AS_ERRORS=OFF` to the cmake command.
+
+```
+cd ~/llvm-project/build
+cmake -DLLVM_ENABLE_SPHINX=ON -DSPHINX_WARNINGS_AS_ERRORS=OFF ../llvm
+make docs-flang-html
+
+It will generate html in
+
+   $BROWSER <build-dir>/tools/flang/docs/html/
+```
diff --git a/flang/docs/conf.py b/flang/docs/conf.py
index bbe37a68cc281..045d0a2c41678 100644
--- a/flang/docs/conf.py
+++ b/flang/docs/conf.py
@@ -21,7 +21,6 @@
 
 # If your documentation needs a minimal Sphinx version, state it here.
 #needs_sphinx = '1.0'
-
 # Add any Sphinx extension module names here, as strings. They can be extensions
 # coming with Sphinx (named 'sphinx.ext.*') or your custom ones.
 extensions = ['sphinx.ext.todo', 'sphinx.ext.mathjax', 'sphinx.ext.intersphinx']
@@ -30,13 +29,29 @@
 templates_path = ['_templates']
 
 # The suffix of source filenames.
-source_suffix = '.rst'
+source_suffix = {
+    '.rst': 'restructuredtext',
+}
+try:
+  import recommonmark
+except ImportError:
+  # manpages do not use any .md sources
+  if not tags.has('builder-man'):
+    raise
+else:
+  import sphinx
+  if sphinx.version_info >= (3, 0):
+    # This requires 0.5 or later.
+    extensions.append('recommonmark')
+  else:
+    source_parsers = {'.md': 'recommonmark.parser.CommonMarkParser'}
+  source_suffix['.md'] = 'markdown'
 
 # The encoding of source files.
 #source_encoding = 'utf-8-sig'
 
 # The master toctree document.
-master_doc = 'ReleaseNotes'
+master_doc = 'Overview'
 
 # General information about the project.
 project = u'Flang'
@@ -196,7 +211,7 @@
 # Grouping the document tree into LaTeX files. List of tuples
 # (source start file, target name, title, author, documentclass [howto/manual]).
 latex_documents = [
-  ('ReleaseNotes', 'Flang.tex', u'Flang Documentation',
+  ('Overview', 'Flang.tex', u'Flang Documentation',
    u'The Flang Team', 'manual'),
 ]
 
@@ -237,8 +252,8 @@
 # (source start file, target name, title, author,
 #  dir menu entry, description, category)
 texinfo_documents = [
-  ('ReleaseNotes', 'Flang', u'Flang Documentation',
-   u'The Flang Team', 'Flang', 'One line description of project.',
+  ('Overview', 'Flang', u'Flang Documentation',
+   u'The Flang Team', 'Flang', 'A Fortran front end for LLVM.',
    'Miscellaneous'),
 ]
 

From d98e4c0d9a3585e2302c717beea9b9d03df9663a Mon Sep 17 00:00:00 2001
From: Kristof Beyls <kristof.beyls@arm.com>
Date: Mon, 31 Aug 2020 14:01:04 +0200
Subject: [PATCH 045/109] Add a few more release notes for ARM and AArch64.

---
 llvm/docs/ReleaseNotes.rst | 32 ++++++++++++++++++++++++++++++--
 1 file changed, 30 insertions(+), 2 deletions(-)

diff --git a/llvm/docs/ReleaseNotes.rst b/llvm/docs/ReleaseNotes.rst
index c7ca861dbc34c..8171f9d990c99 100644
--- a/llvm/docs/ReleaseNotes.rst
+++ b/llvm/docs/ReleaseNotes.rst
@@ -119,11 +119,26 @@ Changes to the AArch64 Backend
   ``00bet5``). For more information, see the ``clang`` 11 release
   notes.
 
+* Added support for Armv8.6-A:
+
+  Assembly support for the following extensions:
+  - Enhanced Counter Virtualization (ARMv8.6-ECV).
+  - Fine Grained Traps (ARMv8.6-FGT).
+  - Activity Monitors virtualization (ARMv8.6-AMU).
+  - Data gathering hint (ARMv8.0-DGH).
+
+  Assembly and intrinsics support for the Armv8.6-A Matrix Multiply extension
+  for Neon and SVE vectors.
+
+  Support for the ARMv8.2-BF16 BFloat16 extension. This includes a new C-level
+  storage-only `__bf16` type, a `BFloat` IR type, a `bf16` MVT, and assembly
+  and intrinsics support.
+
+* Added support for Cortex-A34, Cortex-A77, Cortex-A78 and Cortex-X1 cores.
+
 Changes to the ARM Backend
 --------------------------
 
-During this release ...
-
 * Implemented C-language intrinsics for the full Arm v8.1-M MVE instruction
   set. ``<arm_mve.h>`` now supports the complete API defined in the Arm C
   Language Extensions.
@@ -139,6 +154,19 @@ During this release ...
   default may wish to specify ``-fno-omit-frame-pointer`` to get the old
   behavior. This improves compatibility with GCC.
 
+* Added support for Armv8.6-A:
+
+  Assembly and intrinsics support for the Armv8.6-A Matrix Multiply extension
+  for Neon vectors.
+
+  Support for the ARMv8.2-AA32BF16 BFloat16 extension. This includes a new
+  C-level storage-only `__bf16` type, a `BFloat` IR type, a `bf16` MVT, and
+  assembly and intrinsics support.
+
+* Added support for CMSE.
+
+* Added support for Cortex-M55, Cortex-A77, Cortex-A78 and Cortex-X1 cores.
+
 Changes to the MIPS Target
 --------------------------
 

From defbc77a7c951d8cc47daf49df15c7f548cada05 Mon Sep 17 00:00:00 2001
From: QingShan Zhang <qshanz@cn.ibm.com>
Date: Fri, 28 Aug 2020 15:21:47 +0000
Subject: [PATCH 046/109] [DAGCombine] Don't delete the node if it has uses
 immediately

This is the follow up patch for https://reviews.llvm.org/D86183 as we miss to delete the node if NegX == NegY, which has use after we create the node.
```
    if (NegX && (CostX <= CostY)) {
      Cost = std::min(CostX, CostZ);
      RemoveDeadNode(NegY);
      return DAG.getNode(Opcode, DL, VT, NegX, Y, NegZ, Flags);  #<-- NegY is used here if NegY == NegX.
    }
```

Reviewed By: spatel

Differential Revision: https://reviews.llvm.org/D86689

(cherry picked from commit deb4b2580715810ecd5cb7eefa5ffbe65e5eedc8)
---
 .../CodeGen/SelectionDAG/TargetLowering.cpp    | 18 ++++++++++++------
 llvm/test/CodeGen/PowerPC/fneg.ll              | 17 +++++++++++++++++
 2 files changed, 29 insertions(+), 6 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 94cb6da3d69e9..819e608c6896e 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -5809,15 +5809,17 @@ SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG,
     // Negate the X if its cost is less or equal than Y.
     if (NegX && (CostX <= CostY)) {
       Cost = CostX;
+      SDValue N = DAG.getNode(ISD::FSUB, DL, VT, NegX, Y, Flags);
       RemoveDeadNode(NegY);
-      return DAG.getNode(ISD::FSUB, DL, VT, NegX, Y, Flags);
+      return N;
     }
 
     // Negate the Y if it is not expensive.
     if (NegY) {
       Cost = CostY;
+      SDValue N = DAG.getNode(ISD::FSUB, DL, VT, NegY, X, Flags);
       RemoveDeadNode(NegX);
-      return DAG.getNode(ISD::FSUB, DL, VT, NegY, X, Flags);
+      return N;
     }
     break;
   }
@@ -5854,8 +5856,9 @@ SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG,
     // Negate the X if its cost is less or equal than Y.
     if (NegX && (CostX <= CostY)) {
       Cost = CostX;
+      SDValue N = DAG.getNode(Opcode, DL, VT, NegX, Y, Flags);
       RemoveDeadNode(NegY);
-      return DAG.getNode(Opcode, DL, VT, NegX, Y, Flags);
+      return N;
     }
 
     // Ignore X * 2.0 because that is expected to be canonicalized to X + X.
@@ -5866,8 +5869,9 @@ SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG,
     // Negate the Y if it is not expensive.
     if (NegY) {
       Cost = CostY;
+      SDValue N = DAG.getNode(Opcode, DL, VT, X, NegY, Flags);
       RemoveDeadNode(NegX);
-      return DAG.getNode(Opcode, DL, VT, X, NegY, Flags);
+      return N;
     }
     break;
   }
@@ -5896,15 +5900,17 @@ SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG,
     // Negate the X if its cost is less or equal than Y.
     if (NegX && (CostX <= CostY)) {
       Cost = std::min(CostX, CostZ);
+      SDValue N = DAG.getNode(Opcode, DL, VT, NegX, Y, NegZ, Flags);
       RemoveDeadNode(NegY);
-      return DAG.getNode(Opcode, DL, VT, NegX, Y, NegZ, Flags);
+      return N;
     }
 
     // Negate the Y if it is not expensive.
     if (NegY) {
       Cost = std::min(CostY, CostZ);
+      SDValue N = DAG.getNode(Opcode, DL, VT, X, NegY, NegZ, Flags);
       RemoveDeadNode(NegX);
-      return DAG.getNode(Opcode, DL, VT, X, NegY, NegZ, Flags);
+      return N;
     }
     break;
   }
diff --git a/llvm/test/CodeGen/PowerPC/fneg.ll b/llvm/test/CodeGen/PowerPC/fneg.ll
index 328ffecd17624..aea34e216d644 100644
--- a/llvm/test/CodeGen/PowerPC/fneg.ll
+++ b/llvm/test/CodeGen/PowerPC/fneg.ll
@@ -39,3 +39,20 @@ define float @fma_fneg_fsub(float %x, float %y0, float %y1, float %z) {
   %r = call float @llvm.fmuladd.f32(float %negx, float %negy, float %z)
   ret float %r
 }
+
+; Verify that we didn't hit assertion for this case.
+define double @fneg_no_ice(float %x) {
+; CHECK-LABEL: fneg_no_ice:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lis r3, .LCPI3_0@ha
+; CHECK-NEXT:    lfs f0, .LCPI3_0@l(r3)
+; CHECK-NEXT:    fsubs f0, f0, f1
+; CHECK-NEXT:    fmul f1, f0, f0
+; CHECK-NEXT:    fmul f1, f0, f1
+; CHECK-NEXT:    blr
+  %y = fsub fast float 1.0, %x
+  %e = fpext float %y to double
+  %e2 = fmul double %e, %e
+  %e3 = fmul double %e, %e2
+  ret double %e3
+}

From 7166d2653be30b18d87a112ca99ae706dd998ba2 Mon Sep 17 00:00:00 2001
From: Rainer Orth <ro@gcc.gnu.org>
Date: Fri, 28 Aug 2020 11:40:34 +0200
Subject: [PATCH 047/109] [cmake] Don't build with -O3 -fPIC on Solaris/sparcv9

Tests on Solaris/sparcv9 currently show about 250 failures when building
with gcc, most of them like the following:

  FAIL: LLVM-Unit :: Support/./SupportTests/TaskQueueTest.UnOrderedFutures (4269 of 67884)
  ******************** TEST 'LLVM-Unit :: Support/./SupportTests/TaskQueueTest.UnOrderedFutures' FAILED ********************
  Note: Google Test filter = TaskQueueTest.UnOrderedFutures
  [==========] Running 1 test from 1 test case.
  [----------] Global test environment set-up.
  [----------] 1 test from TaskQueueTest
  [ RUN      ] TaskQueueTest.UnOrderedFutures
  0  SupportTests        0x0000000100753b20 llvm::sys::PrintStackTrace(llvm::raw_ostream&) + 32
  1  SupportTests        0x0000000100752974 llvm::sys::RunSignalHandlers() + 68
  2  SupportTests        0x0000000100752b18 SignalHandler(int) + 372
  3  libc.so.1           0xffffffff7eedc800 __sighndlr + 12
  4  libc.so.1           0xffffffff7eecf23c call_user_handler + 852
  5  libc.so.1           0xffffffff7eecf594 sigacthandler + 84
  6  SupportTests        0x00000001006f8cb8 std::thread::_State_impl<std::thread::_Invoker<std::tuple<llvm::ThreadPool::ThreadPool(llvm::ThreadPoolStrategy)::'lambda'()> > >::_M_run() + 512
  7  libstdc++.so.6.0.28 0xfffffffc628117cc execute_native_thread_routine + 16
  8  libc.so.1           0xffffffff7eedc6a0 _lwp_start + 0

Since it's effectively impossible to debug such a `SEGV` in a `Release`
build, I tried a `Debug` build instead, only to find that the failures had
gone away.

Further investigation revealed that most of the issue centers around
`llvm/lib/Support/ThreadPool.cpp`.  That file is built with `-O3 -fPIC` in
a `Release` build.  The failure vanishes if

- compiling without `-fPIC`
- compiling with `-O -fPIC`
- linking with GNU `ld` instead of Solaris `ld`

It has meanwhile been determined that `gcc` doesn't correctly heed some TLS
code sequences.  To make things worse, Solaris `ld` doesn't properly
validate its assumptions against the input, generating wrong code.

`gld` like `gcc` is more liberal here and correctly deals with the code it
gets fed from `gcc`.

There's PR target/96607: GCC feeds SPARC/Solaris linker with unrecognized
TLS sequences <https://gcc.gnu.org/bugzilla/show_bug.cgi?id=96607> now.

An attempt to build with `-DLLVM_ENABLE_PIC=Off` initially failed since
neither `libRemarks.so` (D85626 <https://reviews.llvm.org/D85626>) nor
`LLVMPolly.so` (D85627 <https://reviews.llvm.org/D85627>) heed that option.
Even with that fixed, a few codegen failures remain.

Next I tried to build just `ThreadPool.cpp` with `-O -fPIC`.  While that
fixed the vast majority of the failures, 16 `LLVM :: CodeGen/X86` failures
remained.

Given that that solution was both incomplete and fragile, I went for
building the whole tree with `-O -fPIC` for `Release` and `RelWithDebInfo`
builds.

As detailed in Bug 47304, 2-stage builds also show large numbers of
failures when building with `-O3` or `-O2`, which are likewise worked
around by building with `-O` until they are sufficiently analyzed and
fixed.

This way, all failures relative to a `Debug` build go away.

Tested on `sparcv9-sun-solaris2.11`.

Differential Revision: https://reviews.llvm.org/D85630

(cherry picked from commit 15c66b10114d239c96282cf8fc5330186178974b)
---
 llvm/cmake/modules/HandleLLVMOptions.cmake | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/llvm/cmake/modules/HandleLLVMOptions.cmake b/llvm/cmake/modules/HandleLLVMOptions.cmake
index 2e249593e12f1..5ef22eb493bad 100644
--- a/llvm/cmake/modules/HandleLLVMOptions.cmake
+++ b/llvm/cmake/modules/HandleLLVMOptions.cmake
@@ -12,6 +12,7 @@ include(CheckCCompilerFlag)
 include(CheckCXXCompilerFlag)
 include(CheckSymbolExists)
 include(CMakeDependentOption)
+include(LLVMProcessSources)
 
 if(CMAKE_LINKER MATCHES "lld-link" OR (MSVC AND (LLVM_USE_LINKER STREQUAL "lld" OR LLVM_ENABLE_LLD)))
   set(LINKER_IS_LLD_LINK TRUE)
@@ -291,6 +292,15 @@ if( LLVM_ENABLE_PIC )
          NOT Uppercase_CMAKE_BUILD_TYPE STREQUAL "DEBUG")
     add_flag_or_print_warning("-fno-shrink-wrap" FNO_SHRINK_WRAP)
   endif()
+  # gcc with -O3 -fPIC generates TLS sequences that violate the spec on
+  # Solaris/sparcv9, causing executables created with the system linker
+  # to SEGV (GCC PR target/96607).
+  # clang with -O3 -fPIC generates code that SEGVs.
+  # Both can be worked around by compiling with -O instead.
+  if(${CMAKE_SYSTEM_NAME} STREQUAL "SunOS" AND LLVM_NATIVE_ARCH STREQUAL "Sparc")
+    llvm_replace_compiler_option(CMAKE_CXX_FLAGS_RELEASE "-O[23]" "-O")
+    llvm_replace_compiler_option(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-O[23]" "-O")
+  endif()
 endif()
 
 if(NOT WIN32 AND NOT CYGWIN AND NOT (${CMAKE_SYSTEM_NAME} MATCHES "AIX" AND CMAKE_CXX_COMPILER_ID STREQUAL "GNU"))

From 981192ddcc6c7c947747e7ea478d02adfca210bd Mon Sep 17 00:00:00 2001
From: Brad Smith <brad@comstyle.com>
Date: Sat, 29 Aug 2020 20:47:18 -0400
Subject: [PATCH 048/109] Remove OpenBSD/sparc support

(cherry picked from commit 4fbf0636a214abbc30b6eee42bd5b7755dfd5f38)
---
 clang/lib/Basic/Targets.cpp             | 4 ----
 clang/lib/Driver/ToolChains/OpenBSD.cpp | 9 ---------
 clang/test/Driver/openbsd.c             | 5 -----
 clang/test/Driver/pic.c                 | 2 --
 4 files changed, 20 deletions(-)

diff --git a/clang/lib/Basic/Targets.cpp b/clang/lib/Basic/Targets.cpp
index 818133f66f3f7..965f273892bd5 100644
--- a/clang/lib/Basic/Targets.cpp
+++ b/clang/lib/Basic/Targets.cpp
@@ -409,8 +409,6 @@ TargetInfo *AllocateTarget(const llvm::Triple &Triple,
       return new SolarisTargetInfo<SparcV8TargetInfo>(Triple, Opts);
     case llvm::Triple::NetBSD:
       return new NetBSDTargetInfo<SparcV8TargetInfo>(Triple, Opts);
-    case llvm::Triple::OpenBSD:
-      return new OpenBSDTargetInfo<SparcV8TargetInfo>(Triple, Opts);
     case llvm::Triple::RTEMS:
       return new RTEMSTargetInfo<SparcV8TargetInfo>(Triple, Opts);
     default:
@@ -424,8 +422,6 @@ TargetInfo *AllocateTarget(const llvm::Triple &Triple,
       return new LinuxTargetInfo<SparcV8elTargetInfo>(Triple, Opts);
     case llvm::Triple::NetBSD:
       return new NetBSDTargetInfo<SparcV8elTargetInfo>(Triple, Opts);
-    case llvm::Triple::OpenBSD:
-      return new OpenBSDTargetInfo<SparcV8elTargetInfo>(Triple, Opts);
     case llvm::Triple::RTEMS:
       return new RTEMSTargetInfo<SparcV8elTargetInfo>(Triple, Opts);
     default:
diff --git a/clang/lib/Driver/ToolChains/OpenBSD.cpp b/clang/lib/Driver/ToolChains/OpenBSD.cpp
index 1177fba965628..5ca2fa0850e63 100644
--- a/clang/lib/Driver/ToolChains/OpenBSD.cpp
+++ b/clang/lib/Driver/ToolChains/OpenBSD.cpp
@@ -43,15 +43,6 @@ void openbsd::Assembler::ConstructJob(Compilation &C, const JobAction &JA,
     CmdArgs.push_back("-many");
     break;
 
-  case llvm::Triple::sparc:
-  case llvm::Triple::sparcel: {
-    CmdArgs.push_back("-32");
-    std::string CPU = getCPUName(Args, getToolChain().getTriple());
-    CmdArgs.push_back(sparc::getSparcAsmModeForCPU(CPU, getToolChain().getTriple()));
-    AddAssemblerKPIC(getToolChain(), Args, CmdArgs);
-    break;
-  }
-
   case llvm::Triple::sparcv9: {
     CmdArgs.push_back("-64");
     std::string CPU = getCPUName(Args, getToolChain().getTriple());
diff --git a/clang/test/Driver/openbsd.c b/clang/test/Driver/openbsd.c
index cee4539eaca2f..203b4b4a2ff0f 100644
--- a/clang/test/Driver/openbsd.c
+++ b/clang/test/Driver/openbsd.c
@@ -58,8 +58,6 @@
 // RUN:   | FileCheck -check-prefix=CHECK-AMD64-M32 %s
 // RUN: %clang -target powerpc-unknown-openbsd -### -no-integrated-as -c %s 2>&1 \
 // RUN:   | FileCheck -check-prefix=CHECK-POWERPC %s
-// RUN: %clang -target sparc-unknown-openbsd -### -no-integrated-as -c %s 2>&1 \
-// RUN:   | FileCheck -check-prefix=CHECK-SPARC %s
 // RUN: %clang -target sparc64-unknown-openbsd -### -no-integrated-as -c %s 2>&1 \
 // RUN:   | FileCheck -check-prefix=CHECK-SPARC64 %s
 // RUN: %clang -target mips64-unknown-openbsd -### -no-integrated-as -c %s 2>&1 \
@@ -72,7 +70,6 @@
 // RUN:   | FileCheck -check-prefix=CHECK-MIPS64EL-PIC %s
 // CHECK-AMD64-M32: as{{.*}}" "--32"
 // CHECK-POWERPC: as{{.*}}" "-mppc" "-many"
-// CHECK-SPARC: as{{.*}}" "-32" "-Av8"
 // CHECK-SPARC64: as{{.*}}" "-64" "-Av9"
 // CHECK-MIPS64: as{{.*}}" "-mabi" "64" "-EB"
 // CHECK-MIPS64-PIC: as{{.*}}" "-mabi" "64" "-EB" "-KPIC"
@@ -80,8 +77,6 @@
 // CHECK-MIPS64EL-PIC: as{{.*}}" "-mabi" "64" "-EL" "-KPIC"
 
 // Check that the integrated assembler is enabled for SPARC
-// RUN: %clang -target sparc-unknown-openbsd -### -c %s 2>&1 \
-// RUN:   | FileCheck -check-prefix=CHECK-IAS %s
 // RUN: %clang -target sparc64-unknown-openbsd -### -c %s 2>&1 \
 // RUN:   | FileCheck -check-prefix=CHECK-IAS %s
 // CHECK-IAS-NOT: "-no-integrated-as"
diff --git a/clang/test/Driver/pic.c b/clang/test/Driver/pic.c
index c0cdeb464cbf8..57924cd1bbb43 100644
--- a/clang/test/Driver/pic.c
+++ b/clang/test/Driver/pic.c
@@ -253,8 +253,6 @@
 // RUN:   | FileCheck %s --check-prefix=CHECK-PIE1
 // RUN: %clang -c %s -target powerpc-unknown-openbsd -### 2>&1 \
 // RUN:   | FileCheck %s --check-prefix=CHECK-PIE2
-// RUN: %clang -c %s -target sparc-unknown-openbsd -### 2>&1 \
-// RUN:   | FileCheck %s --check-prefix=CHECK-PIE2
 // RUN: %clang -c %s -target sparc64-unknown-openbsd -### 2>&1 \
 // RUN:   | FileCheck %s --check-prefix=CHECK-PIE2
 // RUN: %clang -c %s -target i386-pc-openbsd -fno-pie -### 2>&1 \

From 3d27de6c9739c9dd124a83917301e6d5bec933d0 Mon Sep 17 00:00:00 2001
From: sameeran joshi <sameeranjayant.joshi@amd.com>
Date: Mon, 31 Aug 2020 18:29:57 +0530
Subject: [PATCH 049/109] [Flang][NFC] Remove license comments from files in
 docs/ folder.

Solves issue https://reviews.llvm.org/D86131#2247275

Reviewed By: hans

Differential Revision: https://reviews.llvm.org/D86875

(cherry picked from commit f787c9a90c69f604694b02493404c066ea601b9e)
---
 flang/docs/ArrayComposition.md           | 8 --------
 flang/docs/C++17.md                      | 8 --------
 flang/docs/C++style.md                   | 8 --------
 flang/docs/Calls.md                      | 8 --------
 flang/docs/Character.md                  | 8 --------
 flang/docs/ControlFlowGraph.md           | 8 --------
 flang/docs/Directives.md                 | 8 --------
 flang/docs/Extensions.md                 | 8 --------
 flang/docs/FortranForCProgrammers.md     | 8 --------
 flang/docs/FortranIR.md                  | 8 --------
 flang/docs/IORuntimeInternals.md         | 8 --------
 flang/docs/ImplementingASemanticCheck.md | 8 --------
 flang/docs/Intrinsics.md                 | 8 --------
 flang/docs/LabelResolution.md            | 8 --------
 flang/docs/ModFiles.md                   | 8 --------
 flang/docs/OpenMP-semantics.md           | 8 --------
 flang/docs/OptionComparison.md           | 8 --------
 flang/docs/Overview.md                   | 8 --------
 flang/docs/ParserCombinators.md          | 8 --------
 flang/docs/Parsing.md                    | 8 --------
 flang/docs/Preprocessing.md              | 8 --------
 flang/docs/PullRequestChecklist.md       | 8 --------
 flang/docs/RuntimeDescriptor.md          | 8 --------
 flang/docs/Semantics.md                  | 8 --------
 24 files changed, 192 deletions(-)

diff --git a/flang/docs/ArrayComposition.md b/flang/docs/ArrayComposition.md
index 0f30af39f9e4b..18194caadf09c 100644
--- a/flang/docs/ArrayComposition.md
+++ b/flang/docs/ArrayComposition.md
@@ -1,11 +1,3 @@
-<!--===- docs/ArrayComposition.md 
-  
-   Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-   See https://llvm.org/LICENSE.txt for license information.
-   SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-  
--->
-
 This note attempts to describe the motivation for and design of an
 implementation of Fortran 90 (and later) array expression evaluation that
 minimizes the use of dynamically allocated temporary storage for
diff --git a/flang/docs/C++17.md b/flang/docs/C++17.md
index 87d5fc01f0922..ea8395cfdedc7 100644
--- a/flang/docs/C++17.md
+++ b/flang/docs/C++17.md
@@ -1,11 +1,3 @@
-<!--===- docs/C++17.md 
-  
-   Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-   See https://llvm.org/LICENSE.txt for license information.
-   SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-  
--->
-
 ## C++14/17 features used in f18
 
 The C++ dialect used in this project constitutes a subset of the
diff --git a/flang/docs/C++style.md b/flang/docs/C++style.md
index 4ab95393d758a..77e0a04638238 100644
--- a/flang/docs/C++style.md
+++ b/flang/docs/C++style.md
@@ -1,11 +1,3 @@
-<!--===- docs/C++style.md 
-  
-   Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-   See https://llvm.org/LICENSE.txt for license information.
-   SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-  
--->
-
 ## In brief:
 * Use *clang-format*
 from llvm 7
diff --git a/flang/docs/Calls.md b/flang/docs/Calls.md
index d70bc910d73db..8a4d65820d19f 100644
--- a/flang/docs/Calls.md
+++ b/flang/docs/Calls.md
@@ -1,11 +1,3 @@
-<!--===- docs/Calls.md
-
-   Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-   See https://llvm.org/LICENSE.txt for license information.
-   SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-
--->
-
 ## Procedure reference implementation protocol
 
 Fortran function and subroutine references are complicated.
diff --git a/flang/docs/Character.md b/flang/docs/Character.md
index 700db864f2dac..f66b144389450 100644
--- a/flang/docs/Character.md
+++ b/flang/docs/Character.md
@@ -1,11 +1,3 @@
-<!--===- docs/Character.md
-
-   Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-   See https://llvm.org/LICENSE.txt for license information.
-   SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-
--->
-
 ## Implementation of `CHARACTER` types in f18
 
 ### Kinds and Character Sets
diff --git a/flang/docs/ControlFlowGraph.md b/flang/docs/ControlFlowGraph.md
index b2b549845ebb6..7d1e514a87adb 100644
--- a/flang/docs/ControlFlowGraph.md
+++ b/flang/docs/ControlFlowGraph.md
@@ -1,11 +1,3 @@
-<!--===- docs/ControlFlowGraph.md 
-  
-   Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-   See https://llvm.org/LICENSE.txt for license information.
-   SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-  
--->
-
 ## Concept
 After a Fortran subprogram has been parsed, its names resolved, and all its
 semantic constraints successfully checked, the parse tree of its
diff --git a/flang/docs/Directives.md b/flang/docs/Directives.md
index c2e93c5f3de2e..554dc4608dd43 100644
--- a/flang/docs/Directives.md
+++ b/flang/docs/Directives.md
@@ -1,11 +1,3 @@
-<!--===- docs/Directives.md 
-  
-   Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-   See https://llvm.org/LICENSE.txt for license information.
-   SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-  
--->
-
 Compiler directives supported by F18
 ====================================
 
diff --git a/flang/docs/Extensions.md b/flang/docs/Extensions.md
index 9010b770cca6d..86a4f04de57f1 100644
--- a/flang/docs/Extensions.md
+++ b/flang/docs/Extensions.md
@@ -1,11 +1,3 @@
-<!--===- docs/Extensions.md 
-  
-   Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-   See https://llvm.org/LICENSE.txt for license information.
-   SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-  
--->
-
 As a general principle, this compiler will accept by default and
 without complaint many legacy features, extensions to the standard
 language, and features that have been deleted from the standard,
diff --git a/flang/docs/FortranForCProgrammers.md b/flang/docs/FortranForCProgrammers.md
index 103def2a92ce6..542034f3ea833 100644
--- a/flang/docs/FortranForCProgrammers.md
+++ b/flang/docs/FortranForCProgrammers.md
@@ -1,11 +1,3 @@
-<!--===- docs/FortranForCProgrammers.md
-
-   Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-   See https://llvm.org/LICENSE.txt for license information.
-   SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-
--->
-
 Fortran For C Programmers
 =========================
 
diff --git a/flang/docs/FortranIR.md b/flang/docs/FortranIR.md
index 5d83aaa8e34cf..83193ff27a359 100644
--- a/flang/docs/FortranIR.md
+++ b/flang/docs/FortranIR.md
@@ -1,11 +1,3 @@
-<!--===- docs/FortranIR.md 
-  
-   Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-   See https://llvm.org/LICENSE.txt for license information.
-   SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-  
--->
-
 # Design: Fortran IR
 
 ## Introduction
diff --git a/flang/docs/IORuntimeInternals.md b/flang/docs/IORuntimeInternals.md
index b4f3092a014ec..8ff464ee9c8f7 100644
--- a/flang/docs/IORuntimeInternals.md
+++ b/flang/docs/IORuntimeInternals.md
@@ -1,11 +1,3 @@
-<!--===- docs/IORuntimeInternals.md
-
-   Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-   See https://llvm.org/LICENSE.txt for license information.
-   SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-
--->
-
 Fortran I/O Runtime Library Internal Design
 ===========================================
 
diff --git a/flang/docs/ImplementingASemanticCheck.md b/flang/docs/ImplementingASemanticCheck.md
index 3bb16915cb880..2406f5bc2a58c 100644
--- a/flang/docs/ImplementingASemanticCheck.md
+++ b/flang/docs/ImplementingASemanticCheck.md
@@ -1,11 +1,3 @@
-<!--===- docs/ImplementingASemanticCheck.md 
-  
-   Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-   See https://llvm.org/LICENSE.txt for license information.
-   SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-  
--->
-# Introduction
 I recently added a semantic check to the f18 compiler front end.  This document
 describes my thought process and the resulting implementation.
 
diff --git a/flang/docs/Intrinsics.md b/flang/docs/Intrinsics.md
index 7be0bf3e4a9ca..6f4dec4678233 100644
--- a/flang/docs/Intrinsics.md
+++ b/flang/docs/Intrinsics.md
@@ -1,11 +1,3 @@
-<!--===- docs/Intrinsics.md 
-  
-   Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-   See https://llvm.org/LICENSE.txt for license information.
-   SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-  
--->
-
 # A categorization of standard (2018) and extended Fortran intrinsic procedures
 
 This note attempts to group the intrinsic procedures of Fortran into categories
diff --git a/flang/docs/LabelResolution.md b/flang/docs/LabelResolution.md
index e837b4fa6aece..2dfa5a30bb3ca 100644
--- a/flang/docs/LabelResolution.md
+++ b/flang/docs/LabelResolution.md
@@ -1,11 +1,3 @@
-<!--===- docs/LabelResolution.md 
-  
-   Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-   See https://llvm.org/LICENSE.txt for license information.
-   SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-  
--->
-
 # Semantics: Resolving Labels and Construct Names
 
 ## Overview
diff --git a/flang/docs/ModFiles.md b/flang/docs/ModFiles.md
index 483341bdd0f47..367cd4cd54f7c 100644
--- a/flang/docs/ModFiles.md
+++ b/flang/docs/ModFiles.md
@@ -1,11 +1,3 @@
-<!--===- docs/ModFiles.md 
-  
-   Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-   See https://llvm.org/LICENSE.txt for license information.
-   SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-  
--->
-
 # Module Files
 
 Module files hold information from a module that is necessary to compile 
diff --git a/flang/docs/OpenMP-semantics.md b/flang/docs/OpenMP-semantics.md
index 4e2a81739cf81..22a3ca5614ebc 100644
--- a/flang/docs/OpenMP-semantics.md
+++ b/flang/docs/OpenMP-semantics.md
@@ -1,11 +1,3 @@
-<!--===- docs/OpenMP-semantics.md 
-  
-   Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-   See https://llvm.org/LICENSE.txt for license information.
-   SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-  
--->
-
 # OpenMP Semantic Analysis
 
 ## OpenMP for F18
diff --git a/flang/docs/OptionComparison.md b/flang/docs/OptionComparison.md
index db5932411cc1e..5c04450a7bb34 100644
--- a/flang/docs/OptionComparison.md
+++ b/flang/docs/OptionComparison.md
@@ -1,11 +1,3 @@
-<!--===- docs/OptionComparison.md 
-  
-   Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-   See https://llvm.org/LICENSE.txt for license information.
-   SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-  
--->
-
 # Compiler options
 
 This document catalogs the options processed by F18's peers/competitors.  Much of the document is taken up by a set of tables that list the options categorized into different topics.  Some of the table headings link to more information about the contents of the tables.  For example, the table on **Standards conformance** options links to [notes on Standards conformance](#standards).
diff --git a/flang/docs/Overview.md b/flang/docs/Overview.md
index 75a8cd1c4cab0..807efda2ed9a3 100644
--- a/flang/docs/Overview.md
+++ b/flang/docs/Overview.md
@@ -1,11 +1,3 @@
-<!--===- docs/Overview.md 
-  
-   Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-   See https://llvm.org/LICENSE.txt for license information.
-   SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-  
--->
-
 # Overview of Compiler Phases
 
 Each phase produces either correct output or fatal errors.
diff --git a/flang/docs/ParserCombinators.md b/flang/docs/ParserCombinators.md
index 4f3dc6fd07ae6..757684dcfda60 100644
--- a/flang/docs/ParserCombinators.md
+++ b/flang/docs/ParserCombinators.md
@@ -1,11 +1,3 @@
-<!--===- docs/ParserCombinators.md 
-  
-   Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-   See https://llvm.org/LICENSE.txt for license information.
-   SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-  
--->
-
 ## Concept
 The Fortran language recognizer here can be classified as an LL recursive
 descent parser.  It is composed from a *parser combinator* library that
diff --git a/flang/docs/Parsing.md b/flang/docs/Parsing.md
index fad9a4d57278c..54a4fd752f6c1 100644
--- a/flang/docs/Parsing.md
+++ b/flang/docs/Parsing.md
@@ -1,11 +1,3 @@
-<!--===- docs/Parsing.md 
-  
-   Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-   See https://llvm.org/LICENSE.txt for license information.
-   SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-  
--->
-
 The F18 Parser
 ==============
 This program source code implements a parser for the Fortran programming
diff --git a/flang/docs/Preprocessing.md b/flang/docs/Preprocessing.md
index 7f6f3951cfd16..9b4d905177b7f 100644
--- a/flang/docs/Preprocessing.md
+++ b/flang/docs/Preprocessing.md
@@ -1,11 +1,3 @@
-<!--===- docs/Preprocessing.md 
-  
-   Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-   See https://llvm.org/LICENSE.txt for license information.
-   SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-  
--->
-
 Fortran Preprocessing
 =====================
 
diff --git a/flang/docs/PullRequestChecklist.md b/flang/docs/PullRequestChecklist.md
index 12a67be374a20..17b6d64923f58 100644
--- a/flang/docs/PullRequestChecklist.md
+++ b/flang/docs/PullRequestChecklist.md
@@ -1,11 +1,3 @@
-<!--===- docs/PullRequestChecklist.md 
-
-   Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-   See https://llvm.org/LICENSE.txt for license information.
-   SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-
--->
-
 # Pull request checklist
 Please review the following items before submitting a pull request.  This list
 can also be used when reviewing pull requests.
diff --git a/flang/docs/RuntimeDescriptor.md b/flang/docs/RuntimeDescriptor.md
index d819517fa9795..a8eff33f65211 100644
--- a/flang/docs/RuntimeDescriptor.md
+++ b/flang/docs/RuntimeDescriptor.md
@@ -1,11 +1,3 @@
-<!--===- docs/RuntimeDescriptor.md 
-  
-   Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-   See https://llvm.org/LICENSE.txt for license information.
-   SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-  
--->
-
 ## Concept
 The properties that characterize data values and objects in Fortran
 programs must sometimes be materialized when the program runs.
diff --git a/flang/docs/Semantics.md b/flang/docs/Semantics.md
index 6ea0b292de69f..f879671b4f4ed 100644
--- a/flang/docs/Semantics.md
+++ b/flang/docs/Semantics.md
@@ -1,11 +1,3 @@
-<!--===- docs/Semantics.md 
-  
-   Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-   See https://llvm.org/LICENSE.txt for license information.
-   SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-  
--->
-
 # Semantic Analysis
 
 The semantic analysis pass determines if a syntactically correct Fortran

From 2526d8c43499fc5dd6135556ab16ae20d280ddca Mon Sep 17 00:00:00 2001
From: Nikita Popov <nikita.ppv@gmail.com>
Date: Sat, 29 Aug 2020 10:55:55 +0200
Subject: [PATCH 050/109] [InstSimplify] Protect against more poison in
 SimplifyWithOpReplaced (PR47322)

Replace the check for poison-producing instructions in
SimplifyWithOpReplaced() with the generic helper canCreatePoison()
that properly handles poisonous shifts and thus avoids the problem
from PR47322.

This additionally fixes a bug in IIQ.UseInstrInfo=false mode, which
previously could have caused this code to ignore poison flags.
Setting UseInstrInfo=false should reduce the possible optimizations,
not increase them.

This is not a full solution to the problem, as poison could be
introduced more indirectly. This is just a minimal, easy to backport
fix.

Differential Revision: https://reviews.llvm.org/D86834

(cherry picked from commit a5be86fde5de2c253aa19704bf4e4854f1936f8c)
---
 llvm/lib/Analysis/InstructionSimplify.cpp   |  5 +----
 llvm/test/Transforms/InstSimplify/select.ll | 16 ++++++++++++++++
 2 files changed, 17 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp
index d3bdf9d6aafd0..9423ff9e3a66c 100644
--- a/llvm/lib/Analysis/InstructionSimplify.cpp
+++ b/llvm/lib/Analysis/InstructionSimplify.cpp
@@ -3837,10 +3837,7 @@ static const Value *SimplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp,
     // TODO: This is an unusual limitation because better analysis results in
     //       worse simplification. InstCombine can do this fold more generally
     //       by dropping the flags. Remove this fold to save compile-time?
-    if (isa<OverflowingBinaryOperator>(B))
-      if (Q.IIQ.hasNoSignedWrap(B) || Q.IIQ.hasNoUnsignedWrap(B))
-        return nullptr;
-    if (isa<PossiblyExactOperator>(B) && Q.IIQ.isExact(B))
+    if (canCreatePoison(I))
       return nullptr;
 
     if (MaxRecurse) {
diff --git a/llvm/test/Transforms/InstSimplify/select.ll b/llvm/test/Transforms/InstSimplify/select.ll
index b1264138a15ea..05fa46ca3f49c 100644
--- a/llvm/test/Transforms/InstSimplify/select.ll
+++ b/llvm/test/Transforms/InstSimplify/select.ll
@@ -919,3 +919,19 @@ define <2 x i32> @all_constant_true_undef_false_constexpr_vec() {
   %s = select i1 ptrtoint (<2 x i32> ()* @all_constant_true_undef_false_constexpr_vec to i1), <2 x i32> undef, <2 x i32><i32 -1, i32 ptrtoint (<2 x i32> ()* @all_constant_true_undef_false_constexpr_vec to i32)>
   ret <2 x i32> %s
 }
+
+define i32 @pr47322_more_poisonous_replacement(i32 %arg) {
+; CHECK-LABEL: @pr47322_more_poisonous_replacement(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[ARG:%.*]], 0
+; CHECK-NEXT:    [[TRAILING:%.*]] = call i32 @llvm.cttz.i32(i32 [[ARG]], i1 immarg true)
+; CHECK-NEXT:    [[SHIFTED:%.*]] = lshr i32 [[ARG]], [[TRAILING]]
+; CHECK-NEXT:    [[R1_SROA_0_1:%.*]] = select i1 [[CMP]], i32 0, i32 [[SHIFTED]]
+; CHECK-NEXT:    ret i32 [[R1_SROA_0_1]]
+;
+  %cmp = icmp eq i32 %arg, 0
+  %trailing = call i32 @llvm.cttz.i32(i32 %arg, i1 immarg true)
+  %shifted = lshr i32 %arg, %trailing
+  %r1.sroa.0.1 = select i1 %cmp, i32 0, i32 %shifted
+  ret i32 %r1.sroa.0.1
+}
+declare i32 @llvm.cttz.i32(i32, i1 immarg)

From 7569e8c696288cd9c9409936b4fe9b846d0bd0b7 Mon Sep 17 00:00:00 2001
From: Kang Zhang <shkzhang@cn.ibm.com>
Date: Wed, 29 Jul 2020 16:39:27 +0000
Subject: [PATCH 051/109] [PowerPC] Set v1i128 to expand for SETCC to avoid
 crash

Summary:
PPC only supports the instruction selection for v16i8, v8i16, v4i32,
v2i64, v4f32 and v2f64 for ISD::SETCC, don't support the v1i128, so
v1i128 for ISD::SETCC will crash.

This patch is to set v1i128 to expand to avoid crash.

Reviewed By: steven.zhang

Differential Revision: https://reviews.llvm.org/D84238

(cherry picked from commit 802c043078ad653aca131648a130b59f041df0b5)
---
 llvm/lib/Target/PowerPC/PPCISelLowering.cpp |  2 +
 llvm/test/CodeGen/PowerPC/setcc-vector.ll   | 49 +++++++++++++++++++++
 2 files changed, 51 insertions(+)
 create mode 100644 llvm/test/CodeGen/PowerPC/setcc-vector.ll

diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 5c1a4cb16568c..2f50c52c90a11 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -920,6 +920,8 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
         setOperationAction(ISD::SUB, MVT::v2i64, Expand);
       }
 
+      setOperationAction(ISD::SETCC, MVT::v1i128, Expand);
+
       setOperationAction(ISD::LOAD, MVT::v2i64, Promote);
       AddPromotedToType (ISD::LOAD, MVT::v2i64, MVT::v2f64);
       setOperationAction(ISD::STORE, MVT::v2i64, Promote);
diff --git a/llvm/test/CodeGen/PowerPC/setcc-vector.ll b/llvm/test/CodeGen/PowerPC/setcc-vector.ll
new file mode 100644
index 0000000000000..5917ccabf84ed
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/setcc-vector.ll
@@ -0,0 +1,49 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -verify-machineinstrs -mcpu=pwr9 -mtriple=powerpc64le-unknown-unknown \
+; RUN:   -ppc-asm-full-reg-names < %s | FileCheck -check-prefixes=CHECK-PWR9 %s
+; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mtriple=powerpc64le-unknown-unknown \
+; RUN:   -ppc-asm-full-reg-names < %s | FileCheck -check-prefixes=CHECK-PWR8  %s
+; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mtriple=powerpc64-unknown-unknown \
+; RUN:   -ppc-asm-full-reg-names < %s | FileCheck -check-prefixes=CHECK-PWR7 %s
+
+define <1 x i64> @setcc_v1i128(<1 x i128> %a) {
+; CHECK-PWR9-LABEL: setcc_v1i128:
+; CHECK-PWR9:       # %bb.0: # %entry
+; CHECK-PWR9-NEXT:    mfvsrld r3, vs34
+; CHECK-PWR9-NEXT:    cmpldi r3, 35708
+; CHECK-PWR9-NEXT:    mfvsrd r3, vs34
+; CHECK-PWR9-NEXT:    cmpdi cr1, r3, 0
+; CHECK-PWR9-NEXT:    li r3, 1
+; CHECK-PWR9-NEXT:    crnand 4*cr5+lt, 4*cr1+eq, lt
+; CHECK-PWR9-NEXT:    isel r3, 0, r3, 4*cr5+lt
+; CHECK-PWR9-NEXT:    blr
+;
+; CHECK-PWR8-LABEL: setcc_v1i128:
+; CHECK-PWR8:       # %bb.0: # %entry
+; CHECK-PWR8-NEXT:    xxswapd vs0, vs34
+; CHECK-PWR8-NEXT:    mfvsrd r3, vs34
+; CHECK-PWR8-NEXT:    cmpdi r3, 0
+; CHECK-PWR8-NEXT:    li r3, 1
+; CHECK-PWR8-NEXT:    mffprd r4, f0
+; CHECK-PWR8-NEXT:    cmpldi cr1, r4, 35708
+; CHECK-PWR8-NEXT:    crnand 4*cr5+lt, eq, 4*cr1+lt
+; CHECK-PWR8-NEXT:    isel r3, 0, r3, 4*cr5+lt
+; CHECK-PWR8-NEXT:    blr
+;
+; CHECK-PWR7-LABEL: setcc_v1i128:
+; CHECK-PWR7:       # %bb.0: # %entry
+; CHECK-PWR7-NEXT:    li r5, 0
+; CHECK-PWR7-NEXT:    cntlzd r3, r3
+; CHECK-PWR7-NEXT:    ori r5, r5, 35708
+; CHECK-PWR7-NEXT:    rldicl r3, r3, 58, 63
+; CHECK-PWR7-NEXT:    subc r5, r4, r5
+; CHECK-PWR7-NEXT:    subfe r4, r4, r4
+; CHECK-PWR7-NEXT:    neg r4, r4
+; CHECK-PWR7-NEXT:    and r3, r3, r4
+; CHECK-PWR7-NEXT:    blr
+entry:
+  %0 = icmp ult <1 x i128> %a, <i128 35708>
+  %1 = zext <1 x i1> %0 to <1 x i64>
+  ret <1 x i64> %1
+}
+

From f5df584a5077e726ad851ccfe8496deda3e5ef07 Mon Sep 17 00:00:00 2001
From: Camille Coti <camille.coti@gmail.com>
Date: Tue, 25 Aug 2020 09:27:20 +0100
Subject: [PATCH 052/109] [flang] Version information in flang/f18

Fixed some version information in flang/f18:

  - fixed the behavior of the -v switch: this flag enables verbosity with used with arguments, but just displays the version when used alone (related to this bug: https://bugs.llvm.org/show_bug.cgi?id=46017)
 - added __FLANG, __FLANG_MAJOR__, __FLANG_MINOR__ and __FLANG_PATCHLEVEL__ (similar to their __F18* counterparts) for compatibility purpose

Reviewed By: AlexisPerry, richard.barton.arm, tskeith

Differential Revision: https://reviews.llvm.org/D84334

(cherry picked from commit b11c52781635bd871abd6d932cfd5dcd6f311903)
---
 flang/test/Driver/version_test.f90            |  7 ++++--
 .../Preprocessing/compiler_defined_macros.F90 | 11 ++++++++
 flang/tools/f18/CMakeLists.txt                |  7 ++++--
 flang/tools/f18/f18.cpp                       | 25 ++++++++++++++++---
 flang/tools/f18/f18_version.h.in              |  9 +++++++
 5 files changed, 51 insertions(+), 8 deletions(-)
 create mode 100644 flang/test/Preprocessing/compiler_defined_macros.F90
 create mode 100644 flang/tools/f18/f18_version.h.in

diff --git a/flang/test/Driver/version_test.f90 b/flang/test/Driver/version_test.f90
index 08ea35ba49ea6..79be3617cf4b4 100644
--- a/flang/test/Driver/version_test.f90
+++ b/flang/test/Driver/version_test.f90
@@ -1,7 +1,10 @@
 ! Check that lit configuration works by checking the compiler version
 
-! RUN: %f18 -V 2>&1 | FileCheck  -check-prefix=VERSION %s
 ! VERSION-NOT:{{![[:space:]]}}
 ! VERSION:{{[[:space:]]}}
-! VERSION-SAME:f18 compiler (under development)
+! VERSION-SAME:f18 compiler (under development), version {{[1-9][0-9]*.[0-9]*.[0-9]*}}
 ! VERSION-EMPTY:
+  
+! RUN: %f18 -V 2>&1 | FileCheck  -check-prefix=VERSION %s
+! RUN: %f18 -v 2>&1 | FileCheck  -check-prefix=VERSION %s
+! RUN: %f18 --version 2>&1 | FileCheck  -check-prefix=VERSION %s
diff --git a/flang/test/Preprocessing/compiler_defined_macros.F90 b/flang/test/Preprocessing/compiler_defined_macros.F90
new file mode 100644
index 0000000000000..ba20f6d396224
--- /dev/null
+++ b/flang/test/Preprocessing/compiler_defined_macros.F90
@@ -0,0 +1,11 @@
+! Check that the macros that give the version number are set properly
+
+!CHECK: flang_major = {{[1-9][0-9]*$}}
+!CHECK: flang_minor = {{[0-9]+$}}
+!CHECK: flang_patchlevel = {{[0-9]+$}}
+!RUN: %f18 -E %s | FileCheck  --ignore-case %s
+
+  
+integer, parameter :: flang_major = __flang_major__
+integer, parameter :: flang_minor = __flang_minor__
+integer, parameter :: flang_patchlevel = __flang_patchlevel__
diff --git a/flang/tools/f18/CMakeLists.txt b/flang/tools/f18/CMakeLists.txt
index 46c38fa43a2e5..f3af6e8312fe7 100644
--- a/flang/tools/f18/CMakeLists.txt
+++ b/flang/tools/f18/CMakeLists.txt
@@ -28,8 +28,10 @@ set(MODULES
 )
 
 set(include ${FLANG_BINARY_DIR}/include/flang)
-
-set(include ${FLANG_BINARY_DIR}/include/flang)
+target_include_directories(f18
+  PRIVATE
+  ${CMAKE_CURRENT_BINARY_DIR}
+)
 
 # Create module files directly from the top-level module source directory
 foreach(filename ${MODULES})
@@ -64,5 +66,6 @@ file(COPY ${CMAKE_BINARY_DIR}/tools/flang/bin/flang DESTINATION ${CMAKE_BINARY_D
 # The flang script to be installed needs a different path to the headers.
 set(FLANG_INTRINSIC_MODULES_DIR ${CMAKE_INSTALL_PREFIX}/include/flang)
 configure_file(${CMAKE_CURRENT_SOURCE_DIR}/flang.sh.in ${FLANG_BINARY_DIR}/bin/flang-install.sh @ONLY)
+configure_file(${CMAKE_CURRENT_SOURCE_DIR}/f18_version.h.in ${CMAKE_CURRENT_BINARY_DIR}/f18_version.h @ONLY)
 
 install(PROGRAMS ${FLANG_BINARY_DIR}/bin/flang-install.sh DESTINATION bin RENAME flang PERMISSIONS OWNER_EXECUTE OWNER_READ OWNER_WRITE)
diff --git a/flang/tools/f18/f18.cpp b/flang/tools/f18/f18.cpp
index 03c0f7afe810e..23b104ee520c8 100644
--- a/flang/tools/f18/f18.cpp
+++ b/flang/tools/f18/f18.cpp
@@ -38,6 +38,8 @@
 #include <unistd.h>
 #include <vector>
 
+#include "f18_version.h"
+
 static std::list<std::string> argList(int argc, char *const argv[]) {
   std::list<std::string> result;
   for (int j = 0; j < argc; ++j) {
@@ -390,6 +392,13 @@ void Link(std::vector<std::string> &liblist, std::vector<std::string> &objects,
   }
 }
 
+int printVersion() {
+  llvm::errs() << "\nf18 compiler (under development), version "
+               << __FLANG_MAJOR__ << "." << __FLANG_MINOR__ << "."
+               << __FLANG_PATCHLEVEL__ << "\n";
+  return exitStatus;
+}
+
 int main(int argc, char *const argv[]) {
 
   atexit(CleanUpAtExit);
@@ -411,6 +420,11 @@ int main(int argc, char *const argv[]) {
   options.predefinitions.emplace_back("__F18_MAJOR__", "1");
   options.predefinitions.emplace_back("__F18_MINOR__", "1");
   options.predefinitions.emplace_back("__F18_PATCHLEVEL__", "1");
+  options.predefinitions.emplace_back("__flang__", __FLANG__);
+  options.predefinitions.emplace_back("__flang_major__", __FLANG_MAJOR__);
+  options.predefinitions.emplace_back("__flang_minor__", __FLANG_MINOR__);
+  options.predefinitions.emplace_back(
+      "__flang_patchlevel__", __FLANG_PATCHLEVEL__);
 #if __x86_64__
   options.predefinitions.emplace_back("__x86_64__", "1");
 #endif
@@ -651,13 +665,16 @@ int main(int argc, char *const argv[]) {
           << "Unrecognised options are passed through to the external compiler\n"
           << "set by F18_FC (see defaults).\n";
       return exitStatus;
-    } else if (arg == "-V") {
-      llvm::errs() << "\nf18 compiler (under development)\n";
-      return exitStatus;
+    } else if (arg == "-V" || arg == "--version") {
+      return printVersion();
     } else {
       driver.F18_FCArgs.push_back(arg);
       if (arg == "-v") {
-        driver.verbose = true;
+        if (args.size() > 1) {
+          driver.verbose = true;
+        } else {
+          return printVersion();
+        }
       } else if (arg == "-I") {
         driver.F18_FCArgs.push_back(args.front());
         driver.searchDirectories.push_back(args.front());
diff --git a/flang/tools/f18/f18_version.h.in b/flang/tools/f18/f18_version.h.in
new file mode 100644
index 0000000000000..0c8d5227cd001
--- /dev/null
+++ b/flang/tools/f18/f18_version.h.in
@@ -0,0 +1,9 @@
+#ifndef _F18_H_
+#define _F18_H_
+
+#define __FLANG__ "1"
+#define __FLANG_MAJOR__ "@LLVM_VERSION_MAJOR@"
+#define __FLANG_MINOR__ "@LLVM_VERSION_MINOR@"
+#define __FLANG_PATCHLEVEL__ "@LLVM_VERSION_PATCH@"
+
+#endif // _F18_H_

From 7030fc50d93e5b08bde9743fb54f24c4a44a8e4a Mon Sep 17 00:00:00 2001
From: Alex Bradbury <asb@lowrisc.org>
Date: Sat, 5 Sep 2020 13:26:44 +0100
Subject: [PATCH 053/109] ReleaseNotes: Add RISC-V updates

---
 clang/docs/ReleaseNotes.rst | 16 ++++++++++++
 llvm/docs/ReleaseNotes.rst  | 50 +++++++++++++++++++++++++++++++++++++
 2 files changed, 66 insertions(+)

diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 9d0ab935063fd..ba0e15deb3894 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -262,6 +262,13 @@ New Compiler Flags
   and 256TB (needs -mcmodel=large). This allows large/many thread local
   variables or a compact/fast code in an executable.
 
+- -menable-experimental-extension` can be used to enable experimental or
+  unratified RISC-V extensions, allowing them to be targeted by specifying the
+  extension name and precise version number in the `-march` string. For these
+  experimental extensions, there is no expectation of ongoing support - the
+  compiler support will continue to change until the specification is
+  finalised.
+
 Deprecated Compiler Flags
 -------------------------
 
@@ -296,6 +303,10 @@ Modified Compiler Flags
   ``char8_t`` as the character type of ``u8`` literals. This restores the
   Clang 8 behavior that regressed in Clang 9 and 10.
 - -print-targets has been added to print the registered targets.
+- -mcpu is now supported for RISC-V, and recognises the generic-rv32,
+  rocket-rv32, sifive-e31, generic-rv64, rocket-rv64, and sifive-u54 target
+  CPUs.
+
 
 New Pragmas in Clang
 --------------------
@@ -416,6 +427,11 @@ Changes related to C++ for OpenCL
 ABI Changes in Clang
 --------------------
 
+- For RISC-V, an ABI bug was fixed when passing complex single-precision
+  floats in RV64 with the hard float ABI. The bug could only be triggered for
+  function calls that exhaust the available FPRs.
+
+
 OpenMP Support in Clang
 -----------------------
 
diff --git a/llvm/docs/ReleaseNotes.rst b/llvm/docs/ReleaseNotes.rst
index 8171f9d990c99..cbc8c0859c7b8 100644
--- a/llvm/docs/ReleaseNotes.rst
+++ b/llvm/docs/ReleaseNotes.rst
@@ -178,6 +178,56 @@ Changes to the PowerPC Target
 
 During this release ...
 
+Changes to the RISC-V Target
+----------------------------
+
+New features:
+* After consultation through an RFC, the RISC-V backend now accepts patches for
+  proposed instruction set extensions that have not yet been ratified.  For these
+  experimental extensions, there is no expectation of ongoing support - the
+  compiler support will continue to change until the specification is finalised.
+  In line with this policy, MC layer and code generation support was added for
+  version 0.92 of the proposed Bit Manipulation Extension and MC layer support
+  was added for version 0.8 of the proposed RISC-V Vector instruction set
+  extension. As these extensions are not yet ratified, compiler support will
+  continue to change to match the specifications until they are finalised.
+* ELF attribute sections are now created, encoding information such as the ISA
+  string.
+* Support for saving/restoring callee-saved registers via libcalls (a code
+  size optimisation).
+* llvm-objdump will now print branch targets as part of disassembly.
+
+Improvements:
+* If an immediate can be generated using a pair of `addi` instructions, that
+  pair will be selected rather than materialising the immediate into a
+  separate register with an `lui` and `addi` pair.
+* Multiplication by a constant was optimised.
+* `addi` instructions are now folded into the offset of a load/store instruction
+  even if the load/store itself has a non-zero offset, when it is safe to do
+  so.
+* Additional target hooks were implemented to minimise generation of
+  unnecessary control flow instruction.
+* The RISC-V backend's load/store peephole optimisation pass now supports
+  constant pools, improving code generation for floating point constants.
+* Debug scratch register names `dscratch0` and `dscratch1` are now recognised in
+  addition to the legacy `dscratch` register name.
+* Codegen for checking isnan was improved, removing a redundant `and`.
+* The `dret` instruction is now supported by the MC layer.
+* `.option pic` and `.option nopic` are now supported in assembly and `.reloc`
+  was extended to support arbitrary relocation types.
+* Scheduling info metadata was improved.
+* The `jump` pseudo instruction is now supported.
+
+Bug fixes:
+* A failure to insert indirect branches in position independent code
+  was fixed.
+* The calculated expanded size of atomic pseudo operations was fixed, avoiding
+  "fixup value out of range" errors during branch relaxation for some inputs.
+* The `mcountinhibit` CSR is now recognised.
+* The correct libcall is now emitted for converting a float/double to a 32-bit
+  signed or unsigned integer on RV64 targets lacking the F or D extensions.
+
+
 Changes to the X86 Target
 -------------------------
 

From 919f9c291508217c697220b87a33406b9b685202 Mon Sep 17 00:00:00 2001
From: Dimitry Andric <dimitry@andric.com>
Date: Wed, 2 Sep 2020 18:56:12 +0200
Subject: [PATCH 054/109] Eliminate the sizing template parameter N from
 CoalescingBitVector

Since the parameter is not used anywhere, and the default size of 16
apparently causes PR47359, remove it. This ensures that IntervalMap will
automatically determine the optimal size, using its NodeSizer struct.

Reviewed By: dblaikie

Differential Revision: https://reviews.llvm.org/D87044

(cherry picked from commit f26fc568402f84a94557cbe86e7aac8319d61387)
---
 llvm/include/llvm/ADT/CoalescingBitVector.h | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/llvm/include/llvm/ADT/CoalescingBitVector.h b/llvm/include/llvm/ADT/CoalescingBitVector.h
index f8c8fec0ec9e7..0a7dcfe226315 100644
--- a/llvm/include/llvm/ADT/CoalescingBitVector.h
+++ b/llvm/include/llvm/ADT/CoalescingBitVector.h
@@ -34,15 +34,14 @@ namespace llvm {
 /// performance for non-sequential find() operations.
 ///
 /// \tparam IndexT - The type of the index into the bitvector.
-/// \tparam N - The first N coalesced intervals of set bits are stored in-place.
-template <typename IndexT, unsigned N = 16> class CoalescingBitVector {
+template <typename IndexT> class CoalescingBitVector {
   static_assert(std::is_unsigned<IndexT>::value,
                 "Index must be an unsigned integer.");
 
-  using ThisT = CoalescingBitVector<IndexT, N>;
+  using ThisT = CoalescingBitVector<IndexT>;
 
   /// An interval map for closed integer ranges. The mapped values are unused.
-  using MapT = IntervalMap<IndexT, char, N>;
+  using MapT = IntervalMap<IndexT, char>;
 
   using UnderlyingIterator = typename MapT::const_iterator;
 

From 8399522c96a94bfb7c1cbf4df2bed0b3d826fbf6 Mon Sep 17 00:00:00 2001
From: Thomas Lively <tlively@google.com>
Date: Sun, 6 Sep 2020 15:42:21 -0700
Subject: [PATCH 055/109] [WebAssembly] Fix incorrect assumption of simple
 value types

Fixes PR47375, in which an assertion was triggering because
WebAssemblyTargetLowering::isVectorLoadExtDesirable was improperly
assuming the use of simple value types.

Differential Revision: https://reviews.llvm.org/D87110

(cherry picked from commit caee15a0ed52471bd329d01dc253ec9be3936c6d)
---
 .../WebAssembly/WebAssemblyISelLowering.cpp   |  4 +--
 llvm/test/CodeGen/WebAssembly/pr47375.ll      | 36 +++++++++++++++++++
 2 files changed, 38 insertions(+), 2 deletions(-)
 create mode 100644 llvm/test/CodeGen/WebAssembly/pr47375.ll

diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
index a9b9eceb41304..925636c823219 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
@@ -601,8 +601,8 @@ bool WebAssemblyTargetLowering::isIntDivCheap(EVT VT,
 }
 
 bool WebAssemblyTargetLowering::isVectorLoadExtDesirable(SDValue ExtVal) const {
-  MVT ExtT = ExtVal.getSimpleValueType();
-  MVT MemT = cast<LoadSDNode>(ExtVal->getOperand(0))->getSimpleValueType(0);
+  EVT ExtT = ExtVal.getValueType();
+  EVT MemT = cast<LoadSDNode>(ExtVal->getOperand(0))->getValueType(0);
   return (ExtT == MVT::v8i16 && MemT == MVT::v8i8) ||
          (ExtT == MVT::v4i32 && MemT == MVT::v4i16) ||
          (ExtT == MVT::v2i64 && MemT == MVT::v2i32);
diff --git a/llvm/test/CodeGen/WebAssembly/pr47375.ll b/llvm/test/CodeGen/WebAssembly/pr47375.ll
new file mode 100644
index 0000000000000..4c04631f26b11
--- /dev/null
+++ b/llvm/test/CodeGen/WebAssembly/pr47375.ll
@@ -0,0 +1,36 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s | FileCheck %s
+
+target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128"
+target triple = "wasm32-unknown-unknown"
+
+; Regression test for pr47375, in which an assertion was triggering
+; because WebAssemblyTargetLowering::isVectorLoadExtDesirable was
+; improperly assuming the use of simple value types.
+
+define void @sext_vec() {
+; CHECK-LABEL: sext_vec:
+; CHECK:         .functype sext_vec () -> ()
+; CHECK-NEXT:    .local i32
+; CHECK-NEXT:  # %bb.0:
+; CHECK-NEXT:    local.get 0
+; CHECK-NEXT:    i32.load8_u 0
+; CHECK-NEXT:    local.set 0
+; CHECK-NEXT:    local.get 0
+; CHECK-NEXT:    i32.const 0
+; CHECK-NEXT:    i32.store8 0
+; CHECK-NEXT:    local.get 0
+; CHECK-NEXT:    local.get 0
+; CHECK-NEXT:    local.get 0
+; CHECK-NEXT:    i32.const 7
+; CHECK-NEXT:    i32.shl
+; CHECK-NEXT:    i32.or
+; CHECK-NEXT:    i32.const 7175
+; CHECK-NEXT:    i32.and
+; CHECK-NEXT:    i32.store16 0
+; CHECK-NEXT:    # fallthrough-return
+  %L1 = load <2 x i3>, <2 x i3>* undef, align 2
+  %zext = zext <2 x i3> %L1 to <2 x i10>
+  store <2 x i10> %zext, <2 x i10>* undef, align 4
+  ret void
+}

From 56a7fe31adbb352804e4d568ec8b65cdc749a83f Mon Sep 17 00:00:00 2001
From: Fangrui Song <maskray@google.com>
Date: Wed, 2 Sep 2020 12:21:06 -0700
Subject: [PATCH 056/109] [GCDAProfiling] Suppress -Wprio-ctor-dtor for GCC>=9
 and remove unused write_string/length_of_string

The `__attribute__((destructor(100)))` diagnostic does not have a
warning option in GCC 8 (before r264853) and thus cannot be suppressed.

(cherry picked from commit 1cfde143e82aeb47cffba436ba7b5302d8e14193)
---
 compiler-rt/lib/profile/GCDAProfiling.c | 19 +++----------------
 1 file changed, 3 insertions(+), 16 deletions(-)

diff --git a/compiler-rt/lib/profile/GCDAProfiling.c b/compiler-rt/lib/profile/GCDAProfiling.c
index 57d8dec423cc0..82369357e9861 100644
--- a/compiler-rt/lib/profile/GCDAProfiling.c
+++ b/compiler-rt/lib/profile/GCDAProfiling.c
@@ -210,22 +210,6 @@ static void write_64bit_value(uint64_t i) {
   write_32bit_value(hi);
 }
 
-static uint32_t length_of_string(const char *s) {
-  return (strlen(s) / 4) + 1;
-}
-
-// Remove when we support libgcov 9 current_working_directory.
-#if !defined(_MSC_VER) && defined(__clang__)
-__attribute__((unused))
-#endif
-static void
-write_string(const char *s) {
-  uint32_t len = length_of_string(s);
-  write_32bit_value(len);
-  write_bytes(s, strlen(s));
-  write_bytes("\0\0\0\0", 4 - (strlen(s) % 4));
-}
-
 static uint32_t read_32bit_value() {
   uint32_t val;
 
@@ -632,6 +616,9 @@ void llvm_writeout_files(void) {
 // __attribute__((destructor)) and destructors whose priorities are greater than
 // 100 run before this function and can thus be tracked. The priority is
 // compatible with GCC 7 onwards.
+#if __GNUC__ >= 9
+#pragma GCC diagnostic ignored "-Wprio-ctor-dtor"
+#endif
 __attribute__((destructor(100)))
 #endif
 static void llvm_writeout_and_clear(void) {

From 6b98995a44b2ebc94804ab552ed497c6860b6df3 Mon Sep 17 00:00:00 2001
From: Nemanja Ivanovic <nemanja.i.ibm@gmail.com>
Date: Wed, 2 Sep 2020 15:30:19 -0500
Subject: [PATCH 057/109] [PowerPC] Do not legalize vector FDIV without VSX

Quite a while ago, we legalized these nodes as we added custom
handling for reciprocal estimates in the back end. We have since
moved to target-independent combines but neglected to turn off
legalization. As a result, we can now get selection failures on
non-VSX subtargets as evidenced in the listed PR.

Fixes: https://bugs.llvm.org/show_bug.cgi?id=47373
(cherry picked from commit 27714075848e7f05a297317ad28ad2570d8e5a43)
---
 llvm/lib/Target/PowerPC/PPCISelLowering.cpp |   8 +-
 llvm/test/CodeGen/PowerPC/pr47373.ll        | 180 ++++++++++++++++++++
 2 files changed, 181 insertions(+), 7 deletions(-)
 create mode 100644 llvm/test/CodeGen/PowerPC/pr47373.ll

diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 2f50c52c90a11..2d0b171152495 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -799,7 +799,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
     setOperationAction(ISD::MUL, MVT::v4f32, Legal);
     setOperationAction(ISD::FMA, MVT::v4f32, Legal);
 
-    if (TM.Options.UnsafeFPMath || Subtarget.hasVSX()) {
+    if (Subtarget.hasVSX()) {
       setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
       setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
     }
@@ -1297,12 +1297,6 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
     setTargetDAGCombine(ISD::SELECT_CC);
   }
 
-  // Use reciprocal estimates.
-  if (TM.Options.UnsafeFPMath) {
-    setTargetDAGCombine(ISD::FDIV);
-    setTargetDAGCombine(ISD::FSQRT);
-  }
-
   if (Subtarget.hasP9Altivec()) {
     setTargetDAGCombine(ISD::ABS);
     setTargetDAGCombine(ISD::VSELECT);
diff --git a/llvm/test/CodeGen/PowerPC/pr47373.ll b/llvm/test/CodeGen/PowerPC/pr47373.ll
new file mode 100644
index 0000000000000..559f4f9a8b4ae
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/pr47373.ll
@@ -0,0 +1,180 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=powerpc64-unknown-freebsd13.0 \
+; RUN:   -mcpu=ppc64 -ppc-asm-full-reg-names < %s | FileCheck %s
+@a = local_unnamed_addr global float* null, align 8
+
+; Function Attrs: nounwind
+define void @d() local_unnamed_addr #0 {
+; CHECK-LABEL: d:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    mflr r0
+; CHECK-NEXT:    std r0, 16(r1)
+; CHECK-NEXT:    stdu r1, -208(r1)
+; CHECK-NEXT:    addis r3, r2, .LC0@toc@ha
+; CHECK-NEXT:    std r29, 184(r1) # 8-byte Folded Spill
+; CHECK-NEXT:    ld r3, .LC0@toc@l(r3)
+; CHECK-NEXT:    std r30, 192(r1) # 8-byte Folded Spill
+; CHECK-NEXT:    ld r29, 0(r3)
+; CHECK-NEXT:    bl c
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    mr r30, r3
+; CHECK-NEXT:    bl b
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    cmpwi r30, 1
+; CHECK-NEXT:    blt cr0, .LBB0_9
+; CHECK-NEXT:  # %bb.1: # %for.body.preheader
+; CHECK-NEXT:    cmplwi r30, 4
+; CHECK-NEXT:    clrldi r4, r30, 32
+; CHECK-NEXT:    li r5, 0
+; CHECK-NEXT:    blt cr0, .LBB0_7
+; CHECK-NEXT:  # %bb.2: # %vector.memcheck
+; CHECK-NEXT:    rldic r6, r30, 2, 30
+; CHECK-NEXT:    add r7, r3, r6
+; CHECK-NEXT:    cmpld r29, r7
+; CHECK-NEXT:    add r6, r29, r6
+; CHECK-NEXT:    bc 4, lt, .LBB0_4
+; CHECK-NEXT:  # %bb.3: # %vector.memcheck
+; CHECK-NEXT:    cmpld r3, r6
+; CHECK-NEXT:    bc 12, lt, .LBB0_7
+; CHECK-NEXT:  .LBB0_4: # %vector.ph
+; CHECK-NEXT:    rlwinm r5, r4, 0, 0, 29
+; CHECK-NEXT:    li r7, 15
+; CHECK-NEXT:    addi r6, r5, -4
+; CHECK-NEXT:    addi r8, r1, 144
+; CHECK-NEXT:    rldicl r6, r6, 62, 2
+; CHECK-NEXT:    addi r9, r1, 128
+; CHECK-NEXT:    addi r6, r6, 1
+; CHECK-NEXT:    addi r10, r1, 160
+; CHECK-NEXT:    mtctr r6
+; CHECK-NEXT:    li r6, 0
+; CHECK-NEXT:    addi r11, r1, 112
+; CHECK-NEXT:  .LBB0_5: # %vector.body
+; CHECK-NEXT:    #
+; CHECK-NEXT:    add r12, r3, r6
+; CHECK-NEXT:    lvx v3, r3, r6
+; CHECK-NEXT:    lvx v5, r12, r7
+; CHECK-NEXT:    add r12, r29, r6
+; CHECK-NEXT:    lvsl v2, r3, r6
+; CHECK-NEXT:    vperm v2, v3, v5, v2
+; CHECK-NEXT:    lvx v3, r29, r6
+; CHECK-NEXT:    lvx v5, r12, r7
+; CHECK-NEXT:    lvsl v4, r29, r6
+; CHECK-NEXT:    stvx v2, 0, r8
+; CHECK-NEXT:    vperm v2, v3, v5, v4
+; CHECK-NEXT:    stvx v2, 0, r9
+; CHECK-NEXT:    lfs f0, 156(r1)
+; CHECK-NEXT:    lfs f1, 140(r1)
+; CHECK-NEXT:    fdivs f0, f1, f0
+; CHECK-NEXT:    lfs f1, 136(r1)
+; CHECK-NEXT:    stfs f0, 172(r1)
+; CHECK-NEXT:    lfs f0, 152(r1)
+; CHECK-NEXT:    fdivs f0, f1, f0
+; CHECK-NEXT:    lfs f1, 132(r1)
+; CHECK-NEXT:    stfs f0, 168(r1)
+; CHECK-NEXT:    lfs f0, 148(r1)
+; CHECK-NEXT:    fdivs f0, f1, f0
+; CHECK-NEXT:    lfs f1, 128(r1)
+; CHECK-NEXT:    stfs f0, 164(r1)
+; CHECK-NEXT:    lfs f0, 144(r1)
+; CHECK-NEXT:    fdivs f0, f1, f0
+; CHECK-NEXT:    stfs f0, 160(r1)
+; CHECK-NEXT:    lvx v2, 0, r10
+; CHECK-NEXT:    stvx v2, 0, r11
+; CHECK-NEXT:    ld r0, 112(r1)
+; CHECK-NEXT:    stdx r0, r29, r6
+; CHECK-NEXT:    addi r6, r6, 16
+; CHECK-NEXT:    ld r0, 120(r1)
+; CHECK-NEXT:    std r0, 8(r12)
+; CHECK-NEXT:    bdnz .LBB0_5
+; CHECK-NEXT:  # %bb.6: # %middle.block
+; CHECK-NEXT:    cmpld r5, r4
+; CHECK-NEXT:    beq cr0, .LBB0_9
+; CHECK-NEXT:  .LBB0_7: # %for.body.preheader18
+; CHECK-NEXT:    sldi r6, r5, 2
+; CHECK-NEXT:    sub r5, r4, r5
+; CHECK-NEXT:    addi r6, r6, -4
+; CHECK-NEXT:    add r3, r3, r6
+; CHECK-NEXT:    add r4, r29, r6
+; CHECK-NEXT:    mtctr r5
+; CHECK-NEXT:  .LBB0_8: # %for.body
+; CHECK-NEXT:    #
+; CHECK-NEXT:    lfsu f0, 4(r4)
+; CHECK-NEXT:    lfsu f1, 4(r3)
+; CHECK-NEXT:    fdivs f0, f0, f1
+; CHECK-NEXT:    stfs f0, 0(r4)
+; CHECK-NEXT:    bdnz .LBB0_8
+; CHECK-NEXT:  .LBB0_9: # %for.end
+; CHECK-NEXT:    ld r30, 192(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld r29, 184(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    addi r1, r1, 208
+; CHECK-NEXT:    ld r0, 16(r1)
+; CHECK-NEXT:    mtlr r0
+; CHECK-NEXT:    blr
+entry:
+  %0 = load float*, float** @a, align 8
+  %call = call signext i32 bitcast (i32 (...)* @c to i32 ()*)() #2
+  %call1 = call float* bitcast (float* (...)* @b to float* ()*)() #2
+  %cmp11 = icmp sgt i32 %call, 0
+  br i1 %cmp11, label %for.body.preheader, label %for.end
+
+for.body.preheader:                               ; preds = %entry
+  %wide.trip.count = zext i32 %call to i64
+  %min.iters.check = icmp ult i32 %call, 4
+  br i1 %min.iters.check, label %for.body.preheader18, label %vector.memcheck
+
+vector.memcheck:                                  ; preds = %for.body.preheader
+  %scevgep = getelementptr float, float* %0, i64 %wide.trip.count
+  %scevgep15 = getelementptr float, float* %call1, i64 %wide.trip.count
+  %bound0 = icmp ult float* %0, %scevgep15
+  %bound1 = icmp ult float* %call1, %scevgep
+  %found.conflict = and i1 %bound0, %bound1
+  br i1 %found.conflict, label %for.body.preheader18, label %vector.ph
+
+vector.ph:                                        ; preds = %vector.memcheck
+  %n.vec = and i64 %wide.trip.count, 4294967292
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %1 = getelementptr inbounds float, float* %call1, i64 %index
+  %2 = bitcast float* %1 to <4 x float>*
+  %wide.load = load <4 x float>, <4 x float>* %2, align 4
+  %3 = getelementptr inbounds float, float* %0, i64 %index
+  %4 = bitcast float* %3 to <4 x float>*
+  %wide.load17 = load <4 x float>, <4 x float>* %4, align 4
+  %5 = fdiv reassoc nsz arcp afn <4 x float> %wide.load17, %wide.load
+  %6 = bitcast float* %3 to <4 x float>*
+  store <4 x float> %5, <4 x float>* %6, align 4
+  %index.next = add i64 %index, 4
+  %7 = icmp eq i64 %index.next, %n.vec
+  br i1 %7, label %middle.block, label %vector.body
+
+middle.block:                                     ; preds = %vector.body
+  %cmp.n = icmp eq i64 %n.vec, %wide.trip.count
+  br i1 %cmp.n, label %for.end, label %for.body.preheader18
+
+for.body.preheader18:                             ; preds = %middle.block, %vector.memcheck, %for.body.preheader
+  %indvars.iv.ph = phi i64 [ 0, %vector.memcheck ], [ 0, %for.body.preheader ], [ %n.vec, %middle.block ]
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.preheader18, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ %indvars.iv.ph, %for.body.preheader18 ]
+  %arrayidx = getelementptr inbounds float, float* %call1, i64 %indvars.iv
+  %8 = load float, float* %arrayidx, align 4
+  %arrayidx3 = getelementptr inbounds float, float* %0, i64 %indvars.iv
+  %9 = load float, float* %arrayidx3, align 4
+  %div = fdiv reassoc nsz arcp afn float %9, %8
+  store float %div, float* %arrayidx3, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count
+  br i1 %exitcond.not, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %middle.block, %entry
+  ret void
+}
+
+declare signext i32 @c(...) local_unnamed_addr #1
+
+declare float* @b(...) local_unnamed_addr #1
+
+attributes #0 = { nounwind }

From b8fe222400586223c4fac4e98a480ee34cace780 Mon Sep 17 00:00:00 2001
From: Nemanja Ivanovic <nemanja.i.ibm@gmail.com>
Date: Wed, 2 Sep 2020 17:04:35 -0500
Subject: [PATCH 058/109] [PowerPC] Fix broken kill flag after MI peephole

The test case in https://bugs.llvm.org/show_bug.cgi?id=47373 exposed
two bugs in the PPC back end. The first one was fixed in commit
27714075848e7f05a297317ad28ad2570d8e5a43 but the test case had to
be added without -verify-machineinstrs due to the second bug.
This commit fixes the use-after-kill that is left behind by the
PPC MI peephole optimization.

(cherry picked from commit 69289cc10ffd1de4d3bf05d33948e6b21b6e68db)
---
 llvm/lib/Target/PowerPC/PPCMIPeephole.cpp                       | 2 ++
 .../PowerPC/jump-tables-collapse-rotate-remove-SrcMI.mir        | 2 +-
 llvm/test/CodeGen/PowerPC/mi-peephole.mir                       | 2 +-
 llvm/test/CodeGen/PowerPC/pr47373.ll                            | 2 +-
 4 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp b/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
index d2aba6bd6e8de..227c863685ae9 100644
--- a/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
+++ b/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
@@ -1555,6 +1555,8 @@ bool PPCMIPeephole::emitRLDICWhenLoweringJumpTables(MachineInstr &MI) {
   MI.getOperand(1).setReg(SrcMI->getOperand(1).getReg());
   MI.getOperand(2).setImm(NewSH);
   MI.getOperand(3).setImm(NewMB);
+  MI.getOperand(1).setIsKill(SrcMI->getOperand(1).isKill());
+  SrcMI->getOperand(1).setIsKill(false);
 
   LLVM_DEBUG(dbgs() << "To: ");
   LLVM_DEBUG(MI.dump());
diff --git a/llvm/test/CodeGen/PowerPC/jump-tables-collapse-rotate-remove-SrcMI.mir b/llvm/test/CodeGen/PowerPC/jump-tables-collapse-rotate-remove-SrcMI.mir
index 7c14e7750df90..2f7a85a111ebb 100644
--- a/llvm/test/CodeGen/PowerPC/jump-tables-collapse-rotate-remove-SrcMI.mir
+++ b/llvm/test/CodeGen/PowerPC/jump-tables-collapse-rotate-remove-SrcMI.mir
@@ -51,4 +51,4 @@ body:             |
 #
 # CHECK-PASS-NOT:     %2:g8rc = RLDICL killed %1, 0, 32
 # CHECK-PASS-NOT:     %3:g8rc = RLDICR %2, 2, 61
-# CHECK-PASS:     %3:g8rc = RLDIC %1, 2, 30
+# CHECK-PASS:     %3:g8rc = RLDIC killed %1, 2, 30
diff --git a/llvm/test/CodeGen/PowerPC/mi-peephole.mir b/llvm/test/CodeGen/PowerPC/mi-peephole.mir
index 8bf72461d5453..c7f41cd0bc4c9 100644
--- a/llvm/test/CodeGen/PowerPC/mi-peephole.mir
+++ b/llvm/test/CodeGen/PowerPC/mi-peephole.mir
@@ -31,7 +31,7 @@ body:             |
   ; CHECK: bb.0.entry:
   ; CHECK:   %1:g8rc = COPY $x4
   ; CHECK:   %0:g8rc = COPY $x3
-  ; CHECK:   %3:g8rc = RLDIC %1, 2, 30
+  ; CHECK:   %3:g8rc = RLDIC killed %1, 2, 30
   ; CHECK:   $x3 = COPY %3
   ; CHECK:   BLR8 implicit $lr8, implicit $rm, implicit $x3
 ...
diff --git a/llvm/test/CodeGen/PowerPC/pr47373.ll b/llvm/test/CodeGen/PowerPC/pr47373.ll
index 559f4f9a8b4ae..d09a5fe8fb0b6 100644
--- a/llvm/test/CodeGen/PowerPC/pr47373.ll
+++ b/llvm/test/CodeGen/PowerPC/pr47373.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=powerpc64-unknown-freebsd13.0 \
+; RUN: llc -mtriple=powerpc64-unknown-freebsd13.0 -verify-machineinstrs \
 ; RUN:   -mcpu=ppc64 -ppc-asm-full-reg-names < %s | FileCheck %s
 @a = local_unnamed_addr global float* null, align 8
 

From ba6a10d87f57b78303555028f92add22918b3bcb Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@intel.com>
Date: Tue, 1 Sep 2020 12:14:32 -0700
Subject: [PATCH 059/109] [MachineCopyPropagation] In isNopCopy, check the
 destination registers match in addition to the source registers.

Previously if the source match we asserted that the destination
matched. But GPR <-> mask register copies on X86 can violate this
since we use the same K-registers for multiple sizes.

Fixes this ISPC issue https://github.com/ispc/ispc/issues/1851

Differential Revision: https://reviews.llvm.org/D86507

(cherry picked from commit 4783e2c9c603ed6aeacc76bb1177056a9d307bd1)
---
 llvm/lib/CodeGen/MachineCopyPropagation.cpp   |  4 +-
 llvm/test/CodeGen/X86/machine-cp-mask-reg.mir | 59 +++++++++++++++++++
 2 files changed, 60 insertions(+), 3 deletions(-)
 create mode 100644 llvm/test/CodeGen/X86/machine-cp-mask-reg.mir

diff --git a/llvm/lib/CodeGen/MachineCopyPropagation.cpp b/llvm/lib/CodeGen/MachineCopyPropagation.cpp
index 70d6dcc2e3e29..4c4839ca65229 100644
--- a/llvm/lib/CodeGen/MachineCopyPropagation.cpp
+++ b/llvm/lib/CodeGen/MachineCopyPropagation.cpp
@@ -336,10 +336,8 @@ static bool isNopCopy(const MachineInstr &PreviousCopy, unsigned Src,
                       unsigned Def, const TargetRegisterInfo *TRI) {
   Register PreviousSrc = PreviousCopy.getOperand(1).getReg();
   Register PreviousDef = PreviousCopy.getOperand(0).getReg();
-  if (Src == PreviousSrc) {
-    assert(Def == PreviousDef);
+  if (Src == PreviousSrc && Def == PreviousDef)
     return true;
-  }
   if (!TRI->isSubRegister(PreviousSrc, Src))
     return false;
   unsigned SubIdx = TRI->getSubRegIndex(PreviousSrc, Src);
diff --git a/llvm/test/CodeGen/X86/machine-cp-mask-reg.mir b/llvm/test/CodeGen/X86/machine-cp-mask-reg.mir
new file mode 100644
index 0000000000000..86a077e64764f
--- /dev/null
+++ b/llvm/test/CodeGen/X86/machine-cp-mask-reg.mir
@@ -0,0 +1,59 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc %s -mtriple=x86_64-unknown-linux-gnu -mcpu=skx -run-pass=machine-cp -o - | FileCheck %s
+
+# machine-cp previously asserted trying to determine if the k0->eax copy below
+# could be combined with the k0->rax copy.
+
+--- |
+  ; ModuleID = 'test.ll'
+  source_filename = "test.ll"
+  target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+
+  define i8 @foo(<64 x i8> %x, i64* %y, i64 %z) #0 {
+    %a = icmp eq <64 x i8> %x, zeroinitializer
+    %b = bitcast <64 x i1> %a to i64
+    %c = add i64 %b, %z
+    store i64 %c, i64* %y, align 8
+    %d = extractelement <64 x i1> %a, i32 0
+    %e = zext i1 %d to i8
+    ret i8 %e
+  }
+
+  attributes #0 = { "target-cpu"="skx" }
+
+...
+---
+name:            foo
+alignment:       16
+tracksRegLiveness: true
+liveins:
+  - { reg: '$zmm0' }
+  - { reg: '$rdi' }
+  - { reg: '$rsi' }
+frameInfo:
+  maxAlignment:    1
+machineFunctionInfo: {}
+body:             |
+  bb.0 (%ir-block.0):
+    liveins: $rdi, $rsi, $zmm0
+
+    ; CHECK-LABEL: name: foo
+    ; CHECK: liveins: $rdi, $rsi, $zmm0
+    ; CHECK: renamable $k0 = VPTESTNMBZrr killed renamable $zmm0, renamable $zmm0
+    ; CHECK: renamable $rax = COPY renamable $k0
+    ; CHECK: renamable $rsi = ADD64rr killed renamable $rsi, killed renamable $rax, implicit-def dead $eflags
+    ; CHECK: MOV64mr killed renamable $rdi, 1, $noreg, 0, $noreg, killed renamable $rsi :: (store 8 into %ir.y)
+    ; CHECK: renamable $eax = COPY killed renamable $k0
+    ; CHECK: renamable $al = AND8ri renamable $al, 1, implicit-def dead $eflags, implicit killed $eax, implicit-def $eax
+    ; CHECK: $al = KILL renamable $al, implicit killed $eax
+    ; CHECK: RET 0, $al
+    renamable $k0 = VPTESTNMBZrr killed renamable $zmm0, renamable $zmm0
+    renamable $rax = COPY renamable $k0
+    renamable $rsi = ADD64rr killed renamable $rsi, killed renamable $rax, implicit-def dead $eflags
+    MOV64mr killed renamable $rdi, 1, $noreg, 0, $noreg, killed renamable $rsi :: (store 8 into %ir.y)
+    renamable $eax = COPY killed renamable $k0
+    renamable $al = AND8ri renamable $al, 1, implicit-def dead $eflags, implicit killed $eax, implicit-def $eax
+    $al = KILL renamable $al, implicit killed $eax
+    RET 0, $al
+
+...

From 0d8feb542b99bd9f3b92b7be422e8f0d86b93870 Mon Sep 17 00:00:00 2001
From: Nemanja Ivanovic <nemanja.i.ibm@gmail.com>
Date: Fri, 4 Sep 2020 21:44:37 -0400
Subject: [PATCH 060/109] [PowerPC] Provide vec_cmpne on pre-Power9
 architectures in altivec.h

These overloads are listed in appendix A of the ELFv2 ABI specification
without a requirement for ISA 3.0. So these need to be available on
all Altivec-capable architectures. The implementation in altivec.h
erroneously had them guarded for Power9 due to the availability of
the VCMPNE[BHW] instructions. However these need to be implemented
in terms of the VCMPEQ[BHW] instructions on older architectures.

Fixes: https://bugs.llvm.org/show_bug.cgi?id=47423
---
 clang/lib/Headers/altivec.h               | 104 +++++++++++++++++-----
 clang/test/CodeGen/builtins-ppc-altivec.c |  79 ++++++++++++++++
 2 files changed, 159 insertions(+), 24 deletions(-)

diff --git a/clang/lib/Headers/altivec.h b/clang/lib/Headers/altivec.h
index ac5f43836316e..c4b90cc3f87ce 100644
--- a/clang/lib/Headers/altivec.h
+++ b/clang/lib/Headers/altivec.h
@@ -1766,36 +1766,12 @@ vec_cmpne(vector unsigned int __a, vector unsigned int __b) {
                                                     (vector int)__b);
 }
 
-static __inline__ vector bool long long __ATTRS_o_ai
-vec_cmpne(vector bool long long __a, vector bool long long __b) {
-  return (vector bool long long)
-    ~(__builtin_altivec_vcmpequd((vector long long)__a, (vector long long)__b));
-}
-
-static __inline__ vector bool long long __ATTRS_o_ai
-vec_cmpne(vector signed long long __a, vector signed long long __b) {
-  return (vector bool long long)
-    ~(__builtin_altivec_vcmpequd((vector long long)__a, (vector long long)__b));
-}
-
-static __inline__ vector bool long long __ATTRS_o_ai
-vec_cmpne(vector unsigned long long __a, vector unsigned long long __b) {
-  return (vector bool long long)
-    ~(__builtin_altivec_vcmpequd((vector long long)__a, (vector long long)__b));
-}
-
 static __inline__ vector bool int __ATTRS_o_ai
 vec_cmpne(vector float __a, vector float __b) {
   return (vector bool int)__builtin_altivec_vcmpnew((vector int)__a,
                                                     (vector int)__b);
 }
 
-static __inline__ vector bool long long __ATTRS_o_ai
-vec_cmpne(vector double __a, vector double __b) {
-  return (vector bool long long)
-    ~(__builtin_altivec_vcmpequd((vector long long)__a, (vector long long)__b));
-}
-
 /* vec_cmpnez */
 
 static __inline__ vector bool char __ATTRS_o_ai
@@ -1900,6 +1876,86 @@ vec_parity_lsbb(vector signed long long __a) {
   return __builtin_altivec_vprtybd(__a);
 }
 
+#else
+/* vec_cmpne */
+
+static __inline__ vector bool char __ATTRS_o_ai
+vec_cmpne(vector bool char __a, vector bool char __b) {
+  return ~(vec_cmpeq(__a, __b));
+}
+
+static __inline__ vector bool char __ATTRS_o_ai
+vec_cmpne(vector signed char __a, vector signed char __b) {
+  return ~(vec_cmpeq(__a, __b));
+}
+
+static __inline__ vector bool char __ATTRS_o_ai
+vec_cmpne(vector unsigned char __a, vector unsigned char __b) {
+  return ~(vec_cmpeq(__a, __b));
+}
+
+static __inline__ vector bool short __ATTRS_o_ai
+vec_cmpne(vector bool short __a, vector bool short __b) {
+  return ~(vec_cmpeq(__a, __b));
+}
+
+static __inline__ vector bool short __ATTRS_o_ai
+vec_cmpne(vector signed short __a, vector signed short __b) {
+  return ~(vec_cmpeq(__a, __b));
+}
+
+static __inline__ vector bool short __ATTRS_o_ai
+vec_cmpne(vector unsigned short __a, vector unsigned short __b) {
+  return ~(vec_cmpeq(__a, __b));
+}
+
+static __inline__ vector bool int __ATTRS_o_ai
+vec_cmpne(vector bool int __a, vector bool int __b) {
+  return ~(vec_cmpeq(__a, __b));
+}
+
+static __inline__ vector bool int __ATTRS_o_ai
+vec_cmpne(vector signed int __a, vector signed int __b) {
+  return ~(vec_cmpeq(__a, __b));
+}
+
+static __inline__ vector bool int __ATTRS_o_ai
+vec_cmpne(vector unsigned int __a, vector unsigned int __b) {
+  return ~(vec_cmpeq(__a, __b));
+}
+
+static __inline__ vector bool int __ATTRS_o_ai
+vec_cmpne(vector float __a, vector float __b) {
+  return ~(vec_cmpeq(__a, __b));
+}
+#endif
+
+#ifdef __POWER8_VECTOR__
+static __inline__ vector bool long long __ATTRS_o_ai
+vec_cmpne(vector bool long long __a, vector bool long long __b) {
+  return (vector bool long long)
+    ~(__builtin_altivec_vcmpequd((vector long long)__a, (vector long long)__b));
+}
+
+static __inline__ vector bool long long __ATTRS_o_ai
+vec_cmpne(vector signed long long __a, vector signed long long __b) {
+  return (vector bool long long)
+    ~(__builtin_altivec_vcmpequd((vector long long)__a, (vector long long)__b));
+}
+
+static __inline__ vector bool long long __ATTRS_o_ai
+vec_cmpne(vector unsigned long long __a, vector unsigned long long __b) {
+  return (vector bool long long)
+    ~(__builtin_altivec_vcmpequd((vector long long)__a, (vector long long)__b));
+}
+#endif
+
+#ifdef __VSX__
+static __inline__ vector bool long long __ATTRS_o_ai
+vec_cmpne(vector double __a, vector double __b) {
+  return (vector bool long long)
+    ~(__builtin_altivec_vcmpequd((vector long long)__a, (vector long long)__b));
+}
 #endif
 
 /* vec_cmpgt */
diff --git a/clang/test/CodeGen/builtins-ppc-altivec.c b/clang/test/CodeGen/builtins-ppc-altivec.c
index d53011b37d413..dc93e7340597e 100644
--- a/clang/test/CodeGen/builtins-ppc-altivec.c
+++ b/clang/test/CodeGen/builtins-ppc-altivec.c
@@ -1029,6 +1029,85 @@ void test2() {
 // CHECK: @llvm.ppc.altivec.vcmpeqfp
 // CHECK-LE: @llvm.ppc.altivec.vcmpeqfp
 
+  /* vec_cmpne */
+  res_vbc = vec_cmpne(vsc, vsc);
+// CHECK: @llvm.ppc.altivec.vcmpequb
+// CHECK: xor
+// CHECK-LE: @llvm.ppc.altivec.vcmpequb
+// CHECK-LE: xor
+
+  res_vbc = vec_cmpne(vuc, vuc);
+// CHECK: @llvm.ppc.altivec.vcmpequb
+// CHECK: xor
+// CHECK-LE: @llvm.ppc.altivec.vcmpequb
+// CHECK-LE: xor
+
+  res_vbc = vec_cmpne(vbc, vbc);
+// CHECK: @llvm.ppc.altivec.vcmpequb
+// CHECK: xor
+// CHECK-LE: @llvm.ppc.altivec.vcmpequb
+// CHECK-LE: xor
+
+  res_vbc = vec_cmpne(vbc, vbc);
+// CHECK: @llvm.ppc.altivec.vcmpequb
+// CHECK: xor
+// CHECK-LE: @llvm.ppc.altivec.vcmpequb
+// CHECK-LE: xor
+
+  res_vbs = vec_cmpne(vs, vs);
+// CHECK: @llvm.ppc.altivec.vcmpequh
+// CHECK: xor
+// CHECK-LE: @llvm.ppc.altivec.vcmpequh
+// CHECK-LE: xor
+
+  res_vbs = vec_cmpne(vus, vus);
+// CHECK: @llvm.ppc.altivec.vcmpequh
+// CHECK: xor
+// CHECK-LE: @llvm.ppc.altivec.vcmpequh
+// CHECK-LE: xor
+
+  res_vbs = vec_cmpne(vbs, vbs);
+// CHECK: @llvm.ppc.altivec.vcmpequh
+// CHECK: xor
+// CHECK-LE: @llvm.ppc.altivec.vcmpequh
+// CHECK-LE: xor
+
+  res_vbs = vec_cmpne(vbs, vbs);
+// CHECK: @llvm.ppc.altivec.vcmpequh
+// CHECK: xor
+// CHECK-LE: @llvm.ppc.altivec.vcmpequh
+// CHECK-LE: xor
+
+  res_vbi = vec_cmpne(vi, vi);
+// CHECK: @llvm.ppc.altivec.vcmpequw
+// CHECK: xor
+// CHECK-LE: @llvm.ppc.altivec.vcmpequw
+// CHECK-LE: xor
+
+  res_vbi = vec_cmpne(vui, vui);
+// CHECK: @llvm.ppc.altivec.vcmpequw
+// CHECK: xor
+// CHECK-LE: @llvm.ppc.altivec.vcmpequw
+// CHECK-LE: xor
+
+  res_vbi = vec_cmpne(vbi, vbi);
+// CHECK: @llvm.ppc.altivec.vcmpequw
+// CHECK: xor
+// CHECK-LE: @llvm.ppc.altivec.vcmpequw
+// CHECK-LE: xor
+
+  res_vbi = vec_cmpne(vbi, vbi);
+// CHECK: @llvm.ppc.altivec.vcmpequw
+// CHECK: xor
+// CHECK-LE: @llvm.ppc.altivec.vcmpequw
+// CHECK-LE: xor
+
+  res_vbi = vec_cmpne(vf, vf);
+// CHECK: @llvm.ppc.altivec.vcmpeqfp
+// CHECK: xor
+// CHECK-LE: @llvm.ppc.altivec.vcmpeqfp
+// CHECK-LE: xor
+
   /* vec_cmpge */
   res_vbc = vec_cmpge(vsc, vsc);
 // CHECK: @llvm.ppc.altivec.vcmpgtsb

From 96b8fd70d1572d3d38abce208e855c49f9eeac1d Mon Sep 17 00:00:00 2001
From: Aaron Puchert <aaronpuchert@alice-dsl.net>
Date: Sat, 5 Sep 2020 14:23:54 +0200
Subject: [PATCH 061/109] Set InvalidDecl directly when deserializing a Decl

When parsing a C++17 binding declaration, we first create the
BindingDecls in Sema::ActOnDecompositionDeclarator, and then build the
DecompositionDecl in Sema::ActOnVariableDeclarator, so the contained
BindingDecls are never null. But when deserializing, we read the
DecompositionDecl with all properties before filling in the Bindings.
Among other things, reading a declaration reads whether it's invalid,
then calling setInvalidDecl which assumes that all bindings of the
DecompositionDecl are available, but that isn't the case.

Deserialization should just set all properties directly without invoking
subsequent functions, so we just set the flag without using the setter.

Fixes PR34960.

Reviewed By: rsmith

Differential Revision: https://reviews.llvm.org/D86207

(cherry picked from commit 16975a638df3cda95c677055120b23e689d96dcd)
---
 clang/lib/Serialization/ASTReaderDecl.cpp |  2 +-
 clang/test/PCH/cxx1z-decomposition.cpp    | 10 ++++++----
 2 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/clang/lib/Serialization/ASTReaderDecl.cpp b/clang/lib/Serialization/ASTReaderDecl.cpp
index 117eb598bd5eb..c0bf240464f79 100644
--- a/clang/lib/Serialization/ASTReaderDecl.cpp
+++ b/clang/lib/Serialization/ASTReaderDecl.cpp
@@ -583,7 +583,7 @@ void ASTDeclReader::VisitDecl(Decl *D) {
                            Reader.getContext());
   }
   D->setLocation(ThisDeclLoc);
-  D->setInvalidDecl(Record.readInt());
+  D->InvalidDecl = Record.readInt();
   if (Record.readInt()) { // hasAttrs
     AttrVec Attrs;
     Record.readAttributes(Attrs);
diff --git a/clang/test/PCH/cxx1z-decomposition.cpp b/clang/test/PCH/cxx1z-decomposition.cpp
index 2f817b4280ded..914ce80c550d1 100644
--- a/clang/test/PCH/cxx1z-decomposition.cpp
+++ b/clang/test/PCH/cxx1z-decomposition.cpp
@@ -2,11 +2,11 @@
 // RUN: %clang_cc1 -pedantic -std=c++1z -include %s -verify %s
 //
 // With PCH:
-// RUN: %clang_cc1 -pedantic -std=c++1z -emit-pch %s -o %t
-// RUN: %clang_cc1 -pedantic -std=c++1z -include-pch %t -verify %s
+// RUN: %clang_cc1 -pedantic -std=c++1z -emit-pch -fallow-pch-with-compiler-errors %s -o %t
+// RUN: %clang_cc1 -pedantic -std=c++1z -include-pch %t -fallow-pch-with-compiler-errors -verify %s
 
-// RUN: %clang_cc1 -pedantic -std=c++1z -emit-pch -fpch-instantiate-templates %s -o %t
-// RUN: %clang_cc1 -pedantic -std=c++1z -include-pch %t -verify %s
+// RUN: %clang_cc1 -pedantic -std=c++1z -emit-pch -fallow-pch-with-compiler-errors -fpch-instantiate-templates %s -o %t
+// RUN: %clang_cc1 -pedantic -std=c++1z -include-pch %t -fallow-pch-with-compiler-errors -verify %s
 
 #ifndef HEADER
 #define HEADER
@@ -22,6 +22,8 @@ constexpr int foo(Q &&q) {
   return a * 10 + b;
 }
 
+auto [noinit]; // expected-error{{decomposition declaration '[noinit]' requires an initializer}}
+
 #else
 
 int arr[2];

From 7d4d7a7bf1e8d99b80da66afde7df81b05f77538 Mon Sep 17 00:00:00 2001
From: Brad Smith <brad@comstyle.com>
Date: Sun, 6 Sep 2020 15:54:24 -0400
Subject: [PATCH 062/109] [compiler-rt] Implement __clear_cache() on
 OpenBSD/arm

(cherry picked from commit 8542dab909f895a8b6812428bb5e1acf7ea15305)
---
 compiler-rt/lib/builtins/clear_cache.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/compiler-rt/lib/builtins/clear_cache.c b/compiler-rt/lib/builtins/clear_cache.c
index 72e02e613de50..29e31f55d4998 100644
--- a/compiler-rt/lib/builtins/clear_cache.c
+++ b/compiler-rt/lib/builtins/clear_cache.c
@@ -33,7 +33,7 @@ uintptr_t GetCurrentProcess(void);
 #include <machine/sysarch.h>
 #endif
 
-#if defined(__OpenBSD__) && defined(__mips__)
+#if defined(__OpenBSD__) && (defined(__arm__) || defined(__mips__))
 // clang-format off
 #include <sys/types.h>
 #include <machine/sysarch.h>
@@ -58,7 +58,7 @@ void __clear_cache(void *start, void *end) {
 #elif defined(_WIN32) && (defined(__arm__) || defined(__aarch64__))
   FlushInstructionCache(GetCurrentProcess(), start, end - start);
 #elif defined(__arm__) && !defined(__APPLE__)
-#if defined(__FreeBSD__) || defined(__NetBSD__)
+#if defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__)
   struct arm_sync_icache_args arg;
 
   arg.addr = (uintptr_t)start;

From 9e6b164239d767bdbc3248006f21dff6e8a41d90 Mon Sep 17 00:00:00 2001
From: Brad Smith <brad@comstyle.com>
Date: Mon, 7 Sep 2020 02:27:11 -0400
Subject: [PATCH 063/109] [Sparc] Select the UltraSPARC instruction set with
 the external assembler

Select the UltraSPARC instruction set with the external assembler on
Linux / FreeBSD / OpenBSD, matches GCC.

(cherry picked from commit 70523ecfaca692bf5d0192e466c34ae7514624ea)
---
 clang/lib/Driver/ToolChains/Arch/Sparc.cpp | 9 ++++++++-
 clang/test/Driver/freebsd.c                | 2 +-
 clang/test/Driver/linux-as.c               | 4 ++--
 clang/test/Driver/openbsd.c                | 2 +-
 4 files changed, 12 insertions(+), 5 deletions(-)

diff --git a/clang/lib/Driver/ToolChains/Arch/Sparc.cpp b/clang/lib/Driver/ToolChains/Arch/Sparc.cpp
index 043b7f257c01d..70ba8eb2a7d0d 100644
--- a/clang/lib/Driver/ToolChains/Arch/Sparc.cpp
+++ b/clang/lib/Driver/ToolChains/Arch/Sparc.cpp
@@ -21,12 +21,19 @@ using namespace llvm::opt;
 const char *sparc::getSparcAsmModeForCPU(StringRef Name,
                                          const llvm::Triple &Triple) {
   if (Triple.getArch() == llvm::Triple::sparcv9) {
+    const char *DefV9CPU;
+
+    if (Triple.isOSLinux() || Triple.isOSFreeBSD() || Triple.isOSOpenBSD())
+      DefV9CPU = "-Av9a";
+    else
+      DefV9CPU = "-Av9";
+
     return llvm::StringSwitch<const char *>(Name)
         .Case("niagara", "-Av9b")
         .Case("niagara2", "-Av9b")
         .Case("niagara3", "-Av9d")
         .Case("niagara4", "-Av9d")
-        .Default("-Av9");
+        .Default(DefV9CPU);
   } else {
     return llvm::StringSwitch<const char *>(Name)
         .Case("v8", "-Av8")
diff --git a/clang/test/Driver/freebsd.c b/clang/test/Driver/freebsd.c
index 5eb00ce65d711..2d276952691cf 100644
--- a/clang/test/Driver/freebsd.c
+++ b/clang/test/Driver/freebsd.c
@@ -168,7 +168,7 @@
 // RUN: %clang -mcpu=ultrasparc -target sparc64-unknown-freebsd8 %s -### -no-integrated-as 2>&1 \
 // RUN:   | FileCheck --check-prefix=CHECK-SPARC-CPU %s
 // CHECK-SPARC-CPU: cc1{{.*}}" "-target-cpu" "ultrasparc"
-// CHECK-SPARC-CPU: as{{.*}}" "-Av9
+// CHECK-SPARC-CPU: as{{.*}}" "-Av9a
 
 // Check that -G flags are passed to the linker for mips
 // RUN: %clang -target mips-unknown-freebsd %s -### -G0 2>&1 \
diff --git a/clang/test/Driver/linux-as.c b/clang/test/Driver/linux-as.c
index 77ac05f30942c..0959bd7ba0a11 100644
--- a/clang/test/Driver/linux-as.c
+++ b/clang/test/Driver/linux-as.c
@@ -168,7 +168,7 @@
 // RUN:   | FileCheck -check-prefix=CHECK-SPARCV9 %s
 // CHECK-SPARCV9: as
 // CHECK-SPARCV9: -64
-// CHECK-SPARCV9: -Av9
+// CHECK-SPARCV9: -Av9a
 // CHECK-SPARCV9-NOT: -KPIC
 // CHECK-SPARCV9: -o
 //
@@ -177,7 +177,7 @@
 // RUN:   | FileCheck -check-prefix=CHECK-SPARCV9PIC %s
 // CHECK-SPARCV9PIC: as
 // CHECK-SPARCV9PIC: -64
-// CHECK-SPARCV9PIC: -Av9
+// CHECK-SPARCV9PIC: -Av9a
 // CHECK-SPARCV9PIC: -KPIC
 // CHECK-SPARCV9PIC: -o
 //
diff --git a/clang/test/Driver/openbsd.c b/clang/test/Driver/openbsd.c
index 203b4b4a2ff0f..ae1aa64416907 100644
--- a/clang/test/Driver/openbsd.c
+++ b/clang/test/Driver/openbsd.c
@@ -70,7 +70,7 @@
 // RUN:   | FileCheck -check-prefix=CHECK-MIPS64EL-PIC %s
 // CHECK-AMD64-M32: as{{.*}}" "--32"
 // CHECK-POWERPC: as{{.*}}" "-mppc" "-many"
-// CHECK-SPARC64: as{{.*}}" "-64" "-Av9"
+// CHECK-SPARC64: as{{.*}}" "-64" "-Av9a"
 // CHECK-MIPS64: as{{.*}}" "-mabi" "64" "-EB"
 // CHECK-MIPS64-PIC: as{{.*}}" "-mabi" "64" "-EB" "-KPIC"
 // CHECK-MIPS64EL: as{{.*}}" "-mabi" "64" "-EL"

From e7e6335763cafe06988a6c06ed50af0b4ec28d8b Mon Sep 17 00:00:00 2001
From: Juneyoung Lee <aqjune@gmail.com>
Date: Tue, 8 Sep 2020 11:40:24 +0900
Subject: [PATCH 064/109] ReleaseNotes: Add updates in LangRef related with
 undef/poison

---
 llvm/docs/ReleaseNotes.rst | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/llvm/docs/ReleaseNotes.rst b/llvm/docs/ReleaseNotes.rst
index cbc8c0859c7b8..0d5e0137bbc4d 100644
--- a/llvm/docs/ReleaseNotes.rst
+++ b/llvm/docs/ReleaseNotes.rst
@@ -101,6 +101,18 @@ Changes to the LLVM IR
   where ``uint64_t`` was used to denote the size in bits of a IR type
   we have partially migrated the codebase to using ``llvm::TypeSize``.
 
+* Branching on ``undef``/``poison`` is undefined behavior. It is needed for
+  correctly analyzing value ranges based on branch conditions. This is
+  consistent with MSan's behavior as well.
+
+* ``memset``/``memcpy``/``memmove`` can take ``undef``/``poison`` pointer(s)
+  if the size to fill is zero.
+
+* Passing ``undef``/``poison`` to a standard I/O library function call
+  (`printf`/`fputc`/...) is undefined behavior. The new ``noundef`` attribute
+  is attached to the functions' arguments. The full list is available at
+  ``llvm::inferLibFuncAttributes``.
+
 Changes to building LLVM
 ------------------------
 
@@ -305,6 +317,10 @@ Changes to the Go bindings
 Changes to the DAG infrastructure
 ---------------------------------
 
+* A SelDag-level freeze instruction has landed. It is simply lowered as a copy
+  operation to MachineIR, but to make it fully correct either IMPLICIT_DEF
+  should be fixed or the equivalent FREEZE operation should be added to
+  MachineIR.
 
 Changes to the Debug Info
 ---------------------------------

From 6d762fdaa5c42e9f74cca48481e6f55c4472c0d3 Mon Sep 17 00:00:00 2001
From: Nemanja Ivanovic <nemanja.i.ibm@gmail.com>
Date: Fri, 4 Sep 2020 13:54:21 -0400
Subject: [PATCH 065/109] [PowerPC] Allow const pointers for load builtins in
 altivec.h

The load builtins in altivec.h do not have const in the signature
for the pointer parameter. This prevents using them for loading
from constant pointers. A notable case for such a use is Eigen.

This patch simply adds the missing const.

Fixes: https://bugs.llvm.org/show_bug.cgi?id=47408
(cherry picked from commit 54205f0bd2377503b818d7f62cc4ed63ef5b1e94)
---
 clang/lib/Headers/altivec.h                 |  74 ++++-----
 clang/test/CodeGen/builtins-ppc-altivec.c   | 171 ++++++++++----------
 clang/test/CodeGen/builtins-ppc-p10vector.c |  10 +-
 clang/test/CodeGen/builtins-ppc-xl-xst.c    | 165 +++++++++++--------
 4 files changed, 230 insertions(+), 190 deletions(-)

diff --git a/clang/lib/Headers/altivec.h b/clang/lib/Headers/altivec.h
index c4b90cc3f87ce..c00c5561428a6 100644
--- a/clang/lib/Headers/altivec.h
+++ b/clang/lib/Headers/altivec.h
@@ -2758,67 +2758,67 @@ vec_insert_exp(vector unsigned int __a, vector unsigned int __b) {
 }
 
 #if defined(__powerpc64__)
-static __inline__ vector signed char __ATTRS_o_ai vec_xl_len(signed char *__a,
+static __inline__ vector signed char __ATTRS_o_ai vec_xl_len(const signed char *__a,
                                                              size_t __b) {
   return (vector signed char)__builtin_vsx_lxvl(__a, (__b << 56));
 }
 
 static __inline__ vector unsigned char __ATTRS_o_ai
-vec_xl_len(unsigned char *__a, size_t __b) {
+vec_xl_len(const unsigned char *__a, size_t __b) {
   return (vector unsigned char)__builtin_vsx_lxvl(__a, (__b << 56));
 }
 
-static __inline__ vector signed short __ATTRS_o_ai vec_xl_len(signed short *__a,
+static __inline__ vector signed short __ATTRS_o_ai vec_xl_len(const signed short *__a,
                                                               size_t __b) {
   return (vector signed short)__builtin_vsx_lxvl(__a, (__b << 56));
 }
 
 static __inline__ vector unsigned short __ATTRS_o_ai
-vec_xl_len(unsigned short *__a, size_t __b) {
+vec_xl_len(const unsigned short *__a, size_t __b) {
   return (vector unsigned short)__builtin_vsx_lxvl(__a, (__b << 56));
 }
 
-static __inline__ vector signed int __ATTRS_o_ai vec_xl_len(signed int *__a,
+static __inline__ vector signed int __ATTRS_o_ai vec_xl_len(const signed int *__a,
                                                             size_t __b) {
   return (vector signed int)__builtin_vsx_lxvl(__a, (__b << 56));
 }
 
-static __inline__ vector unsigned int __ATTRS_o_ai vec_xl_len(unsigned int *__a,
+static __inline__ vector unsigned int __ATTRS_o_ai vec_xl_len(const unsigned int *__a,
                                                               size_t __b) {
   return (vector unsigned int)__builtin_vsx_lxvl(__a, (__b << 56));
 }
 
-static __inline__ vector float __ATTRS_o_ai vec_xl_len(float *__a, size_t __b) {
+static __inline__ vector float __ATTRS_o_ai vec_xl_len(const float *__a, size_t __b) {
   return (vector float)__builtin_vsx_lxvl(__a, (__b << 56));
 }
 
 static __inline__ vector signed __int128 __ATTRS_o_ai
-vec_xl_len(signed __int128 *__a, size_t __b) {
+vec_xl_len(const signed __int128 *__a, size_t __b) {
   return (vector signed __int128)__builtin_vsx_lxvl(__a, (__b << 56));
 }
 
 static __inline__ vector unsigned __int128 __ATTRS_o_ai
-vec_xl_len(unsigned __int128 *__a, size_t __b) {
+vec_xl_len(const unsigned __int128 *__a, size_t __b) {
   return (vector unsigned __int128)__builtin_vsx_lxvl(__a, (__b << 56));
 }
 
 static __inline__ vector signed long long __ATTRS_o_ai
-vec_xl_len(signed long long *__a, size_t __b) {
+vec_xl_len(const signed long long *__a, size_t __b) {
   return (vector signed long long)__builtin_vsx_lxvl(__a, (__b << 56));
 }
 
 static __inline__ vector unsigned long long __ATTRS_o_ai
-vec_xl_len(unsigned long long *__a, size_t __b) {
+vec_xl_len(const unsigned long long *__a, size_t __b) {
   return (vector unsigned long long)__builtin_vsx_lxvl(__a, (__b << 56));
 }
 
-static __inline__ vector double __ATTRS_o_ai vec_xl_len(double *__a,
+static __inline__ vector double __ATTRS_o_ai vec_xl_len(const double *__a,
                                                         size_t __b) {
   return (vector double)__builtin_vsx_lxvl(__a, (__b << 56));
 }
 
 static __inline__ vector unsigned char __ATTRS_o_ai
-vec_xl_len_r(unsigned char *__a, size_t __b) {
+vec_xl_len_r(const unsigned char *__a, size_t __b) {
   vector unsigned char __res =
       (vector unsigned char)__builtin_vsx_lxvll(__a, (__b << 56));
 #ifdef __LITTLE_ENDIAN__
@@ -16409,41 +16409,41 @@ typedef vector unsigned int unaligned_vec_uint __attribute__((aligned(1)));
 typedef vector float unaligned_vec_float __attribute__((aligned(1)));
 
 static inline __ATTRS_o_ai vector signed char vec_xl(signed long long __offset,
-                                                     signed char *__ptr) {
+                                                     const signed char *__ptr) {
   return *(unaligned_vec_schar *)(__ptr + __offset);
 }
 
 static inline __ATTRS_o_ai vector unsigned char
-vec_xl(signed long long __offset, unsigned char *__ptr) {
+vec_xl(signed long long __offset, const unsigned char *__ptr) {
   return *(unaligned_vec_uchar*)(__ptr + __offset);
 }
 
 static inline __ATTRS_o_ai vector signed short vec_xl(signed long long __offset,
-                                                      signed short *__ptr) {
+                                                      const signed short *__ptr) {
   signed char *__addr = (signed char *)__ptr + __offset;
   return *(unaligned_vec_sshort *)__addr;
 }
 
 static inline __ATTRS_o_ai vector unsigned short
-vec_xl(signed long long __offset, unsigned short *__ptr) {
+vec_xl(signed long long __offset, const unsigned short *__ptr) {
   signed char *__addr = (signed char *)__ptr + __offset;
   return *(unaligned_vec_ushort *)__addr;
 }
 
 static inline __ATTRS_o_ai vector signed int vec_xl(signed long long __offset,
-                                                    signed int *__ptr) {
+                                                    const signed int *__ptr) {
   signed char *__addr = (signed char *)__ptr + __offset;
   return *(unaligned_vec_sint *)__addr;
 }
 
 static inline __ATTRS_o_ai vector unsigned int vec_xl(signed long long __offset,
-                                                      unsigned int *__ptr) {
+                                                      const unsigned int *__ptr) {
   signed char *__addr = (signed char *)__ptr + __offset;
   return *(unaligned_vec_uint *)__addr;
 }
 
 static inline __ATTRS_o_ai vector float vec_xl(signed long long __offset,
-                                               float *__ptr) {
+                                               const float *__ptr) {
   signed char *__addr = (signed char *)__ptr + __offset;
   return *(unaligned_vec_float *)__addr;
 }
@@ -16454,19 +16454,19 @@ typedef vector unsigned long long unaligned_vec_ull __attribute__((aligned(1)));
 typedef vector double unaligned_vec_double __attribute__((aligned(1)));
 
 static inline __ATTRS_o_ai vector signed long long
-vec_xl(signed long long __offset, signed long long *__ptr) {
+vec_xl(signed long long __offset, const signed long long *__ptr) {
   signed char *__addr = (signed char *)__ptr + __offset;
   return *(unaligned_vec_sll *)__addr;
 }
 
 static inline __ATTRS_o_ai vector unsigned long long
-vec_xl(signed long long __offset, unsigned long long *__ptr) {
+vec_xl(signed long long __offset, const unsigned long long *__ptr) {
   signed char *__addr = (signed char *)__ptr + __offset;
   return *(unaligned_vec_ull *)__addr;
 }
 
 static inline __ATTRS_o_ai vector double vec_xl(signed long long __offset,
-                                                double *__ptr) {
+                                                const double *__ptr) {
   signed char *__addr = (signed char *)__ptr + __offset;
   return *(unaligned_vec_double *)__addr;
 }
@@ -16477,13 +16477,13 @@ typedef vector signed __int128 unaligned_vec_si128 __attribute__((aligned(1)));
 typedef vector unsigned __int128 unaligned_vec_ui128
     __attribute__((aligned(1)));
 static inline __ATTRS_o_ai vector signed __int128
-vec_xl(signed long long __offset, signed __int128 *__ptr) {
+vec_xl(signed long long __offset, const signed __int128 *__ptr) {
   signed char *__addr = (signed char *)__ptr + __offset;
   return *(unaligned_vec_si128 *)__addr;
 }
 
 static inline __ATTRS_o_ai vector unsigned __int128
-vec_xl(signed long long __offset, unsigned __int128 *__ptr) {
+vec_xl(signed long long __offset, const unsigned __int128 *__ptr) {
   signed char *__addr = (signed char *)__ptr + __offset;
   return *(unaligned_vec_ui128 *)__addr;
 }
@@ -16493,71 +16493,71 @@ vec_xl(signed long long __offset, unsigned __int128 *__ptr) {
 
 #ifdef __LITTLE_ENDIAN__
 static __inline__ vector signed char __ATTRS_o_ai
-vec_xl_be(signed long long __offset, signed char *__ptr) {
+vec_xl_be(signed long long __offset, const signed char *__ptr) {
   vector signed char __vec = (vector signed char)__builtin_vsx_lxvd2x_be(__offset, __ptr);
   return __builtin_shufflevector(__vec, __vec, 7, 6, 5, 4, 3, 2, 1, 0, 15, 14,
                                  13, 12, 11, 10, 9, 8);
 }
 
 static __inline__ vector unsigned char __ATTRS_o_ai
-vec_xl_be(signed long long __offset, unsigned char *__ptr) {
+vec_xl_be(signed long long __offset, const unsigned char *__ptr) {
   vector unsigned char __vec = (vector unsigned char)__builtin_vsx_lxvd2x_be(__offset, __ptr);
   return __builtin_shufflevector(__vec, __vec, 7, 6, 5, 4, 3, 2, 1, 0, 15, 14,
                                  13, 12, 11, 10, 9, 8);
 }
 
 static __inline__ vector signed short  __ATTRS_o_ai
-vec_xl_be(signed long long __offset, signed short *__ptr) {
+vec_xl_be(signed long long __offset, const signed short *__ptr) {
   vector signed short __vec = (vector signed short)__builtin_vsx_lxvd2x_be(__offset, __ptr);
   return __builtin_shufflevector(__vec, __vec, 3, 2, 1, 0, 7, 6, 5, 4);
 }
 
 static __inline__ vector unsigned short __ATTRS_o_ai
-vec_xl_be(signed long long __offset, unsigned short *__ptr) {
+vec_xl_be(signed long long __offset, const unsigned short *__ptr) {
   vector unsigned short __vec = (vector unsigned short)__builtin_vsx_lxvd2x_be(__offset, __ptr);
   return __builtin_shufflevector(__vec, __vec, 3, 2, 1, 0, 7, 6, 5, 4);
 }
 
 static __inline__ vector signed int __ATTRS_o_ai
-vec_xl_be(signed long long  __offset, signed int *__ptr) {
+vec_xl_be(signed long long  __offset, const signed int *__ptr) {
   return (vector signed int)__builtin_vsx_lxvw4x_be(__offset, __ptr);
 }
 
 static __inline__ vector unsigned int __ATTRS_o_ai
-vec_xl_be(signed long long  __offset, unsigned int *__ptr) {
+vec_xl_be(signed long long  __offset, const unsigned int *__ptr) {
   return (vector unsigned int)__builtin_vsx_lxvw4x_be(__offset, __ptr);
 }
 
 static __inline__ vector float __ATTRS_o_ai
-vec_xl_be(signed long long  __offset, float *__ptr) {
+vec_xl_be(signed long long  __offset, const float *__ptr) {
   return (vector float)__builtin_vsx_lxvw4x_be(__offset, __ptr);
 }
 
 #ifdef __VSX__
 static __inline__ vector signed long long __ATTRS_o_ai
-vec_xl_be(signed long long  __offset, signed long long *__ptr) {
+vec_xl_be(signed long long  __offset, const signed long long *__ptr) {
   return (vector signed long long)__builtin_vsx_lxvd2x_be(__offset, __ptr);
 }
 
 static __inline__ vector unsigned long long __ATTRS_o_ai
-vec_xl_be(signed long long  __offset, unsigned long long *__ptr) {
+vec_xl_be(signed long long  __offset, const unsigned long long *__ptr) {
   return (vector unsigned long long)__builtin_vsx_lxvd2x_be(__offset, __ptr);
 }
 
 static __inline__ vector double __ATTRS_o_ai
-vec_xl_be(signed long long  __offset, double *__ptr) {
+vec_xl_be(signed long long  __offset, const double *__ptr) {
   return (vector double)__builtin_vsx_lxvd2x_be(__offset, __ptr);
 }
 #endif
 
 #if defined(__POWER8_VECTOR__) && defined(__powerpc64__)
 static __inline__ vector signed __int128 __ATTRS_o_ai
-vec_xl_be(signed long long  __offset, signed __int128 *__ptr) {
+vec_xl_be(signed long long  __offset, const signed __int128 *__ptr) {
   return vec_xl(__offset, __ptr);
 }
 
 static __inline__ vector unsigned __int128 __ATTRS_o_ai
-vec_xl_be(signed long long  __offset, unsigned __int128 *__ptr) {
+vec_xl_be(signed long long  __offset, const unsigned __int128 *__ptr) {
   return vec_xl(__offset, __ptr);
 }
 #endif
diff --git a/clang/test/CodeGen/builtins-ppc-altivec.c b/clang/test/CodeGen/builtins-ppc-altivec.c
index dc93e7340597e..06f70a9019039 100644
--- a/clang/test/CodeGen/builtins-ppc-altivec.c
+++ b/clang/test/CodeGen/builtins-ppc-altivec.c
@@ -38,6 +38,13 @@ vector float res_vf;
 
 // CHECK-NOALTIVEC: error: unknown type name 'vector'
 // CHECK-NOALTIVEC-NOT: '(error)'
+const signed char *param_sc_ld;
+const unsigned char *param_uc_ld;
+const short *param_s_ld;
+const unsigned short *param_us_ld;
+const int *param_i_ld;
+const unsigned int *param_ui_ld;
+const float *param_f_ld;
 
 signed char param_sc;
 unsigned char param_uc;
@@ -1392,7 +1399,7 @@ void test6() {
 // CHECK: @llvm.ppc.altivec.lvx
 // CHECK-LE: @llvm.ppc.altivec.lvx
 
-  res_vsc = vec_ld(0, &param_sc);
+  res_vsc = vec_ld(0, param_sc_ld);
 // CHECK: @llvm.ppc.altivec.lvx
 // CHECK-LE: @llvm.ppc.altivec.lvx
 
@@ -1400,7 +1407,7 @@ void test6() {
 // CHECK: @llvm.ppc.altivec.lvx
 // CHECK-LE: @llvm.ppc.altivec.lvx
 
-  res_vuc = vec_ld(0, &param_uc);
+  res_vuc = vec_ld(0, param_uc_ld);
 // CHECK: @llvm.ppc.altivec.lvx
 // CHECK-LE: @llvm.ppc.altivec.lvx
 
@@ -1412,7 +1419,7 @@ void test6() {
 // CHECK: @llvm.ppc.altivec.lvx
 // CHECK-LE: @llvm.ppc.altivec.lvx
 
-  res_vs  = vec_ld(0, &param_s);
+  res_vs  = vec_ld(0, param_s_ld);
 // CHECK: @llvm.ppc.altivec.lvx
 // CHECK-LE: @llvm.ppc.altivec.lvx
 
@@ -1420,7 +1427,7 @@ void test6() {
 // CHECK: @llvm.ppc.altivec.lvx
 // CHECK-LE: @llvm.ppc.altivec.lvx
 
-  res_vus = vec_ld(0, &param_us);
+  res_vus = vec_ld(0, param_us_ld);
 // CHECK: @llvm.ppc.altivec.lvx
 // CHECK-LE: @llvm.ppc.altivec.lvx
 
@@ -1436,7 +1443,7 @@ void test6() {
 // CHECK: @llvm.ppc.altivec.lvx
 // CHECK-LE: @llvm.ppc.altivec.lvx
 
-  res_vi  = vec_ld(0, &param_i);
+  res_vi  = vec_ld(0, param_i_ld);
 // CHECK: @llvm.ppc.altivec.lvx
 // CHECK-LE: @llvm.ppc.altivec.lvx
 
@@ -1444,7 +1451,7 @@ void test6() {
 // CHECK: @llvm.ppc.altivec.lvx
 // CHECK-LE: @llvm.ppc.altivec.lvx
 
-  res_vui = vec_ld(0, &param_ui);
+  res_vui = vec_ld(0, param_ui_ld);
 // CHECK: @llvm.ppc.altivec.lvx
 // CHECK-LE: @llvm.ppc.altivec.lvx
 
@@ -1456,7 +1463,7 @@ void test6() {
 // CHECK: @llvm.ppc.altivec.lvx
 // CHECK-LE: @llvm.ppc.altivec.lvx
 
-  res_vf  = vec_ld(0, &param_f);
+  res_vf  = vec_ld(0, param_f_ld);
 // CHECK: @llvm.ppc.altivec.lvx
 // CHECK-LE: @llvm.ppc.altivec.lvx
 
@@ -1464,7 +1471,7 @@ void test6() {
 // CHECK: @llvm.ppc.altivec.lvx
 // CHECK-LE: @llvm.ppc.altivec.lvx
 
-  res_vsc = vec_lvx(0, &param_sc);
+  res_vsc = vec_lvx(0, param_sc_ld);
 // CHECK: @llvm.ppc.altivec.lvx
 // CHECK-LE: @llvm.ppc.altivec.lvx
 
@@ -1472,7 +1479,7 @@ void test6() {
 // CHECK: @llvm.ppc.altivec.lvx
 // CHECK-LE: @llvm.ppc.altivec.lvx
 
-  res_vuc = vec_lvx(0, &param_uc);
+  res_vuc = vec_lvx(0, param_uc_ld);
 // CHECK: @llvm.ppc.altivec.lvx
 // CHECK-LE: @llvm.ppc.altivec.lvx
 
@@ -1484,7 +1491,7 @@ void test6() {
 // CHECK: @llvm.ppc.altivec.lvx
 // CHECK-LE: @llvm.ppc.altivec.lvx
 
-  res_vs  = vec_lvx(0, &param_s);
+  res_vs  = vec_lvx(0, param_s_ld);
 // CHECK: @llvm.ppc.altivec.lvx
 // CHECK-LE: @llvm.ppc.altivec.lvx
 
@@ -1492,7 +1499,7 @@ void test6() {
 // CHECK: @llvm.ppc.altivec.lvx
 // CHECK-LE: @llvm.ppc.altivec.lvx
 
-  res_vus = vec_lvx(0, &param_us);
+  res_vus = vec_lvx(0, param_us_ld);
 // CHECK: @llvm.ppc.altivec.lvx
 // CHECK-LE: @llvm.ppc.altivec.lvx
 
@@ -1508,7 +1515,7 @@ void test6() {
 // CHECK: @llvm.ppc.altivec.lvx
 // CHECK-LE: @llvm.ppc.altivec.lvx
 
-  res_vi  = vec_lvx(0, &param_i);
+  res_vi  = vec_lvx(0, param_i_ld);
 // CHECK: @llvm.ppc.altivec.lvx
 // CHECK-LE: @llvm.ppc.altivec.lvx
 
@@ -1516,7 +1523,7 @@ void test6() {
 // CHECK: @llvm.ppc.altivec.lvx
 // CHECK-LE: @llvm.ppc.altivec.lvx
 
-  res_vui = vec_lvx(0, &param_ui);
+  res_vui = vec_lvx(0, param_ui_ld);
 // CHECK: @llvm.ppc.altivec.lvx
 // CHECK-LE: @llvm.ppc.altivec.lvx
 
@@ -1528,64 +1535,64 @@ void test6() {
 // CHECK: @llvm.ppc.altivec.lvx
 // CHECK-LE: @llvm.ppc.altivec.lvx
 
-  res_vf  = vec_lvx(0, &param_f);
+  res_vf  = vec_lvx(0, param_f_ld);
 // CHECK: @llvm.ppc.altivec.lvx
 // CHECK-LE: @llvm.ppc.altivec.lvx
 
   /* vec_lde */
-  res_vsc = vec_lde(0, &param_sc);
+  res_vsc = vec_lde(0, param_sc_ld);
 // CHECK: @llvm.ppc.altivec.lvebx
 // CHECK-LE: @llvm.ppc.altivec.lvebx
 
-  res_vuc = vec_lde(0, &param_uc);
+  res_vuc = vec_lde(0, param_uc_ld);
 // CHECK: @llvm.ppc.altivec.lvebx
 // CHECK-LE: @llvm.ppc.altivec.lvebx
 
-  res_vs  = vec_lde(0, &param_s);
+  res_vs  = vec_lde(0, param_s_ld);
 // CHECK: @llvm.ppc.altivec.lvehx
 // CHECK-LE: @llvm.ppc.altivec.lvehx
 
-  res_vus = vec_lde(0, &param_us);
+  res_vus = vec_lde(0, param_us_ld);
 // CHECK: @llvm.ppc.altivec.lvehx
 // CHECK-LE: @llvm.ppc.altivec.lvehx
 
-  res_vi  = vec_lde(0, &param_i);
+  res_vi  = vec_lde(0, param_i_ld);
 // CHECK: @llvm.ppc.altivec.lvewx
 // CHECK-LE: @llvm.ppc.altivec.lvewx
 
-  res_vui = vec_lde(0, &param_ui);
+  res_vui = vec_lde(0, param_ui_ld);
 // CHECK: @llvm.ppc.altivec.lvewx
 // CHECK-LE: @llvm.ppc.altivec.lvewx
 
-  res_vf  = vec_lde(0, &param_f);
+  res_vf  = vec_lde(0, param_f_ld);
 // CHECK: @llvm.ppc.altivec.lvewx
 // CHECK-LE: @llvm.ppc.altivec.lvewx
 
-  res_vsc = vec_lvebx(0, &param_sc);
+  res_vsc = vec_lvebx(0, param_sc_ld);
 // CHECK: @llvm.ppc.altivec.lvebx
 // CHECK-LE: @llvm.ppc.altivec.lvebx
 
-  res_vuc = vec_lvebx(0, &param_uc);
+  res_vuc = vec_lvebx(0, param_uc_ld);
 // CHECK: @llvm.ppc.altivec.lvebx
 // CHECK-LE: @llvm.ppc.altivec.lvebx
 
-  res_vs  = vec_lvehx(0, &param_s);
+  res_vs  = vec_lvehx(0, param_s_ld);
 // CHECK: @llvm.ppc.altivec.lvehx
 // CHECK-LE: @llvm.ppc.altivec.lvehx
 
-  res_vus = vec_lvehx(0, &param_us);
+  res_vus = vec_lvehx(0, param_us_ld);
 // CHECK: @llvm.ppc.altivec.lvehx
 // CHECK-LE: @llvm.ppc.altivec.lvehx
 
-  res_vi  = vec_lvewx(0, &param_i);
+  res_vi  = vec_lvewx(0, param_i_ld);
 // CHECK: @llvm.ppc.altivec.lvewx
 // CHECK-LE: @llvm.ppc.altivec.lvewx
 
-  res_vui = vec_lvewx(0, &param_ui);
+  res_vui = vec_lvewx(0, param_ui_ld);
 // CHECK: @llvm.ppc.altivec.lvewx
 // CHECK-LE: @llvm.ppc.altivec.lvewx
 
-  res_vf  = vec_lvewx(0, &param_f);
+  res_vf  = vec_lvewx(0, param_f_ld);
 // CHECK: @llvm.ppc.altivec.lvewx
 // CHECK-LE: @llvm.ppc.altivec.lvewx
 
@@ -1594,7 +1601,7 @@ void test6() {
 // CHECK: @llvm.ppc.altivec.lvxl
 // CHECK-LE: @llvm.ppc.altivec.lvxl
 
-  res_vsc = vec_ldl(0, &param_sc);
+  res_vsc = vec_ldl(0, param_sc_ld);
 // CHECK: @llvm.ppc.altivec.lvxl
 // CHECK-LE: @llvm.ppc.altivec.lvxl
 
@@ -1602,7 +1609,7 @@ void test6() {
 // CHECK: @llvm.ppc.altivec.lvxl
 // CHECK-LE: @llvm.ppc.altivec.lvxl
 
-  res_vuc = vec_ldl(0, &param_uc);
+  res_vuc = vec_ldl(0, param_uc_ld);
 // CHECK: @llvm.ppc.altivec.lvxl
 // CHECK-LE: @llvm.ppc.altivec.lvxl
 
@@ -1614,7 +1621,7 @@ void test6() {
 // CHECK: @llvm.ppc.altivec.lvxl
 // CHECK-LE: @llvm.ppc.altivec.lvxl
 
-  res_vs  = vec_ldl(0, &param_s);
+  res_vs  = vec_ldl(0, param_s_ld);
 // CHECK: @llvm.ppc.altivec.lvxl
 // CHECK-LE: @llvm.ppc.altivec.lvxl
 
@@ -1622,7 +1629,7 @@ void test6() {
 // CHECK: @llvm.ppc.altivec.lvxl
 // CHECK-LE: @llvm.ppc.altivec.lvxl
 
-  res_vus = vec_ldl(0, &param_us);
+  res_vus = vec_ldl(0, param_us_ld);
 // CHECK: @llvm.ppc.altivec.lvxl
 // CHECK-LE: @llvm.ppc.altivec.lvxl
 
@@ -1638,7 +1645,7 @@ void test6() {
 // CHECK: @llvm.ppc.altivec.lvxl
 // CHECK-LE: @llvm.ppc.altivec.lvxl
 
-  res_vi  = vec_ldl(0, &param_i);
+  res_vi  = vec_ldl(0, param_i_ld);
 // CHECK: @llvm.ppc.altivec.lvxl
 // CHECK-LE: @llvm.ppc.altivec.lvxl
 
@@ -1646,7 +1653,7 @@ void test6() {
 // CHECK: @llvm.ppc.altivec.lvxl
 // CHECK-LE: @llvm.ppc.altivec.lvxl
 
-  res_vui = vec_ldl(0, &param_ui);
+  res_vui = vec_ldl(0, param_ui_ld);
 // CHECK: @llvm.ppc.altivec.lvxl
 // CHECK-LE: @llvm.ppc.altivec.lvxl
 
@@ -1658,7 +1665,7 @@ void test6() {
 // CHECK: @llvm.ppc.altivec.lvxl
 // CHECK-LE: @llvm.ppc.altivec.lvxl
 
-  res_vf  = vec_ldl(0, &param_f);
+  res_vf  = vec_ldl(0, param_f_ld);
 // CHECK: @llvm.ppc.altivec.lvxl
 // CHECK-LE: @llvm.ppc.altivec.lvxl
 
@@ -1666,7 +1673,7 @@ void test6() {
 // CHECK: @llvm.ppc.altivec.lvxl
 // CHECK-LE: @llvm.ppc.altivec.lvxl
 
-  res_vsc = vec_lvxl(0, &param_sc);
+  res_vsc = vec_lvxl(0, param_sc_ld);
 // CHECK: @llvm.ppc.altivec.lvxl
 // CHECK-LE: @llvm.ppc.altivec.lvxl
 
@@ -1678,7 +1685,7 @@ void test6() {
 // CHECK: @llvm.ppc.altivec.lvxl
 // CHECK-LE: @llvm.ppc.altivec.lvxl
 
-  res_vuc = vec_lvxl(0, &param_uc);
+  res_vuc = vec_lvxl(0, param_uc_ld);
 // CHECK: @llvm.ppc.altivec.lvxl
 // CHECK-LE: @llvm.ppc.altivec.lvxl
 
@@ -1686,7 +1693,7 @@ void test6() {
 // CHECK: @llvm.ppc.altivec.lvxl
 // CHECK-LE: @llvm.ppc.altivec.lvxl
 
-  res_vs  = vec_lvxl(0, &param_s);
+  res_vs  = vec_lvxl(0, param_s_ld);
 // CHECK: @llvm.ppc.altivec.lvxl
 // CHECK-LE: @llvm.ppc.altivec.lvxl
 
@@ -1694,7 +1701,7 @@ void test6() {
 // CHECK: @llvm.ppc.altivec.lvxl
 // CHECK-LE: @llvm.ppc.altivec.lvxl
 
-  res_vus = vec_lvxl(0, &param_us);
+  res_vus = vec_lvxl(0, param_us_ld);
 // CHECK: @llvm.ppc.altivec.lvxl
 // CHECK-LE: @llvm.ppc.altivec.lvxl
 
@@ -1710,7 +1717,7 @@ void test6() {
 // CHECK: @llvm.ppc.altivec.lvxl
 // CHECK-LE: @llvm.ppc.altivec.lvxl
 
-  res_vi  = vec_lvxl(0, &param_i);
+  res_vi  = vec_lvxl(0, param_i_ld);
 // CHECK: @llvm.ppc.altivec.lvxl
 // CHECK-LE: @llvm.ppc.altivec.lvxl
 
@@ -1718,7 +1725,7 @@ void test6() {
 // CHECK: @llvm.ppc.altivec.lvxl
 // CHECK-LE: @llvm.ppc.altivec.lvxl
 
-  res_vui = vec_lvxl(0, &param_ui);
+  res_vui = vec_lvxl(0, param_ui_ld);
 // CHECK: @llvm.ppc.altivec.lvxl
 // CHECK-LE: @llvm.ppc.altivec.lvxl
 
@@ -1730,7 +1737,7 @@ void test6() {
 // CHECK: @llvm.ppc.altivec.lvxl
 // CHECK-LE: @llvm.ppc.altivec.lvxl
 
-  res_vf  = vec_lvxl(0, &param_f);
+  res_vf  = vec_lvxl(0, param_f_ld);
 // CHECK: @llvm.ppc.altivec.lvxl
 // CHECK-LE: @llvm.ppc.altivec.lvxl
 
@@ -1744,12 +1751,12 @@ void test6() {
 // CHECK-LE: @llvm.ppc.altivec.vlogefp
 
   /* vec_lvsl */
-  res_vuc = vec_lvsl(0, &param_i);
+  res_vuc = vec_lvsl(0, param_i_ld);
 // CHECK: @llvm.ppc.altivec.lvsl
 // CHECK-LE: @llvm.ppc.altivec.lvsl
 
   /* vec_lvsr */
-  res_vuc = vec_lvsr(0, &param_i);
+  res_vuc = vec_lvsr(0, param_i_ld);
 // CHECK: @llvm.ppc.altivec.lvsr
 // CHECK-LE: @llvm.ppc.altivec.lvsr
 
@@ -6108,7 +6115,7 @@ void test6() {
 // CHECK-LE: insertelement <4 x float>
 
   /* vec_lvlx */
-  res_vsc = vec_lvlx(0, &param_sc);
+  res_vsc = vec_lvlx(0, param_sc_ld);
 // CHECK: @llvm.ppc.altivec.lvx
 // CHECK: @llvm.ppc.altivec.lvsl
 // CHECK: store <16 x i8> zeroinitializer
@@ -6128,7 +6135,7 @@ void test6() {
 // CHECK-LE: store <16 x i8> zeroinitializer
 // CHECK-LE: @llvm.ppc.altivec.vperm
 
-  res_vuc = vec_lvlx(0, &param_uc);
+  res_vuc = vec_lvlx(0, param_uc_ld);
 // CHECK: @llvm.ppc.altivec.lvx
 // CHECK: @llvm.ppc.altivec.lvsl
 // CHECK: store <16 x i8> zeroinitializer
@@ -6158,7 +6165,7 @@ void test6() {
 // CHECK-LE: @llvm.ppc.altivec.lvsl
 // CHECK-LE: @llvm.ppc.altivec.vperm
 
-  res_vs  = vec_lvlx(0, &param_s);
+  res_vs  = vec_lvlx(0, param_s_ld);
 // CHECK: @llvm.ppc.altivec.lvx
 // CHECK: @llvm.ppc.altivec.lvsl
 // CHECK: store <8 x i16> zeroinitializer
@@ -6178,7 +6185,7 @@ void test6() {
 // CHECK-LE: store <8 x i16> zeroinitializer
 // CHECK-LE: @llvm.ppc.altivec.vperm
 
-  res_vus = vec_lvlx(0, &param_us);
+  res_vus = vec_lvlx(0, param_us_ld);
 // CHECK: @llvm.ppc.altivec.lvx
 // CHECK: @llvm.ppc.altivec.lvsl
 // CHECK: store <8 x i16> zeroinitializer
@@ -6218,7 +6225,7 @@ void test6() {
 // CHECK-LE: @llvm.ppc.altivec.lvsl
 // CHECK-LE: @llvm.ppc.altivec.vperm
 
-  res_vi  = vec_lvlx(0, &param_i);
+  res_vi  = vec_lvlx(0, param_i_ld);
 // CHECK: @llvm.ppc.altivec.lvx
 // CHECK: @llvm.ppc.altivec.lvsl
 // CHECK: store <4 x i32> zeroinitializer
@@ -6238,7 +6245,7 @@ void test6() {
 // CHECK-LE: store <4 x i32> zeroinitializer
 // CHECK-LE: @llvm.ppc.altivec.vperm
 
-  res_vui = vec_lvlx(0, &param_ui);
+  res_vui = vec_lvlx(0, param_ui_ld);
 // CHECK: @llvm.ppc.altivec.lvx
 // CHECK: @llvm.ppc.altivec.lvsl
 // CHECK: store <4 x i32> zeroinitializer
@@ -6279,7 +6286,7 @@ void test6() {
 // CHECK-LE: @llvm.ppc.altivec.vperm
 
   /* vec_lvlxl */
-  res_vsc = vec_lvlxl(0, &param_sc);
+  res_vsc = vec_lvlxl(0, param_sc_ld);
 // CHECK: @llvm.ppc.altivec.lvxl
 // CHECK: @llvm.ppc.altivec.lvsl
 // CHECK: store <16 x i8> zeroinitializer
@@ -6299,7 +6306,7 @@ void test6() {
 // CHECK-LE: store <16 x i8> zeroinitializer
 // CHECK-LE: @llvm.ppc.altivec.vperm
 
-  res_vuc = vec_lvlxl(0, &param_uc);
+  res_vuc = vec_lvlxl(0, param_uc_ld);
 // CHECK: @llvm.ppc.altivec.lvxl
 // CHECK: @llvm.ppc.altivec.lvsl
 // CHECK: store <16 x i8> zeroinitializer
@@ -6329,7 +6336,7 @@ void test6() {
 // CHECK-LE: @llvm.ppc.altivec.lvsl
 // CHECK-LE: @llvm.ppc.altivec.vperm
 
-  res_vs  = vec_lvlxl(0, &param_s);
+  res_vs  = vec_lvlxl(0, param_s_ld);
 // CHECK: @llvm.ppc.altivec.lvxl
 // CHECK: @llvm.ppc.altivec.lvsl
 // CHECK: store <8 x i16> zeroinitializer
@@ -6349,7 +6356,7 @@ void test6() {
 // CHECK-LE: store <8 x i16> zeroinitializer
 // CHECK-LE: @llvm.ppc.altivec.vperm
 
-  res_vus = vec_lvlxl(0, &param_us);
+  res_vus = vec_lvlxl(0, param_us_ld);
 // CHECK: @llvm.ppc.altivec.lvxl
 // CHECK: @llvm.ppc.altivec.lvsl
 // CHECK: store <8 x i16> zeroinitializer
@@ -6389,7 +6396,7 @@ void test6() {
 // CHECK-LE: @llvm.ppc.altivec.lvsl
 // CHECK-LE: @llvm.ppc.altivec.vperm
 
-  res_vi  = vec_lvlxl(0, &param_i);
+  res_vi  = vec_lvlxl(0, param_i_ld);
 // CHECK: @llvm.ppc.altivec.lvxl
 // CHECK: @llvm.ppc.altivec.lvsl
 // CHECK: store <4 x i32> zeroinitializer
@@ -6409,7 +6416,7 @@ void test6() {
 // CHECK-LE: store <4 x i32> zeroinitializer
 // CHECK-LE: @llvm.ppc.altivec.vperm
 
-  res_vui = vec_lvlxl(0, &param_ui);
+  res_vui = vec_lvlxl(0, param_ui_ld);
 // CHECK: @llvm.ppc.altivec.lvxl
 // CHECK: @llvm.ppc.altivec.lvsl
 // CHECK: store <4 x i32> zeroinitializer
@@ -6450,7 +6457,7 @@ void test6() {
 // CHECK-LE: @llvm.ppc.altivec.vperm
 
   /* vec_lvrx */
-  res_vsc = vec_lvrx(0, &param_sc);
+  res_vsc = vec_lvrx(0, param_sc_ld);
 // CHECK: @llvm.ppc.altivec.lvx
 // CHECK: @llvm.ppc.altivec.lvsl
 // CHECK: store <16 x i8> zeroinitializer
@@ -6470,7 +6477,7 @@ void test6() {
 // CHECK-LE: store <16 x i8> zeroinitializer
 // CHECK-LE: @llvm.ppc.altivec.vperm
 
-  res_vuc = vec_lvrx(0, &param_uc);
+  res_vuc = vec_lvrx(0, param_uc_ld);
 // CHECK: @llvm.ppc.altivec.lvx
 // CHECK: @llvm.ppc.altivec.lvsl
 // CHECK: store <16 x i8> zeroinitializer
@@ -6500,7 +6507,7 @@ void test6() {
 // CHECK-LE: @llvm.ppc.altivec.lvsl
 // CHECK-LE: @llvm.ppc.altivec.vperm
 
-  res_vs  = vec_lvrx(0, &param_s);
+  res_vs  = vec_lvrx(0, param_s_ld);
 // CHECK: @llvm.ppc.altivec.lvx
 // CHECK: @llvm.ppc.altivec.lvsl
 // CHECK: store <8 x i16> zeroinitializer
@@ -6520,7 +6527,7 @@ void test6() {
 // CHECK-LE: store <8 x i16> zeroinitializer
 // CHECK-LE: @llvm.ppc.altivec.vperm
 
-  res_vus = vec_lvrx(0, &param_us);
+  res_vus = vec_lvrx(0, param_us_ld);
 // CHECK: @llvm.ppc.altivec.lvx
 // CHECK: @llvm.ppc.altivec.lvsl
 // CHECK: store <8 x i16> zeroinitializer
@@ -6560,7 +6567,7 @@ void test6() {
 // CHECK-LE: @llvm.ppc.altivec.lvsl
 // CHECK-LE: @llvm.ppc.altivec.vperm
 
-  res_vi  = vec_lvrx(0, &param_i);
+  res_vi  = vec_lvrx(0, param_i_ld);
 // CHECK: @llvm.ppc.altivec.lvx
 // CHECK: @llvm.ppc.altivec.lvsl
 // CHECK: store <4 x i32> zeroinitializer
@@ -6580,7 +6587,7 @@ void test6() {
 // CHECK-LE: store <4 x i32> zeroinitializer
 // CHECK-LE: @llvm.ppc.altivec.vperm
 
-  res_vui = vec_lvrx(0, &param_ui);
+  res_vui = vec_lvrx(0, param_ui_ld);
 // CHECK: @llvm.ppc.altivec.lvx
 // CHECK: @llvm.ppc.altivec.lvsl
 // CHECK: store <4 x i32> zeroinitializer
@@ -6621,7 +6628,7 @@ void test6() {
 // CHECK-LE: @llvm.ppc.altivec.vperm
 
   /* vec_lvrxl */
-  res_vsc = vec_lvrxl(0, &param_sc);
+  res_vsc = vec_lvrxl(0, param_sc_ld);
 // CHECK: @llvm.ppc.altivec.lvxl
 // CHECK: @llvm.ppc.altivec.lvsl
 // CHECK: store <16 x i8> zeroinitializer
@@ -6641,7 +6648,7 @@ void test6() {
 // CHECK-LE: store <16 x i8> zeroinitializer
 // CHECK-LE: @llvm.ppc.altivec.vperm
 
-  res_vuc = vec_lvrxl(0, &param_uc);
+  res_vuc = vec_lvrxl(0, param_uc_ld);
 // CHECK: @llvm.ppc.altivec.lvxl
 // CHECK: @llvm.ppc.altivec.lvsl
 // CHECK: store <16 x i8> zeroinitializer
@@ -6671,7 +6678,7 @@ void test6() {
 // CHECK-LE: @llvm.ppc.altivec.lvsl
 // CHECK-LE: @llvm.ppc.altivec.vperm
 
-  res_vs  = vec_lvrxl(0, &param_s);
+  res_vs  = vec_lvrxl(0, param_s_ld);
 // CHECK: @llvm.ppc.altivec.lvxl
 // CHECK: @llvm.ppc.altivec.lvsl
 // CHECK: store <8 x i16> zeroinitializer
@@ -6691,7 +6698,7 @@ void test6() {
 // CHECK-LE: store <8 x i16> zeroinitializer
 // CHECK-LE: @llvm.ppc.altivec.vperm
 
-  res_vus = vec_lvrxl(0, &param_us);
+  res_vus = vec_lvrxl(0, param_us_ld);
 // CHECK: @llvm.ppc.altivec.lvxl
 // CHECK: @llvm.ppc.altivec.lvsl
 // CHECK: store <8 x i16> zeroinitializer
@@ -6731,7 +6738,7 @@ void test6() {
 // CHECK-LE: @llvm.ppc.altivec.lvsl
 // CHECK-LE: @llvm.ppc.altivec.vperm
 
-  res_vi  = vec_lvrxl(0, &param_i);
+  res_vi  = vec_lvrxl(0, param_i_ld);
 // CHECK: @llvm.ppc.altivec.lvxl
 // CHECK: @llvm.ppc.altivec.lvsl
 // CHECK: store <4 x i32> zeroinitializer
@@ -6751,7 +6758,7 @@ void test6() {
 // CHECK-LE: store <4 x i32> zeroinitializer
 // CHECK-LE: @llvm.ppc.altivec.vperm
 
-  res_vui = vec_lvrxl(0, &param_ui);
+  res_vui = vec_lvrxl(0, param_ui_ld);
 // CHECK: @llvm.ppc.altivec.lvxl
 // CHECK: @llvm.ppc.altivec.lvsl
 // CHECK: store <4 x i32> zeroinitializer
@@ -9433,31 +9440,31 @@ void test8() {
 void test9() {
   // CHECK-LABEL: define void @test9
   // CHECK-LE-LABEL: define void @test9
-  res_vsc = vec_xl(param_sll, &param_sc);
+  res_vsc = vec_xl(param_sll, param_sc_ld);
   // CHECK: load <16 x i8>, <16 x i8>* %{{[0-9]+}}, align 1
   // CHECK-LE: load <16 x i8>, <16 x i8>* %{{[0-9]+}}, align 1
 
-  res_vuc = vec_xl(param_sll, &param_uc);
+  res_vuc = vec_xl(param_sll, param_uc_ld);
   // CHECK: load <16 x i8>, <16 x i8>* %{{[0-9]+}}, align 1
   // CHECK-LE: load <16 x i8>, <16 x i8>* %{{[0-9]+}}, align 1
 
-  res_vs = vec_xl(param_sll, &param_s);
+  res_vs = vec_xl(param_sll, param_s_ld);
   // CHECK: load <8 x i16>, <8 x i16>* %{{[0-9]+}}, align 1
   // CHECK-LE: load <8 x i16>, <8 x i16>* %{{[0-9]+}}, align 1
 
-  res_vus = vec_xl(param_sll, &param_us);
+  res_vus = vec_xl(param_sll, param_us_ld);
   // CHECK: load <8 x i16>, <8 x i16>* %{{[0-9]+}}, align 1
   // CHECK-LE: load <8 x i16>, <8 x i16>* %{{[0-9]+}}, align 1
 
-  res_vi = vec_xl(param_sll, &param_i);
+  res_vi = vec_xl(param_sll, param_i_ld);
   // CHECK: load <4 x i32>, <4 x i32>* %{{[0-9]+}}, align 1
   // CHECK-LE: load <4 x i32>, <4 x i32>* %{{[0-9]+}}, align 1
 
-  res_vui = vec_xl(param_sll, &param_ui);
+  res_vui = vec_xl(param_sll, param_ui_ld);
   // CHECK: load <4 x i32>, <4 x i32>* %{{[0-9]+}}, align 1
   // CHECK-LE: load <4 x i32>, <4 x i32>* %{{[0-9]+}}, align 1
 
-  res_vf = vec_xl(param_sll, &param_f);
+  res_vf = vec_xl(param_sll, param_f_ld);
   // CHECK: load <4 x float>, <4 x float>* %{{[0-9]+}}, align 1
   // CHECK-LE: load <4 x float>, <4 x float>* %{{[0-9]+}}, align 1
 }
@@ -9499,35 +9506,35 @@ void test10() {
 void test11() {
   // CHECK-LABEL: define void @test11
   // CHECK-LE-LABEL: define void @test11
-  res_vsc = vec_xl_be(param_sll, &param_sc);
+  res_vsc = vec_xl_be(param_sll, param_sc_ld);
   // CHECK: load <16 x i8>, <16 x i8>* %{{[0-9]+}}, align 1
   // CHECK-LE: call <2 x double> @llvm.ppc.vsx.lxvd2x.be(i8* %{{[0-9]+}})
   // CHECK-LE: shufflevector <16 x i8> %{{[0-9]+}}, <16 x i8> %{{[0-9]+}}, <16 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8>
 
-  res_vuc = vec_xl_be(param_sll, &param_uc);
+  res_vuc = vec_xl_be(param_sll, param_uc_ld);
   // CHECK: load <16 x i8>, <16 x i8>* %{{[0-9]+}}, align 1
   // CHECK-LE: call <2 x double> @llvm.ppc.vsx.lxvd2x.be(i8* %{{[0-9]+}})
   // CHECK-LE: shufflevector <16 x i8> %{{[0-9]+}}, <16 x i8> %{{[0-9]+}}, <16 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8>
 
-  res_vs = vec_xl_be(param_sll, &param_s);
+  res_vs = vec_xl_be(param_sll, param_s_ld);
   // CHECK: load <8 x i16>, <8 x i16>* %{{[0-9]+}}, align 1
   // CHECK-LE: call <2 x double> @llvm.ppc.vsx.lxvd2x.be(i8* %{{[0-9]+}})
   // CHECK-LE: shufflevector <8 x i16> %{{[0-9]+}}, <8 x i16> %{{[0-9]+}}, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
 
-  res_vus = vec_xl_be(param_sll, &param_us);
+  res_vus = vec_xl_be(param_sll, param_us_ld);
   // CHECK: load <8 x i16>, <8 x i16>* %{{[0-9]+}}, align 1
   // CHECK-LE: call <2 x double> @llvm.ppc.vsx.lxvd2x.be(i8* %{{[0-9]+}})
   // CHECK-LE: shufflevector <8 x i16> %{{[0-9]+}}, <8 x i16> %{{[0-9]+}}, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
 
-  res_vi = vec_xl_be(param_sll, &param_i);
+  res_vi = vec_xl_be(param_sll, param_i_ld);
   // CHECK: load <4 x i32>, <4 x i32>* %{{[0-9]+}}, align 1
   // CHECK-LE: call <4 x i32> @llvm.ppc.vsx.lxvw4x.be(i8* %{{[0-9]+}})
 
-  res_vui = vec_xl_be(param_sll, &param_ui);
+  res_vui = vec_xl_be(param_sll, param_ui_ld);
   // CHECK: load <4 x i32>, <4 x i32>* %{{[0-9]+}}, align 1
   // CHECK-LE: call <4 x i32> @llvm.ppc.vsx.lxvw4x.be(i8* %{{[0-9]+}})
 
-  res_vf = vec_xl_be(param_sll, &param_f);
+  res_vf = vec_xl_be(param_sll, param_f_ld);
   // CHECK: load <4 x float>, <4 x float>* %{{[0-9]+}}, align 1
   // CHECK-LE: call <4 x i32> @llvm.ppc.vsx.lxvw4x.be(i8* %{{[0-9]+}})
 }
diff --git a/clang/test/CodeGen/builtins-ppc-p10vector.c b/clang/test/CodeGen/builtins-ppc-p10vector.c
index c51c24f259862..3512bfc4cf33b 100644
--- a/clang/test/CodeGen/builtins-ppc-p10vector.c
+++ b/clang/test/CodeGen/builtins-ppc-p10vector.c
@@ -24,10 +24,18 @@ vector unsigned long long vulla, vullb, vullc;
 vector unsigned __int128 vui128a, vui128b, vui128c;
 vector float vfa, vfb;
 vector double vda, vdb;
-unsigned int uia, uib;
+signed int *iap;
+unsigned int uia, uib, *uiap;
+signed char *cap;
 unsigned char uca;
+const unsigned char *ucap;
+const signed short *sap;
 unsigned short usa;
+const unsigned short *usap;
+const signed long long *llap;
+signed long long llb;
 unsigned long long ulla;
+const unsigned long long *ullap;
 
 vector unsigned long long test_vpdepd(void) {
   // CHECK: @llvm.ppc.altivec.vpdepd(<2 x i64>
diff --git a/clang/test/CodeGen/builtins-ppc-xl-xst.c b/clang/test/CodeGen/builtins-ppc-xl-xst.c
index 8ad45376e9779..226e9d8aff4e6 100644
--- a/clang/test/CodeGen/builtins-ppc-xl-xst.c
+++ b/clang/test/CodeGen/builtins-ppc-xl-xst.c
@@ -17,10 +17,12 @@
 // CHECK-NEXT:    [[__PTR_ADDR_I:%.*]] = alloca i16*, align 8
 // CHECK-NEXT:    [[__ADDR_I:%.*]] = alloca i8*, align 8
 // CHECK-NEXT:    [[C_ADDR:%.*]] = alloca <8 x i16>*, align 8
-// CHECK-NEXT:    [[PTR_ADDR:%.*]] = alloca i16*, align 8
+// CHECK-NEXT:    [[ST_ADDR:%.*]] = alloca i16*, align 8
+// CHECK-NEXT:    [[LD_ADDR:%.*]] = alloca i16*, align 8
 // CHECK-NEXT:    store <8 x i16>* [[C:%.*]], <8 x i16>** [[C_ADDR]], align 8
-// CHECK-NEXT:    store i16* [[PTR:%.*]], i16** [[PTR_ADDR]], align 8
-// CHECK-NEXT:    [[TMP0:%.*]] = load i16*, i16** [[PTR_ADDR]], align 8
+// CHECK-NEXT:    store i16* [[ST:%.*]], i16** [[ST_ADDR]], align 8
+// CHECK-NEXT:    store i16* [[LD:%.*]], i16** [[LD_ADDR]], align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = load i16*, i16** [[LD_ADDR]], align 8
 // CHECK-NEXT:    store i64 3, i64* [[__OFFSET_ADDR_I]], align 8
 // CHECK-NEXT:    store i16* [[TMP0]], i16** [[__PTR_ADDR_I]], align 8
 // CHECK-NEXT:    [[TMP1:%.*]] = load i16*, i16** [[__PTR_ADDR_I]], align 8
@@ -35,7 +37,7 @@
 // CHECK-NEXT:    store <8 x i16> [[TMP6]], <8 x i16>* [[TMP7]], align 16
 // CHECK-NEXT:    [[TMP8:%.*]] = load <8 x i16>*, <8 x i16>** [[C_ADDR]], align 8
 // CHECK-NEXT:    [[TMP9:%.*]] = load <8 x i16>, <8 x i16>* [[TMP8]], align 16
-// CHECK-NEXT:    [[TMP10:%.*]] = load i16*, i16** [[PTR_ADDR]], align 8
+// CHECK-NEXT:    [[TMP10:%.*]] = load i16*, i16** [[ST_ADDR]], align 8
 // CHECK-NEXT:    store <8 x i16> [[TMP9]], <8 x i16>* [[__VEC_ADDR_I]], align 16
 // CHECK-NEXT:    store i64 7, i64* [[__OFFSET_ADDR_I1]], align 8
 // CHECK-NEXT:    store i16* [[TMP10]], i16** [[__PTR_ADDR_I2]], align 8
@@ -50,9 +52,9 @@
 // CHECK-NEXT:    store <8 x i16> [[TMP14]], <8 x i16>* [[TMP16]], align 1
 // CHECK-NEXT:    ret void
 //
-void test1(vector signed short *c, signed short *ptr) {
-    *c = vec_xl(3ll, ptr);
-    vec_xst(*c, 7ll, ptr);
+void test1(vector signed short *c, signed short *st, const signed short *ld) {
+    *c = vec_xl(3ll, ld);
+    vec_xst(*c, 7ll, st);
 }
 
 // CHECK-LABEL: @test2(
@@ -65,10 +67,12 @@ void test1(vector signed short *c, signed short *ptr) {
 // CHECK-NEXT:    [[__PTR_ADDR_I:%.*]] = alloca i16*, align 8
 // CHECK-NEXT:    [[__ADDR_I:%.*]] = alloca i8*, align 8
 // CHECK-NEXT:    [[C_ADDR:%.*]] = alloca <8 x i16>*, align 8
-// CHECK-NEXT:    [[PTR_ADDR:%.*]] = alloca i16*, align 8
+// CHECK-NEXT:    [[ST_ADDR:%.*]] = alloca i16*, align 8
+// CHECK-NEXT:    [[LD_ADDR:%.*]] = alloca i16*, align 8
 // CHECK-NEXT:    store <8 x i16>* [[C:%.*]], <8 x i16>** [[C_ADDR]], align 8
-// CHECK-NEXT:    store i16* [[PTR:%.*]], i16** [[PTR_ADDR]], align 8
-// CHECK-NEXT:    [[TMP0:%.*]] = load i16*, i16** [[PTR_ADDR]], align 8
+// CHECK-NEXT:    store i16* [[ST:%.*]], i16** [[ST_ADDR]], align 8
+// CHECK-NEXT:    store i16* [[LD:%.*]], i16** [[LD_ADDR]], align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = load i16*, i16** [[LD_ADDR]], align 8
 // CHECK-NEXT:    store i64 3, i64* [[__OFFSET_ADDR_I]], align 8
 // CHECK-NEXT:    store i16* [[TMP0]], i16** [[__PTR_ADDR_I]], align 8
 // CHECK-NEXT:    [[TMP1:%.*]] = load i16*, i16** [[__PTR_ADDR_I]], align 8
@@ -83,7 +87,7 @@ void test1(vector signed short *c, signed short *ptr) {
 // CHECK-NEXT:    store <8 x i16> [[TMP6]], <8 x i16>* [[TMP7]], align 16
 // CHECK-NEXT:    [[TMP8:%.*]] = load <8 x i16>*, <8 x i16>** [[C_ADDR]], align 8
 // CHECK-NEXT:    [[TMP9:%.*]] = load <8 x i16>, <8 x i16>* [[TMP8]], align 16
-// CHECK-NEXT:    [[TMP10:%.*]] = load i16*, i16** [[PTR_ADDR]], align 8
+// CHECK-NEXT:    [[TMP10:%.*]] = load i16*, i16** [[ST_ADDR]], align 8
 // CHECK-NEXT:    store <8 x i16> [[TMP9]], <8 x i16>* [[__VEC_ADDR_I]], align 16
 // CHECK-NEXT:    store i64 7, i64* [[__OFFSET_ADDR_I1]], align 8
 // CHECK-NEXT:    store i16* [[TMP10]], i16** [[__PTR_ADDR_I2]], align 8
@@ -98,9 +102,10 @@ void test1(vector signed short *c, signed short *ptr) {
 // CHECK-NEXT:    store <8 x i16> [[TMP14]], <8 x i16>* [[TMP16]], align 1
 // CHECK-NEXT:    ret void
 //
-void test2(vector unsigned short *c, unsigned short *ptr) {
-    *c = vec_xl(3ll, ptr);
-    vec_xst(*c, 7ll, ptr);
+void test2(vector unsigned short *c, unsigned short *st,
+           const unsigned short *ld) {
+    *c = vec_xl(3ll, ld);
+    vec_xst(*c, 7ll, st);
 }
 
 // CHECK-LABEL: @test3(
@@ -113,10 +118,12 @@ void test2(vector unsigned short *c, unsigned short *ptr) {
 // CHECK-NEXT:    [[__PTR_ADDR_I:%.*]] = alloca i32*, align 8
 // CHECK-NEXT:    [[__ADDR_I:%.*]] = alloca i8*, align 8
 // CHECK-NEXT:    [[C_ADDR:%.*]] = alloca <4 x i32>*, align 8
-// CHECK-NEXT:    [[PTR_ADDR:%.*]] = alloca i32*, align 8
+// CHECK-NEXT:    [[ST_ADDR:%.*]] = alloca i32*, align 8
+// CHECK-NEXT:    [[LD_ADDR:%.*]] = alloca i32*, align 8
 // CHECK-NEXT:    store <4 x i32>* [[C:%.*]], <4 x i32>** [[C_ADDR]], align 8
-// CHECK-NEXT:    store i32* [[PTR:%.*]], i32** [[PTR_ADDR]], align 8
-// CHECK-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[PTR_ADDR]], align 8
+// CHECK-NEXT:    store i32* [[ST:%.*]], i32** [[ST_ADDR]], align 8
+// CHECK-NEXT:    store i32* [[LD:%.*]], i32** [[LD_ADDR]], align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[LD_ADDR]], align 8
 // CHECK-NEXT:    store i64 3, i64* [[__OFFSET_ADDR_I]], align 8
 // CHECK-NEXT:    store i32* [[TMP0]], i32** [[__PTR_ADDR_I]], align 8
 // CHECK-NEXT:    [[TMP1:%.*]] = load i32*, i32** [[__PTR_ADDR_I]], align 8
@@ -131,7 +138,7 @@ void test2(vector unsigned short *c, unsigned short *ptr) {
 // CHECK-NEXT:    store <4 x i32> [[TMP6]], <4 x i32>* [[TMP7]], align 16
 // CHECK-NEXT:    [[TMP8:%.*]] = load <4 x i32>*, <4 x i32>** [[C_ADDR]], align 8
 // CHECK-NEXT:    [[TMP9:%.*]] = load <4 x i32>, <4 x i32>* [[TMP8]], align 16
-// CHECK-NEXT:    [[TMP10:%.*]] = load i32*, i32** [[PTR_ADDR]], align 8
+// CHECK-NEXT:    [[TMP10:%.*]] = load i32*, i32** [[ST_ADDR]], align 8
 // CHECK-NEXT:    store <4 x i32> [[TMP9]], <4 x i32>* [[__VEC_ADDR_I]], align 16
 // CHECK-NEXT:    store i64 7, i64* [[__OFFSET_ADDR_I1]], align 8
 // CHECK-NEXT:    store i32* [[TMP10]], i32** [[__PTR_ADDR_I2]], align 8
@@ -146,9 +153,9 @@ void test2(vector unsigned short *c, unsigned short *ptr) {
 // CHECK-NEXT:    store <4 x i32> [[TMP14]], <4 x i32>* [[TMP16]], align 1
 // CHECK-NEXT:    ret void
 //
-void test3(vector signed int *c, signed int *ptr) {
-    *c = vec_xl(3ll, ptr);
-    vec_xst(*c, 7ll, ptr);
+void test3(vector signed int *c, signed int *st, const signed int *ld) {
+    *c = vec_xl(3ll, ld);
+    vec_xst(*c, 7ll, st);
 }
 
 // CHECK-LABEL: @test4(
@@ -161,10 +168,12 @@ void test3(vector signed int *c, signed int *ptr) {
 // CHECK-NEXT:    [[__PTR_ADDR_I:%.*]] = alloca i32*, align 8
 // CHECK-NEXT:    [[__ADDR_I:%.*]] = alloca i8*, align 8
 // CHECK-NEXT:    [[C_ADDR:%.*]] = alloca <4 x i32>*, align 8
-// CHECK-NEXT:    [[PTR_ADDR:%.*]] = alloca i32*, align 8
+// CHECK-NEXT:    [[ST_ADDR:%.*]] = alloca i32*, align 8
+// CHECK-NEXT:    [[LD_ADDR:%.*]] = alloca i32*, align 8
 // CHECK-NEXT:    store <4 x i32>* [[C:%.*]], <4 x i32>** [[C_ADDR]], align 8
-// CHECK-NEXT:    store i32* [[PTR:%.*]], i32** [[PTR_ADDR]], align 8
-// CHECK-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[PTR_ADDR]], align 8
+// CHECK-NEXT:    store i32* [[ST:%.*]], i32** [[ST_ADDR]], align 8
+// CHECK-NEXT:    store i32* [[LD:%.*]], i32** [[LD_ADDR]], align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[LD_ADDR]], align 8
 // CHECK-NEXT:    store i64 3, i64* [[__OFFSET_ADDR_I]], align 8
 // CHECK-NEXT:    store i32* [[TMP0]], i32** [[__PTR_ADDR_I]], align 8
 // CHECK-NEXT:    [[TMP1:%.*]] = load i32*, i32** [[__PTR_ADDR_I]], align 8
@@ -179,7 +188,7 @@ void test3(vector signed int *c, signed int *ptr) {
 // CHECK-NEXT:    store <4 x i32> [[TMP6]], <4 x i32>* [[TMP7]], align 16
 // CHECK-NEXT:    [[TMP8:%.*]] = load <4 x i32>*, <4 x i32>** [[C_ADDR]], align 8
 // CHECK-NEXT:    [[TMP9:%.*]] = load <4 x i32>, <4 x i32>* [[TMP8]], align 16
-// CHECK-NEXT:    [[TMP10:%.*]] = load i32*, i32** [[PTR_ADDR]], align 8
+// CHECK-NEXT:    [[TMP10:%.*]] = load i32*, i32** [[ST_ADDR]], align 8
 // CHECK-NEXT:    store <4 x i32> [[TMP9]], <4 x i32>* [[__VEC_ADDR_I]], align 16
 // CHECK-NEXT:    store i64 7, i64* [[__OFFSET_ADDR_I1]], align 8
 // CHECK-NEXT:    store i32* [[TMP10]], i32** [[__PTR_ADDR_I2]], align 8
@@ -194,9 +203,9 @@ void test3(vector signed int *c, signed int *ptr) {
 // CHECK-NEXT:    store <4 x i32> [[TMP14]], <4 x i32>* [[TMP16]], align 1
 // CHECK-NEXT:    ret void
 //
-void test4(vector unsigned int *c, unsigned int *ptr) {
-    *c = vec_xl(3ll, ptr);
-    vec_xst(*c, 7ll, ptr);
+void test4(vector unsigned int *c, unsigned int *st, const unsigned int *ld) {
+    *c = vec_xl(3ll, ld);
+    vec_xst(*c, 7ll, st);
 }
 
 // CHECK-LABEL: @test5(
@@ -209,10 +218,12 @@ void test4(vector unsigned int *c, unsigned int *ptr) {
 // CHECK-NEXT:    [[__PTR_ADDR_I:%.*]] = alloca i64*, align 8
 // CHECK-NEXT:    [[__ADDR_I:%.*]] = alloca i8*, align 8
 // CHECK-NEXT:    [[C_ADDR:%.*]] = alloca <2 x i64>*, align 8
-// CHECK-NEXT:    [[PTR_ADDR:%.*]] = alloca i64*, align 8
+// CHECK-NEXT:    [[ST_ADDR:%.*]] = alloca i64*, align 8
+// CHECK-NEXT:    [[LD_ADDR:%.*]] = alloca i64*, align 8
 // CHECK-NEXT:    store <2 x i64>* [[C:%.*]], <2 x i64>** [[C_ADDR]], align 8
-// CHECK-NEXT:    store i64* [[PTR:%.*]], i64** [[PTR_ADDR]], align 8
-// CHECK-NEXT:    [[TMP0:%.*]] = load i64*, i64** [[PTR_ADDR]], align 8
+// CHECK-NEXT:    store i64* [[ST:%.*]], i64** [[ST_ADDR]], align 8
+// CHECK-NEXT:    store i64* [[LD:%.*]], i64** [[LD_ADDR]], align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = load i64*, i64** [[LD_ADDR]], align 8
 // CHECK-NEXT:    store i64 3, i64* [[__OFFSET_ADDR_I]], align 8
 // CHECK-NEXT:    store i64* [[TMP0]], i64** [[__PTR_ADDR_I]], align 8
 // CHECK-NEXT:    [[TMP1:%.*]] = load i64*, i64** [[__PTR_ADDR_I]], align 8
@@ -227,7 +238,7 @@ void test4(vector unsigned int *c, unsigned int *ptr) {
 // CHECK-NEXT:    store <2 x i64> [[TMP6]], <2 x i64>* [[TMP7]], align 16
 // CHECK-NEXT:    [[TMP8:%.*]] = load <2 x i64>*, <2 x i64>** [[C_ADDR]], align 8
 // CHECK-NEXT:    [[TMP9:%.*]] = load <2 x i64>, <2 x i64>* [[TMP8]], align 16
-// CHECK-NEXT:    [[TMP10:%.*]] = load i64*, i64** [[PTR_ADDR]], align 8
+// CHECK-NEXT:    [[TMP10:%.*]] = load i64*, i64** [[ST_ADDR]], align 8
 // CHECK-NEXT:    store <2 x i64> [[TMP9]], <2 x i64>* [[__VEC_ADDR_I]], align 16
 // CHECK-NEXT:    store i64 7, i64* [[__OFFSET_ADDR_I1]], align 8
 // CHECK-NEXT:    store i64* [[TMP10]], i64** [[__PTR_ADDR_I2]], align 8
@@ -242,9 +253,10 @@ void test4(vector unsigned int *c, unsigned int *ptr) {
 // CHECK-NEXT:    store <2 x i64> [[TMP14]], <2 x i64>* [[TMP16]], align 1
 // CHECK-NEXT:    ret void
 //
-void test5(vector signed long long *c, signed long long *ptr) {
-    *c = vec_xl(3ll, ptr);
-    vec_xst(*c, 7ll, ptr);
+void test5(vector signed long long *c, signed long long *st,
+           const signed long long *ld) {
+    *c = vec_xl(3ll, ld);
+    vec_xst(*c, 7ll, st);
 }
 
 // CHECK-LABEL: @test6(
@@ -257,10 +269,12 @@ void test5(vector signed long long *c, signed long long *ptr) {
 // CHECK-NEXT:    [[__PTR_ADDR_I:%.*]] = alloca i64*, align 8
 // CHECK-NEXT:    [[__ADDR_I:%.*]] = alloca i8*, align 8
 // CHECK-NEXT:    [[C_ADDR:%.*]] = alloca <2 x i64>*, align 8
-// CHECK-NEXT:    [[PTR_ADDR:%.*]] = alloca i64*, align 8
+// CHECK-NEXT:    [[ST_ADDR:%.*]] = alloca i64*, align 8
+// CHECK-NEXT:    [[LD_ADDR:%.*]] = alloca i64*, align 8
 // CHECK-NEXT:    store <2 x i64>* [[C:%.*]], <2 x i64>** [[C_ADDR]], align 8
-// CHECK-NEXT:    store i64* [[PTR:%.*]], i64** [[PTR_ADDR]], align 8
-// CHECK-NEXT:    [[TMP0:%.*]] = load i64*, i64** [[PTR_ADDR]], align 8
+// CHECK-NEXT:    store i64* [[ST:%.*]], i64** [[ST_ADDR]], align 8
+// CHECK-NEXT:    store i64* [[LD:%.*]], i64** [[LD_ADDR]], align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = load i64*, i64** [[LD_ADDR]], align 8
 // CHECK-NEXT:    store i64 3, i64* [[__OFFSET_ADDR_I]], align 8
 // CHECK-NEXT:    store i64* [[TMP0]], i64** [[__PTR_ADDR_I]], align 8
 // CHECK-NEXT:    [[TMP1:%.*]] = load i64*, i64** [[__PTR_ADDR_I]], align 8
@@ -275,7 +289,7 @@ void test5(vector signed long long *c, signed long long *ptr) {
 // CHECK-NEXT:    store <2 x i64> [[TMP6]], <2 x i64>* [[TMP7]], align 16
 // CHECK-NEXT:    [[TMP8:%.*]] = load <2 x i64>*, <2 x i64>** [[C_ADDR]], align 8
 // CHECK-NEXT:    [[TMP9:%.*]] = load <2 x i64>, <2 x i64>* [[TMP8]], align 16
-// CHECK-NEXT:    [[TMP10:%.*]] = load i64*, i64** [[PTR_ADDR]], align 8
+// CHECK-NEXT:    [[TMP10:%.*]] = load i64*, i64** [[ST_ADDR]], align 8
 // CHECK-NEXT:    store <2 x i64> [[TMP9]], <2 x i64>* [[__VEC_ADDR_I]], align 16
 // CHECK-NEXT:    store i64 7, i64* [[__OFFSET_ADDR_I1]], align 8
 // CHECK-NEXT:    store i64* [[TMP10]], i64** [[__PTR_ADDR_I2]], align 8
@@ -290,9 +304,10 @@ void test5(vector signed long long *c, signed long long *ptr) {
 // CHECK-NEXT:    store <2 x i64> [[TMP14]], <2 x i64>* [[TMP16]], align 1
 // CHECK-NEXT:    ret void
 //
-void test6(vector unsigned long long *c, unsigned long long *ptr) {
-    *c = vec_xl(3ll, ptr);
-    vec_xst(*c, 7ll, ptr);
+void test6(vector unsigned long long *c, unsigned long long *st,
+           const unsigned long long *ld) {
+    *c = vec_xl(3ll, ld);
+    vec_xst(*c, 7ll, st);
 }
 
 // CHECK-LABEL: @test7(
@@ -305,10 +320,12 @@ void test6(vector unsigned long long *c, unsigned long long *ptr) {
 // CHECK-NEXT:    [[__PTR_ADDR_I:%.*]] = alloca float*, align 8
 // CHECK-NEXT:    [[__ADDR_I:%.*]] = alloca i8*, align 8
 // CHECK-NEXT:    [[C_ADDR:%.*]] = alloca <4 x float>*, align 8
-// CHECK-NEXT:    [[PTR_ADDR:%.*]] = alloca float*, align 8
+// CHECK-NEXT:    [[ST_ADDR:%.*]] = alloca float*, align 8
+// CHECK-NEXT:    [[LD_ADDR:%.*]] = alloca float*, align 8
 // CHECK-NEXT:    store <4 x float>* [[C:%.*]], <4 x float>** [[C_ADDR]], align 8
-// CHECK-NEXT:    store float* [[PTR:%.*]], float** [[PTR_ADDR]], align 8
-// CHECK-NEXT:    [[TMP0:%.*]] = load float*, float** [[PTR_ADDR]], align 8
+// CHECK-NEXT:    store float* [[ST:%.*]], float** [[ST_ADDR]], align 8
+// CHECK-NEXT:    store float* [[LD:%.*]], float** [[LD_ADDR]], align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = load float*, float** [[LD_ADDR]], align 8
 // CHECK-NEXT:    store i64 3, i64* [[__OFFSET_ADDR_I]], align 8
 // CHECK-NEXT:    store float* [[TMP0]], float** [[__PTR_ADDR_I]], align 8
 // CHECK-NEXT:    [[TMP1:%.*]] = load float*, float** [[__PTR_ADDR_I]], align 8
@@ -323,7 +340,7 @@ void test6(vector unsigned long long *c, unsigned long long *ptr) {
 // CHECK-NEXT:    store <4 x float> [[TMP6]], <4 x float>* [[TMP7]], align 16
 // CHECK-NEXT:    [[TMP8:%.*]] = load <4 x float>*, <4 x float>** [[C_ADDR]], align 8
 // CHECK-NEXT:    [[TMP9:%.*]] = load <4 x float>, <4 x float>* [[TMP8]], align 16
-// CHECK-NEXT:    [[TMP10:%.*]] = load float*, float** [[PTR_ADDR]], align 8
+// CHECK-NEXT:    [[TMP10:%.*]] = load float*, float** [[ST_ADDR]], align 8
 // CHECK-NEXT:    store <4 x float> [[TMP9]], <4 x float>* [[__VEC_ADDR_I]], align 16
 // CHECK-NEXT:    store i64 7, i64* [[__OFFSET_ADDR_I1]], align 8
 // CHECK-NEXT:    store float* [[TMP10]], float** [[__PTR_ADDR_I2]], align 8
@@ -338,9 +355,9 @@ void test6(vector unsigned long long *c, unsigned long long *ptr) {
 // CHECK-NEXT:    store <4 x float> [[TMP14]], <4 x float>* [[TMP16]], align 1
 // CHECK-NEXT:    ret void
 //
-void test7(vector float *c, float *ptr) {
-    *c = vec_xl(3ll, ptr);
-    vec_xst(*c, 7ll, ptr);
+void test7(vector float *c, float *st, const float *ld) {
+    *c = vec_xl(3ll, ld);
+    vec_xst(*c, 7ll, st);
 }
 
 // CHECK-LABEL: @test8(
@@ -353,10 +370,12 @@ void test7(vector float *c, float *ptr) {
 // CHECK-NEXT:    [[__PTR_ADDR_I:%.*]] = alloca double*, align 8
 // CHECK-NEXT:    [[__ADDR_I:%.*]] = alloca i8*, align 8
 // CHECK-NEXT:    [[C_ADDR:%.*]] = alloca <2 x double>*, align 8
-// CHECK-NEXT:    [[PTR_ADDR:%.*]] = alloca double*, align 8
+// CHECK-NEXT:    [[ST_ADDR:%.*]] = alloca double*, align 8
+// CHECK-NEXT:    [[LD_ADDR:%.*]] = alloca double*, align 8
 // CHECK-NEXT:    store <2 x double>* [[C:%.*]], <2 x double>** [[C_ADDR]], align 8
-// CHECK-NEXT:    store double* [[PTR:%.*]], double** [[PTR_ADDR]], align 8
-// CHECK-NEXT:    [[TMP0:%.*]] = load double*, double** [[PTR_ADDR]], align 8
+// CHECK-NEXT:    store double* [[ST:%.*]], double** [[ST_ADDR]], align 8
+// CHECK-NEXT:    store double* [[LD:%.*]], double** [[LD_ADDR]], align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = load double*, double** [[LD_ADDR]], align 8
 // CHECK-NEXT:    store i64 3, i64* [[__OFFSET_ADDR_I]], align 8
 // CHECK-NEXT:    store double* [[TMP0]], double** [[__PTR_ADDR_I]], align 8
 // CHECK-NEXT:    [[TMP1:%.*]] = load double*, double** [[__PTR_ADDR_I]], align 8
@@ -371,7 +390,7 @@ void test7(vector float *c, float *ptr) {
 // CHECK-NEXT:    store <2 x double> [[TMP6]], <2 x double>* [[TMP7]], align 16
 // CHECK-NEXT:    [[TMP8:%.*]] = load <2 x double>*, <2 x double>** [[C_ADDR]], align 8
 // CHECK-NEXT:    [[TMP9:%.*]] = load <2 x double>, <2 x double>* [[TMP8]], align 16
-// CHECK-NEXT:    [[TMP10:%.*]] = load double*, double** [[PTR_ADDR]], align 8
+// CHECK-NEXT:    [[TMP10:%.*]] = load double*, double** [[ST_ADDR]], align 8
 // CHECK-NEXT:    store <2 x double> [[TMP9]], <2 x double>* [[__VEC_ADDR_I]], align 16
 // CHECK-NEXT:    store i64 7, i64* [[__OFFSET_ADDR_I1]], align 8
 // CHECK-NEXT:    store double* [[TMP10]], double** [[__PTR_ADDR_I2]], align 8
@@ -386,9 +405,9 @@ void test7(vector float *c, float *ptr) {
 // CHECK-NEXT:    store <2 x double> [[TMP14]], <2 x double>* [[TMP16]], align 1
 // CHECK-NEXT:    ret void
 //
-void test8(vector double *c, double *ptr) {
-    *c = vec_xl(3ll, ptr);
-    vec_xst(*c, 7ll, ptr);
+void test8(vector double *c, double *st, const double *ld) {
+    *c = vec_xl(3ll, ld);
+    vec_xst(*c, 7ll, st);
 }
 
 #ifdef __POWER8_VECTOR__
@@ -402,10 +421,12 @@ void test8(vector double *c, double *ptr) {
 // CHECK-P8-NEXT:    [[__PTR_ADDR_I:%.*]] = alloca i128*, align 8
 // CHECK-P8-NEXT:    [[__ADDR_I:%.*]] = alloca i8*, align 8
 // CHECK-P8-NEXT:    [[C_ADDR:%.*]] = alloca <1 x i128>*, align 8
-// CHECK-P8-NEXT:    [[PTR_ADDR:%.*]] = alloca i128*, align 8
+// CHECK-P8-NEXT:    [[ST_ADDR:%.*]] = alloca i128*, align 8
+// CHECK-P8-NEXT:    [[LD_ADDR:%.*]] = alloca i128*, align 8
 // CHECK-P8-NEXT:    store <1 x i128>* [[C:%.*]], <1 x i128>** [[C_ADDR]], align 8
-// CHECK-P8-NEXT:    store i128* [[PTR:%.*]], i128** [[PTR_ADDR]], align 8
-// CHECK-P8-NEXT:    [[TMP0:%.*]] = load i128*, i128** [[PTR_ADDR]], align 8
+// CHECK-P8-NEXT:    store i128* [[ST:%.*]], i128** [[ST_ADDR]], align 8
+// CHECK-P8-NEXT:    store i128* [[LD:%.*]], i128** [[LD_ADDR]], align 8
+// CHECK-P8-NEXT:    [[TMP0:%.*]] = load i128*, i128** [[LD_ADDR]], align 8
 // CHECK-P8-NEXT:    store i64 3, i64* [[__OFFSET_ADDR_I]], align 8
 // CHECK-P8-NEXT:    store i128* [[TMP0]], i128** [[__PTR_ADDR_I]], align 8
 // CHECK-P8-NEXT:    [[TMP1:%.*]] = load i128*, i128** [[__PTR_ADDR_I]], align 8
@@ -420,7 +441,7 @@ void test8(vector double *c, double *ptr) {
 // CHECK-P8-NEXT:    store <1 x i128> [[TMP6]], <1 x i128>* [[TMP7]], align 16
 // CHECK-P8-NEXT:    [[TMP8:%.*]] = load <1 x i128>*, <1 x i128>** [[C_ADDR]], align 8
 // CHECK-P8-NEXT:    [[TMP9:%.*]] = load <1 x i128>, <1 x i128>* [[TMP8]], align 16
-// CHECK-P8-NEXT:    [[TMP10:%.*]] = load i128*, i128** [[PTR_ADDR]], align 8
+// CHECK-P8-NEXT:    [[TMP10:%.*]] = load i128*, i128** [[ST_ADDR]], align 8
 // CHECK-P8-NEXT:    store <1 x i128> [[TMP9]], <1 x i128>* [[__VEC_ADDR_I]], align 16
 // CHECK-P8-NEXT:    store i64 7, i64* [[__OFFSET_ADDR_I1]], align 8
 // CHECK-P8-NEXT:    store i128* [[TMP10]], i128** [[__PTR_ADDR_I2]], align 8
@@ -435,9 +456,10 @@ void test8(vector double *c, double *ptr) {
 // CHECK-P8-NEXT:    store <1 x i128> [[TMP14]], <1 x i128>* [[TMP16]], align 1
 // CHECK-P8-NEXT:    ret void
 //
-void test9(vector signed __int128 *c, signed __int128 *ptr) {
-    *c = vec_xl(3ll, ptr);
-    vec_xst(*c, 7ll, ptr);
+void test9(vector signed __int128 *c, signed __int128 *st,
+           const signed __int128 *ld) {
+    *c = vec_xl(3ll, ld);
+    vec_xst(*c, 7ll, st);
 }
 
 // CHECK-P8-LABEL: @test10(
@@ -450,10 +472,12 @@ void test9(vector signed __int128 *c, signed __int128 *ptr) {
 // CHECK-P8-NEXT:    [[__PTR_ADDR_I:%.*]] = alloca i128*, align 8
 // CHECK-P8-NEXT:    [[__ADDR_I:%.*]] = alloca i8*, align 8
 // CHECK-P8-NEXT:    [[C_ADDR:%.*]] = alloca <1 x i128>*, align 8
-// CHECK-P8-NEXT:    [[PTR_ADDR:%.*]] = alloca i128*, align 8
+// CHECK-P8-NEXT:    [[ST_ADDR:%.*]] = alloca i128*, align 8
+// CHECK-P8-NEXT:    [[LD_ADDR:%.*]] = alloca i128*, align 8
 // CHECK-P8-NEXT:    store <1 x i128>* [[C:%.*]], <1 x i128>** [[C_ADDR]], align 8
-// CHECK-P8-NEXT:    store i128* [[PTR:%.*]], i128** [[PTR_ADDR]], align 8
-// CHECK-P8-NEXT:    [[TMP0:%.*]] = load i128*, i128** [[PTR_ADDR]], align 8
+// CHECK-P8-NEXT:    store i128* [[ST:%.*]], i128** [[ST_ADDR]], align 8
+// CHECK-P8-NEXT:    store i128* [[LD:%.*]], i128** [[LD_ADDR]], align 8
+// CHECK-P8-NEXT:    [[TMP0:%.*]] = load i128*, i128** [[LD_ADDR]], align 8
 // CHECK-P8-NEXT:    store i64 3, i64* [[__OFFSET_ADDR_I]], align 8
 // CHECK-P8-NEXT:    store i128* [[TMP0]], i128** [[__PTR_ADDR_I]], align 8
 // CHECK-P8-NEXT:    [[TMP1:%.*]] = load i128*, i128** [[__PTR_ADDR_I]], align 8
@@ -468,7 +492,7 @@ void test9(vector signed __int128 *c, signed __int128 *ptr) {
 // CHECK-P8-NEXT:    store <1 x i128> [[TMP6]], <1 x i128>* [[TMP7]], align 16
 // CHECK-P8-NEXT:    [[TMP8:%.*]] = load <1 x i128>*, <1 x i128>** [[C_ADDR]], align 8
 // CHECK-P8-NEXT:    [[TMP9:%.*]] = load <1 x i128>, <1 x i128>* [[TMP8]], align 16
-// CHECK-P8-NEXT:    [[TMP10:%.*]] = load i128*, i128** [[PTR_ADDR]], align 8
+// CHECK-P8-NEXT:    [[TMP10:%.*]] = load i128*, i128** [[ST_ADDR]], align 8
 // CHECK-P8-NEXT:    store <1 x i128> [[TMP9]], <1 x i128>* [[__VEC_ADDR_I]], align 16
 // CHECK-P8-NEXT:    store i64 7, i64* [[__OFFSET_ADDR_I1]], align 8
 // CHECK-P8-NEXT:    store i128* [[TMP10]], i128** [[__PTR_ADDR_I2]], align 8
@@ -483,8 +507,9 @@ void test9(vector signed __int128 *c, signed __int128 *ptr) {
 // CHECK-P8-NEXT:    store <1 x i128> [[TMP14]], <1 x i128>* [[TMP16]], align 1
 // CHECK-P8-NEXT:    ret void
 //
-void test10(vector unsigned __int128 *c, unsigned __int128 *ptr) {
-    *c = vec_xl(3ll, ptr);
-    vec_xst(*c, 7ll, ptr);
+void test10(vector unsigned __int128 *c, unsigned __int128 *st,
+            const unsigned __int128 *ld) {
+    *c = vec_xl(3ll, ld);
+    vec_xst(*c, 7ll, st);
 }
 #endif

From 2ffe0eed51af296a4cf6be73c1b514c91e722114 Mon Sep 17 00:00:00 2001
From: Serge Guelton <sguelton@redhat.com>
Date: Thu, 25 Jun 2020 05:57:01 -0400
Subject: [PATCH 066/109] Provide anchor for compiler extensions

This patch is cherry-picked from 04b0a4e22e3b4549f9d241f8a9f37eebecb62a31, and
amended to prevent an undefined reference to `llvm::EnableABIBreakingChecks'

(cherry picked from commit 38778e1087b2825e91b07ce4570c70815b49dcdc)
---
 llvm/lib/Extensions/Extensions.cpp | 15 +++++++++++++++
 llvm/lib/Extensions/LLVMBuild.txt  |  2 +-
 2 files changed, 16 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Extensions/Extensions.cpp b/llvm/lib/Extensions/Extensions.cpp
index e69de29bb2d1d..2fe537f91876a 100644
--- a/llvm/lib/Extensions/Extensions.cpp
+++ b/llvm/lib/Extensions/Extensions.cpp
@@ -0,0 +1,15 @@
+#include "llvm/Passes/PassPlugin.h"
+#define HANDLE_EXTENSION(Ext)                                                  \
+		llvm::PassPluginLibraryInfo get##Ext##PluginInfo();
+#include "llvm/Support/Extension.def"
+
+
+namespace llvm {
+	namespace details {
+		void extensions_anchor() {
+#define HANDLE_EXTENSION(Ext)                                                  \
+			static auto Ext = get##Ext##PluginInfo();
+#include "llvm/Support/Extension.def"
+		}
+	}
+}
diff --git a/llvm/lib/Extensions/LLVMBuild.txt b/llvm/lib/Extensions/LLVMBuild.txt
index 2005830a4dd7a..7a98c8f680513 100644
--- a/llvm/lib/Extensions/LLVMBuild.txt
+++ b/llvm/lib/Extensions/LLVMBuild.txt
@@ -18,4 +18,4 @@
 type = Library
 name = Extensions
 parent = Libraries
-required_libraries =
+required_libraries = Support

From 42ee33ca2bee10faedb6a02031c88bd6f70193f0 Mon Sep 17 00:00:00 2001
From: mydeveloperday <mydeveloperday@gmail.com>
Date: Tue, 8 Sep 2020 16:39:11 +0100
Subject: [PATCH 067/109] [clang-format] Handle shifts within conditions

In some situation shifts can be treated as a template, and is thus formatted as one. So, by doing a couple extra checks to assure that the condition doesn't contain a template, and is in fact a bit shift should solve this problem.

This is a fix for [[ https://bugs.llvm.org/show_bug.cgi?id=46969 | bug 46969 ]]

Reviewed By: MyDeveloperDay

Patch By: Saldivarcher

Differential Revision: https://reviews.llvm.org/D86581

(cherry picked from commit c81dd3d159ab03d46e4280c458d3c29e56648218)
---
 clang/lib/Format/TokenAnnotator.cpp   | 20 +++++++++++++-------
 clang/unittests/Format/FormatTest.cpp | 15 +++++++++++++++
 2 files changed, 28 insertions(+), 7 deletions(-)

diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp
index 7f8e351265127..914c05f72aec2 100644
--- a/clang/lib/Format/TokenAnnotator.cpp
+++ b/clang/lib/Format/TokenAnnotator.cpp
@@ -56,6 +56,13 @@ static bool isLambdaParameterList(const FormatToken *Left) {
          Left->Previous->MatchingParen->is(TT_LambdaLSquare);
 }
 
+/// Returns \c true if the token is followed by a boolean condition, \c false
+/// otherwise.
+static bool isKeywordWithCondition(const FormatToken &Tok) {
+  return Tok.isOneOf(tok::kw_if, tok::kw_for, tok::kw_while, tok::kw_switch,
+                     tok::kw_constexpr, tok::kw_catch);
+}
+
 /// A parser that gathers additional information about tokens.
 ///
 /// The \c TokenAnnotator tries to match parenthesis and square brakets and
@@ -108,6 +115,12 @@ class AnnotatingParser {
 
     while (CurrentToken) {
       if (CurrentToken->is(tok::greater)) {
+        // Try to do a better job at looking for ">>" within the condition of
+        // a statement.
+        if (CurrentToken->Next && CurrentToken->Next->is(tok::greater) &&
+            Left->ParentBracket != tok::less &&
+            isKeywordWithCondition(*Line.First))
+          return false;
         Left->MatchingParen = CurrentToken;
         CurrentToken->MatchingParen = Left;
         // In TT_Proto, we must distignuish between:
@@ -2733,13 +2746,6 @@ bool TokenAnnotator::spaceRequiredBeforeParens(const FormatToken &Right) const {
           Right.ParameterCount > 0);
 }
 
-/// Returns \c true if the token is followed by a boolean condition, \c false
-/// otherwise.
-static bool isKeywordWithCondition(const FormatToken &Tok) {
-  return Tok.isOneOf(tok::kw_if, tok::kw_for, tok::kw_while, tok::kw_switch,
-                     tok::kw_constexpr, tok::kw_catch);
-}
-
 bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line,
                                           const FormatToken &Left,
                                           const FormatToken &Right) {
diff --git a/clang/unittests/Format/FormatTest.cpp b/clang/unittests/Format/FormatTest.cpp
index 6ac3ffbffd1c8..17d302f0b659d 100644
--- a/clang/unittests/Format/FormatTest.cpp
+++ b/clang/unittests/Format/FormatTest.cpp
@@ -7508,6 +7508,21 @@ TEST_F(FormatTest, UnderstandsTemplateParameters) {
   verifyFormat("static_assert(is_convertible<A &&, B>::value, \"AAA\");");
   verifyFormat("Constructor(A... a) : a_(X<A>{std::forward<A>(a)}...) {}");
   verifyFormat("< < < < < < < < < < < < < < < < < < < < < < < < < < < < < <");
+  verifyFormat("some_templated_type<decltype([](int i) { return i; })>");
+}
+
+TEST_F(FormatTest, UnderstandsShiftOperators) {
+  verifyFormat("if (i < x >> 1)");
+  verifyFormat("while (i < x >> 1)");
+  verifyFormat("for (unsigned i = 0; i < i; ++i, v = v >> 1)");
+  verifyFormat("for (unsigned i = 0; i < x >> 1; ++i, v = v >> 1)");
+  verifyFormat(
+      "for (std::vector<int>::iterator i = 0; i < x >> 1; ++i, v = v >> 1)");
+  verifyFormat("Foo.call<Bar<Function>>()");
+  verifyFormat("if (Foo.call<Bar<Function>>() == 0)");
+  verifyFormat("for (std::vector<std::pair<int>>::iterator i = 0; i < x >> 1; "
+               "++i, v = v >> 1)");
+  verifyFormat("if (w<u<v<x>>, 1>::t)");
 }
 
 TEST_F(FormatTest, BitshiftOperatorWidth) {

From 6f1dbbc17c03206040eeaaee71e5db961f2cac30 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@intel.com>
Date: Tue, 8 Sep 2020 10:49:32 -0700
Subject: [PATCH 068/109] [X86] SSE4_A should only imply SSE3 not SSSE3 in the
 frontend.

SSE4_1 and SSE4_2 due imply SSSE3. So I guess I got confused when
switching the code to being table based in D83273.

Fixes PR47464

(cherry picked from commit e6bb4c8e7b3e27f214c9665763a2dd09aa96a5ac)
---
 clang/test/Preprocessor/predefined-arch-macros.c | 2 ++
 llvm/lib/Support/X86TargetParser.cpp             | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/clang/test/Preprocessor/predefined-arch-macros.c b/clang/test/Preprocessor/predefined-arch-macros.c
index abab9274ffbb2..4dc9a800956ee 100644
--- a/clang/test/Preprocessor/predefined-arch-macros.c
+++ b/clang/test/Preprocessor/predefined-arch-macros.c
@@ -2386,6 +2386,7 @@
 // CHECK_AMDFAM10_M32: #define __SSE4A__ 1
 // CHECK_AMDFAM10_M32: #define __SSE_MATH__ 1
 // CHECK_AMDFAM10_M32: #define __SSE__ 1
+// CHECK_AMDFAM10_M32-NOT: #define __SSSE3__ 1
 // CHECK_AMDFAM10_M32: #define __amdfam10 1
 // CHECK_AMDFAM10_M32: #define __amdfam10__ 1
 // CHECK_AMDFAM10_M32: #define __i386 1
@@ -2408,6 +2409,7 @@
 // CHECK_AMDFAM10_M64: #define __SSE4A__ 1
 // CHECK_AMDFAM10_M64: #define __SSE_MATH__ 1
 // CHECK_AMDFAM10_M64: #define __SSE__ 1
+// CHECK_AMDFAM10_M64-NOT: #define __SSSE3__ 1
 // CHECK_AMDFAM10_M64: #define __amd64 1
 // CHECK_AMDFAM10_M64: #define __amd64__ 1
 // CHECK_AMDFAM10_M64: #define __amdfam10 1
diff --git a/llvm/lib/Support/X86TargetParser.cpp b/llvm/lib/Support/X86TargetParser.cpp
index c629f872df121..4c2d4efbfca8d 100644
--- a/llvm/lib/Support/X86TargetParser.cpp
+++ b/llvm/lib/Support/X86TargetParser.cpp
@@ -522,7 +522,7 @@ static constexpr FeatureBitset ImpliedFeaturesAVX5124FMAPS = {};
 static constexpr FeatureBitset ImpliedFeaturesAVX5124VNNIW = {};
 
 // SSE4_A->FMA4->XOP chain.
-static constexpr FeatureBitset ImpliedFeaturesSSE4_A = FeatureSSSE3;
+static constexpr FeatureBitset ImpliedFeaturesSSE4_A = FeatureSSE3;
 static constexpr FeatureBitset ImpliedFeaturesFMA4 = FeatureAVX | FeatureSSE4_A;
 static constexpr FeatureBitset ImpliedFeaturesXOP = FeatureFMA4;
 

From d1cdc6da27a5937c239791c056eb2a754d7f4747 Mon Sep 17 00:00:00 2001
From: Brad Smith <brad@comstyle.com>
Date: Tue, 8 Sep 2020 21:21:14 -0400
Subject: [PATCH 069/109] [PowerPC] Set setMaxAtomicSizeInBitsSupported
 appropriately for 32-bit PowerPC in PPCTargetLowering

Reviewed By: nemanjai

Differential Revision: https://reviews.llvm.org/D86165

(cherry picked from commit 88b368a1c47bca536f03041f7464235b94ea98a1)
---
 llvm/lib/Target/PowerPC/PPCISelLowering.cpp  |   3 +
 llvm/test/CodeGen/PowerPC/atomics-indexed.ll | 140 ++++--
 llvm/test/CodeGen/PowerPC/atomics.ll         | 437 ++++++++++++++++---
 3 files changed, 503 insertions(+), 77 deletions(-)

diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 2d0b171152495..f54f1673526dd 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -1260,6 +1260,9 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
     setLibcallName(RTLIB::SRA_I128, nullptr);
   }
 
+  if (!isPPC64)
+    setMaxAtomicSizeInBitsSupported(32);
+
   setStackPointerRegisterToSaveRestore(isPPC64 ? PPC::X1 : PPC::R1);
 
   // We have target-specific dag combine patterns for the following nodes:
diff --git a/llvm/test/CodeGen/PowerPC/atomics-indexed.ll b/llvm/test/CodeGen/PowerPC/atomics-indexed.ll
index b4790adfd9088..cf7225a5fc200 100644
--- a/llvm/test/CodeGen/PowerPC/atomics-indexed.ll
+++ b/llvm/test/CodeGen/PowerPC/atomics-indexed.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple=powerpc-unknown-linux-gnu -verify-machineinstrs -ppc-asm-full-reg-names | FileCheck %s --check-prefix=CHECK --check-prefix=PPC32
 ; FIXME: -verify-machineinstrs currently fail on ppc64 (mismatched register/instruction).
 ; This is already checked for in Atomics-64.ll
@@ -8,9 +9,25 @@
 
 ; Indexed version of loads
 define i8 @load_x_i8_seq_cst([100000 x i8]* %mem) {
-; CHECK-LABEL: load_x_i8_seq_cst
-; CHECK: sync
-; CHECK: lbzx [[VAL:r[0-9]+]]
+; PPC32-LABEL: load_x_i8_seq_cst:
+; PPC32:       # %bb.0:
+; PPC32-NEXT:    lis r4, 1
+; PPC32-NEXT:    sync
+; PPC32-NEXT:    ori r4, r4, 24464
+; PPC32-NEXT:    lbzx r3, r3, r4
+; PPC32-NEXT:    lwsync
+; PPC32-NEXT:    blr
+;
+; PPC64-LABEL: load_x_i8_seq_cst:
+; PPC64:       # %bb.0:
+; PPC64-NEXT:    lis r4, 1
+; PPC64-NEXT:    sync
+; PPC64-NEXT:    ori r4, r4, 24464
+; PPC64-NEXT:    lbzx r3, r3, r4
+; PPC64-NEXT:    cmpd cr7, r3, r3
+; PPC64-NEXT:    bne- cr7, .+4
+; PPC64-NEXT:    isync
+; PPC64-NEXT:    blr
 ; CHECK-PPC32: lwsync
 ; CHECK-PPC64: cmpw [[CR:cr[0-9]+]], [[VAL]], [[VAL]]
 ; CHECK-PPC64: bne- [[CR]], .+4
@@ -20,8 +37,23 @@ define i8 @load_x_i8_seq_cst([100000 x i8]* %mem) {
   ret i8 %val
 }
 define i16 @load_x_i16_acquire([100000 x i16]* %mem) {
-; CHECK-LABEL: load_x_i16_acquire
-; CHECK: lhzx [[VAL:r[0-9]+]]
+; PPC32-LABEL: load_x_i16_acquire:
+; PPC32:       # %bb.0:
+; PPC32-NEXT:    lis r4, 2
+; PPC32-NEXT:    ori r4, r4, 48928
+; PPC32-NEXT:    lhzx r3, r3, r4
+; PPC32-NEXT:    lwsync
+; PPC32-NEXT:    blr
+;
+; PPC64-LABEL: load_x_i16_acquire:
+; PPC64:       # %bb.0:
+; PPC64-NEXT:    lis r4, 2
+; PPC64-NEXT:    ori r4, r4, 48928
+; PPC64-NEXT:    lhzx r3, r3, r4
+; PPC64-NEXT:    cmpd cr7, r3, r3
+; PPC64-NEXT:    bne- cr7, .+4
+; PPC64-NEXT:    isync
+; PPC64-NEXT:    blr
 ; CHECK-PPC32: lwsync
 ; CHECK-PPC64: cmpw [[CR:cr[0-9]+]], [[VAL]], [[VAL]]
 ; CHECK-PPC64: bne- [[CR]], .+4
@@ -31,19 +63,39 @@ define i16 @load_x_i16_acquire([100000 x i16]* %mem) {
   ret i16 %val
 }
 define i32 @load_x_i32_monotonic([100000 x i32]* %mem) {
-; CHECK-LABEL: load_x_i32_monotonic
-; CHECK: lwzx
-; CHECK-NOT: sync
+; CHECK-LABEL: load_x_i32_monotonic:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lis r4, 5
+; CHECK-NEXT:    ori r4, r4, 32320
+; CHECK-NEXT:    lwzx r3, r3, r4
+; CHECK-NEXT:    blr
   %ptr = getelementptr inbounds [100000 x i32], [100000 x i32]* %mem, i64 0, i64 90000
   %val = load atomic i32, i32* %ptr monotonic, align 4
   ret i32 %val
 }
 define i64 @load_x_i64_unordered([100000 x i64]* %mem) {
-; CHECK-LABEL: load_x_i64_unordered
-; PPC32: __sync_
-; PPC64-NOT: __sync_
-; PPC64: ldx
-; CHECK-NOT: sync
+; PPC32-LABEL: load_x_i64_unordered:
+; PPC32:       # %bb.0:
+; PPC32-NEXT:    mflr r0
+; PPC32-NEXT:    stw r0, 4(r1)
+; PPC32-NEXT:    stwu r1, -16(r1)
+; PPC32-NEXT:    .cfi_def_cfa_offset 16
+; PPC32-NEXT:    .cfi_offset lr, 4
+; PPC32-NEXT:    addi r3, r3, -896
+; PPC32-NEXT:    addis r3, r3, 11
+; PPC32-NEXT:    li r4, 0
+; PPC32-NEXT:    bl __atomic_load_8
+; PPC32-NEXT:    lwz r0, 20(r1)
+; PPC32-NEXT:    addi r1, r1, 16
+; PPC32-NEXT:    mtlr r0
+; PPC32-NEXT:    blr
+;
+; PPC64-LABEL: load_x_i64_unordered:
+; PPC64:       # %bb.0:
+; PPC64-NEXT:    lis r4, 10
+; PPC64-NEXT:    ori r4, r4, 64640
+; PPC64-NEXT:    ldx r3, r3, r4
+; PPC64-NEXT:    blr
   %ptr = getelementptr inbounds [100000 x i64], [100000 x i64]* %mem, i64 0, i64 90000
   %val = load atomic i64, i64* %ptr unordered, align 8
   ret i64 %val
@@ -51,35 +103,69 @@ define i64 @load_x_i64_unordered([100000 x i64]* %mem) {
 
 ; Indexed version of stores
 define void @store_x_i8_seq_cst([100000 x i8]* %mem) {
-; CHECK-LABEL: store_x_i8_seq_cst
-; CHECK: sync
-; CHECK: stbx
+; CHECK-LABEL: store_x_i8_seq_cst:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lis r4, 1
+; CHECK-NEXT:    ori r4, r4, 24464
+; CHECK-NEXT:    li r5, 42
+; CHECK-NEXT:    sync
+; CHECK-NEXT:    stbx r5, r3, r4
+; CHECK-NEXT:    blr
   %ptr = getelementptr inbounds [100000 x i8], [100000 x i8]* %mem, i64 0, i64 90000
   store atomic i8 42, i8* %ptr seq_cst, align 1
   ret void
 }
 define void @store_x_i16_release([100000 x i16]* %mem) {
-; CHECK-LABEL: store_x_i16_release
-; CHECK: lwsync
-; CHECK: sthx
+; CHECK-LABEL: store_x_i16_release:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lis r4, 2
+; CHECK-NEXT:    ori r4, r4, 48928
+; CHECK-NEXT:    li r5, 42
+; CHECK-NEXT:    lwsync
+; CHECK-NEXT:    sthx r5, r3, r4
+; CHECK-NEXT:    blr
   %ptr = getelementptr inbounds [100000 x i16], [100000 x i16]* %mem, i64 0, i64 90000
   store atomic i16 42, i16* %ptr release, align 2
   ret void
 }
 define void @store_x_i32_monotonic([100000 x i32]* %mem) {
-; CHECK-LABEL: store_x_i32_monotonic
-; CHECK-NOT: sync
-; CHECK: stwx
+; CHECK-LABEL: store_x_i32_monotonic:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lis r4, 5
+; CHECK-NEXT:    ori r4, r4, 32320
+; CHECK-NEXT:    li r5, 42
+; CHECK-NEXT:    stwx r5, r3, r4
+; CHECK-NEXT:    blr
   %ptr = getelementptr inbounds [100000 x i32], [100000 x i32]* %mem, i64 0, i64 90000
   store atomic i32 42, i32* %ptr monotonic, align 4
   ret void
 }
 define void @store_x_i64_unordered([100000 x i64]* %mem) {
-; CHECK-LABEL: store_x_i64_unordered
-; CHECK-NOT: sync
-; PPC32: __sync_
-; PPC64-NOT: __sync_
-; PPC64: stdx
+; PPC32-LABEL: store_x_i64_unordered:
+; PPC32:       # %bb.0:
+; PPC32-NEXT:    mflr r0
+; PPC32-NEXT:    stw r0, 4(r1)
+; PPC32-NEXT:    stwu r1, -16(r1)
+; PPC32-NEXT:    .cfi_def_cfa_offset 16
+; PPC32-NEXT:    .cfi_offset lr, 4
+; PPC32-NEXT:    addi r3, r3, -896
+; PPC32-NEXT:    addis r3, r3, 11
+; PPC32-NEXT:    li r5, 0
+; PPC32-NEXT:    li r6, 42
+; PPC32-NEXT:    li r7, 0
+; PPC32-NEXT:    bl __atomic_store_8
+; PPC32-NEXT:    lwz r0, 20(r1)
+; PPC32-NEXT:    addi r1, r1, 16
+; PPC32-NEXT:    mtlr r0
+; PPC32-NEXT:    blr
+;
+; PPC64-LABEL: store_x_i64_unordered:
+; PPC64:       # %bb.0:
+; PPC64-NEXT:    lis r4, 10
+; PPC64-NEXT:    ori r4, r4, 64640
+; PPC64-NEXT:    li r5, 42
+; PPC64-NEXT:    stdx r5, r3, r4
+; PPC64-NEXT:    blr
   %ptr = getelementptr inbounds [100000 x i64], [100000 x i64]* %mem, i64 0, i64 90000
   store atomic i64 42, i64* %ptr unordered, align 8
   ret void
diff --git a/llvm/test/CodeGen/PowerPC/atomics.ll b/llvm/test/CodeGen/PowerPC/atomics.ll
index c964218cb60bf..008cd4c7157c1 100644
--- a/llvm/test/CodeGen/PowerPC/atomics.ll
+++ b/llvm/test/CodeGen/PowerPC/atomics.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --force-update
 ; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc-unknown-linux-gnu -verify-machineinstrs  -ppc-asm-full-reg-names | FileCheck %s --check-prefix=CHECK --check-prefix=PPC32
 ; This is already checked for in Atomics-64.ll
 ; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64-unknown-linux-gnu  -ppc-asm-full-reg-names | FileCheck %s --check-prefix=CHECK --check-prefix=PPC64
@@ -9,22 +10,35 @@
 ; We first check loads, for all sizes from i8 to i64.
 ; We also vary orderings to check for barriers.
 define i8 @load_i8_unordered(i8* %mem) {
-; CHECK-LABEL: load_i8_unordered
-; CHECK: lbz
-; CHECK-NOT: sync
+; CHECK-LABEL: load_i8_unordered:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lbz r3, 0(r3)
+; CHECK-NEXT:    blr
   %val = load atomic i8, i8* %mem unordered, align 1
   ret i8 %val
 }
 define i16 @load_i16_monotonic(i16* %mem) {
-; CHECK-LABEL: load_i16_monotonic
-; CHECK: lhz
-; CHECK-NOT: sync
+; CHECK-LABEL: load_i16_monotonic:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lhz r3, 0(r3)
+; CHECK-NEXT:    blr
   %val = load atomic i16, i16* %mem monotonic, align 2
   ret i16 %val
 }
 define i32 @load_i32_acquire(i32* %mem) {
-; CHECK-LABEL: load_i32_acquire
-; CHECK: lwz [[VAL:r[0-9]+]]
+; PPC32-LABEL: load_i32_acquire:
+; PPC32:       # %bb.0:
+; PPC32-NEXT:    lwz r3, 0(r3)
+; PPC32-NEXT:    lwsync
+; PPC32-NEXT:    blr
+;
+; PPC64-LABEL: load_i32_acquire:
+; PPC64:       # %bb.0:
+; PPC64-NEXT:    lwz r3, 0(r3)
+; PPC64-NEXT:    cmpd cr7, r3, r3
+; PPC64-NEXT:    bne- cr7, .+4
+; PPC64-NEXT:    isync
+; PPC64-NEXT:    blr
   %val = load atomic i32, i32* %mem acquire, align 4
 ; CHECK-PPC32: lwsync
 ; CHECK-PPC64: cmpw [[CR:cr[0-9]+]], [[VAL]], [[VAL]]
@@ -33,11 +47,28 @@ define i32 @load_i32_acquire(i32* %mem) {
   ret i32 %val
 }
 define i64 @load_i64_seq_cst(i64* %mem) {
-; CHECK-LABEL: load_i64_seq_cst
-; CHECK: sync
-; PPC32: __sync_
-; PPC64-NOT: __sync_
-; PPC64: ld [[VAL:r[0-9]+]]
+; PPC32-LABEL: load_i64_seq_cst:
+; PPC32:       # %bb.0:
+; PPC32-NEXT:    mflr r0
+; PPC32-NEXT:    stw r0, 4(r1)
+; PPC32-NEXT:    stwu r1, -16(r1)
+; PPC32-NEXT:    .cfi_def_cfa_offset 16
+; PPC32-NEXT:    .cfi_offset lr, 4
+; PPC32-NEXT:    li r4, 5
+; PPC32-NEXT:    bl __atomic_load_8
+; PPC32-NEXT:    lwz r0, 20(r1)
+; PPC32-NEXT:    addi r1, r1, 16
+; PPC32-NEXT:    mtlr r0
+; PPC32-NEXT:    blr
+;
+; PPC64-LABEL: load_i64_seq_cst:
+; PPC64:       # %bb.0:
+; PPC64-NEXT:    sync
+; PPC64-NEXT:    ld r3, 0(r3)
+; PPC64-NEXT:    cmpd cr7, r3, r3
+; PPC64-NEXT:    bne- cr7, .+4
+; PPC64-NEXT:    isync
+; PPC64-NEXT:    blr
   %val = load atomic i64, i64* %mem seq_cst, align 8
 ; CHECK-PPC32: lwsync
 ; CHECK-PPC64: cmpw [[CR:cr[0-9]+]], [[VAL]], [[VAL]]
@@ -48,95 +79,401 @@ define i64 @load_i64_seq_cst(i64* %mem) {
 
 ; Stores
 define void @store_i8_unordered(i8* %mem) {
-; CHECK-LABEL: store_i8_unordered
-; CHECK-NOT: sync
-; CHECK: stb
+; CHECK-LABEL: store_i8_unordered:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li r4, 42
+; CHECK-NEXT:    stb r4, 0(r3)
+; CHECK-NEXT:    blr
   store atomic i8 42, i8* %mem unordered, align 1
   ret void
 }
 define void @store_i16_monotonic(i16* %mem) {
-; CHECK-LABEL: store_i16_monotonic
-; CHECK-NOT: sync
-; CHECK: sth
+; CHECK-LABEL: store_i16_monotonic:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li r4, 42
+; CHECK-NEXT:    sth r4, 0(r3)
+; CHECK-NEXT:    blr
   store atomic i16 42, i16* %mem monotonic, align 2
   ret void
 }
 define void @store_i32_release(i32* %mem) {
-; CHECK-LABEL: store_i32_release
-; CHECK: lwsync
-; CHECK: stw
+; CHECK-LABEL: store_i32_release:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li r4, 42
+; CHECK-NEXT:    lwsync
+; CHECK-NEXT:    stw r4, 0(r3)
+; CHECK-NEXT:    blr
   store atomic i32 42, i32* %mem release, align 4
   ret void
 }
 define void @store_i64_seq_cst(i64* %mem) {
-; CHECK-LABEL: store_i64_seq_cst
-; CHECK: sync
-; PPC32: __sync_
-; PPC64-NOT: __sync_
-; PPC64: std
+; PPC32-LABEL: store_i64_seq_cst:
+; PPC32:       # %bb.0:
+; PPC32-NEXT:    mflr r0
+; PPC32-NEXT:    stw r0, 4(r1)
+; PPC32-NEXT:    stwu r1, -16(r1)
+; PPC32-NEXT:    .cfi_def_cfa_offset 16
+; PPC32-NEXT:    .cfi_offset lr, 4
+; PPC32-NEXT:    li r5, 0
+; PPC32-NEXT:    li r6, 42
+; PPC32-NEXT:    li r7, 5
+; PPC32-NEXT:    bl __atomic_store_8
+; PPC32-NEXT:    lwz r0, 20(r1)
+; PPC32-NEXT:    addi r1, r1, 16
+; PPC32-NEXT:    mtlr r0
+; PPC32-NEXT:    blr
+;
+; PPC64-LABEL: store_i64_seq_cst:
+; PPC64:       # %bb.0:
+; PPC64-NEXT:    li r4, 42
+; PPC64-NEXT:    sync
+; PPC64-NEXT:    std r4, 0(r3)
+; PPC64-NEXT:    blr
   store atomic i64 42, i64* %mem seq_cst, align 8
   ret void
 }
 
 ; Atomic CmpXchg
 define i8 @cas_strong_i8_sc_sc(i8* %mem) {
-; CHECK-LABEL: cas_strong_i8_sc_sc
-; CHECK: sync
+; PPC32-LABEL: cas_strong_i8_sc_sc:
+; PPC32:       # %bb.0:
+; PPC32-NEXT:    rlwinm r8, r3, 3, 27, 28
+; PPC32-NEXT:    li r5, 1
+; PPC32-NEXT:    li r6, 0
+; PPC32-NEXT:    li r7, 255
+; PPC32-NEXT:    rlwinm r4, r3, 0, 0, 29
+; PPC32-NEXT:    xori r3, r8, 24
+; PPC32-NEXT:    slw r5, r5, r3
+; PPC32-NEXT:    slw r8, r6, r3
+; PPC32-NEXT:    slw r6, r7, r3
+; PPC32-NEXT:    and r7, r5, r6
+; PPC32-NEXT:    and r8, r8, r6
+; PPC32-NEXT:    sync
+; PPC32-NEXT:  .LBB8_1:
+; PPC32-NEXT:    lwarx r9, 0, r4
+; PPC32-NEXT:    and r5, r9, r6
+; PPC32-NEXT:    cmpw r5, r8
+; PPC32-NEXT:    bne cr0, .LBB8_3
+; PPC32-NEXT:  # %bb.2:
+; PPC32-NEXT:    andc r9, r9, r6
+; PPC32-NEXT:    or r9, r9, r7
+; PPC32-NEXT:    stwcx. r9, 0, r4
+; PPC32-NEXT:    bne cr0, .LBB8_1
+; PPC32-NEXT:    b .LBB8_4
+; PPC32-NEXT:  .LBB8_3:
+; PPC32-NEXT:    stwcx. r9, 0, r4
+; PPC32-NEXT:  .LBB8_4:
+; PPC32-NEXT:    srw r3, r5, r3
+; PPC32-NEXT:    lwsync
+; PPC32-NEXT:    blr
+;
+; PPC64-LABEL: cas_strong_i8_sc_sc:
+; PPC64:       # %bb.0:
+; PPC64-NEXT:    rlwinm r8, r3, 3, 27, 28
+; PPC64-NEXT:    li r5, 1
+; PPC64-NEXT:    li r6, 0
+; PPC64-NEXT:    li r7, 255
+; PPC64-NEXT:    rldicr r4, r3, 0, 61
+; PPC64-NEXT:    xori r3, r8, 24
+; PPC64-NEXT:    slw r5, r5, r3
+; PPC64-NEXT:    slw r8, r6, r3
+; PPC64-NEXT:    slw r6, r7, r3
+; PPC64-NEXT:    and r7, r5, r6
+; PPC64-NEXT:    and r8, r8, r6
+; PPC64-NEXT:    sync
+; PPC64-NEXT:  .LBB8_1:
+; PPC64-NEXT:    lwarx r9, 0, r4
+; PPC64-NEXT:    and r5, r9, r6
+; PPC64-NEXT:    cmpw r5, r8
+; PPC64-NEXT:    bne cr0, .LBB8_3
+; PPC64-NEXT:  # %bb.2:
+; PPC64-NEXT:    andc r9, r9, r6
+; PPC64-NEXT:    or r9, r9, r7
+; PPC64-NEXT:    stwcx. r9, 0, r4
+; PPC64-NEXT:    bne cr0, .LBB8_1
+; PPC64-NEXT:    b .LBB8_4
+; PPC64-NEXT:  .LBB8_3:
+; PPC64-NEXT:    stwcx. r9, 0, r4
+; PPC64-NEXT:  .LBB8_4:
+; PPC64-NEXT:    srw r3, r5, r3
+; PPC64-NEXT:    lwsync
+; PPC64-NEXT:    blr
   %val = cmpxchg i8* %mem, i8 0, i8 1 seq_cst seq_cst
-; CHECK: lwsync
   %loaded = extractvalue { i8, i1} %val, 0
   ret i8 %loaded
 }
 define i16 @cas_weak_i16_acquire_acquire(i16* %mem) {
-; CHECK-LABEL: cas_weak_i16_acquire_acquire
-;CHECK-NOT: sync
+; PPC32-LABEL: cas_weak_i16_acquire_acquire:
+; PPC32:       # %bb.0:
+; PPC32-NEXT:    li r6, 0
+; PPC32-NEXT:    rlwinm r4, r3, 3, 27, 27
+; PPC32-NEXT:    li r5, 1
+; PPC32-NEXT:    ori r7, r6, 65535
+; PPC32-NEXT:    xori r4, r4, 16
+; PPC32-NEXT:    slw r8, r5, r4
+; PPC32-NEXT:    slw r9, r6, r4
+; PPC32-NEXT:    slw r5, r7, r4
+; PPC32-NEXT:    rlwinm r3, r3, 0, 0, 29
+; PPC32-NEXT:    and r6, r8, r5
+; PPC32-NEXT:    and r8, r9, r5
+; PPC32-NEXT:  .LBB9_1:
+; PPC32-NEXT:    lwarx r9, 0, r3
+; PPC32-NEXT:    and r7, r9, r5
+; PPC32-NEXT:    cmpw r7, r8
+; PPC32-NEXT:    bne cr0, .LBB9_3
+; PPC32-NEXT:  # %bb.2:
+; PPC32-NEXT:    andc r9, r9, r5
+; PPC32-NEXT:    or r9, r9, r6
+; PPC32-NEXT:    stwcx. r9, 0, r3
+; PPC32-NEXT:    bne cr0, .LBB9_1
+; PPC32-NEXT:    b .LBB9_4
+; PPC32-NEXT:  .LBB9_3:
+; PPC32-NEXT:    stwcx. r9, 0, r3
+; PPC32-NEXT:  .LBB9_4:
+; PPC32-NEXT:    srw r3, r7, r4
+; PPC32-NEXT:    lwsync
+; PPC32-NEXT:    blr
+;
+; PPC64-LABEL: cas_weak_i16_acquire_acquire:
+; PPC64:       # %bb.0:
+; PPC64-NEXT:    li r6, 0
+; PPC64-NEXT:    rlwinm r4, r3, 3, 27, 27
+; PPC64-NEXT:    li r5, 1
+; PPC64-NEXT:    ori r7, r6, 65535
+; PPC64-NEXT:    xori r4, r4, 16
+; PPC64-NEXT:    slw r8, r5, r4
+; PPC64-NEXT:    slw r9, r6, r4
+; PPC64-NEXT:    slw r5, r7, r4
+; PPC64-NEXT:    rldicr r3, r3, 0, 61
+; PPC64-NEXT:    and r6, r8, r5
+; PPC64-NEXT:    and r8, r9, r5
+; PPC64-NEXT:  .LBB9_1:
+; PPC64-NEXT:    lwarx r9, 0, r3
+; PPC64-NEXT:    and r7, r9, r5
+; PPC64-NEXT:    cmpw r7, r8
+; PPC64-NEXT:    bne cr0, .LBB9_3
+; PPC64-NEXT:  # %bb.2:
+; PPC64-NEXT:    andc r9, r9, r5
+; PPC64-NEXT:    or r9, r9, r6
+; PPC64-NEXT:    stwcx. r9, 0, r3
+; PPC64-NEXT:    bne cr0, .LBB9_1
+; PPC64-NEXT:    b .LBB9_4
+; PPC64-NEXT:  .LBB9_3:
+; PPC64-NEXT:    stwcx. r9, 0, r3
+; PPC64-NEXT:  .LBB9_4:
+; PPC64-NEXT:    srw r3, r7, r4
+; PPC64-NEXT:    lwsync
+; PPC64-NEXT:    blr
   %val = cmpxchg weak i16* %mem, i16 0, i16 1 acquire acquire
-; CHECK: lwsync
   %loaded = extractvalue { i16, i1} %val, 0
   ret i16 %loaded
 }
 define i32 @cas_strong_i32_acqrel_acquire(i32* %mem) {
-; CHECK-LABEL: cas_strong_i32_acqrel_acquire
-; CHECK: lwsync
+; CHECK-LABEL: cas_strong_i32_acqrel_acquire:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li r5, 1
+; CHECK-NEXT:    li r6, 0
+; CHECK-NEXT:    lwsync
+; CHECK-NEXT:  .LBB10_1:
+; CHECK-NEXT:    lwarx r4, 0, r3
+; CHECK-NEXT:    cmpw r6, r4
+; CHECK-NEXT:    bne cr0, .LBB10_3
+; CHECK-NEXT:  # %bb.2:
+; CHECK-NEXT:    stwcx. r5, 0, r3
+; CHECK-NEXT:    bne cr0, .LBB10_1
+; CHECK-NEXT:    b .LBB10_4
+; CHECK-NEXT:  .LBB10_3:
+; CHECK-NEXT:    stwcx. r4, 0, r3
+; CHECK-NEXT:  .LBB10_4:
+; CHECK-NEXT:    mr r3, r4
+; CHECK-NEXT:    lwsync
+; CHECK-NEXT:    blr
   %val = cmpxchg i32* %mem, i32 0, i32 1 acq_rel acquire
-; CHECK: lwsync
   %loaded = extractvalue { i32, i1} %val, 0
   ret i32 %loaded
 }
 define i64 @cas_weak_i64_release_monotonic(i64* %mem) {
-; CHECK-LABEL: cas_weak_i64_release_monotonic
-; CHECK: lwsync
+; PPC32-LABEL: cas_weak_i64_release_monotonic:
+; PPC32:       # %bb.0:
+; PPC32-NEXT:    mflr r0
+; PPC32-NEXT:    stw r0, 4(r1)
+; PPC32-NEXT:    stwu r1, -16(r1)
+; PPC32-NEXT:    .cfi_def_cfa_offset 16
+; PPC32-NEXT:    .cfi_offset lr, 4
+; PPC32-NEXT:    li r4, 0
+; PPC32-NEXT:    stw r4, 12(r1)
+; PPC32-NEXT:    li r5, 0
+; PPC32-NEXT:    stw r4, 8(r1)
+; PPC32-NEXT:    addi r4, r1, 8
+; PPC32-NEXT:    li r6, 1
+; PPC32-NEXT:    li r7, 3
+; PPC32-NEXT:    li r8, 0
+; PPC32-NEXT:    bl __atomic_compare_exchange_8
+; PPC32-NEXT:    lwz r4, 12(r1)
+; PPC32-NEXT:    lwz r3, 8(r1)
+; PPC32-NEXT:    lwz r0, 20(r1)
+; PPC32-NEXT:    addi r1, r1, 16
+; PPC32-NEXT:    mtlr r0
+; PPC32-NEXT:    blr
+;
+; PPC64-LABEL: cas_weak_i64_release_monotonic:
+; PPC64:       # %bb.0:
+; PPC64-NEXT:    li r5, 1
+; PPC64-NEXT:    li r6, 0
+; PPC64-NEXT:    lwsync
+; PPC64-NEXT:  .LBB11_1:
+; PPC64-NEXT:    ldarx r4, 0, r3
+; PPC64-NEXT:    cmpd r6, r4
+; PPC64-NEXT:    bne cr0, .LBB11_4
+; PPC64-NEXT:  # %bb.2:
+; PPC64-NEXT:    stdcx. r5, 0, r3
+; PPC64-NEXT:    bne cr0, .LBB11_1
+; PPC64-NEXT:  # %bb.3:
+; PPC64-NEXT:    mr r3, r4
+; PPC64-NEXT:    blr
+; PPC64-NEXT:  .LBB11_4:
+; PPC64-NEXT:    stdcx. r4, 0, r3
+; PPC64-NEXT:    mr r3, r4
+; PPC64-NEXT:    blr
   %val = cmpxchg weak i64* %mem, i64 0, i64 1 release monotonic
-; CHECK-NOT: [sync ]
   %loaded = extractvalue { i64, i1} %val, 0
   ret i64 %loaded
 }
 
 ; AtomicRMW
 define i8 @add_i8_monotonic(i8* %mem, i8 %operand) {
-; CHECK-LABEL: add_i8_monotonic
-; CHECK-NOT: sync
+; PPC32-LABEL: add_i8_monotonic:
+; PPC32:       # %bb.0:
+; PPC32-NEXT:    rlwinm r7, r3, 3, 27, 28
+; PPC32-NEXT:    li r6, 255
+; PPC32-NEXT:    rlwinm r5, r3, 0, 0, 29
+; PPC32-NEXT:    xori r3, r7, 24
+; PPC32-NEXT:    slw r4, r4, r3
+; PPC32-NEXT:    slw r6, r6, r3
+; PPC32-NEXT:  .LBB12_1:
+; PPC32-NEXT:    lwarx r7, 0, r5
+; PPC32-NEXT:    add r8, r4, r7
+; PPC32-NEXT:    andc r9, r7, r6
+; PPC32-NEXT:    and r8, r8, r6
+; PPC32-NEXT:    or r8, r8, r9
+; PPC32-NEXT:    stwcx. r8, 0, r5
+; PPC32-NEXT:    bne cr0, .LBB12_1
+; PPC32-NEXT:  # %bb.2:
+; PPC32-NEXT:    srw r3, r7, r3
+; PPC32-NEXT:    blr
+;
+; PPC64-LABEL: add_i8_monotonic:
+; PPC64:       # %bb.0:
+; PPC64-NEXT:    rlwinm r7, r3, 3, 27, 28
+; PPC64-NEXT:    li r6, 255
+; PPC64-NEXT:    rldicr r5, r3, 0, 61
+; PPC64-NEXT:    xori r3, r7, 24
+; PPC64-NEXT:    slw r4, r4, r3
+; PPC64-NEXT:    slw r6, r6, r3
+; PPC64-NEXT:  .LBB12_1:
+; PPC64-NEXT:    lwarx r7, 0, r5
+; PPC64-NEXT:    add r8, r4, r7
+; PPC64-NEXT:    andc r9, r7, r6
+; PPC64-NEXT:    and r8, r8, r6
+; PPC64-NEXT:    or r8, r8, r9
+; PPC64-NEXT:    stwcx. r8, 0, r5
+; PPC64-NEXT:    bne cr0, .LBB12_1
+; PPC64-NEXT:  # %bb.2:
+; PPC64-NEXT:    srw r3, r7, r3
+; PPC64-NEXT:    blr
   %val = atomicrmw add i8* %mem, i8 %operand monotonic
   ret i8 %val
 }
 define i16 @xor_i16_seq_cst(i16* %mem, i16 %operand) {
-; CHECK-LABEL: xor_i16_seq_cst
-; CHECK: sync
+; PPC32-LABEL: xor_i16_seq_cst:
+; PPC32:       # %bb.0:
+; PPC32-NEXT:    li r6, 0
+; PPC32-NEXT:    rlwinm r7, r3, 3, 27, 27
+; PPC32-NEXT:    rlwinm r5, r3, 0, 0, 29
+; PPC32-NEXT:    ori r6, r6, 65535
+; PPC32-NEXT:    xori r3, r7, 16
+; PPC32-NEXT:    slw r4, r4, r3
+; PPC32-NEXT:    slw r6, r6, r3
+; PPC32-NEXT:    sync
+; PPC32-NEXT:  .LBB13_1:
+; PPC32-NEXT:    lwarx r7, 0, r5
+; PPC32-NEXT:    xor r8, r4, r7
+; PPC32-NEXT:    andc r9, r7, r6
+; PPC32-NEXT:    and r8, r8, r6
+; PPC32-NEXT:    or r8, r8, r9
+; PPC32-NEXT:    stwcx. r8, 0, r5
+; PPC32-NEXT:    bne cr0, .LBB13_1
+; PPC32-NEXT:  # %bb.2:
+; PPC32-NEXT:    srw r3, r7, r3
+; PPC32-NEXT:    lwsync
+; PPC32-NEXT:    blr
+;
+; PPC64-LABEL: xor_i16_seq_cst:
+; PPC64:       # %bb.0:
+; PPC64-NEXT:    li r6, 0
+; PPC64-NEXT:    rlwinm r7, r3, 3, 27, 27
+; PPC64-NEXT:    rldicr r5, r3, 0, 61
+; PPC64-NEXT:    ori r6, r6, 65535
+; PPC64-NEXT:    xori r3, r7, 16
+; PPC64-NEXT:    slw r4, r4, r3
+; PPC64-NEXT:    slw r6, r6, r3
+; PPC64-NEXT:    sync
+; PPC64-NEXT:  .LBB13_1:
+; PPC64-NEXT:    lwarx r7, 0, r5
+; PPC64-NEXT:    xor r8, r4, r7
+; PPC64-NEXT:    andc r9, r7, r6
+; PPC64-NEXT:    and r8, r8, r6
+; PPC64-NEXT:    or r8, r8, r9
+; PPC64-NEXT:    stwcx. r8, 0, r5
+; PPC64-NEXT:    bne cr0, .LBB13_1
+; PPC64-NEXT:  # %bb.2:
+; PPC64-NEXT:    srw r3, r7, r3
+; PPC64-NEXT:    lwsync
+; PPC64-NEXT:    blr
   %val = atomicrmw xor i16* %mem, i16 %operand seq_cst
-; CHECK: lwsync
   ret i16 %val
 }
 define i32 @xchg_i32_acq_rel(i32* %mem, i32 %operand) {
-; CHECK-LABEL: xchg_i32_acq_rel
-; CHECK: lwsync
+; CHECK-LABEL: xchg_i32_acq_rel:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lwsync
+; CHECK-NEXT:  .LBB14_1:
+; CHECK-NEXT:    lwarx r5, 0, r3
+; CHECK-NEXT:    stwcx. r4, 0, r3
+; CHECK-NEXT:    bne cr0, .LBB14_1
+; CHECK-NEXT:  # %bb.2:
+; CHECK-NEXT:    mr r3, r5
+; CHECK-NEXT:    lwsync
+; CHECK-NEXT:    blr
   %val = atomicrmw xchg i32* %mem, i32 %operand acq_rel
-; CHECK: lwsync
   ret i32 %val
 }
 define i64 @and_i64_release(i64* %mem, i64 %operand) {
-; CHECK-LABEL: and_i64_release
-; CHECK: lwsync
+; PPC32-LABEL: and_i64_release:
+; PPC32:       # %bb.0:
+; PPC32-NEXT:    mflr r0
+; PPC32-NEXT:    stw r0, 4(r1)
+; PPC32-NEXT:    stwu r1, -16(r1)
+; PPC32-NEXT:    .cfi_def_cfa_offset 16
+; PPC32-NEXT:    .cfi_offset lr, 4
+; PPC32-NEXT:    li r7, 3
+; PPC32-NEXT:    bl __atomic_fetch_and_8
+; PPC32-NEXT:    lwz r0, 20(r1)
+; PPC32-NEXT:    addi r1, r1, 16
+; PPC32-NEXT:    mtlr r0
+; PPC32-NEXT:    blr
+;
+; PPC64-LABEL: and_i64_release:
+; PPC64:       # %bb.0:
+; PPC64-NEXT:    lwsync
+; PPC64-NEXT:  .LBB15_1:
+; PPC64-NEXT:    ldarx r5, 0, r3
+; PPC64-NEXT:    and r6, r4, r5
+; PPC64-NEXT:    stdcx. r6, 0, r3
+; PPC64-NEXT:    bne cr0, .LBB15_1
+; PPC64-NEXT:  # %bb.2:
+; PPC64-NEXT:    mr r3, r5
+; PPC64-NEXT:    blr
   %val = atomicrmw and i64* %mem, i64 %operand release
-; CHECK-NOT: [sync ]
   ret i64 %val
 }

From 8ae3293030d9691ebe0006b79cec1b06bb8015cf Mon Sep 17 00:00:00 2001
From: Sourabh Singh Tomar <SourabhSingh.Tomar@amd.com>
Date: Thu, 10 Sep 2020 23:04:37 +0530
Subject: [PATCH 070/109] Revert D86875 "[Flang][NFC] Remove license comments
 from files in docs/ folder."

This reverts commit f787c9a90c69f, this was causing some build issues.

(cherry picked from commit 932aae77e92b08e63c0225b6eb37dfa80b310313)
---
 flang/docs/ArrayComposition.md           | 8 ++++++++
 flang/docs/C++17.md                      | 8 ++++++++
 flang/docs/C++style.md                   | 8 ++++++++
 flang/docs/Calls.md                      | 8 ++++++++
 flang/docs/Character.md                  | 8 ++++++++
 flang/docs/ControlFlowGraph.md           | 8 ++++++++
 flang/docs/Directives.md                 | 8 ++++++++
 flang/docs/Extensions.md                 | 8 ++++++++
 flang/docs/FortranForCProgrammers.md     | 8 ++++++++
 flang/docs/FortranIR.md                  | 8 ++++++++
 flang/docs/IORuntimeInternals.md         | 8 ++++++++
 flang/docs/ImplementingASemanticCheck.md | 8 ++++++++
 flang/docs/Intrinsics.md                 | 8 ++++++++
 flang/docs/LabelResolution.md            | 8 ++++++++
 flang/docs/ModFiles.md                   | 8 ++++++++
 flang/docs/OpenMP-semantics.md           | 8 ++++++++
 flang/docs/OptionComparison.md           | 8 ++++++++
 flang/docs/Overview.md                   | 8 ++++++++
 flang/docs/ParserCombinators.md          | 8 ++++++++
 flang/docs/Parsing.md                    | 8 ++++++++
 flang/docs/Preprocessing.md              | 8 ++++++++
 flang/docs/PullRequestChecklist.md       | 8 ++++++++
 flang/docs/RuntimeDescriptor.md          | 8 ++++++++
 flang/docs/Semantics.md                  | 8 ++++++++
 24 files changed, 192 insertions(+)

diff --git a/flang/docs/ArrayComposition.md b/flang/docs/ArrayComposition.md
index 18194caadf09c..0f30af39f9e4b 100644
--- a/flang/docs/ArrayComposition.md
+++ b/flang/docs/ArrayComposition.md
@@ -1,3 +1,11 @@
+<!--===- docs/ArrayComposition.md 
+  
+   Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+   See https://llvm.org/LICENSE.txt for license information.
+   SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+  
+-->
+
 This note attempts to describe the motivation for and design of an
 implementation of Fortran 90 (and later) array expression evaluation that
 minimizes the use of dynamically allocated temporary storage for
diff --git a/flang/docs/C++17.md b/flang/docs/C++17.md
index ea8395cfdedc7..87d5fc01f0922 100644
--- a/flang/docs/C++17.md
+++ b/flang/docs/C++17.md
@@ -1,3 +1,11 @@
+<!--===- docs/C++17.md 
+  
+   Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+   See https://llvm.org/LICENSE.txt for license information.
+   SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+  
+-->
+
 ## C++14/17 features used in f18
 
 The C++ dialect used in this project constitutes a subset of the
diff --git a/flang/docs/C++style.md b/flang/docs/C++style.md
index 77e0a04638238..4ab95393d758a 100644
--- a/flang/docs/C++style.md
+++ b/flang/docs/C++style.md
@@ -1,3 +1,11 @@
+<!--===- docs/C++style.md 
+  
+   Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+   See https://llvm.org/LICENSE.txt for license information.
+   SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+  
+-->
+
 ## In brief:
 * Use *clang-format*
 from llvm 7
diff --git a/flang/docs/Calls.md b/flang/docs/Calls.md
index 8a4d65820d19f..d70bc910d73db 100644
--- a/flang/docs/Calls.md
+++ b/flang/docs/Calls.md
@@ -1,3 +1,11 @@
+<!--===- docs/Calls.md
+
+   Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+   See https://llvm.org/LICENSE.txt for license information.
+   SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+-->
+
 ## Procedure reference implementation protocol
 
 Fortran function and subroutine references are complicated.
diff --git a/flang/docs/Character.md b/flang/docs/Character.md
index f66b144389450..700db864f2dac 100644
--- a/flang/docs/Character.md
+++ b/flang/docs/Character.md
@@ -1,3 +1,11 @@
+<!--===- docs/Character.md
+
+   Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+   See https://llvm.org/LICENSE.txt for license information.
+   SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+-->
+
 ## Implementation of `CHARACTER` types in f18
 
 ### Kinds and Character Sets
diff --git a/flang/docs/ControlFlowGraph.md b/flang/docs/ControlFlowGraph.md
index 7d1e514a87adb..b2b549845ebb6 100644
--- a/flang/docs/ControlFlowGraph.md
+++ b/flang/docs/ControlFlowGraph.md
@@ -1,3 +1,11 @@
+<!--===- docs/ControlFlowGraph.md 
+  
+   Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+   See https://llvm.org/LICENSE.txt for license information.
+   SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+  
+-->
+
 ## Concept
 After a Fortran subprogram has been parsed, its names resolved, and all its
 semantic constraints successfully checked, the parse tree of its
diff --git a/flang/docs/Directives.md b/flang/docs/Directives.md
index 554dc4608dd43..c2e93c5f3de2e 100644
--- a/flang/docs/Directives.md
+++ b/flang/docs/Directives.md
@@ -1,3 +1,11 @@
+<!--===- docs/Directives.md 
+  
+   Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+   See https://llvm.org/LICENSE.txt for license information.
+   SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+  
+-->
+
 Compiler directives supported by F18
 ====================================
 
diff --git a/flang/docs/Extensions.md b/flang/docs/Extensions.md
index 86a4f04de57f1..9010b770cca6d 100644
--- a/flang/docs/Extensions.md
+++ b/flang/docs/Extensions.md
@@ -1,3 +1,11 @@
+<!--===- docs/Extensions.md 
+  
+   Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+   See https://llvm.org/LICENSE.txt for license information.
+   SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+  
+-->
+
 As a general principle, this compiler will accept by default and
 without complaint many legacy features, extensions to the standard
 language, and features that have been deleted from the standard,
diff --git a/flang/docs/FortranForCProgrammers.md b/flang/docs/FortranForCProgrammers.md
index 542034f3ea833..103def2a92ce6 100644
--- a/flang/docs/FortranForCProgrammers.md
+++ b/flang/docs/FortranForCProgrammers.md
@@ -1,3 +1,11 @@
+<!--===- docs/FortranForCProgrammers.md
+
+   Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+   See https://llvm.org/LICENSE.txt for license information.
+   SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+-->
+
 Fortran For C Programmers
 =========================
 
diff --git a/flang/docs/FortranIR.md b/flang/docs/FortranIR.md
index 83193ff27a359..5d83aaa8e34cf 100644
--- a/flang/docs/FortranIR.md
+++ b/flang/docs/FortranIR.md
@@ -1,3 +1,11 @@
+<!--===- docs/FortranIR.md 
+  
+   Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+   See https://llvm.org/LICENSE.txt for license information.
+   SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+  
+-->
+
 # Design: Fortran IR
 
 ## Introduction
diff --git a/flang/docs/IORuntimeInternals.md b/flang/docs/IORuntimeInternals.md
index 8ff464ee9c8f7..b4f3092a014ec 100644
--- a/flang/docs/IORuntimeInternals.md
+++ b/flang/docs/IORuntimeInternals.md
@@ -1,3 +1,11 @@
+<!--===- docs/IORuntimeInternals.md
+
+   Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+   See https://llvm.org/LICENSE.txt for license information.
+   SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+-->
+
 Fortran I/O Runtime Library Internal Design
 ===========================================
 
diff --git a/flang/docs/ImplementingASemanticCheck.md b/flang/docs/ImplementingASemanticCheck.md
index 2406f5bc2a58c..3bb16915cb880 100644
--- a/flang/docs/ImplementingASemanticCheck.md
+++ b/flang/docs/ImplementingASemanticCheck.md
@@ -1,3 +1,11 @@
+<!--===- docs/ImplementingASemanticCheck.md 
+  
+   Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+   See https://llvm.org/LICENSE.txt for license information.
+   SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+  
+-->
+# Introduction
 I recently added a semantic check to the f18 compiler front end.  This document
 describes my thought process and the resulting implementation.
 
diff --git a/flang/docs/Intrinsics.md b/flang/docs/Intrinsics.md
index 6f4dec4678233..7be0bf3e4a9ca 100644
--- a/flang/docs/Intrinsics.md
+++ b/flang/docs/Intrinsics.md
@@ -1,3 +1,11 @@
+<!--===- docs/Intrinsics.md 
+  
+   Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+   See https://llvm.org/LICENSE.txt for license information.
+   SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+  
+-->
+
 # A categorization of standard (2018) and extended Fortran intrinsic procedures
 
 This note attempts to group the intrinsic procedures of Fortran into categories
diff --git a/flang/docs/LabelResolution.md b/flang/docs/LabelResolution.md
index 2dfa5a30bb3ca..e837b4fa6aece 100644
--- a/flang/docs/LabelResolution.md
+++ b/flang/docs/LabelResolution.md
@@ -1,3 +1,11 @@
+<!--===- docs/LabelResolution.md 
+  
+   Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+   See https://llvm.org/LICENSE.txt for license information.
+   SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+  
+-->
+
 # Semantics: Resolving Labels and Construct Names
 
 ## Overview
diff --git a/flang/docs/ModFiles.md b/flang/docs/ModFiles.md
index 367cd4cd54f7c..483341bdd0f47 100644
--- a/flang/docs/ModFiles.md
+++ b/flang/docs/ModFiles.md
@@ -1,3 +1,11 @@
+<!--===- docs/ModFiles.md 
+  
+   Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+   See https://llvm.org/LICENSE.txt for license information.
+   SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+  
+-->
+
 # Module Files
 
 Module files hold information from a module that is necessary to compile 
diff --git a/flang/docs/OpenMP-semantics.md b/flang/docs/OpenMP-semantics.md
index 22a3ca5614ebc..4e2a81739cf81 100644
--- a/flang/docs/OpenMP-semantics.md
+++ b/flang/docs/OpenMP-semantics.md
@@ -1,3 +1,11 @@
+<!--===- docs/OpenMP-semantics.md 
+  
+   Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+   See https://llvm.org/LICENSE.txt for license information.
+   SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+  
+-->
+
 # OpenMP Semantic Analysis
 
 ## OpenMP for F18
diff --git a/flang/docs/OptionComparison.md b/flang/docs/OptionComparison.md
index 5c04450a7bb34..db5932411cc1e 100644
--- a/flang/docs/OptionComparison.md
+++ b/flang/docs/OptionComparison.md
@@ -1,3 +1,11 @@
+<!--===- docs/OptionComparison.md 
+  
+   Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+   See https://llvm.org/LICENSE.txt for license information.
+   SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+  
+-->
+
 # Compiler options
 
 This document catalogs the options processed by F18's peers/competitors.  Much of the document is taken up by a set of tables that list the options categorized into different topics.  Some of the table headings link to more information about the contents of the tables.  For example, the table on **Standards conformance** options links to [notes on Standards conformance](#standards).
diff --git a/flang/docs/Overview.md b/flang/docs/Overview.md
index 807efda2ed9a3..75a8cd1c4cab0 100644
--- a/flang/docs/Overview.md
+++ b/flang/docs/Overview.md
@@ -1,3 +1,11 @@
+<!--===- docs/Overview.md 
+  
+   Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+   See https://llvm.org/LICENSE.txt for license information.
+   SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+  
+-->
+
 # Overview of Compiler Phases
 
 Each phase produces either correct output or fatal errors.
diff --git a/flang/docs/ParserCombinators.md b/flang/docs/ParserCombinators.md
index 757684dcfda60..4f3dc6fd07ae6 100644
--- a/flang/docs/ParserCombinators.md
+++ b/flang/docs/ParserCombinators.md
@@ -1,3 +1,11 @@
+<!--===- docs/ParserCombinators.md 
+  
+   Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+   See https://llvm.org/LICENSE.txt for license information.
+   SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+  
+-->
+
 ## Concept
 The Fortran language recognizer here can be classified as an LL recursive
 descent parser.  It is composed from a *parser combinator* library that
diff --git a/flang/docs/Parsing.md b/flang/docs/Parsing.md
index 54a4fd752f6c1..fad9a4d57278c 100644
--- a/flang/docs/Parsing.md
+++ b/flang/docs/Parsing.md
@@ -1,3 +1,11 @@
+<!--===- docs/Parsing.md 
+  
+   Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+   See https://llvm.org/LICENSE.txt for license information.
+   SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+  
+-->
+
 The F18 Parser
 ==============
 This program source code implements a parser for the Fortran programming
diff --git a/flang/docs/Preprocessing.md b/flang/docs/Preprocessing.md
index 9b4d905177b7f..7f6f3951cfd16 100644
--- a/flang/docs/Preprocessing.md
+++ b/flang/docs/Preprocessing.md
@@ -1,3 +1,11 @@
+<!--===- docs/Preprocessing.md 
+  
+   Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+   See https://llvm.org/LICENSE.txt for license information.
+   SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+  
+-->
+
 Fortran Preprocessing
 =====================
 
diff --git a/flang/docs/PullRequestChecklist.md b/flang/docs/PullRequestChecklist.md
index 17b6d64923f58..12a67be374a20 100644
--- a/flang/docs/PullRequestChecklist.md
+++ b/flang/docs/PullRequestChecklist.md
@@ -1,3 +1,11 @@
+<!--===- docs/PullRequestChecklist.md 
+
+   Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+   See https://llvm.org/LICENSE.txt for license information.
+   SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+-->
+
 # Pull request checklist
 Please review the following items before submitting a pull request.  This list
 can also be used when reviewing pull requests.
diff --git a/flang/docs/RuntimeDescriptor.md b/flang/docs/RuntimeDescriptor.md
index a8eff33f65211..d819517fa9795 100644
--- a/flang/docs/RuntimeDescriptor.md
+++ b/flang/docs/RuntimeDescriptor.md
@@ -1,3 +1,11 @@
+<!--===- docs/RuntimeDescriptor.md 
+  
+   Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+   See https://llvm.org/LICENSE.txt for license information.
+   SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+  
+-->
+
 ## Concept
 The properties that characterize data values and objects in Fortran
 programs must sometimes be materialized when the program runs.
diff --git a/flang/docs/Semantics.md b/flang/docs/Semantics.md
index f879671b4f4ed..6ea0b292de69f 100644
--- a/flang/docs/Semantics.md
+++ b/flang/docs/Semantics.md
@@ -1,3 +1,11 @@
+<!--===- docs/Semantics.md 
+  
+   Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+   See https://llvm.org/LICENSE.txt for license information.
+   SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+  
+-->
+
 # Semantic Analysis
 
 The semantic analysis pass determines if a syntactically correct Fortran

From 9ad89a8cc3c7de0f0b5a306932671a4c78644d03 Mon Sep 17 00:00:00 2001
From: Richard Barton <richard.barton@arm.com>
Date: Thu, 3 Sep 2020 11:44:03 +0100
Subject: [PATCH 071/109] [flang] Convert release notes to markdown

Switch ReleaseNotes from .rst to .md to match the other docs.

At the same time, fix the version number for master.
---
 flang/docs/ReleaseNotes.md  | 87 +++++++++++++++++++++++++++++++++
 flang/docs/ReleaseNotes.rst | 96 -------------------------------------
 2 files changed, 87 insertions(+), 96 deletions(-)
 create mode 100644 flang/docs/ReleaseNotes.md
 delete mode 100644 flang/docs/ReleaseNotes.rst

diff --git a/flang/docs/ReleaseNotes.md b/flang/docs/ReleaseNotes.md
new file mode 100644
index 0000000000000..b891ab904a04c
--- /dev/null
+++ b/flang/docs/ReleaseNotes.md
@@ -0,0 +1,87 @@
+# Flang 11.0.0 (In-Progress) Release Notes
+
+> **warning**
+>
+> These are in-progress notes for the upcoming LLVM 11.0.0 release.
+> Release notes for previous releases can be found on [the Download
+> Page](https://releases.llvm.org/download.html).
+
+## Introduction
+
+This document contains the release notes for the Flang Fortran frontend,
+part of the LLVM Compiler Infrastructure, release 11.0.0. Here we
+describe the status of Flang in some detail, including major
+improvements from the previous release and new feature work. For the
+general LLVM release notes, see [the LLVM
+documentation](https://llvm.org/docs/ReleaseNotes.html). All LLVM
+releases may be downloaded from the [LLVM releases web
+site](https://llvm.org/releases/).
+
+Note that if you are reading this file from a Git checkout, this
+document applies to the *next* release, not the current one. To see the
+release notes for a specific release, please see the [releases
+page](https://llvm.org/releases/).
+
+## Known Issues
+
+These are issues that couldn't be fixed before the release. See the bug
+reports for the latest status.
+
+ *   ...
+
+## Introducing Flang
+
+Flang is LLVM's Fortran front end and is new for the LLVM 11 release.
+
+Flang is still a work in progress for this release and is included for
+experimentation and feedback.
+
+Flang is able to parse a comprehensive subset of the Fortran language
+and check it for correctness. Flang is not yet able to generate LLVM IR
+for the source code and thus is unable to compile a running binary.
+
+Flang is able to unparse the input source code into a canonical form and
+emit it to allow testing. Flang can also invoke an external Fortran
+compiler on this canonical input.
+
+Flang's parser has comprehensive support for:
+ * Fortran 2018
+ * OpenMP 4.5
+ * OpenACC 3.0
+
+Interested users are invited to try to compile their Fortran codes with
+flang in and report any issues in parsing or semantic checking in
+[bugzilla](https://bugs.llvm.org/enter_bug.cgi?product=flang).
+
+### Major missing features
+
+ * Flang is not supported on Windows platforms.
+
+## Using Flang
+
+Usage: `flang hello.f90 -o hello.bin`
+
+By default, Flang will parse the Fortran file `hello.f90` then unparse it to a
+canonical Fortran source file. Flang will then invoke an external
+Fortran compiler to compile this source file and link it, placing the
+resulting executable in `hello.bin`.
+
+To specify the external Fortran compiler, set the `F18_FC` environment
+variable to the name of the compiler binary and ensure that it is on your
+`PATH`. The default value for `F18_FC` is `gfortran`.
+
+When invoked with no source input, Flang will wait for input on stdin.
+When invoked in this way, Flang performs the same actions as if
+called with `-fdebug-measure-parse-tree -funparse` and does not invoke
+`F18_FC`.
+
+For a full list of options that Flang supports, run `flang --help`.
+
+## Additional Information
+
+Flang's documentation is located in the `flang/docs/` directory in the
+LLVM monorepo.
+
+If you have any questions or comments about Flang, please feel free to
+contact us via the [mailing
+list](https://lists.llvm.org/mailman/listinfo/flang-dev).
diff --git a/flang/docs/ReleaseNotes.rst b/flang/docs/ReleaseNotes.rst
deleted file mode 100644
index bbc7377412d63..0000000000000
--- a/flang/docs/ReleaseNotes.rst
+++ /dev/null
@@ -1,96 +0,0 @@
-========================================
-Flang 11.0.0 (In-Progress) Release Notes
-========================================
-
-.. contents::
-   :local:
-   :depth: 2
-
-.. warning::
-
-   These are in-progress notes for the upcoming LLVM 11.0.0 release.
-   Release notes for previous releases can be found on
-   `the Download Page <https://releases.llvm.org/download.html>`_.
-
-Introduction
-============
-
-This document contains the release notes for the Flang Fortran
-frontend, part of the LLVM Compiler Infrastructure, release 11.0.0. Here we
-describe the status of Flang in some detail, including major
-improvements from the previous release and new feature work. For the
-general LLVM release notes, see `the LLVM
-documentation <https://llvm.org/docs/ReleaseNotes.html>`_. All LLVM
-releases may be downloaded from the `LLVM releases web
-site <https://llvm.org/releases/>`_.
-
-Note that if you are reading this file from a Git checkout, this document
-applies to the *next* release, not
-the current one. To see the release notes for a specific release, please
-see the `releases page <https://llvm.org/releases/>`_.
-
-Known Issues
-============
-
-These are issues that couldn't be fixed before the release. See the bug reports for the latest status.
-
-- ...
-
-Introducing Flang
-=================
-
-Flang is LLVM's Fortran front end and is new for the LLVM 11 release.
-
-Flang is still a work in progress for this release and is included for
-experimentation and feedback.
-
-Flang status
-------------
-
-Flang is able to parse a comprehensive subset of the Fortran language
-and check it for correctness. Flang is not yet able to generate LLVM IR for
-the source code and thus is unable to compile a running binary. 
-
-Flang is able to unparse the input source code into a canonical form and emit
-it to allow testing. Flang can also invoke an external Fortran compiler on this
-canonical input.
-
-Flang's parser has comprehensive support for:
-- Fortran 2018
-- OpenMP 4.5
-- OpenACC 3.0
-
-Major missing features
-----------------------
-
-- Flang is not supported on Windows platforms.
-
-Using Flang
-===========
-
-Usage: ``flang hello.f90 -o hello.bin``
-
-Flang will parse the Fortran file ``hello.f90`` then unparse it to a canonical
-Fortran source file. Flang will then invoke an external Fortran compiler to
-compile this source file and link it, placing the resulting executable
-in ``hello.bin``.
-
-To specify the external Fortran compiler, set the ``F18_FC`` environment
-variable to the name of the compiler binary and ensure it is on your ``PATH``.
-The default value for ``F18_FC`` is ``gfortran``.
-
-When invoked with no source input, Flang will wait for input on standard in.
-When invoked in this way, Flang performs the same actions as if called with
-``-fdebug-measure-parse-tree -funparse`` and does not invoke ``F18_FC``.
-
-For a full list of options that Flang supports, run ``flang --help``.
-
-Additional Information
-======================
-
-Flang's documentation is located in the ``flang/docs/`` directory in
-the LLVM monorepo.
-
-If you have any questions or comments about Flang, please feel free to
-contact us via the `mailing
-list <https://lists.llvm.org/mailman/listinfo/flang-dev>`_.

From d024df40a192407eb54c5f8c17a7eb3b49e3f6da Mon Sep 17 00:00:00 2001
From: Kamil Rytarowski <n54@gmx.com>
Date: Thu, 10 Sep 2020 11:44:12 +0200
Subject: [PATCH 072/109] [compiler-rt] [netbsd] Reintroduce
 __sanitizer_protoent

Partial revert of https://reviews.llvm.org/D82424

(cherry picked from commit f51e55e09eefbbc57fdd802f5f17e34749ba03ec)
---
 .../lib/sanitizer_common/sanitizer_platform_limits_netbsd.h | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_netbsd.h b/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_netbsd.h
index ae54a8cf105ee..d80280d9bf8c8 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_netbsd.h
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_netbsd.h
@@ -129,6 +129,12 @@ struct __sanitizer_shmid_ds {
   void *_shm_internal;
 };
 
+struct __sanitizer_protoent {
+  char *p_name;
+  char **p_aliases;
+  int p_proto;
+};
+
 struct __sanitizer_netent {
   char *n_name;
   char **n_aliases;

From 29d700a8132088ee6320702b601c0479a710a3ec Mon Sep 17 00:00:00 2001
From: Alok Kumar Sharma <AlokKumar.Sharma@amd.com>
Date: Fri, 11 Sep 2020 11:11:39 +0530
Subject: [PATCH 073/109] [DebugInfo] Fixing CodeView assert related to
 lowerBound field of DISubrange.

    This is to fix CodeView build failure https://bugs.llvm.org/show_bug.cgi?id=47287
    after DIsSubrange upgrade D80197

    Assert condition is now removed and Count is calculated in case LowerBound
    is absent or zero and Count or UpperBound is constant. If Count is unknown
    it is later handled as VLA (currently Count is set to zero).

Reviewed By: rnk

Differential Revision: https://reviews.llvm.org/D87406

(cherry picked from commit e45b0708ae81ace27de53f12b32a80601cb12bf3)
---
 llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp b/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
index 3f053c7a38c77..39069e24e0612 100644
--- a/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
@@ -1592,11 +1592,16 @@ TypeIndex CodeViewDebug::lowerTypeArray(const DICompositeType *Ty) {
     assert(Element->getTag() == dwarf::DW_TAG_subrange_type);
 
     const DISubrange *Subrange = cast<DISubrange>(Element);
-    assert(!Subrange->getRawLowerBound() &&
-           "codeview doesn't support subranges with lower bounds");
     int64_t Count = -1;
-    if (auto *CI = Subrange->getCount().dyn_cast<ConstantInt*>())
-      Count = CI->getSExtValue();
+    // Calculate the count if either LowerBound is absent or is zero and
+    // either of Count or UpperBound are constant.
+    auto *LI = Subrange->getLowerBound().dyn_cast<ConstantInt *>();
+    if (!Subrange->getRawLowerBound() || (LI && (LI->getSExtValue() == 0))) {
+      if (auto *CI = Subrange->getCount().dyn_cast<ConstantInt*>())
+        Count = CI->getSExtValue();
+      else if (auto *UI = Subrange->getUpperBound().dyn_cast<ConstantInt*>())
+        Count = UI->getSExtValue() + 1; // LowerBound is zero
+    }
 
     // Forward declarations of arrays without a size and VLAs use a count of -1.
     // Emit a count of zero in these cases to match what MSVC does for arrays

From bff8d98129e8512ce9dcaed04e49c4f32f3a7e71 Mon Sep 17 00:00:00 2001
From: Cullen Rhodes <cullen.rhodes@arm.com>
Date: Thu, 10 Sep 2020 15:41:36 +0000
Subject: [PATCH 074/109] [clang][aarch64] Fix mangling of bfloat16 neon
 vectors

The AAPCS64 specifies the internal type is used for c++ mangling. For
bfloat16 it was defined as `BFloat16` when it should be `Bfloat16`, i.e.
lowercase 'f'.

For more information, see:

https://github.com/ARM-software/abi-aa/blob/master/aapcs64/aapcs64.rst#appendix-support-for-advanced-simd-extensions

Reviewed By: stuij

Differential Revision: https://reviews.llvm.org/D87463

(cherry picked from commit cabd60c26b5df34f096cccca5a915bde3b1d8ee1)
---
 clang/lib/AST/ItaniumMangle.cpp               |  2 +-
 clang/test/CodeGenCXX/mangle-neon-vectors.cpp | 14 ++++++++++++++
 2 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/clang/lib/AST/ItaniumMangle.cpp b/clang/lib/AST/ItaniumMangle.cpp
index ddfbe9f864991..8b1419074df55 100644
--- a/clang/lib/AST/ItaniumMangle.cpp
+++ b/clang/lib/AST/ItaniumMangle.cpp
@@ -3248,7 +3248,7 @@ static StringRef mangleAArch64VectorBase(const BuiltinType *EltType) {
   case BuiltinType::Double:
     return "Float64";
   case BuiltinType::BFloat16:
-    return "BFloat16";
+    return "Bfloat16";
   default:
     llvm_unreachable("Unexpected vector element base type");
   }
diff --git a/clang/test/CodeGenCXX/mangle-neon-vectors.cpp b/clang/test/CodeGenCXX/mangle-neon-vectors.cpp
index 6faf6226efd2e..cb5e40be6a6df 100644
--- a/clang/test/CodeGenCXX/mangle-neon-vectors.cpp
+++ b/clang/test/CodeGenCXX/mangle-neon-vectors.cpp
@@ -1,6 +1,7 @@
 // RUN: %clang_cc1 -triple armv7-apple-ios -target-feature +neon  %s -emit-llvm -o - | FileCheck %s
 // RUN: %clang_cc1 -triple arm64-apple-ios -target-feature +neon %s -emit-llvm -o - | FileCheck %s
 // RUN: %clang_cc1 -triple arm64-linux-gnu -target-feature +neon %s -emit-llvm -o - | FileCheck %s --check-prefix=CHECK-AARCH64
+// RUN: %clang_cc1 -triple arm64-linux-gnu -target-feature +neon -target-feature +bf16 %s -emit-llvm -o - | FileCheck %s --check-prefix=CHECK-AARCH64-BF16
 
 typedef float float32_t;
 typedef double float64_t;
@@ -14,6 +15,10 @@ typedef short poly16_t;
 #endif
 typedef unsigned __INT64_TYPE__ uint64_t;
 
+#if defined(__ARM_FEATURE_BF16)
+typedef __bf16 bfloat16_t;
+#endif
+
 typedef __attribute__((neon_vector_type(2))) int int32x2_t;
 typedef __attribute__((neon_vector_type(4))) int int32x4_t;
 typedef __attribute__((neon_vector_type(1))) uint64_t uint64x1_t;
@@ -28,6 +33,10 @@ typedef __attribute__((neon_vector_type(2))) float64_t float64x2_t;
 typedef __attribute__((neon_polyvector_type(16))) poly8_t  poly8x16_t;
 typedef __attribute__((neon_polyvector_type(8)))  poly16_t poly16x8_t;
 
+#if defined(__ARM_FEATURE_BF16)
+typedef __attribute__((neon_vector_type(4))) __bf16 bfloat16x4_t;
+#endif
+
 // CHECK: 16__simd64_int32_t
 // CHECK-AARCH64: 11__Int32x2_t
 void f1(int32x2_t v) { }
@@ -72,3 +81,8 @@ void f10(poly16x8_t v) {}
 // CHECK-AARCH64: 13__Float64x2_t
 void f11(float64x2_t v) { }
 #endif
+
+#if defined(__ARM_FEATURE_BF16)
+// CHECK-AARCH64-BF16: 14__Bfloat16x4_t
+void f12(bfloat16x4_t v) {}
+#endif

From 01be54e257d9f09c6bbc7fe98f8f7449b30b37da Mon Sep 17 00:00:00 2001
From: Richard Barton <richard.barton@arm.com>
Date: Fri, 11 Sep 2020 14:17:19 +0100
Subject: [PATCH 075/109] [flang] Add new documentation main page

Add a new index page to be the Flang documentation mainpage instead of
Overview.md, which jumps straight into the compiler Design. The index file
needs to be in .rst format to use the toctree directive to create table of
contents.

Also use the sphinx_markdown_tables extension to generate html tables form
markdown.

A number of additional style changes to the existing docs were needed to make
this work well:
 * Convert all headings to the # style, which works better with toctree's
   titlesonly option. Ensure that there is only one top-level heading per
   document.
 * Add a title to documents that don't have one for rendering on the index.
 * Convert the grammar docs from .txt to .md. for better rendering
 * Fixed broken link to a section in another document - sphinx does not seem to
   support anchor links in markdown files.

Depends on D87226

Reviewed By: sameeranjoshi

Differential Revision: https://reviews.llvm.org/D87242
---
 flang/docs/ArrayComposition.md                | 31 +++++----
 flang/docs/BijectiveInternalNameUniquing.md   | 21 +++---
 flang/docs/C++17.md                           | 13 ++--
 flang/docs/C++style.md                        |  9 +++
 flang/docs/Calls.md                           |  7 ++
 flang/docs/Character.md                       | 17 +++--
 flang/docs/ControlFlowGraph.md                |  7 ++
 flang/docs/Directives.md                      |  5 +-
 flang/docs/Extensions.md                      | 27 +++++---
 flang/docs/FortranForCProgrammers.md          | 68 ++++++++++---------
 flang/docs/FortranIR.md                       |  5 ++
 flang/docs/IORuntimeInternals.md              | 63 +++++++++--------
 flang/docs/ImplementingASemanticCheck.md      | 42 +++++++-----
 flang/docs/Intrinsics.md                      | 57 +++++++++-------
 flang/docs/LabelResolution.md                 |  5 ++
 flang/docs/ModFiles.md                        |  5 ++
 ...-4.5-grammar.txt => OpenMP-4.5-grammar.md} | 17 +++--
 flang/docs/OpenMP-semantics.md                |  5 ++
 flang/docs/OptionComparison.md                | 15 ++--
 flang/docs/Overview.md                        |  5 ++
 flang/docs/ParserCombinators.md               |  9 +++
 flang/docs/Parsing.md                         | 33 +++++----
 flang/docs/Preprocessing.md                   | 32 +++++----
 flang/docs/PullRequestChecklist.md            |  2 +-
 flang/docs/RuntimeDescriptor.md               |  7 ++
 flang/docs/Semantics.md                       |  5 ++
 flang/docs/conf.py                            | 13 +++-
 .../{f2018-grammar.txt => f2018-grammar.md}   | 12 ++--
 flang/docs/index.md                           | 61 +++++++++++++++++
 29 files changed, 399 insertions(+), 199 deletions(-)
 rename flang/docs/{OpenMP-4.5-grammar.txt => OpenMP-4.5-grammar.md} (97%)
 rename flang/docs/{f2018-grammar.txt => f2018-grammar.md} (99%)
 create mode 100644 flang/docs/index.md

diff --git a/flang/docs/ArrayComposition.md b/flang/docs/ArrayComposition.md
index 0f30af39f9e4b..9e61abe5670f3 100644
--- a/flang/docs/ArrayComposition.md
+++ b/flang/docs/ArrayComposition.md
@@ -6,6 +6,13 @@
   
 -->
 
+# Array Composition
+
+```eval_rst
+.. contents::
+   :local:
+```
+
 This note attempts to describe the motivation for and design of an
 implementation of Fortran 90 (and later) array expression evaluation that
 minimizes the use of dynamically allocated temporary storage for
@@ -34,8 +41,8 @@ Other Fortran intrinsic functions are technically transformational (e.g.,
 `COMMAND_ARGUMENT_COUNT`) but not of interest for this note.
 The generic `REDUCE` is also not considered here.
 
-Arrays as functions
-===================
+## Arrays as functions
+
 A whole array can be viewed as a function that maps its indices to the values
 of its elements.
 Specifically, it is a map from a tuple of integers to its element type.
@@ -45,8 +52,8 @@ and the shape of the array delimits the domain of the map.
 `REAL :: A(N,M)` can be seen as a function mapping ordered pairs of integers
 `(J,K)` with `1<=J<=N` and `1<=J<=M` to real values.
 
-Array expressions as functions
-==============================
+## Array expressions as functions
+
 The same perspective can be taken of an array expression comprising
 intrinsic operators and elemental functions.
 Fortran doesn't allow one to apply subscripts directly to an expression,
@@ -83,8 +90,8 @@ side variable as an operand of the right-hand side expression, and any
 function calls on the right-hand side are elemental or scalar-valued,
 we can avoid the use of a temporary.
 
-Transformational intrinsic functions as function composition
-============================================================
+## Transformational intrinsic functions as function composition
+
 Many of the transformational intrinsic functions listed above
 can, when their array arguments are viewed as functions over their
 index tuples, be seen as compositions of those functions with
@@ -127,8 +134,8 @@ More completely:
 * `SPREAD(A,DIM=d,NCOPIES=n)` for compile-time `d` simply
   applies `A` to a reduced index tuple.
 
-Determination of rank and shape
-===============================
+## Determination of rank and shape
+
 An important part of evaluating array expressions without the use of
 temporary storage is determining the shape of the result prior to,
 or without, evaluating the elements of the result.
@@ -173,8 +180,8 @@ In cases where the analyzed shape is known at compile time, we should
 be able to have the opportunity to avoid heap allocation in favor of
 stack storage, if the scope of the variable is local.
 
-Automatic reallocation of allocatables
-======================================
+## Automatic reallocation of allocatables
+
 Fortran 2003 introduced the ability to assign non-conforming array expressions
 to ALLOCATABLE arrays with the implied semantics of reallocation to the
 new shape.
@@ -182,8 +189,8 @@ The implementation of this feature also becomes more straightforward if
 our implementation of array expressions has decoupled calculation of shapes
 from the evaluation of the elements of the result.
 
-Rewriting rules
-===============
+## Rewriting rules
+
 Let `{...}` denote an ordered tuple of 1-based indices, e.g. `{j,k}`, into
 the result of an array expression or subexpression.
 
diff --git a/flang/docs/BijectiveInternalNameUniquing.md b/flang/docs/BijectiveInternalNameUniquing.md
index b302d389c664f..7a6e8a4f4e644 100644
--- a/flang/docs/BijectiveInternalNameUniquing.md
+++ b/flang/docs/BijectiveInternalNameUniquing.md
@@ -1,4 +1,9 @@
-## Bijective Internal Name Uniquing
+# Bijective Internal Name Uniquing
+
+```eval_rst
+.. contents::
+   :local:
+```
 
 FIR has a flat namespace.  No two objects may have the same name at
 the module level.  (These would be functions, globals, etc.)
@@ -13,14 +18,14 @@ Fortran is case insensitive, which allows the compiler to convert the
 user's identifiers to all lower case.  Such a universal conversion implies
 that all upper case letters are available for use in uniquing.
 
-### Prefix `_Q`
+## Prefix `_Q`
 
 All uniqued names have the prefix sequence `_Q` to indicate the name has
 been uniqued.  (Q is chosen because it is a
 [low frequency letter](http://pi.math.cornell.edu/~mec/2003-2004/cryptography/subs/frequencies.html)
 in English.)
 
-### Scope Building
+## Scope Building
 
 Symbols can be scoped by the module, submodule, or procedure that contains
 that symbol.  After the `_Q` sigil, names are constructed from outermost to
@@ -45,7 +50,7 @@ The uniqued name of `fun` becomes:
     _QMmodSs1modSs2modFsubPfun
 ```
 
-### Common blocks
+## Common blocks
 
    * A common block name will be prefixed with `B`
 
@@ -69,7 +74,7 @@ The uniqued name in case of `blank common block` becomes:
     _QB
 ```
 
-### Module scope global data
+## Module scope global data
 
    * A global data entity is prefixed with `E`
    * A global entity that is constant (parameter) will be prefixed with `EC`
@@ -92,7 +97,7 @@ The uniqued name of `pi` becomes:
     _QMmodECpi
 ```
 
-### Procedures/Subprograms
+## Procedures/Subprograms
 
    * A procedure/subprogram is prefixed with `P`
 
@@ -105,7 +110,7 @@ The uniqued name of `sub` becomes:
     _QPsub
 ```
 
-### Derived types and related
+## Derived types and related
 
    * A derived type is prefixed with `T`
    * If a derived type has KIND parameters, they are listed in a consistent
@@ -148,7 +153,7 @@ The uniqued name of `yourtype` where `k1=4` and `k2=-6` (at compile-time):
      type `yourtype` above would be `_QCTyourtypeK4KN6`.  The type
      descriptor for `REAL(4)` would be `_QCrealK4`.
 
-### Compiler generated names
+## Compiler generated names
 
 Compiler generated names do not have to be mapped back to Fortran.  These
 names will be prefixed with `_QQ` and followed by a unique compiler
diff --git a/flang/docs/C++17.md b/flang/docs/C++17.md
index 87d5fc01f0922..9e0120d2e4c5e 100644
--- a/flang/docs/C++17.md
+++ b/flang/docs/C++17.md
@@ -6,7 +6,12 @@
   
 -->
 
-## C++14/17 features used in f18
+# C++14/17 features used in f18
+
+```eval_rst
+.. contents::
+   :local:
+```
 
 The C++ dialect used in this project constitutes a subset of the
 standard C++ programming language and library features.
@@ -32,7 +37,7 @@ The most important of these are:
 (`std::tuple` is actually a C++11 feature, but I include it
 in this list because it's not particularly well known.)
 
-### Sum types
+## Sum types
 
 First, some background information to explain the need for sum types
 in f18.
@@ -111,7 +116,7 @@ would be to:
   functions (or the forbidden `dynamic_cast`) to identify alternatives
   during analysis
 
-### Product types
+## Product types
 
 Many productions in the Fortran grammar describe a sequence of various
 sub-parses.
@@ -133,7 +138,7 @@ So we use `std::tuple` for such things.
 It has also been handy for template metaprogramming that needs to work
 with lists of types.
 
-### `std::optional`
+## `std::optional`
 
 This simple little type is used wherever a value might or might not be
 present.
diff --git a/flang/docs/C++style.md b/flang/docs/C++style.md
index 4ab95393d758a..fb11e64116141 100644
--- a/flang/docs/C++style.md
+++ b/flang/docs/C++style.md
@@ -6,6 +6,15 @@
   
 -->
 
+# Flang C++ Style Guide
+
+```eval_rst
+.. contents::
+   :local:
+```
+
+This document captures the style guide rules that are followed in the Flang codebase.
+
 ## In brief:
 * Use *clang-format*
 from llvm 7
diff --git a/flang/docs/Calls.md b/flang/docs/Calls.md
index d70bc910d73db..440d0bd147c2d 100644
--- a/flang/docs/Calls.md
+++ b/flang/docs/Calls.md
@@ -6,6 +6,13 @@
 
 -->
 
+# Representation of Fortran function calls
+
+```eval_rst
+.. contents::
+   :local:
+```
+
 ## Procedure reference implementation protocol
 
 Fortran function and subroutine references are complicated.
diff --git a/flang/docs/Character.md b/flang/docs/Character.md
index 700db864f2dac..603dd8848ba1b 100644
--- a/flang/docs/Character.md
+++ b/flang/docs/Character.md
@@ -6,9 +6,14 @@
 
 -->
 
-## Implementation of `CHARACTER` types in f18
+# Implementation of `CHARACTER` types in f18
 
-### Kinds and Character Sets
+```eval_rst
+.. contents::
+   :local:
+```
+
+## Kinds and Character Sets
 
 The f18 compiler and runtime support three kinds of the intrinsic
 `CHARACTER` type of Fortran 2018.
@@ -48,7 +53,7 @@ We might want to support one or more environment variables to change these
 assumptions, especially for `KIND=1` users of ISO-8859 character sets
 besides Latin-1.
 
-### Lengths
+## Lengths
 
 Allocatable `CHARACTER` objects in Fortran may defer the specification
 of their lengths until the time of their allocation or whole (non-substring)
@@ -76,7 +81,7 @@ Fortran substrings are rather like subscript triplets into a hidden
 "zero" dimension of a scalar `CHARACTER` value, but they cannot have
 strides.
 
-### Concatenation
+## Concatenation
 
 Fortran has one `CHARACTER`-valued intrinsic operator, `//`, which
 concatenates its operands (10.1.5.3).
@@ -105,7 +110,7 @@ The result of `//` may be used
 The f18 compiler has a general (but slow) means of implementing concatenation
 and a specialized (fast) option to optimize the most common case.
 
-#### General concatenation
+### General concatenation
 
 In the most general case, the f18 compiler's generated code and
 runtime support library represent the result as a deferred-length allocatable
@@ -130,7 +135,7 @@ When the left-hand side of a `CHARACTER` assignment is a deferred-length
 allocatable and the right-hand side is a temporary, use of the runtime's
 `MoveAlloc()` subroutine instead can save an allocation and a copy.
 
-#### Optimized concatenation
+### Optimized concatenation
 
 Scalar `CHARACTER(KIND=1)` expressions evaluated as the right-hand sides of
 assignments to independent substrings or whole variables that are not
diff --git a/flang/docs/ControlFlowGraph.md b/flang/docs/ControlFlowGraph.md
index b2b549845ebb6..dcdecf1b77f65 100644
--- a/flang/docs/ControlFlowGraph.md
+++ b/flang/docs/ControlFlowGraph.md
@@ -6,6 +6,13 @@
   
 -->
 
+# Control Flow Graph
+
+```eval_rst
+.. contents::
+   :local:
+```
+
 ## Concept
 After a Fortran subprogram has been parsed, its names resolved, and all its
 semantic constraints successfully checked, the parse tree of its
diff --git a/flang/docs/Directives.md b/flang/docs/Directives.md
index c2e93c5f3de2e..a1a99b674cef2 100644
--- a/flang/docs/Directives.md
+++ b/flang/docs/Directives.md
@@ -6,8 +6,9 @@
   
 -->
 
-Compiler directives supported by F18
-====================================
+# Compiler directives supported by Flang
+
+A list of non-standard directives supported by Flang
 
 * `!dir$ fixed` and `!dir$ free` select Fortran source forms.  Their effect
   persists to the end of the current source file.
diff --git a/flang/docs/Extensions.md b/flang/docs/Extensions.md
index 9010b770cca6d..e16c55e976739 100644
--- a/flang/docs/Extensions.md
+++ b/flang/docs/Extensions.md
@@ -6,6 +6,13 @@
   
 -->
 
+# Fortran Extensions supported by Flang
+
+```eval_rst
+.. contents::
+   :local:
+```
+
 As a general principle, this compiler will accept by default and
 without complaint many legacy features, extensions to the standard
 language, and features that have been deleted from the standard,
@@ -16,8 +23,8 @@ Other non-standard features, which do conflict with the current
 standard specification of the Fortran programming language, are
 accepted if enabled by command-line options.
 
-Intentional violations of the standard
-======================================
+## Intentional violations of the standard
+
 * Scalar `INTEGER` actual argument expressions (not variables!)
   are converted to the kinds of scalar `INTEGER` dummy arguments
   when the interface is explicit and the kinds differ.
@@ -29,8 +36,8 @@ Intentional violations of the standard
   so long as they contain no executable code, no internal subprograms,
   and allocate no storage outside a named `COMMON` block.  (C1415)
 
-Extensions, deletions, and legacy features supported by default
-===============================================================
+## Extensions, deletions, and legacy features supported by default
+
 * Tabs in source
 * `<>` as synonym for `.NE.` and `/=`
 * `$` and `@` as legal characters in names
@@ -122,8 +129,8 @@ Extensions, deletions, and legacy features supported by default
 * DATA statement initialization is allowed for procedure pointers outside
   structure constructors.
 
-Extensions supported when enabled by options
---------------------------------------------
+### Extensions supported when enabled by options
+
 * C-style backslash escape sequences in quoted CHARACTER literals
   (but not Hollerith) [-fbackslash]
 * Logical abbreviations `.T.`, `.F.`, `.N.`, `.A.`, `.O.`, and `.X.`
@@ -140,8 +147,8 @@ Extensions supported when enabled by options
   `KIND=` actual argument.  We return `INTEGER(KIND=8)` by default in
   these cases when the `-flarge-sizes` option is enabled.
 
-Extensions and legacy features deliberately not supported
----------------------------------------------------------
+### Extensions and legacy features deliberately not supported
+
 * `.LG.` as synonym for `.NE.`
 * `REDIMENSION`
 * Allocatable `COMMON`
@@ -184,8 +191,8 @@ Extensions and legacy features deliberately not supported
   PGI, Intel, and XLF support this in ways that are not numerically equivalent.
   PGI converts the arguments while Intel and XLF replace the specific by the related generic.
 
-Preprocessing behavior
-======================
+## Preprocessing behavior
+
 * The preprocessor is always run, whatever the filename extension may be.
 * We respect Fortran comments in macro actual arguments (like GNU, Intel, NAG;
   unlike PGI and XLF) on the principle that macro calls should be treated
diff --git a/flang/docs/FortranForCProgrammers.md b/flang/docs/FortranForCProgrammers.md
index 103def2a92ce6..572433ab7c154 100644
--- a/flang/docs/FortranForCProgrammers.md
+++ b/flang/docs/FortranForCProgrammers.md
@@ -6,8 +6,12 @@
 
 -->
 
-Fortran For C Programmers
-=========================
+# Fortran For C Programmers
+
+```eval_rst
+.. contents::
+   :local:
+```
 
 This note is limited to essential information about Fortran so that
 a C or C++ programmer can get started more quickly with the language,
@@ -16,8 +20,8 @@ to write or modify Fortran code.
 Please see other sources to learn about Fortran's rich history,
 current applications, and modern best practices in new code.
 
-Know This At Least
-------------------
+## Know This At Least
+
 * There have been many implementations of Fortran, often from competing
   vendors, and the standard language has been defined by U.S. and
   international standards organizations.  The various editions of
@@ -53,8 +57,8 @@ Know This At Least
   interfaces in compiled "modules", as well as legacy mechanisms for
   sharing data and interconnecting subprograms.
 
-A Rosetta Stone
----------------
+## A Rosetta Stone
+
 Fortran's language standard and other documentation uses some terminology
 in particular ways that might be unfamiliar.
 
@@ -81,8 +85,8 @@ in particular ways that might be unfamiliar.
 | Type-bound procedure | Kind of a C++ member function but not really |
 | Unformatted | Raw binary |
 
-Data Types
-----------
+## Data Types
+
 There are five built-in ("intrinsic") types: `INTEGER`, `REAL`, `COMPLEX`,
 `LOGICAL`, and `CHARACTER`.
 They are parameterized with "kind" values, which should be treated as
@@ -117,8 +121,8 @@ Last, there are "typeless" binary constants that can be used in a few
 situations, like static data initialization or immediate conversion,
 where type is not necessary.
 
-Arrays
-------
+## Arrays
+
 Arrays are not types in Fortran.
 Being an array is a property of an object or function, not of a type.
 Unlike C, one cannot have an array of arrays or an array of pointers,
@@ -133,8 +137,8 @@ And yes, the default lower bound on each dimension is 1, not 0.
 Expressions can manipulate arrays as multidimensional values, and
 the compiler will create the necessary loops.
 
-Allocatables
-------------
+## Allocatables
+
 Modern Fortran programs use `ALLOCATABLE` data extensively.
 Such variables and derived type components are allocated dynamically.
 They are automatically deallocated when they go out of scope, much
@@ -147,8 +151,8 @@ and follow up all the references that are made in the documentation
 from the description of `ALLOCATABLE` to other topics; it's a feature
 that interacts with much of the rest of the language.)
 
-I/O
----
+## I/O
+
 Fortran's input/output features are built into the syntax of the language,
 rather than being defined by library interfaces as in C and C++.
 There are means for raw binary I/O and for "formatted" transfers to
@@ -173,8 +177,8 @@ One can also use compiler-generated formatting in "list-directed" I/O,
 in which the compiler derives reasonable default formats based on
 data types.
 
-Subprograms
------------
+## Subprograms
+
 Fortran has both `FUNCTION` and `SUBROUTINE` subprograms.
 They share the same name space, but functions cannot be called as
 subroutines or vice versa.
@@ -188,8 +192,8 @@ their own internal procedures.
 As is the case with C++ lambda expressions, internal procedures can
 reference names from their host subprograms.
 
-Modules
--------
+## Modules
+
 Modern Fortran has good support for separate compilation and namespace
 management.
 The *module* is the basic unit of compilation, although independent
@@ -204,8 +208,8 @@ All references to objects in modules are done with direct names or
 aliases that have been added to the local scope, as Fortran has no means
 of qualifying references with module names.
 
-Arguments
----------
+## Arguments
+
 Functions and subroutines have "dummy" arguments that are dynamically
 associated with actual arguments during calls.
 Essentially, all argument passing in Fortran is by reference, not value.
@@ -236,8 +240,8 @@ scope.
 This is the opposite of the assumptions under which a C or C++ compiler must
 labor when trying to optimize code with pointers.
 
-Overloading
------------
+## Overloading
+
 Fortran supports a form of overloading via its interface feature.
 By default, an interface is a means for specifying prototypes for a
 set of subroutines and functions.
@@ -250,8 +254,8 @@ A similar feature can be used for generic type-bound procedures.
 This feature can be used to overload the built-in operators and some
 I/O statements, too.
 
-Polymorphism
-------------
+## Polymorphism
+
 Fortran code can be written to accept data of some derived type or
 any extension thereof using `CLASS`, deferring the actual type to
 execution, rather than the usual `TYPE` syntax.
@@ -261,8 +265,8 @@ Fortran's `SELECT TYPE` construct is used to distinguish between
 possible specific types dynamically, when necessary.  It's a
 little like C++17's `std::visit()` on a discriminated union.
 
-Pointers
---------
+## Pointers
+
 Pointers are objects in Fortran, not data types.
 Pointers can point to data, arrays, and subprograms.
 A pointer can only point to data that has the `TARGET` attribute.
@@ -287,8 +291,8 @@ out of scope.
 A legacy feature, "Cray pointers", implements dynamic base addressing of
 one variable using an address stored in another.
 
-Preprocessing
--------------
+## Preprocessing
+
 There is no standard preprocessing feature, but every real Fortran implementation
 has some support for passing Fortran source code through a variant of
 the standard C source preprocessor.
@@ -302,8 +306,8 @@ suffix (e.g., "foo.F90") or a compiler command line option.
 (Since the F18 compiler always runs its built-in preprocessing stage,
 no special option or filename suffix is required.)
 
-"Object Oriented" Programming
------------------------------
+## "Object Oriented" Programming
+
 Fortran doesn't have member functions (or subroutines) in the sense
 that C++ does, in which a function has immediate access to the members
 of a specific instance of a derived type.
@@ -325,8 +329,8 @@ There's a lot more that can be said about type-bound procedures (e.g., how they
 support overloading) but this should be enough to get you started with
 the most common usage.
 
-Pitfalls
---------
+## Pitfalls
+
 Variable initializers, e.g. `INTEGER :: J=123`, are _static_ initializers!
 They imply that the variable is stored in static storage, not on the stack,
 and the initialized value lasts only until the variable is assigned.
diff --git a/flang/docs/FortranIR.md b/flang/docs/FortranIR.md
index 5d83aaa8e34cf..f1f643a1d17da 100644
--- a/flang/docs/FortranIR.md
+++ b/flang/docs/FortranIR.md
@@ -8,6 +8,11 @@
 
 # Design: Fortran IR
 
+```eval_rst
+.. contents::
+   :local:
+```
+
 ## Introduction
 
 After semantic analysis is complete and it has been determined that the compiler has a legal Fortran program as input, the parse tree will be lowered to an intermediate representation for the purposes of high-level analysis and optimization.  In this document, that intermediate representation will be called Fortran IR or FIR.  The pass that converts from the parse tree and other data structures of the front-end to FIR will be called the "Burnside bridge".
diff --git a/flang/docs/IORuntimeInternals.md b/flang/docs/IORuntimeInternals.md
index b4f3092a014ec..2748fcf16fa3c 100644
--- a/flang/docs/IORuntimeInternals.md
+++ b/flang/docs/IORuntimeInternals.md
@@ -6,8 +6,12 @@
 
 -->
 
-Fortran I/O Runtime Library Internal Design
-===========================================
+# Fortran I/O Runtime Library Internal Design
+
+```eval_rst
+.. contents::
+   :local:
+```
 
 This note is meant to be an overview of the design of the *implementation*
 of the f18 Fortran compiler's runtime support library for I/O statements.
@@ -66,8 +70,7 @@ template library of fast conversion algorithms used to interpret
 floating-point values in Fortran source programs and to emit them
 to module files.
 
-Overview of Classes
-===================
+## Overview of Classes
 
 A suite of C++ classes and class templates are composed to construct
 the Fortran I/O runtime support library.
@@ -79,16 +82,16 @@ classes are in the process of being vigorously rearranged and
 modified; use `grep` or an IDE to discover these classes in
 the source for now.  (Sorry!)
 
-`Terminator`
-----------
+### `Terminator`
+
 A general facility for the entire library, `Terminator` latches a
 source program statement location in terms of an unowned pointer to
 its source file path name and line number and uses them to construct
 a fatal error message if needed.
 It is used for both user program errors and internal runtime library crashes.
 
-`IoErrorHandler`
---------------
+### `IoErrorHandler`
+
 When I/O error conditions arise at runtime that the Fortran program
 might have the privilege to handle itself via `ERR=`, `END=`, or
 `EOR=` labels and/or by an `IOSTAT=` variable, this subclass of
@@ -96,8 +99,8 @@ might have the privilege to handle itself via `ERR=`, `END=`, or
 It sorts out priorities in the case of multiple errors and determines
 the final `IOSTAT=` value at the end of an I/O statement.
 
-`MutableModes`
-------------
+### `MutableModes`
+
 Fortran's formatted I/O statements are affected by a suite of
 modes that can be configured by `OPEN` statements, overridden by
 data transfer I/O statement control lists, and further overridden
@@ -108,8 +111,8 @@ order to properly isolate their modifications.
 The modes in force at the time each data item is processed constitute
 a member of each `DataEdit`.
 
-`DataEdit`
---------
+### `DataEdit`
+
 Represents a single data edit descriptor from a `FORMAT` statement
 or `FMT=` character value, with some hidden extensions to also
 support formatting of list-directed transfers.
@@ -119,8 +122,8 @@ For simplicity and efficiency, each data edit descriptor is
 encoded in the `DataEdit` as a simple capitalized character
 (or two) and some optional field widths.
 
-`FormatControl<>`
----------------
+### `FormatControl<>`
+
 This class template traverses a `FORMAT` statement's contents (or `FMT=`
 character value) to extract data edit descriptors like `E20.14` to
 serve each item in an I/O data transfer statement's *io-list*,
@@ -142,32 +145,32 @@ output strings or record positionings at the end of the *io-list*.
 The `DefaultFormatControlCallbacks` structure summarizes the API
 expected by `FormatControl` from its class template actual arguments.
 
-`OpenFile`
---------
+### `OpenFile`
+
 This class encapsulates all (I hope) the operating system interfaces
 used to interact with the host's filesystems for operations on
 external units.
 Asynchronous I/O interfaces are faked for now with synchronous
 operations and deferred results.
 
-`ConnectionState`
----------------
+### `ConnectionState`
+
 An active connection to an external or internal unit maintains
 the common parts of its state in this subclass of `ConnectionAttributes`.
 The base class holds state that should not change during the
 lifetime of the connection, while the subclass maintains state
 that may change during I/O statement execution.
 
-`InternalDescriptorUnit`
-----------------------
+### `InternalDescriptorUnit`
+
 When I/O is being performed from/to a Fortran `CHARACTER` array
 rather than an external file, this class manages the standard
 interoperable descriptor used to access its elements as records.
 It has the necessary interfaces to serve as an actual argument
 to the `FormatControl` class template.
 
-`FileFrame<>`
------------
+### `FileFrame<>`
+
 This CRTP class template isolates all of the complexity involved between
 an external unit's `OpenFile` and the buffering requirements
 imposed by the capabilities of Fortran `FORMAT` control edit
@@ -192,8 +195,8 @@ a frame may come up short.
 As a CRTP class template, `FileFrame` accesses the raw filesystem
 facilities it needs from `*this`.
 
-`ExternalFileUnit`
-----------------
+### `ExternalFileUnit`
+
 This class mixes in `ConnectionState`, `OpenFile`, and
 `FileFrame<ExternalFileUnit>` to represent the state of an open
 (or soon to be opened) external file descriptor as a Fortran
@@ -210,8 +213,8 @@ Static member functions `LookUp()`, `LookUpOrCrash()`, and `LookUpOrCreate()`
 probe the map to convert Fortran `UNIT=` numbers from I/O statements
 into references to active units.
 
-`IoStatementBase`
----------------
+### `IoStatementBase`
+
 The subclasses of `IoStatementBase` each encapsulate and maintain
 the state of one active Fortran I/O statement across the several
 I/O runtime library API function calls it may comprise.
@@ -239,8 +242,8 @@ the I/O API supports a means whereby the code generated for the Fortran
 program may supply stack space to the I/O runtime support library
 for this purpose.
 
-`IoStatementState`
-----------------
+### `IoStatementState`
+
 F18's Fortran I/O runtime support library defines and implements an API
 that uses a sequence of function calls to implement each Fortran I/O
 statement.
@@ -269,8 +272,8 @@ unit, the library has to treat that (expected to be rare) situation
 as a weird variation of internal I/O since there's no `ExternalFileUnit`
 available to hold its `IoStatementBase` subclass or `IoStatementState`.
 
-A Narrative Overview Of `PRINT *, 'HELLO, WORLD'`
-=================================================
+## A Narrative Overview Of `PRINT *, 'HELLO, WORLD'`
+
 1. When the compiled Fortran program begins execution at the `main()`
 entry point exported from its main program, it calls `ProgramStart()`
 with its arguments and environment.
diff --git a/flang/docs/ImplementingASemanticCheck.md b/flang/docs/ImplementingASemanticCheck.md
index 3bb16915cb880..35b107e4988eb 100644
--- a/flang/docs/ImplementingASemanticCheck.md
+++ b/flang/docs/ImplementingASemanticCheck.md
@@ -5,14 +5,20 @@
    SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   
 -->
-# Introduction
+# How to implement a Sematic Check in Flang
+
+```eval_rst
+.. contents::
+   :local:
+```
+
 I recently added a semantic check to the f18 compiler front end.  This document
 describes my thought process and the resulting implementation.
 
 For more information about the compiler, start with the 
 [compiler overview](Overview.md).
 
-# Problem definition
+## Problem definition
 
 In the 2018 Fortran standard, section 11.1.7.4.3, paragraph 2, states that:
 
@@ -29,7 +35,7 @@ emit a warning if an active DO variable was passed to a dummy argument with
 INTENT(INOUT).  Previously, I had implemented similar checks for SUBROUTINE
 calls.
 
-# Creating a test
+## Creating a test
 
 My first step was to create a test case to cause the problem.  I called it testfun.f90 and used it to check the behavior of other Fortran compilers.  Here's the initial version:
 
@@ -94,14 +100,14 @@ constant 216 in the statement:
 ```fortran
       dummyArg = 216
 ```
-# Analysis and implementation planning
+## Analysis and implementation planning
 
 I then considered what I needed to do.  I needed to detect situations where an
 active DO variable was passed to a dummy argument with `INTENT(OUT)` or
 `INTENT(INOUT)`.  Once I detected such a situation, I needed to produce a
 message that highlighted the erroneous source code.  
 
-## Deciding where to add the code to the compiler
+### Deciding where to add the code to the compiler
 This new semantic check would depend on several types of information -- the
 parse tree, source code location information, symbols, and expressions.  Thus I
 needed to put my new code in a place in the compiler after the parse tree had
@@ -151,7 +157,7 @@ Since my semantic check was focused on DO CONCURRENT statements, I added it to
 the file `lib/Semantics/check-do.cpp` where most of the semantic checking for
 DO statements already lived.
 
-## Taking advantage of prior work
+### Taking advantage of prior work
 When implementing a similar check for SUBROUTINE calls, I created a utility
 functions in `lib/Semantics/semantics.cpp` to emit messages if
 a symbol corresponding to an active DO variable was being potentially modified:
@@ -173,7 +179,7 @@ information --
 The first and third are needed since they're required to call the utility
 functions.  The second is needed to determine whether to call them.
 
-## Finding the source location
+### Finding the source location
 The source code location information that I'd need for the error message must
 come from the parse tree.  I looked in the file
 `include/flang/Parser/parse-tree.h` and determined that a `struct Expr`
@@ -181,7 +187,7 @@ contained source location information since it had the field `CharBlock
 source`.  Thus, if I visited a `parser::Expr` node, I could get the source
 location information for the associated expression.
 
-## Determining the `INTENT`
+### Determining the `INTENT`
 I knew that I could find the `INTENT` of the dummy argument associated with the
 actual argument from the function called `dummyIntent()` in the class
 `evaluate::ActualArgument` in the file `include/flang/Evaluate/call.h`.  So
@@ -248,7 +254,7 @@ This combination of the traversal framework and `dummyIntent()` would give
 me the `INTENT` of all of the dummy arguments in a FUNCTION call.  Thus, I
 would have the second piece of information I needed.
 
-## Determining if the actual argument is a variable
+### Determining if the actual argument is a variable
 I also guessed that I could determine if the `evaluate::ActualArgument`
 consisted of a variable.  
 
@@ -264,9 +270,9 @@ needed -- the source location of the erroneous text, the `INTENT` of the dummy
 argument, and a symbol that I could use to determine whether the actual
 argument was an active DO variable.
 
-# Implementation
+## Implementation
 
-## Adding a parse tree visitor
+### Adding a parse tree visitor
 I started my implementation by adding a visitor for `parser::Expr` nodes.
 Since this analysis is part of DO construct checking, I did this in
 `lib/Semantics/check-do.cpp`.  I added a print statement to the visitor to
@@ -308,7 +314,7 @@ source position of the associated expression (`CharBlock source`).  So I
 now had one of the three pieces of information needed to detect and report
 errors.
 
-## Collecting the actual arguments
+### Collecting the actual arguments
 To get the `INTENT` of the dummy arguments and the `semantics::Symbol` associated with the
 actual argument, I needed to find all of the actual arguments embedded in an
 expression that contained a FUNCTION call.  So my next step was to write the
@@ -474,7 +480,7 @@ node.
 
 So far, so good.
 
-## Finding the `INTENT` of the dummy argument
+### Finding the `INTENT` of the dummy argument
 I now wanted to find the `INTENT` of the dummy argument associated with the
 arguments in the set.  As mentioned earlier, the type
 `evaluate::ActualArgument` has a member function called `dummyIntent()`
@@ -518,7 +524,7 @@ I then modified my test case to convince myself that I was getting the correct
 
 So far, so good.
 
-## Finding the symbols for arguments that are variables
+### Finding the symbols for arguments that are variables
 The third and last piece of information I needed was to determine if a variable
 was being passed as an actual argument.  In such cases, I wanted to get the
 symbol table node (`semantics::Symbol`) for the variable.  My starting point was the
@@ -638,7 +644,7 @@ Here's the result of running the modified compiler on my Fortran test case:
 
 Sweet.
 
-## Emitting the messages
+### Emitting the messages
 At this point, using the source location information from the original
 `parser::Expr`, I had enough information to plug into the exiting
 interfaces for emitting messages for active DO variables.  I modified the
@@ -701,7 +707,7 @@ output:
 
 Even sweeter.
 
-# Improving the test case
+## Improving the test case
 At this point, my implementation seemed to be working.  But I was concerned
 about the limitations of my test case.  So I augmented it to include arguments
 other than `INTENT(OUT)` and more complex expressions.  Luckily, my
@@ -762,7 +768,7 @@ Here's the test I ended up with:
   end subroutine s
 ```
 
-# Submitting the pull request
+## Submitting the pull request
 At this point, my implementation seemed functionally complete, so I stripped out all of the debug statements, ran `clang-format` on it and reviewed it
 to make sure that the names were clear.  Here's what I ended up with:
 
@@ -790,7 +796,7 @@ to make sure that the names were clear.  Here's what I ended up with:
 
 I then created a pull request to get review comments.  
 
-# Responding to pull request comments
+## Responding to pull request comments
 I got feedback suggesting that I use an `if` statement rather than a
 `case` statement.  Another comment reminded me that I should look at the
 code I'd previously writted to do a similar check for SUBROUTINE calls to see
diff --git a/flang/docs/Intrinsics.md b/flang/docs/Intrinsics.md
index 7be0bf3e4a9ca..f9e47e5893bff 100644
--- a/flang/docs/Intrinsics.md
+++ b/flang/docs/Intrinsics.md
@@ -8,6 +8,11 @@
 
 # A categorization of standard (2018) and extended Fortran intrinsic procedures
 
+```eval_rst
+.. contents::
+   :local:
+```
+
 This note attempts to group the intrinsic procedures of Fortran into categories
 of functions or subroutines with similar interfaces as an aid to
 comprehension beyond that which might be gained from the standard's
@@ -53,14 +58,14 @@ Intrinsic modules are not covered here.
    may appear within the brackets to preserve the order of arguments
    (e.g., `COUNT`).
 
-# Elemental intrinsic functions
+## Elemental intrinsic functions
 
 Pure elemental semantics apply to these functions, to wit: when one or more of
 the actual arguments are arrays, the arguments must be conformable, and
 the result is also an array.
 Scalar arguments are expanded when the arguments are not all scalars.
 
-## Elemental intrinsic functions that may have unrestricted specific procedures
+### Elemental intrinsic functions that may have unrestricted specific procedures
 
 When an elemental intrinsic function is documented here as having an
 _unrestricted specific name_, that name may be passed as an actual
@@ -349,7 +354,7 @@ that is present in `SET`, or zero if none is.
 `VERIFY` is essentially the opposite: it returns the index of the first (or last) character
 in `STRING` that is *not* present in `SET`, or zero if all are.
 
-# Transformational intrinsic functions
+## Transformational intrinsic functions
 
 This category comprises a large collection of intrinsic functions that
 are collected together because they somehow transform their arguments
@@ -372,7 +377,7 @@ Some general rules apply to the transformational intrinsic functions:
 1. The type `any` here denotes any intrinsic or derived type.
 1. The notation `(..)` denotes an array of any rank (but not an assumed-rank array).
 
-## Logical reduction transformational intrinsic functions
+### Logical reduction transformational intrinsic functions
 ```
 ALL(LOGICAL(k) MASK(..) [, DIM ]) -> LOGICAL(k)
 ANY(LOGICAL(k) MASK(..) [, DIM ]) -> LOGICAL(k)
@@ -380,7 +385,7 @@ COUNT(LOGICAL(any) MASK(..) [, DIM, KIND=KIND(0) ]) -> INTEGER(KIND)
 PARITY(LOGICAL(k) MASK(..) [, DIM ]) -> LOGICAL(k)
 ```
 
-## Numeric reduction transformational intrinsic functions
+### Numeric reduction transformational intrinsic functions
 ```
 IALL(INTEGER(k) ARRAY(..) [, DIM, MASK ]) -> INTEGER(k)
 IANY(INTEGER(k) ARRAY(..) [, DIM, MASK ]) -> INTEGER(k)
@@ -392,7 +397,7 @@ SUM(numeric ARRAY(..) [, DIM, MASK ]) -> numeric
 
 `NORM2` generalizes `HYPOT` by computing `SQRT(SUM(X*X))` while avoiding spurious overflows.
 
-## Extrema reduction transformational intrinsic functions
+### Extrema reduction transformational intrinsic functions
 ```
 MAXVAL(relational(k) ARRAY(..) [, DIM, MASK ]) -> relational(k)
 MINVAL(relational(k) ARRAY(..) [, DIM, MASK ]) -> relational(k)
@@ -419,7 +424,7 @@ MAXLOC(relational ARRAY(..) [, DIM, MASK, KIND=KIND(0), BACK=.FALSE. ])
 MINLOC(relational ARRAY(..) [, DIM, MASK, KIND=KIND(0), BACK=.FALSE. ])
 ```
 
-## Data rearrangement transformational intrinsic functions
+### Data rearrangement transformational intrinsic functions
 The optional `DIM` argument to these functions must be a scalar integer of
 any kind, and it takes a default value of 1 when absent.
 
@@ -475,7 +480,7 @@ UNPACK(any VECTOR(n), LOGICAL(any) MASK(..), FIELD) -> type and kind of VECTOR,
 ```
 `FIELD` has same type and kind as `VECTOR` and is conformable with `MASK`.
 
-## Other transformational intrinsic functions
+### Other transformational intrinsic functions
 ```
 BESSEL_JN(INTEGER(n1) N1, INTEGER(n2) N2, REAL(k) X) -> REAL(k) vector (MAX(N2-N1+1,0))
 BESSEL_YN(INTEGER(n1) N1, INTEGER(n2) N2, REAL(k) X) -> REAL(k) vector (MAX(N2-N1+1,0))
@@ -517,7 +522,7 @@ At least one argument must be present in a call to `SELECTED_REAL_KIND`.
 An assumed-rank array may be passed to `SHAPE`, and if it is associated with an assumed-size array,
 the last element of the result will be -1.
 
-## Coarray transformational intrinsic functions
+### Coarray transformational intrinsic functions
 ```
 FAILED_IMAGES([scalar TEAM_TYPE TEAM, KIND=KIND(0)]) -> INTEGER(KIND) vector
 GET_TEAM([scalar INTEGER(?) LEVEL]) -> scalar TEAM_TYPE
@@ -532,10 +537,10 @@ THIS_IMAGE([COARRAY, DIM, scalar TEAM_TYPE TEAM]) -> default INTEGER
 The result of `THIS_IMAGE` is a scalar if `DIM` is present or if `COARRAY` is absent,
 and a vector whose length is the corank of `COARRAY` otherwise.
 
-# Inquiry intrinsic functions
+## Inquiry intrinsic functions
 These are neither elemental nor transformational; all are pure.
 
-## Type inquiry intrinsic functions
+### Type inquiry intrinsic functions
 All of these functions return constants.
 The value of the argument is not used, and may well be undefined.
 ```
@@ -554,7 +559,7 @@ RANGE(INTEGER(k) or REAL(k) or COMPLEX(k) X(..)) -> scalar default INTEGER
 TINY(REAL(k) X(..)) -> scalar REAL(k)
 ```
 
-## Bound and size inquiry intrinsic functions
+### Bound and size inquiry intrinsic functions
 The results are scalar when `DIM` is present, and a vector of length=(co)rank(`(CO)ARRAY`)
 when `DIM` is absent.
 ```
@@ -567,7 +572,7 @@ UCOBOUND(any COARRAY [, DIM, KIND=KIND(0) ]) -> INTEGER(KIND)
 
 Assumed-rank arrays may be used with `LBOUND`, `SIZE`, and `UBOUND`.
 
-## Object characteristic inquiry intrinsic functions
+### Object characteristic inquiry intrinsic functions
 ```
 ALLOCATED(any type ALLOCATABLE ARRAY) -> scalar default LOGICAL
 ALLOCATED(any type ALLOCATABLE SCALAR) -> scalar default LOGICAL
@@ -584,11 +589,11 @@ The arguments to `EXTENDS_TYPE_OF` must be of extensible derived types or be unl
 
 An assumed-rank array may be used with `IS_CONTIGUOUS` and `RANK`.
 
-# Intrinsic subroutines
+## Intrinsic subroutines
 
 (*TODO*: complete these descriptions)
 
-## One elemental intrinsic subroutine
+### One elemental intrinsic subroutine
 ```
 INTERFACE
   SUBROUTINE MVBITS(FROM, FROMPOS, LEN, TO, TOPOS)
@@ -602,7 +607,7 @@ INTERFACE
 END INTERFACE
 ```
 
-## Non-elemental intrinsic subroutines
+### Non-elemental intrinsic subroutines
 ```
 CALL CPU_TIME(REAL INTENT(OUT) TIME)
 ```
@@ -627,7 +632,7 @@ CALL RANDOM_SEED([SIZE, PUT, GET])
 CALL SYSTEM_CLOCK([COUNT, COUNT_RATE, COUNT_MAX])
 ```
 
-## Atomic intrinsic subroutines
+### Atomic intrinsic subroutines
 ```
 CALL ATOMIC_ADD(ATOM, VALUE [, STAT=])
 CALL ATOMIC_AND(ATOM, VALUE [, STAT=])
@@ -642,7 +647,7 @@ CALL ATOMIC_REF(VALUE, ATOM [, STAT=])
 CALL ATOMIC_XOR(ATOM, VALUE [, STAT=])
 ```
 
-## Collective intrinsic subroutines
+### Collective intrinsic subroutines
 ```
 CALL CO_BROADCAST
 CALL CO_MAX
@@ -651,8 +656,8 @@ CALL CO_REDUCE
 CALL CO_SUM
 ```
 
-# Non-standard intrinsics
-## PGI
+## Non-standard intrinsics
+### PGI
 ```
 AND, OR, XOR
 LSHIFT, RSHIFT, SHIFT
@@ -666,7 +671,7 @@ JINT, JNINT, KNINT
 LOC
 ```
 
-## Intel
+### Intel
 ```
 DCMPLX(X,Y), QCMPLX(X,Y)
 DREAL(DOUBLE COMPLEX A) -> DOUBLE PRECISION
@@ -689,12 +694,12 @@ CACHESIZE, EOF, FP_CLASS, INT_PTR_KIND, ISNAN, LOC
 MALLOC
 ```
 
-# Intrinsic Procedure Support in f18
+## Intrinsic Procedure Support in f18
 This section gives an overview of the support inside f18 libraries for the
 intrinsic procedures listed above.
 It may be outdated, refer to f18 code base for the actual support status.
 
-## Semantic Analysis
+### Semantic Analysis
 F18 semantic expression analysis phase detects intrinsic procedure references,
 validates the argument types and deduces the return types.
 This phase currently supports all the intrinsic procedures listed above but the ones in the table below.
@@ -710,7 +715,7 @@ This phase currently supports all the intrinsic procedures listed above but the
 | Collective intrinsic subroutines | CO_BROADCAST &al. |
 
 
-## Intrinsic Function Folding
+### Intrinsic Function Folding
 Fortran Constant Expressions can contain references to a certain number of
 intrinsic functions (see Fortran 2018 standard section 10.1.12 for more details).
 Constant Expressions may be used to define kind arguments. Therefore, the semantic
@@ -724,7 +729,7 @@ arrays when an implementation is provided for the scalars (regardless of whether
 it is using host hardware types or not).
 The status of intrinsic function folding support is given in the sub-sections below.
 
-### Intrinsic Functions with Host Independent Folding Support
+#### Intrinsic Functions with Host Independent Folding Support
 Implementations using f18 scalar types enables folding intrinsic functions
 on any host and with any possible type kind supported by f18. The intrinsic functions
 listed below are folded using host independent implementations.
@@ -736,7 +741,7 @@ listed below are folded using host independent implementations.
 | COMPLEX | CMPLX, CONJG |
 | LOGICAL | BGE, BGT, BLE, BLT |
 
-### Intrinsic Functions with Host Dependent Folding Support
+#### Intrinsic Functions with Host Dependent Folding Support
 Implementations using the host runtime may not be available for all supported
 f18 types depending on the host hardware types and the libraries available on the host.
 The actual support on a host depends on what the host hardware types are.
diff --git a/flang/docs/LabelResolution.md b/flang/docs/LabelResolution.md
index e837b4fa6aece..c1227a8bc35a1 100644
--- a/flang/docs/LabelResolution.md
+++ b/flang/docs/LabelResolution.md
@@ -8,6 +8,11 @@
 
 # Semantics: Resolving Labels and Construct Names
 
+```eval_rst
+.. contents::
+   :local:
+```
+
 ## Overview
 
 After the Fortran input file(s) has been parsed into a syntax tree, the compiler must check that the program checks semantically.  Target labels must be checked and violations of legal semantics should be reported to the user.
diff --git a/flang/docs/ModFiles.md b/flang/docs/ModFiles.md
index 483341bdd0f47..ccb849ab0decd 100644
--- a/flang/docs/ModFiles.md
+++ b/flang/docs/ModFiles.md
@@ -8,6 +8,11 @@
 
 # Module Files
 
+```eval_rst
+.. contents::
+   :local:
+```
+
 Module files hold information from a module that is necessary to compile 
 program units that depend on the module.
 
diff --git a/flang/docs/OpenMP-4.5-grammar.txt b/flang/docs/OpenMP-4.5-grammar.md
similarity index 97%
rename from flang/docs/OpenMP-4.5-grammar.txt
rename to flang/docs/OpenMP-4.5-grammar.md
index c74072ba1ef27..9044e305f0609 100644
--- a/flang/docs/OpenMP-4.5-grammar.txt
+++ b/flang/docs/OpenMP-4.5-grammar.md
@@ -1,18 +1,16 @@
-#===-- docs/OpenMP-4.5-grammar.txt --------------------------------===#
-#
-# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-# See https://llvm.org/LICENSE.txt for license information.
-# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-#
-#===------------------------------------------------------------------------===#
+# OpenMP 4.5 Grammar
 
-# OpenMP 4.5 Specifications
+Grammar used by Flang to parse OpenMP 4.5.
 
+## OpenMP 4.5 Specifications
+```
 2 omp-directive -> sentinel directive-name [clause[ [,] clause]...]
 2.1.1 sentinel -> !$omp | c$omp | *$omp
 2.1.2 sentinel -> !$omp
+```
 
-# directive-name
+## directive-name
+```
 2.5 parallel -> PARALLEL [parallel-clause[ [,] parallel-clause]...]
     parallel-clause -> if-clause |
                        num-threads-clause |
@@ -462,3 +460,4 @@
                      ALLOC | RELEASE | DELETE
 
 2.15.5.2 defaultmap -> DEFAULTMAP (TOFROM:SCALAR)
+```
diff --git a/flang/docs/OpenMP-semantics.md b/flang/docs/OpenMP-semantics.md
index 4e2a81739cf81..1511bc9e7b3b5 100644
--- a/flang/docs/OpenMP-semantics.md
+++ b/flang/docs/OpenMP-semantics.md
@@ -8,6 +8,11 @@
 
 # OpenMP Semantic Analysis
 
+```eval_rst
+.. contents::
+   :local:
+```
+
 ## OpenMP for F18
 
 1. Define and document the parse tree representation for
diff --git a/flang/docs/OptionComparison.md b/flang/docs/OptionComparison.md
index db5932411cc1e..347a1d6000ee2 100644
--- a/flang/docs/OptionComparison.md
+++ b/flang/docs/OptionComparison.md
@@ -6,14 +6,21 @@
   
 -->
 
-# Compiler options
+# Compiler options comparison
+
+```eval_rst
+.. contents::
+   :local:
+```
 
 This document catalogs the options processed by F18's peers/competitors.  Much of the document is taken up by a set of tables that list the options categorized into different topics.  Some of the table headings link to more information about the contents of the tables.  For example, the table on **Standards conformance** options links to [notes on Standards conformance](#standards).
 
-**There's also important information in the ___[Notes section](#notes)___ near the end of the document on how this data was gathered and what ___is___ and ___is not___ included in this document.**  
+**There's also important information in the ___[Appendix section](#appendix)___ near the end of the document on how this data was gathered and what ___is___ and ___is not___ included in this document.**  
 
 Note that compilers may support language features without having an option for them.  Such cases are frequently, but not always noted in this document.
 
+## Categorisation of Options
+
 <table>
   <tr>
    <td colspan="7" ><strong><a href="#standards">Standards conformance</a></strong>
@@ -1183,7 +1190,7 @@ Mcuda
 
 
 
-## <a name="notes"></a>Notes
+## Notes
 
 **<a name="standards"></a>Standards conformance:** 
 
@@ -1290,7 +1297,7 @@ GNU is the only compiler with options governing the use of non-standard intrinsi
 **Warn for bad call checking**: This Cray option ("-eb") issues a warning message rather than an error message when the compiler detects a call to a procedure with one or more dummy arguments having the TARGET, VOLATILE or ASYNCHRONOUS attribute and there is not an explicit interface definition.
 
 
-## Notes
+## Appendix
 
 
 ### What is and is not included
diff --git a/flang/docs/Overview.md b/flang/docs/Overview.md
index 75a8cd1c4cab0..9878589438450 100644
--- a/flang/docs/Overview.md
+++ b/flang/docs/Overview.md
@@ -8,6 +8,11 @@
 
 # Overview of Compiler Phases
 
+```eval_rst
+.. contents::
+   :local:
+```
+
 Each phase produces either correct output or fatal errors.
 
 ## Prescan and Preprocess
diff --git a/flang/docs/ParserCombinators.md b/flang/docs/ParserCombinators.md
index 4f3dc6fd07ae6..ff94d341c1501 100644
--- a/flang/docs/ParserCombinators.md
+++ b/flang/docs/ParserCombinators.md
@@ -6,6 +6,15 @@
   
 -->
 
+# Parser Combinators
+
+```eval_rst
+.. contents::
+   :local:
+```
+
+This document is a primer on Parser Combinators and their use in Flang.
+
 ## Concept
 The Fortran language recognizer here can be classified as an LL recursive
 descent parser.  It is composed from a *parser combinator* library that
diff --git a/flang/docs/Parsing.md b/flang/docs/Parsing.md
index fad9a4d57278c..dec63e6fbdab4 100644
--- a/flang/docs/Parsing.md
+++ b/flang/docs/Parsing.md
@@ -6,8 +6,13 @@
   
 -->
 
-The F18 Parser
-==============
+# The F18 Parser
+
+```eval_rst
+.. contents::
+   :local:
+```
+
 This program source code implements a parser for the Fortran programming
 language.
 
@@ -42,8 +47,8 @@ source file and receive its parse tree and error messages.  The interfaces
 of the Parsing class correspond to the two major passes of the parser,
 which are described below.
 
-Prescanning and Preprocessing
------------------------------
+## Prescanning and Preprocessing
+
 The first pass is performed by an instance of the Prescanner class,
 with help from an instance of Preprocessor.
 
@@ -100,8 +105,8 @@ The content of the cooked character stream is available and useful
 for debugging, being as it is a simple value forwarded from the first major
 pass of the compiler to the second.
 
-Source Provenance
------------------
+## Source Provenance
+
 The prescanner constructs a chronicle of every file that is read by the
 parser, viz. the original source file and all others that it directly
 or indirectly includes.  One copy of the content of each of these files
@@ -124,8 +129,8 @@ Simple `const char *` pointers to characters in the cooked character
 stream, or to contiguous ranges thereof, are used as source position
 indicators within the parser and in the parse tree.
 
-Messages
---------
+## Messages
+
 Message texts, and snprintf-like formatting strings for constructing
 messages, are instantiated in the various components of the parser with
 C++ user defined character literals tagged with `_err_en_US` and `_en_US`
@@ -134,8 +139,8 @@ English used in the United States) so that they may be easily identified
 for localization.  As described above, messages are associated with
 source code positions by means of provenance values.
 
-The Parse Tree
---------------
+## The Parse Tree
+
 Each of the ca. 450 numbered requirement productions in the standard
 Fortran language grammar, as well as the productions implied by legacy
 extensions and preserved obsolescent features, maps to a distinct class
@@ -174,8 +179,8 @@ stability of pointers into these lists.
 There is a general purpose library by means of which parse trees may
 be traversed.
 
-Parsing
--------
+## Parsing
+
 This compiler attempts to recognize the entire cooked character stream
 (see above) as a Fortran program.  It records the reductions made during
 a successful recognition as a parse tree value.  The recognized grammar
@@ -203,8 +208,8 @@ of "parser combinator" template functions that compose them to form more
 complicated recognizers and their correspondences to the construction
 of parse tree values.
 
-Unparsing
----------
+## Unparsing
+
 Parse trees can be converted back into free form Fortran source code.
 This formatter is not really a classical "pretty printer", but is
 more of a data structure dump whose output is suitable for compilation
diff --git a/flang/docs/Preprocessing.md b/flang/docs/Preprocessing.md
index 7f6f3951cfd16..3c6984cfa2fd0 100644
--- a/flang/docs/Preprocessing.md
+++ b/flang/docs/Preprocessing.md
@@ -6,11 +6,15 @@
   
 -->
 
-Fortran Preprocessing
-=====================
+# Fortran Preprocessing
+
+```eval_rst
+.. contents::
+   :local:
+```
+
+## Behavior common to (nearly) all compilers:
 
-Behavior common to (nearly) all compilers:
-------------------------------------------
 * Macro and argument names are sensitive to case.
 * Fixed form right margin clipping after column 72 (or 132)
   has precedence over macro name recognition, and also over
@@ -39,9 +43,8 @@ Behavior common to (nearly) all compilers:
 * A `#define` directive intermixed with continuation lines can't
   define a macro that's invoked earlier in the same continued statement.
 
-Behavior that is not consistent over all extant compilers but which
-probably should be uncontroversial:
------------------------------------
+## Behavior that is not consistent over all extant compilers but which probably should be uncontroversial:
+
 * Invoked macro names can straddle a Fortran line continuation.
 * ... unless implicit fixed form card padding intervenes; i.e.,
   in fixed form, a continued macro name has to be split at column
@@ -65,8 +68,8 @@ probably should be uncontroversial:
   directive indicator.
 * `#define KWM !` allows KWM to signal a comment.
 
-Judgement calls, where precedents are unclear:
-----------------------------------------------
+## Judgement calls, where precedents are unclear:
+
 * Expressions in `#if` and `#elif` should support both Fortran and C
   operators; e.g., `#if 2 .LT. 3` should work.
 * If a function-like macro does not close its parentheses, line
@@ -84,16 +87,16 @@ Judgement calls, where precedents are unclear:
   lines, it may or may not affect text in the continued statement that
   appeared before the directive.
 
-Behavior that few compilers properly support (or none), but should:
--------------------------------------------------------------------
+## Behavior that few compilers properly support (or none), but should:
+
 * A macro invocation can straddle free form continuation lines in all of their
   forms, with continuation allowed in the name, before the arguments, and
   within the arguments.
 * Directives can be capitalized in free form, too.
 * `__VA_ARGS__` and `__VA_OPT__` work in variadic function-like macros.
 
-In short, a Fortran preprocessor should work as if:
----------------------------------------------------
+## In short, a Fortran preprocessor should work as if:
+
 1. Fixed form lines are padded up to column 72 (or 132) and clipped thereafter.
 2. Fortran comments are removed.
 3. C-style line continuations are processed in preprocessing directives.
@@ -125,8 +128,7 @@ text.
 OpenMP-style directives that look like comments are not addressed by
 this scheme but are obvious extensions.
 
-Appendix
-========
+## Appendix
 `N` in the table below means "not supported"; this doesn't
 mean a bug, it just means that a particular behavior was
 not observed.
diff --git a/flang/docs/PullRequestChecklist.md b/flang/docs/PullRequestChecklist.md
index 12a67be374a20..b253c153f61ec 100644
--- a/flang/docs/PullRequestChecklist.md
+++ b/flang/docs/PullRequestChecklist.md
@@ -36,7 +36,7 @@ even though I've read the style guide, they regularly trip me up.
    clang-format will do this for most code.  But you may need to break up long
    strings.
 *  Review declarations for proper use of `constexpr` and `const`.
-*  Follow the C++ [naming guidelines](C++style.md#naming).
+*  Follow the C++ [naming guidelines](C++style.html#naming)
 *  Ensure that the names evoke their purpose and are consistent with existing code.
 *  Used braced initializers.
 *  Review pointer and reference types to make sure that you're using them
diff --git a/flang/docs/RuntimeDescriptor.md b/flang/docs/RuntimeDescriptor.md
index d819517fa9795..f0bbd2e3fedaf 100644
--- a/flang/docs/RuntimeDescriptor.md
+++ b/flang/docs/RuntimeDescriptor.md
@@ -6,6 +6,13 @@
   
 -->
 
+# Runtime Descriptors
+
+```eval_rst
+.. contents::
+   :local:
+```
+
 ## Concept
 The properties that characterize data values and objects in Fortran
 programs must sometimes be materialized when the program runs.
diff --git a/flang/docs/Semantics.md b/flang/docs/Semantics.md
index 6ea0b292de69f..361426c936c24 100644
--- a/flang/docs/Semantics.md
+++ b/flang/docs/Semantics.md
@@ -8,6 +8,11 @@
 
 # Semantic Analysis
 
+```eval_rst
+.. contents::
+   :local:
+```
+
 The semantic analysis pass determines if a syntactically correct Fortran
 program is is legal by enforcing the constraints of the language.
 
diff --git a/flang/docs/conf.py b/flang/docs/conf.py
index 045d0a2c41678..21362fc3449e9 100644
--- a/flang/docs/conf.py
+++ b/flang/docs/conf.py
@@ -46,12 +46,23 @@
   else:
     source_parsers = {'.md': 'recommonmark.parser.CommonMarkParser'}
   source_suffix['.md'] = 'markdown'
+  extensions.append('sphinx_markdown_tables')
+
+  # Setup AutoStructify for inline .rst toctrees in index.md
+  from recommonmark.transform import AutoStructify
+  def setup(app):
+    # Disable inline math to avoid
+    # https://github.com/readthedocs/recommonmark/issues/120 in Extensions.md
+    app.add_config_value('recommonmark_config', {
+      'enable_inline_math': False
+    }, True)
+    app.add_transform(AutoStructify)
 
 # The encoding of source files.
 #source_encoding = 'utf-8-sig'
 
 # The master toctree document.
-master_doc = 'Overview'
+master_doc = 'index'
 
 # General information about the project.
 project = u'Flang'
diff --git a/flang/docs/f2018-grammar.txt b/flang/docs/f2018-grammar.md
similarity index 99%
rename from flang/docs/f2018-grammar.txt
rename to flang/docs/f2018-grammar.md
index 2de8cdfc1b8f7..b47eced4857e3 100644
--- a/flang/docs/f2018-grammar.txt
+++ b/flang/docs/f2018-grammar.md
@@ -1,11 +1,8 @@
-#===-- docs/f2018-grammar.txt -------------------------------------===#
-#
-# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-# See https://llvm.org/LICENSE.txt for license information.
-# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-#
-#===------------------------------------------------------------------------===#
+# Fortran 2018 Grammar
 
+Grammar used by Flang to parse Fortran 2018.
+
+```
 R0001 digit -> 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9
 R0002 letter ->
         A | B | C | D | E | F | G | H | I | J | K | L | M |
@@ -799,3 +796,4 @@ R1542 return-stmt -> RETURN [scalar-int-expr]
 R1543 contains-stmt -> CONTAINS
 R1544 stmt-function-stmt ->
         function-name ( [dummy-arg-name-list] ) = scalar-expr
+```        
diff --git a/flang/docs/index.md b/flang/docs/index.md
new file mode 100644
index 0000000000000..4c07170565227
--- /dev/null
+++ b/flang/docs/index.md
@@ -0,0 +1,61 @@
+# Welcome to Flang's documentation
+
+Flang is LLVM's Fortran frontend
+
+```eval_rst
+.. toctree::
+   :titlesonly:
+
+   ReleaseNotes
+```
+
+# Contributing to Flang
+
+```eval_rst
+.. toctree::
+   :titlesonly:
+
+   FortranForCProgrammers
+   C++style
+   C++17
+   PullRequestChecklist
+   ImplementingASemanticCheck
+```
+
+# Design Documents
+
+```eval_rst
+.. toctree::
+   :titlesonly:
+
+   Overview
+   Preprocessing
+   Parsing
+   LabelResolution
+   ModFiles
+   Semantics
+   OpenMP-semantics
+   ControlFlowGraph
+   FortranIR
+   IORuntimeInternals
+   f2018-grammar.md
+   OpenMP-4.5-grammar.md
+   Directives
+   Extensions
+   Intrinsics
+   OptionComparison
+   ParserCombinators
+   RuntimeDescriptor
+   Calls
+   Character
+   ArrayComposition
+   BijectiveInternalNameUniquing
+```
+
+# Indices and tables
+
+```eval_rst
+* :ref:`genindex`
+* :ref:`modindex`
+* :ref:`search`
+```

From b3fb40b3a3c1fb7ac094eda50762624baad37552 Mon Sep 17 00:00:00 2001
From: dfukalov <daniil.fukalov@amd.com>
Date: Fri, 4 Sep 2020 22:44:01 +0300
Subject: [PATCH 076/109] [AMDGPU] Fix for folding v2.16 literals.

It was found some packed immediate operands (e.g. `<half 1.0, half 2.0>`) are
incorrectly processed so one of two packed values were lost.

Introduced new function to check immediate 32-bit operand can be folded.
Converted condition about current op_sel flags value to fall-through.

Fixes: SWDEV-247595

Reviewed By: rampitec

Differential Revision: https://reviews.llvm.org/D87158

(cherry picked from commit d03c4034dc80c944ec4a5833ba8f87d60183f866)
---
 llvm/lib/Target/AMDGPU/SIFoldOperands.cpp     | 44 +++++++++----------
 .../Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp    | 13 ++++++
 llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h |  3 ++
 .../CodeGen/AMDGPU/shrink-add-sub-constant.ll |  4 +-
 4 files changed, 40 insertions(+), 24 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
index ffcf4c30bc70d..92980d2406cf2 100644
--- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -192,8 +192,8 @@ static bool updateOperand(FoldCandidate &Fold,
   if (Fold.isImm()) {
     if (MI->getDesc().TSFlags & SIInstrFlags::IsPacked &&
         !(MI->getDesc().TSFlags & SIInstrFlags::IsMAI) &&
-        AMDGPU::isInlinableLiteralV216(static_cast<uint16_t>(Fold.ImmToFold),
-                                       ST.hasInv2PiInlineImm())) {
+        AMDGPU::isFoldableLiteralV216(Fold.ImmToFold,
+                                      ST.hasInv2PiInlineImm())) {
       // Set op_sel/op_sel_hi on this operand or bail out if op_sel is
       // already set.
       unsigned Opcode = MI->getOpcode();
@@ -209,30 +209,30 @@ static bool updateOperand(FoldCandidate &Fold,
       ModIdx = AMDGPU::getNamedOperandIdx(Opcode, ModIdx);
       MachineOperand &Mod = MI->getOperand(ModIdx);
       unsigned Val = Mod.getImm();
-      if ((Val & SISrcMods::OP_SEL_0) || !(Val & SISrcMods::OP_SEL_1))
-        return false;
-      // Only apply the following transformation if that operand requries
-      // a packed immediate.
-      switch (TII.get(Opcode).OpInfo[OpNo].OperandType) {
-      case AMDGPU::OPERAND_REG_IMM_V2FP16:
-      case AMDGPU::OPERAND_REG_IMM_V2INT16:
-      case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
-      case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
-        // If upper part is all zero we do not need op_sel_hi.
-        if (!isUInt<16>(Fold.ImmToFold)) {
-          if (!(Fold.ImmToFold & 0xffff)) {
-            Mod.setImm(Mod.getImm() | SISrcMods::OP_SEL_0);
+      if (!(Val & SISrcMods::OP_SEL_0) && (Val & SISrcMods::OP_SEL_1)) {
+        // Only apply the following transformation if that operand requries
+        // a packed immediate.
+        switch (TII.get(Opcode).OpInfo[OpNo].OperandType) {
+        case AMDGPU::OPERAND_REG_IMM_V2FP16:
+        case AMDGPU::OPERAND_REG_IMM_V2INT16:
+        case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
+        case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
+          // If upper part is all zero we do not need op_sel_hi.
+          if (!isUInt<16>(Fold.ImmToFold)) {
+            if (!(Fold.ImmToFold & 0xffff)) {
+              Mod.setImm(Mod.getImm() | SISrcMods::OP_SEL_0);
+              Mod.setImm(Mod.getImm() & ~SISrcMods::OP_SEL_1);
+              Old.ChangeToImmediate((Fold.ImmToFold >> 16) & 0xffff);
+              return true;
+            }
             Mod.setImm(Mod.getImm() & ~SISrcMods::OP_SEL_1);
-            Old.ChangeToImmediate((Fold.ImmToFold >> 16) & 0xffff);
+            Old.ChangeToImmediate(Fold.ImmToFold & 0xffff);
             return true;
           }
-          Mod.setImm(Mod.getImm() & ~SISrcMods::OP_SEL_1);
-          Old.ChangeToImmediate(Fold.ImmToFold & 0xffff);
-          return true;
+          break;
+        default:
+          break;
         }
-        break;
-      default:
-        break;
       }
     }
   }
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index 00e6d517bde58..3df2157fc402d 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -1282,6 +1282,19 @@ bool isInlinableIntLiteralV216(int32_t Literal) {
   return Lo16 == Hi16 && isInlinableIntLiteral(Lo16);
 }
 
+bool isFoldableLiteralV216(int32_t Literal, bool HasInv2Pi) {
+  assert(HasInv2Pi);
+
+  int16_t Lo16 = static_cast<int16_t>(Literal);
+  if (isInt<16>(Literal) || isUInt<16>(Literal))
+    return true;
+
+  int16_t Hi16 = static_cast<int16_t>(Literal >> 16);
+  if (!(Literal & 0xffff))
+    return true;
+  return Lo16 == Hi16;
+}
+
 bool isArgPassedInSGPR(const Argument *A) {
   const Function *F = A->getParent();
 
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index e71554575f6af..26bb77f4b4c74 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -660,6 +660,9 @@ bool isInlinableLiteralV216(int32_t Literal, bool HasInv2Pi);
 LLVM_READNONE
 bool isInlinableIntLiteralV216(int32_t Literal);
 
+LLVM_READNONE
+bool isFoldableLiteralV216(int32_t Literal, bool HasInv2Pi);
+
 bool isArgPassedInSGPR(const Argument *Arg);
 
 LLVM_READONLY
diff --git a/llvm/test/CodeGen/AMDGPU/shrink-add-sub-constant.ll b/llvm/test/CodeGen/AMDGPU/shrink-add-sub-constant.ll
index ff4a8296d8dd0..bf437cc5bb58a 100644
--- a/llvm/test/CodeGen/AMDGPU/shrink-add-sub-constant.ll
+++ b/llvm/test/CodeGen/AMDGPU/shrink-add-sub-constant.ll
@@ -1166,7 +1166,7 @@ define amdgpu_kernel void @v_test_v2i16_x_sub_7_64(<2 x i16> addrspace(1)* %out,
 ; GFX10-NEXT:    v_add_co_u32_e64 v0, s0, s0, v2
 ; GFX10-NEXT:    v_add_co_ci_u32_e64 v1, s0, s1, 0, s0
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
-; GFX10-NEXT:    v_pk_sub_i16 v2, v3, 7 op_sel_hi:[1,0]
+; GFX10-NEXT:    v_pk_sub_i16 v2, v3, 0x400007
 ; GFX10-NEXT:    global_store_dword v[0:1], v2, off
 ; GFX10-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
@@ -1250,7 +1250,7 @@ define amdgpu_kernel void @v_test_v2i16_x_sub_64_123(<2 x i16> addrspace(1)* %ou
 ; GFX10-NEXT:    v_add_co_u32_e64 v0, s0, s0, v2
 ; GFX10-NEXT:    v_add_co_ci_u32_e64 v1, s0, s1, 0, s0
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
-; GFX10-NEXT:    v_pk_sub_i16 v2, v3, 64 op_sel_hi:[1,0]
+; GFX10-NEXT:    v_pk_sub_i16 v2, v3, 0x7b0040
 ; GFX10-NEXT:    global_store_dword v[0:1], v2, off
 ; GFX10-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()

From 2d61b5ea8079fb28db6a7b25cfc844fa6c21f8c4 Mon Sep 17 00:00:00 2001
From: Nikita Popov <nikita.ppv@gmail.com>
Date: Sat, 29 Aug 2020 22:31:06 +0200
Subject: [PATCH 077/109] Reduce code duplication in simplifySelectWithICmpCond
 (NFC)

Canonicalize icmp ne to icmp eq and implement all the folds only once.
---
 llvm/lib/Analysis/InstructionSimplify.cpp | 34 ++++++-----------------
 1 file changed, 9 insertions(+), 25 deletions(-)

diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp
index 9423ff9e3a66c..0a76979f93e20 100644
--- a/llvm/lib/Analysis/InstructionSimplify.cpp
+++ b/llvm/lib/Analysis/InstructionSimplify.cpp
@@ -3965,12 +3965,18 @@ static Value *simplifySelectWithICmpCond(Value *CondVal, Value *TrueVal,
   if (!match(CondVal, m_ICmp(Pred, m_Value(CmpLHS), m_Value(CmpRHS))))
     return nullptr;
 
-  if (ICmpInst::isEquality(Pred) && match(CmpRHS, m_Zero())) {
+  // Canonicalize ne to eq predicate.
+  if (Pred == ICmpInst::ICMP_NE) {
+    Pred = ICmpInst::ICMP_EQ;
+    std::swap(TrueVal, FalseVal);
+  }
+
+  if (Pred == ICmpInst::ICMP_EQ && match(CmpRHS, m_Zero())) {
     Value *X;
     const APInt *Y;
     if (match(CmpLHS, m_And(m_Value(X), m_APInt(Y))))
       if (Value *V = simplifySelectBitTest(TrueVal, FalseVal, X, Y,
-                                           Pred == ICmpInst::ICMP_EQ))
+                                           /*TrueWhenUnset=*/true))
         return V;
 
     // Test for a bogus zero-shift-guard-op around funnel-shift or rotate.
@@ -3981,13 +3987,7 @@ static Value *simplifySelectWithICmpCond(Value *CondVal, Value *TrueVal,
                                                           m_Value(ShAmt)));
     // (ShAmt == 0) ? fshl(X, *, ShAmt) : X --> X
     // (ShAmt == 0) ? fshr(*, X, ShAmt) : X --> X
-    if (match(TrueVal, isFsh) && FalseVal == X && CmpLHS == ShAmt &&
-        Pred == ICmpInst::ICMP_EQ)
-      return X;
-    // (ShAmt != 0) ? X : fshl(X, *, ShAmt) --> X
-    // (ShAmt != 0) ? X : fshr(*, X, ShAmt) --> X
-    if (match(FalseVal, isFsh) && TrueVal == X && CmpLHS == ShAmt &&
-        Pred == ICmpInst::ICMP_NE)
+    if (match(TrueVal, isFsh) && FalseVal == X && CmpLHS == ShAmt)
       return X;
 
     // Test for a zero-shift-guard-op around rotates. These are used to
@@ -4001,11 +4001,6 @@ static Value *simplifySelectWithICmpCond(Value *CondVal, Value *TrueVal,
                                 m_Intrinsic<Intrinsic::fshr>(m_Value(X),
                                                              m_Deferred(X),
                                                              m_Value(ShAmt)));
-    // (ShAmt != 0) ? fshl(X, X, ShAmt) : X --> fshl(X, X, ShAmt)
-    // (ShAmt != 0) ? fshr(X, X, ShAmt) : X --> fshr(X, X, ShAmt)
-    if (match(TrueVal, isRotate) && FalseVal == X && CmpLHS == ShAmt &&
-        Pred == ICmpInst::ICMP_NE)
-      return TrueVal;
     // (ShAmt == 0) ? X : fshl(X, X, ShAmt) --> fshl(X, X, ShAmt)
     // (ShAmt == 0) ? X : fshr(X, X, ShAmt) --> fshr(X, X, ShAmt)
     if (match(FalseVal, isRotate) && TrueVal == X && CmpLHS == ShAmt &&
@@ -4032,17 +4027,6 @@ static Value *simplifySelectWithICmpCond(Value *CondVal, Value *TrueVal,
         SimplifyWithOpReplaced(TrueVal, CmpRHS, CmpLHS, Q, MaxRecurse) ==
             FalseVal)
       return FalseVal;
-  } else if (Pred == ICmpInst::ICMP_NE) {
-    if (SimplifyWithOpReplaced(TrueVal, CmpLHS, CmpRHS, Q, MaxRecurse) ==
-            FalseVal ||
-        SimplifyWithOpReplaced(TrueVal, CmpRHS, CmpLHS, Q, MaxRecurse) ==
-            FalseVal)
-      return TrueVal;
-    if (SimplifyWithOpReplaced(FalseVal, CmpLHS, CmpRHS, Q, MaxRecurse) ==
-            TrueVal ||
-        SimplifyWithOpReplaced(FalseVal, CmpRHS, CmpLHS, Q, MaxRecurse) ==
-            TrueVal)
-      return TrueVal;
   }
 
   return nullptr;

From d720e5855dcf57b5b88ee6a4147ccd762115278a Mon Sep 17 00:00:00 2001
From: Nikita Popov <nikita.ppv@gmail.com>
Date: Thu, 10 Sep 2020 18:53:08 +0200
Subject: [PATCH 078/109] Add test for PR47322 (NFC)

---
 llvm/test/Transforms/InstCombine/select.ll | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/llvm/test/Transforms/InstCombine/select.ll b/llvm/test/Transforms/InstCombine/select.ll
index 185ff838b8192..abdb36ab7bd42 100644
--- a/llvm/test/Transforms/InstCombine/select.ll
+++ b/llvm/test/Transforms/InstCombine/select.ll
@@ -2487,3 +2487,19 @@ define <2 x i32> @true_undef_vec(i1 %cond, <2 x i32> %x) {
   %s = select i1 %cond, <2 x i32> undef, <2 x i32> %x
   ret <2 x i32> %s
 }
+
+; FIXME: This is a miscompile!
+define i32 @pr47322_more_poisonous_replacement(i32 %arg) {
+; CHECK-LABEL: @pr47322_more_poisonous_replacement(
+; CHECK-NEXT:    [[TRAILING:%.*]] = call i32 @llvm.cttz.i32(i32 [[ARG:%.*]], i1 immarg true), [[RNG0:!range !.*]]
+; CHECK-NEXT:    [[SHIFTED:%.*]] = lshr i32 [[ARG]], [[TRAILING]]
+; CHECK-NEXT:    ret i32 [[SHIFTED]]
+;
+  %cmp = icmp eq i32 %arg, 0
+  %trailing = call i32 @llvm.cttz.i32(i32 %arg, i1 immarg true)
+  %shifted = lshr i32 %arg, %trailing
+  %r1.sroa.0.1 = select i1 %cmp, i32 0, i32 %shifted
+  ret i32 %r1.sroa.0.1
+}
+
+declare i32 @llvm.cttz.i32(i32, i1 immarg)

From be318969e245db0cd5471bff2a7cbfa3fad2b075 Mon Sep 17 00:00:00 2001
From: Nikita Popov <nikita.ppv@gmail.com>
Date: Thu, 10 Sep 2020 12:19:16 +0200
Subject: [PATCH 079/109] Fix incorrect SimplifyWithOpReplaced transform
 (PR47322)

This is a followup to D86834, which partially fixed this issue in
InstSimplify. However, InstCombine repeats the same transform while
dropping poison flags -- which does not cover cases where poison is
introduced in some other way.

The fix here is a bit more comprehensive, because things are quite
entangled, and it's hard to only partially address it without
regressing optimization. There are really two changes here:

 * Export the SimplifyWithOpReplaced API from InstSimplify, with an
   added AllowRefinement flag. For replacements inside the TrueVal
   we don't actually care whether refinement occurs or not, the
   replacement is always legal. This part of the transform is now
   done in InstSimplify only. (It should be noted that the current
   AllowRefinement check is not sufficient -- that's an issue we
   need to address separately.)
 * Change the InstCombine fold to work by temporarily dropping
   poison generating flags, running the fold and then restoring the
   flags if it didn't work out. This will ensure that the InstCombine
   fold is correct as long as the InstSimplify fold is correct.

Differential Revision: https://reviews.llvm.org/D87445
---
 .../llvm/Analysis/InstructionSimplify.h       |  6 ++
 llvm/lib/Analysis/InstructionSimplify.cpp     | 50 ++++++++++-------
 .../InstCombine/InstCombineSelect.cpp         | 55 +++++++++++--------
 llvm/test/Transforms/InstCombine/select.ll    |  6 +-
 4 files changed, 72 insertions(+), 45 deletions(-)

diff --git a/llvm/include/llvm/Analysis/InstructionSimplify.h b/llvm/include/llvm/Analysis/InstructionSimplify.h
index 2a39a4e090870..b5ae54fb98bc9 100644
--- a/llvm/include/llvm/Analysis/InstructionSimplify.h
+++ b/llvm/include/llvm/Analysis/InstructionSimplify.h
@@ -268,6 +268,12 @@ Value *SimplifyFreezeInst(Value *Op, const SimplifyQuery &Q);
 Value *SimplifyInstruction(Instruction *I, const SimplifyQuery &Q,
                            OptimizationRemarkEmitter *ORE = nullptr);
 
+/// See if V simplifies when its operand Op is replaced with RepOp.
+/// AllowRefinement specifies whether the simplification can be a refinement,
+/// or whether it needs to be strictly identical.
+Value *SimplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp,
+                              const SimplifyQuery &Q, bool AllowRefinement);
+
 /// Replace all uses of 'I' with 'SimpleV' and simplify the uses recursively.
 ///
 /// This first performs a normal RAUW of I with SimpleV. It then recursively
diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp
index 0a76979f93e20..e744a966a104f 100644
--- a/llvm/lib/Analysis/InstructionSimplify.cpp
+++ b/llvm/lib/Analysis/InstructionSimplify.cpp
@@ -3810,10 +3810,10 @@ Value *llvm::SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
   return ::SimplifyFCmpInst(Predicate, LHS, RHS, FMF, Q, RecursionLimit);
 }
 
-/// See if V simplifies when its operand Op is replaced with RepOp.
-static const Value *SimplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp,
-                                           const SimplifyQuery &Q,
-                                           unsigned MaxRecurse) {
+static Value *SimplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp,
+                                     const SimplifyQuery &Q,
+                                     bool AllowRefinement,
+                                     unsigned MaxRecurse) {
   // Trivial replacement.
   if (V == Op)
     return RepOp;
@@ -3826,20 +3826,19 @@ static const Value *SimplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp,
   if (!I)
     return nullptr;
 
+  // Consider:
+  //   %cmp = icmp eq i32 %x, 2147483647
+  //   %add = add nsw i32 %x, 1
+  //   %sel = select i1 %cmp, i32 -2147483648, i32 %add
+  //
+  // We can't replace %sel with %add unless we strip away the flags (which will
+  // be done in InstCombine).
+  // TODO: This is unsound, because it only catches some forms of refinement.
+  if (!AllowRefinement && canCreatePoison(I))
+    return nullptr;
+
   // If this is a binary operator, try to simplify it with the replaced op.
   if (auto *B = dyn_cast<BinaryOperator>(I)) {
-    // Consider:
-    //   %cmp = icmp eq i32 %x, 2147483647
-    //   %add = add nsw i32 %x, 1
-    //   %sel = select i1 %cmp, i32 -2147483648, i32 %add
-    //
-    // We can't replace %sel with %add unless we strip away the flags.
-    // TODO: This is an unusual limitation because better analysis results in
-    //       worse simplification. InstCombine can do this fold more generally
-    //       by dropping the flags. Remove this fold to save compile-time?
-    if (canCreatePoison(I))
-      return nullptr;
-
     if (MaxRecurse) {
       if (B->getOperand(0) == Op)
         return SimplifyBinOp(B->getOpcode(), RepOp, B->getOperand(1), Q,
@@ -3906,6 +3905,13 @@ static const Value *SimplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp,
   return nullptr;
 }
 
+Value *llvm::SimplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp,
+                                    const SimplifyQuery &Q,
+                                    bool AllowRefinement) {
+  return ::SimplifyWithOpReplaced(V, Op, RepOp, Q, AllowRefinement,
+                                  RecursionLimit);
+}
+
 /// Try to simplify a select instruction when its condition operand is an
 /// integer comparison where one operand of the compare is a constant.
 static Value *simplifySelectBitTest(Value *TrueVal, Value *FalseVal, Value *X,
@@ -4017,14 +4023,18 @@ static Value *simplifySelectWithICmpCond(Value *CondVal, Value *TrueVal,
   // arms of the select. See if substituting this value into the arm and
   // simplifying the result yields the same value as the other arm.
   if (Pred == ICmpInst::ICMP_EQ) {
-    if (SimplifyWithOpReplaced(FalseVal, CmpLHS, CmpRHS, Q, MaxRecurse) ==
+    if (SimplifyWithOpReplaced(FalseVal, CmpLHS, CmpRHS, Q,
+                               /* AllowRefinement */ false, MaxRecurse) ==
             TrueVal ||
-        SimplifyWithOpReplaced(FalseVal, CmpRHS, CmpLHS, Q, MaxRecurse) ==
+        SimplifyWithOpReplaced(FalseVal, CmpRHS, CmpLHS, Q,
+                               /* AllowRefinement */ false, MaxRecurse) ==
             TrueVal)
       return FalseVal;
-    if (SimplifyWithOpReplaced(TrueVal, CmpLHS, CmpRHS, Q, MaxRecurse) ==
+    if (SimplifyWithOpReplaced(TrueVal, CmpLHS, CmpRHS, Q,
+                               /* AllowRefinement */ true, MaxRecurse) ==
             FalseVal ||
-        SimplifyWithOpReplaced(TrueVal, CmpRHS, CmpLHS, Q, MaxRecurse) ==
+        SimplifyWithOpReplaced(TrueVal, CmpRHS, CmpLHS, Q,
+                               /* AllowRefinement */ true, MaxRecurse) ==
             FalseVal)
       return FalseVal;
   }
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
index db27711f29b17..fa695c39cd1eb 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -1148,22 +1148,6 @@ static Instruction *canonicalizeAbsNabs(SelectInst &Sel, ICmpInst &Cmp,
   return &Sel;
 }
 
-static Value *simplifyWithOpReplaced(Value *V, Value *Op, Value *ReplaceOp,
-                                     const SimplifyQuery &Q) {
-  // If this is a binary operator, try to simplify it with the replaced op
-  // because we know Op and ReplaceOp are equivalant.
-  // For example: V = X + 1, Op = X, ReplaceOp = 42
-  // Simplifies as: add(42, 1) --> 43
-  if (auto *BO = dyn_cast<BinaryOperator>(V)) {
-    if (BO->getOperand(0) == Op)
-      return SimplifyBinOp(BO->getOpcode(), ReplaceOp, BO->getOperand(1), Q);
-    if (BO->getOperand(1) == Op)
-      return SimplifyBinOp(BO->getOpcode(), BO->getOperand(0), ReplaceOp, Q);
-  }
-
-  return nullptr;
-}
-
 /// If we have a select with an equality comparison, then we know the value in
 /// one of the arms of the select. See if substituting this value into an arm
 /// and simplifying the result yields the same value as the other arm.
@@ -1190,20 +1174,45 @@ static Value *foldSelectValueEquivalence(SelectInst &Sel, ICmpInst &Cmp,
   if (Cmp.getPredicate() == ICmpInst::ICMP_NE)
     std::swap(TrueVal, FalseVal);
 
+  auto *FalseInst = dyn_cast<Instruction>(FalseVal);
+  if (!FalseInst)
+    return nullptr;
+
+  // InstSimplify already performed this fold if it was possible subject to
+  // current poison-generating flags. Try the transform again with
+  // poison-generating flags temporarily dropped.
+  bool WasNUW = false, WasNSW = false, WasExact = false;
+  if (auto *OBO = dyn_cast<OverflowingBinaryOperator>(FalseVal)) {
+    WasNUW = OBO->hasNoUnsignedWrap();
+    WasNSW = OBO->hasNoSignedWrap();
+    FalseInst->setHasNoUnsignedWrap(false);
+    FalseInst->setHasNoSignedWrap(false);
+  }
+  if (auto *PEO = dyn_cast<PossiblyExactOperator>(FalseVal)) {
+    WasExact = PEO->isExact();
+    FalseInst->setIsExact(false);
+  }
+
   // Try each equivalence substitution possibility.
   // We have an 'EQ' comparison, so the select's false value will propagate.
   // Example:
   // (X == 42) ? 43 : (X + 1) --> (X == 42) ? (X + 1) : (X + 1) --> X + 1
-  // (X == 42) ? (X + 1) : 43 --> (X == 42) ? (42 + 1) : 43 --> 43
   Value *CmpLHS = Cmp.getOperand(0), *CmpRHS = Cmp.getOperand(1);
-  if (simplifyWithOpReplaced(FalseVal, CmpLHS, CmpRHS, Q) == TrueVal ||
-      simplifyWithOpReplaced(FalseVal, CmpRHS, CmpLHS, Q) == TrueVal ||
-      simplifyWithOpReplaced(TrueVal, CmpLHS, CmpRHS, Q) == FalseVal ||
-      simplifyWithOpReplaced(TrueVal, CmpRHS, CmpLHS, Q) == FalseVal) {
-    if (auto *FalseInst = dyn_cast<Instruction>(FalseVal))
-      FalseInst->dropPoisonGeneratingFlags();
+  if (SimplifyWithOpReplaced(FalseVal, CmpLHS, CmpRHS, Q,
+                             /* AllowRefinement */ false) == TrueVal ||
+      SimplifyWithOpReplaced(FalseVal, CmpRHS, CmpLHS, Q,
+                             /* AllowRefinement */ false) == TrueVal) {
     return FalseVal;
   }
+
+  // Restore poison-generating flags if the transform did not apply.
+  if (WasNUW)
+    FalseInst->setHasNoUnsignedWrap();
+  if (WasNSW)
+    FalseInst->setHasNoSignedWrap();
+  if (WasExact)
+    FalseInst->setIsExact();
+
   return nullptr;
 }
 
diff --git a/llvm/test/Transforms/InstCombine/select.ll b/llvm/test/Transforms/InstCombine/select.ll
index abdb36ab7bd42..c23587b606ce7 100644
--- a/llvm/test/Transforms/InstCombine/select.ll
+++ b/llvm/test/Transforms/InstCombine/select.ll
@@ -2491,9 +2491,11 @@ define <2 x i32> @true_undef_vec(i1 %cond, <2 x i32> %x) {
 ; FIXME: This is a miscompile!
 define i32 @pr47322_more_poisonous_replacement(i32 %arg) {
 ; CHECK-LABEL: @pr47322_more_poisonous_replacement(
-; CHECK-NEXT:    [[TRAILING:%.*]] = call i32 @llvm.cttz.i32(i32 [[ARG:%.*]], i1 immarg true), [[RNG0:!range !.*]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[ARG:%.*]], 0
+; CHECK-NEXT:    [[TRAILING:%.*]] = call i32 @llvm.cttz.i32(i32 [[ARG]], i1 immarg true), [[RNG0:!range !.*]]
 ; CHECK-NEXT:    [[SHIFTED:%.*]] = lshr i32 [[ARG]], [[TRAILING]]
-; CHECK-NEXT:    ret i32 [[SHIFTED]]
+; CHECK-NEXT:    [[R1_SROA_0_1:%.*]] = select i1 [[CMP]], i32 0, i32 [[SHIFTED]]
+; CHECK-NEXT:    ret i32 [[R1_SROA_0_1]]
 ;
   %cmp = icmp eq i32 %arg, 0
   %trailing = call i32 @llvm.cttz.i32(i32 %arg, i1 immarg true)

From 88e17a8e9b49bfbcfc1fa70f867b5b56a7a64fc7 Mon Sep 17 00:00:00 2001
From: Qiu Chaofan <qiucofan@cn.ibm.com>
Date: Tue, 15 Sep 2020 17:59:10 +0800
Subject: [PATCH 080/109] [SelectionDAG] Remove unused FP constant in
 getNegatedExpression

960cbc53 immediately removes nodes that won't be used to avoid
compilation time explosion. This patch adds the removal to constants to
fix PR47517.

Reviewed By: RKSimon, steven.zhang

Differential Revision: https://reviews.llvm.org/D87614

(cherry picked from commit 2508ef014e8b01006de4e5ee6fd451d1f68d550f)
---
 .../CodeGen/SelectionDAG/TargetLowering.cpp   |  4 ++-
 llvm/test/CodeGen/X86/pr47517.ll              | 28 +++++++++++++++++++
 2 files changed, 31 insertions(+), 1 deletion(-)
 create mode 100644 llvm/test/CodeGen/X86/pr47517.ll

diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 819e608c6896e..4ebb99c978415 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -5751,8 +5751,10 @@ SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG,
 
     // If we already have the use of the negated floating constant, it is free
     // to negate it even it has multiple uses.
-    if (!Op.hasOneUse() && CFP.use_empty())
+    if (!Op.hasOneUse() && CFP.use_empty()) {
+      RemoveDeadNode(CFP);
       break;
+    }
     Cost = NegatibleCost::Neutral;
     return CFP;
   }
diff --git a/llvm/test/CodeGen/X86/pr47517.ll b/llvm/test/CodeGen/X86/pr47517.ll
new file mode 100644
index 0000000000000..6b508acf15dda
--- /dev/null
+++ b/llvm/test/CodeGen/X86/pr47517.ll
@@ -0,0 +1,28 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple x86_64 < %s | FileCheck %s
+
+; To ensure unused floating point constant is removed in negation
+define float @test(float %src, float* %p) {
+; CHECK-LABEL: test:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movq $0, (%rdi)
+; CHECK-NEXT:    xorps %xmm0, %xmm0
+; CHECK-NEXT:    retq
+entry:
+  %a0 = getelementptr inbounds float, float* %p, i32 0
+  %a1 = getelementptr inbounds float, float* %p, i32 1
+  store float 0.000000e+00, float* %a0
+  store float 0.000000e+00, float* %a1
+  %zero = load float, float* %a0
+  %fmul1 = fmul fast float %zero, %src
+  %fadd1 = fadd fast float %fmul1, %zero
+  %fmul2 = fmul fast float %fadd1, 2.000000e+00
+  %fmul3 = fmul fast float %fmul2, %fmul2
+  %fmul4 = fmul fast float %fmul2, 2.000000e+00
+  %fadd2 = fadd fast float %fmul4, -3.000000e+00
+  %fmul5 = fmul fast float %fadd2, %fmul2
+  %fadd3 = fadd fast float %fmul2, %src
+  %fadd4 = fadd fast float %fadd3, %fmul5
+  %fmul6 = fmul fast float %fmul3, %fadd4
+  ret float %fmul6
+}

From d754173a98309b25562b5624dc108a3b46e990fe Mon Sep 17 00:00:00 2001
From: Kirill Bobyrev <kbobyrev@google.com>
Date: Wed, 26 Aug 2020 17:08:00 +0200
Subject: [PATCH 081/109] [clangd] Use string[] for allCommitCharacters

As per LSP specification, allCommitCharacters should be string[] instead of
string:

https://microsoft.github.io/language-server-protocol/specification#textDocument_completion

Reviewed By: sammccall

Differential Revision: https://reviews.llvm.org/D86604

(cherry picked from commit 9d11e6789c477ce6104e29745ca70e13c9fafeb0)
---
 clang-tools-extra/clangd/ClangdLSPServer.cpp  |  5 +++-
 .../clangd/test/initialize-params.test        | 30 ++++++++++++++++++-
 2 files changed, 33 insertions(+), 2 deletions(-)

diff --git a/clang-tools-extra/clangd/ClangdLSPServer.cpp b/clang-tools-extra/clangd/ClangdLSPServer.cpp
index 0408b0498488e..15ef89cb34faa 100644
--- a/clang-tools-extra/clangd/ClangdLSPServer.cpp
+++ b/clang-tools-extra/clangd/ClangdLSPServer.cpp
@@ -592,7 +592,10 @@ void ClangdLSPServer::onInitialize(const InitializeParams &Params,
             {"codeActionProvider", std::move(CodeActionProvider)},
             {"completionProvider",
              llvm::json::Object{
-                 {"allCommitCharacters", " \t()[]{}<>:;,+-/*%^&#?.=\"'|"},
+                 {"allCommitCharacters",
+                  {" ", "\t", "(", ")", "[", "]", "{",  "}", "<",
+                   ">", ":",  ";", ",", "+", "-", "/",  "*", "%",
+                   "^", "&",  "#", "?", ".", "=", "\"", "'", "|"}},
                  {"resolveProvider", false},
                  // We do extra checks, e.g. that > is part of ->.
                  {"triggerCharacters", {".", "<", ">", ":", "\"", "/"}},
diff --git a/clang-tools-extra/clangd/test/initialize-params.test b/clang-tools-extra/clangd/test/initialize-params.test
index f0a0f791c2f68..4125c27e4e35a 100644
--- a/clang-tools-extra/clangd/test/initialize-params.test
+++ b/clang-tools-extra/clangd/test/initialize-params.test
@@ -7,7 +7,35 @@
 # CHECK-NEXT:    "capabilities": {
 # CHECK-NEXT:      "codeActionProvider": true,
 # CHECK-NEXT:      "completionProvider": {
-# CHECK-NEXT:        "allCommitCharacters": " \t()[]{}<>:;,+-/*%^&#?.=\"'|",
+# CHECK-NEXT:        "allCommitCharacters": [
+# CHECK-NEXT:          " ",
+# CHECK-NEXT:          "\t",
+# CHECK-NEXT:          "(",
+# CHECK-NEXT:          ")",
+# CHECK-NEXT:          "[",
+# CHECK-NEXT:          "]",
+# CHECK-NEXT:          "{",
+# CHECK-NEXT:          "}",
+# CHECK-NEXT:          "<",
+# CHECK-NEXT:          ">",
+# CHECK-NEXT:          ":",
+# CHECK-NEXT:          ";",
+# CHECK-NEXT:          ",",
+# CHECK-NEXT:          "+",
+# CHECK-NEXT:          "-",
+# CHECK-NEXT:          "/",
+# CHECK-NEXT:          "*",
+# CHECK-NEXT:          "%",
+# CHECK-NEXT:          "^",
+# CHECK-NEXT:          "&",
+# CHECK-NEXT:          "#",
+# CHECK-NEXT:          "?",
+# CHECK-NEXT:          ".",
+# CHECK-NEXT:          "=",
+# CHECK-NEXT:          "\"",
+# CHECK-NEXT:          "'",
+# CHECK-NEXT:          "|"
+# CHECK-NEXT:        ],
 # CHECK-NEXT:        "resolveProvider": false,
 # CHECK-NEXT:        "triggerCharacters": [
 # CHECK-NEXT:          ".",

From 2ec773995076236110d4ffb1db7e6723c22519fc Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@intel.com>
Date: Mon, 14 Sep 2020 12:52:54 -0700
Subject: [PATCH 082/109] [FastISel] Bail out of selectGetElementPtr for vector
 GEPs.

The code that decomposes the GEP into ADD/MUL doesn't work properly
for vector GEPs. It can create bad COPY instructions or possibly
assert.

For now just bail out to SelectionDAG.

Fixes PR45906

(cherry picked from commit 4208ea3e19f8e3e8cd35e6f5a6c43f4aa066c6ec)
---
 llvm/lib/CodeGen/SelectionDAG/FastISel.cpp    |  6 +++
 .../test/CodeGen/X86/masked_gather_scatter.ll | 48 +++++++++++++++++++
 2 files changed, 54 insertions(+)

diff --git a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
index fc6c3a145f132..f5948d2a20dca 100644
--- a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -690,6 +690,12 @@ bool FastISel::selectGetElementPtr(const User *I) {
   Register N = getRegForValue(I->getOperand(0));
   if (!N) // Unhandled operand. Halt "fast" selection and bail.
     return false;
+
+  // FIXME: The code below does not handle vector GEPs. Halt "fast" selection
+  // and bail.
+  if (isa<VectorType>(I->getType()))
+    return false;
+
   bool NIsKill = hasTrivialKill(I->getOperand(0));
 
   // Keep a running tab of the total offset to coalesce multiple N = N + Offset
diff --git a/llvm/test/CodeGen/X86/masked_gather_scatter.ll b/llvm/test/CodeGen/X86/masked_gather_scatter.ll
index df3af4c246596..b654b2a579fca 100644
--- a/llvm/test/CodeGen/X86/masked_gather_scatter.ll
+++ b/llvm/test/CodeGen/X86/masked_gather_scatter.ll
@@ -3319,3 +3319,51 @@ define void @scatter_16i64_constant_indices(i32* %ptr, <16 x i1> %mask, <16 x i3
   call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> %src0, <16 x i32*> %gep, i32 4, <16 x i1> %mask)
   ret void
 }
+
+%struct.foo = type { i8*, i64, i16, i16, i32 }
+
+; This used to cause fast-isel to generate bad copy instructions that would
+; cause an error in copyPhysReg.
+define <8 x i64> @pr45906(<8 x %struct.foo*> %ptr) {
+; KNL_64-LABEL: pr45906:
+; KNL_64:       # %bb.0: # %bb
+; KNL_64-NEXT:    vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm1
+; KNL_64-NEXT:    kxnorw %k0, %k0, %k1
+; KNL_64-NEXT:    vpgatherqq (,%zmm1), %zmm0 {%k1}
+; KNL_64-NEXT:    retq
+;
+; KNL_32-LABEL: pr45906:
+; KNL_32:       # %bb.0: # %bb
+; KNL_32-NEXT:    vpbroadcastd {{.*#+}} ymm1 = [4,4,4,4,4,4,4,4]
+; KNL_32-NEXT:    vpaddd %ymm1, %ymm0, %ymm1
+; KNL_32-NEXT:    kxnorw %k0, %k0, %k1
+; KNL_32-NEXT:    vpgatherdq (,%ymm1), %zmm0 {%k1}
+; KNL_32-NEXT:    retl
+;
+; SKX_SMALL-LABEL: pr45906:
+; SKX_SMALL:       # %bb.0: # %bb
+; SKX_SMALL-NEXT:    vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm1
+; SKX_SMALL-NEXT:    kxnorw %k0, %k0, %k1
+; SKX_SMALL-NEXT:    vpgatherqq (,%zmm1), %zmm0 {%k1}
+; SKX_SMALL-NEXT:    retq
+;
+; SKX_LARGE-LABEL: pr45906:
+; SKX_LARGE:       # %bb.0: # %bb
+; SKX_LARGE-NEXT:    movabsq ${{\.LCPI.*}}, %rax
+; SKX_LARGE-NEXT:    vpaddq (%rax){1to8}, %zmm0, %zmm1
+; SKX_LARGE-NEXT:    kxnorw %k0, %k0, %k1
+; SKX_LARGE-NEXT:    vpgatherqq (,%zmm1), %zmm0 {%k1}
+; SKX_LARGE-NEXT:    retq
+;
+; SKX_32-LABEL: pr45906:
+; SKX_32:       # %bb.0: # %bb
+; SKX_32-NEXT:    vpaddd {{\.LCPI.*}}{1to8}, %ymm0, %ymm1
+; SKX_32-NEXT:    kxnorw %k0, %k0, %k1
+; SKX_32-NEXT:    vpgatherdq (,%ymm1), %zmm0 {%k1}
+; SKX_32-NEXT:    retl
+bb:
+  %tmp = getelementptr inbounds %struct.foo, <8 x %struct.foo*> %ptr, i64 0, i32 1
+  %tmp1 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0i64(<8 x i64*> %tmp, i32 8, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i64> undef)
+  ret <8 x i64> %tmp1
+}
+declare <8 x i64> @llvm.masked.gather.v8i64.v8p0i64(<8 x i64*>, i32, <8 x i1>, <8 x i64>)

From 274bb3fdddf8fe692fa13f4b3ccb06df8a72b388 Mon Sep 17 00:00:00 2001
From: Hans Wennborg <hans@chromium.org>
Date: Tue, 15 Sep 2020 15:45:33 +0200
Subject: [PATCH 083/109] Clang release notes: mention the max_tokens_here
 pragma

---
 clang/docs/ReleaseNotes.rst | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index ba0e15deb3894..c39be709d86c9 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -311,7 +311,10 @@ Modified Compiler Flags
 New Pragmas in Clang
 --------------------
 
-- ...
+- The ``clang max_tokens_here`` pragma can be used together with
+  `-Wmax-tokens <DiagnosticsReference.html#wmax-tokens>`_ to emit a warning when
+  the number of preprocessor tokens exceeds a limit. Such limits can be helpful
+  in limiting code growth and slow compiles due to large header files.
 
 Attribute Changes in Clang
 --------------------------

From 1a8e4505d860ad0faa898526fc6fdc861c981516 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Krist=C3=B3f=20Umann?= <dkszelethus@gmail.com>
Date: Tue, 25 Aug 2020 13:49:41 +0200
Subject: [PATCH 084/109] [analyzer] Add documentation for alpha.fuchsia.Lock
 and alpha.core.C11Lock

---
 clang/docs/analyzer/checkers.rst              | 37 +++++++++++++++++++
 .../user-docs/CrossTranslationUnit.rst        |  2 +
 2 files changed, 39 insertions(+)

diff --git a/clang/docs/analyzer/checkers.rst b/clang/docs/analyzer/checkers.rst
index 1583da7aff098..ca5aec677178e 100644
--- a/clang/docs/analyzer/checkers.rst
+++ b/clang/docs/analyzer/checkers.rst
@@ -1472,6 +1472,23 @@ Warn about assigning non-{0,1} values to boolean variables.
 alpha.core
 ^^^^^^^^^^
 
+.. _alpha-core-C11Lock:
+
+alpha.core.C11Lock
+""""""""""""""""""
+Similarly to :ref:`alpha.unix.PthreadLock <alpha-unix-PthreadLock>`, checks for
+the locking/unlocking of ``mtx_t`` mutexes.
+
+.. code-block:: cpp
+
+ mtx_t mtx1;
+
+ void bad1(void)
+ {
+   mtx_lock(&mtx1);
+   mtx_lock(&mtx1); // warn: This lock has already been acquired
+ }
+
 .. _alpha-core-CallAndMessageUnInitRefArg:
 
 alpha.core.CallAndMessageUnInitRefArg (C,C++, ObjC)
@@ -1849,6 +1866,26 @@ Check for dereference of null smart pointers.
    *P; // warn: dereference of a default constructed smart unique_ptr
  }
 
+alpha.fuchsia
+^^^^^^^^^^^^^
+
+.. _alpha-fuchsia-lock:
+
+alpha.fuchsia.Lock
+""""""""""""""""""
+Similarly to :ref:`alpha.unix.PthreadLock <alpha-unix-PthreadLock>`, checks for
+the locking/unlocking of fuchsia mutexes.
+
+.. code-block:: cpp
+
+ spin_lock_t mtx1;
+
+ void bad1(void)
+ {
+   spin_lock(&mtx1);
+   spin_lock(&mtx1);	// warn: This lock has already been acquired
+ }
+
 alpha.llvm
 ^^^^^^^^^^
 
diff --git a/clang/docs/analyzer/user-docs/CrossTranslationUnit.rst b/clang/docs/analyzer/user-docs/CrossTranslationUnit.rst
index 36be82f209ef2..0606185f39e64 100644
--- a/clang/docs/analyzer/user-docs/CrossTranslationUnit.rst
+++ b/clang/docs/analyzer/user-docs/CrossTranslationUnit.rst
@@ -201,6 +201,8 @@ Example usage of scan-build-py:
   ^C
   $
 
+.. _ctu-on-demand:
+
 On-demand analysis
 __________________
 The analysis produces the necessary AST structure of external TUs during analysis. This requires the

From e62452bb3e1e163daf75914cdf2e86deb4debf50 Mon Sep 17 00:00:00 2001
From: Hans Wennborg <hans@chromium.org>
Date: Tue, 15 Sep 2020 16:42:07 +0200
Subject: [PATCH 085/109] Revert "Double check that passes correctly set their
 Modified status"

This check fires during self-host.

> The approach is simple: if a pass reports that it's not modifying a
> Function/Module, compute a loose hash of that Function/Module and compare it
> with the original one. If we report no change but there's a hash change, then we
> have an error.
>
> This approach misses a lot of change but it's not super intrusive and can
> detect most of the simple mistakes.
>
> Differential Revision: https://reviews.llvm.org/D80916

This reverts commit 3667d87a33d3c8d4072a41fd84bb880c59347dc0.
---
 llvm/lib/IR/LegacyPassManager.cpp           | 87 ---------------------
 llvm/unittests/IR/LegacyPassManagerTest.cpp |  2 +-
 2 files changed, 1 insertion(+), 88 deletions(-)

diff --git a/llvm/lib/IR/LegacyPassManager.cpp b/llvm/lib/IR/LegacyPassManager.cpp
index 74869fa62c66f..4189aea46294c 100644
--- a/llvm/lib/IR/LegacyPassManager.cpp
+++ b/llvm/lib/IR/LegacyPassManager.cpp
@@ -1475,74 +1475,6 @@ void FPPassManager::dumpPassStructure(unsigned Offset) {
   }
 }
 
-#ifdef EXPENSIVE_CHECKS
-namespace {
-namespace details {
-
-// Basic hashing mechanism to detect structural change to the IR, used to verify
-// pass return status consistency with actual change. Loosely copied from
-// llvm/lib/Transforms/Utils/FunctionComparator.cpp
-
-class StructuralHash {
-  uint64_t Hash = 0x6acaa36bef8325c5ULL;
-
-  void update(uint64_t V) { Hash = hashing::detail::hash_16_bytes(Hash, V); }
-
-public:
-  StructuralHash() = default;
-
-  void update(Function &F) {
-    if (F.empty())
-      return;
-
-    update(F.isVarArg());
-    update(F.arg_size());
-
-    SmallVector<const BasicBlock *, 8> BBs;
-    SmallPtrSet<const BasicBlock *, 16> VisitedBBs;
-
-    BBs.push_back(&F.getEntryBlock());
-    VisitedBBs.insert(BBs[0]);
-    while (!BBs.empty()) {
-      const BasicBlock *BB = BBs.pop_back_val();
-      update(45798); // Block header
-      for (auto &Inst : *BB)
-        update(Inst.getOpcode());
-
-      const Instruction *Term = BB->getTerminator();
-      for (unsigned i = 0, e = Term->getNumSuccessors(); i != e; ++i) {
-        if (!VisitedBBs.insert(Term->getSuccessor(i)).second)
-          continue;
-        BBs.push_back(Term->getSuccessor(i));
-      }
-    }
-  }
-
-  void update(Module &M) {
-    for (Function &F : M)
-      update(F);
-  }
-
-  uint64_t getHash() const { return Hash; }
-};
-
-} // namespace details
-
-uint64_t StructuralHash(Function &F) {
-  details::StructuralHash H;
-  H.update(F);
-  return H.getHash();
-}
-
-uint64_t StructuralHash(Module &M) {
-  details::StructuralHash H;
-  H.update(M);
-  return H.getHash();
-}
-
-} // end anonymous namespace
-
-#endif
 
 /// Execute all of the passes scheduled for execution by invoking
 /// runOnFunction method.  Keep track of whether any of the passes modifies
@@ -1581,16 +1513,7 @@ bool FPPassManager::runOnFunction(Function &F) {
     {
       PassManagerPrettyStackEntry X(FP, F);
       TimeRegion PassTimer(getPassTimer(FP));
-#ifdef EXPENSIVE_CHECKS
-      uint64_t RefHash = StructuralHash(F);
-#endif
       LocalChanged |= FP->runOnFunction(F);
-
-#ifdef EXPENSIVE_CHECKS
-      assert((LocalChanged || (RefHash == StructuralHash(F))) &&
-             "Pass modifies its input and doesn't report it.");
-#endif
-
       if (EmitICRemark) {
         unsigned NewSize = F.getInstructionCount();
 
@@ -1691,17 +1614,7 @@ MPPassManager::runOnModule(Module &M) {
       PassManagerPrettyStackEntry X(MP, M);
       TimeRegion PassTimer(getPassTimer(MP));
 
-#ifdef EXPENSIVE_CHECKS
-      uint64_t RefHash = StructuralHash(M);
-#endif
-
       LocalChanged |= MP->runOnModule(M);
-
-#ifdef EXPENSIVE_CHECKS
-      assert((LocalChanged || (RefHash == StructuralHash(M))) &&
-             "Pass modifies its input and doesn't report it.");
-#endif
-
       if (EmitICRemark) {
         // Update the size of the module.
         unsigned ModuleCount = M.getInstructionCount();
diff --git a/llvm/unittests/IR/LegacyPassManagerTest.cpp b/llvm/unittests/IR/LegacyPassManagerTest.cpp
index 8dda94b1b0326..b7801b52481dd 100644
--- a/llvm/unittests/IR/LegacyPassManagerTest.cpp
+++ b/llvm/unittests/IR/LegacyPassManagerTest.cpp
@@ -680,7 +680,7 @@ namespace llvm {
       ASSERT_EQ(M->getFunctionList().size(), 4U);
       Function *F = M->getFunction("test2");
       Function *SF = splitSimpleFunction(*F);
-      CallInst::Create(F, "", &*SF->getEntryBlock().getFirstInsertionPt());
+      CallInst::Create(F, "", &SF->getEntryBlock());
       ASSERT_EQ(M->getFunctionList().size(), 5U);
       CGModifierPass *P = new CGModifierPass();
       legacy::PassManager Passes;

From 791b7e9f73e0064153a7c3db8045a7333a8c390c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Krist=C3=B3f=20Umann?= <dkszelethus@gmail.com>
Date: Tue, 25 Aug 2020 13:48:04 +0200
Subject: [PATCH 086/109] [release][docs] Add 11.0.0. release notes for the
 Clang Static Analyzer

Differential Revision: https://reviews.llvm.org/D86533
---
 clang/docs/ReleaseNotes.rst | 69 ++++++++++++++++++++++++++++++++++++-
 1 file changed, 68 insertions(+), 1 deletion(-)

diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index c39be709d86c9..ee257194d57fc 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -603,10 +603,77 @@ libclang
 
 - ...
 
+.. _release-notes-clang-static-analyzer:
+
 Static Analyzer
 ---------------
 
-- ...
+- Improved the analyzer's understanding of inherited C++ constructors.
+
+- Improved the analyzer's understanding of dynamic class method dispatching in
+  Objective-C.
+
+- Greatly improved the analyzer's constraint solver by better understanding
+  when constraints are imposed on multiple symbolic values that are known to be
+  equal or known to be non-equal. It will now also efficiently reject impossible
+  if-branches between known comparison expressions.
+
+- Added :ref:`on-demand parsing <ctu-on-demand>` capability to Cross Translation
+  Unit (CTU) analysis.
+
+- Numerous fixes and improvements for the HTML output.
+
+- New checker: :ref:`alpha.core.C11Lock <alpha-core-C11Lock>` and
+  :ref:`alpha.fuchsia.Lock <alpha-fuchsia-lock>` checks for their respective
+  locking APIs.
+
+- New checker: :ref:`alpha.security.cert.pos.34c <alpha-security-cert-pos-34c>`
+  finds calls to ``putenv`` where a pointer to an autmoatic variable is passed
+  as an argument.
+
+- New checker: :ref:`webkit.NoUncountedMemberChecker
+  <webkit-NoUncountedMemberChecker>` to enforce the existence of virtual
+  destructors for all uncounted types used as base classes.
+
+- New checker: :ref:`webkit.RefCntblBaseVirtualDtor
+  <webkit-RefCntblBaseVirtualDtor>` checks that only ref-counted types
+  are used as class members, not raw pointers and references to uncounted
+  types.
+
+- New checker: :ref:`alpha.cplusplus.SmartPtr <alpha-cplusplus-SmartPtr>` check
+  for dereference of null smart pointers.
+
+- Moved ``PlacementNewChecker`` out of alpha, making it enabled by default.
+
+- Added support for multi-dimensional variadic arrays in ``core.VLASize``.
+
+- Added a check for insufficient storage in array placement new calls, as well
+  as support for alignment variants to ``cplusplus.PlacementNew``.
+
+- While still in alpha, ``alpha.unix.PthreadLock``, the iterator and container
+  modeling infrastructure, ``alpha.unix.Stream``, and taint analysis were
+  improved greatly. An ongoing, currently off-by-default improvement was made on
+  the pre-condition modeling of several functions defined in the POSIX standard.
+
+- Improved the warning messages of several checkers.
+
+- Fixed a few remaining cases of checkers emitting reports under incorrect
+  checker names, and employed a few restrictions to more easily identify and
+  avoid such errors.
+
+- Moved several non-reporting checkers (those that model, among other things,
+  standard functions, but emit no diagnostics) to be listed under
+  ``-analyzer-checker-help-developer`` instead of ``-analyzer-checker-help``.
+  Manually enabling or disabling checkers found on this list is not supported
+  in production.
+
+- Numerous fixes for crashes, false positives and false negatives in
+  ``unix.Malloc``, ``osx.cocoa.NSError``, and several other checkers.
+
+- Implemented a dockerized testing system to more easily determine the
+  correctness and performance impact of a change to the static analyzer itself.
+  The currently beta-version tool is found in
+  ``(llvm-project repository)/clang/utils/analyzer/SATest.py``.
 
 .. _release-notes-ubsan:
 

From 22dab218407e159631fd0689cb4412646b51515a Mon Sep 17 00:00:00 2001
From: Qiu Chaofan <qiucofan@cn.ibm.com>
Date: Tue, 15 Sep 2020 22:03:50 +0800
Subject: [PATCH 087/109] Revert "[SelectionDAG] Remove unused FP constant in
 getNegatedExpression"

2508ef01 doesn't totally fix the issue since we did not handle the case
when unused temporary negated result is the same with the result, which
is found by address sanitizer.

(cherry picked from commit e1669843f2aaf1e4929afdd8f125c14536d27664)
---
 .../CodeGen/SelectionDAG/TargetLowering.cpp   |  4 +--
 llvm/test/CodeGen/X86/pr47517.ll              | 28 -------------------
 2 files changed, 1 insertion(+), 31 deletions(-)
 delete mode 100644 llvm/test/CodeGen/X86/pr47517.ll

diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 4ebb99c978415..819e608c6896e 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -5751,10 +5751,8 @@ SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG,
 
     // If we already have the use of the negated floating constant, it is free
     // to negate it even it has multiple uses.
-    if (!Op.hasOneUse() && CFP.use_empty()) {
-      RemoveDeadNode(CFP);
+    if (!Op.hasOneUse() && CFP.use_empty())
       break;
-    }
     Cost = NegatibleCost::Neutral;
     return CFP;
   }
diff --git a/llvm/test/CodeGen/X86/pr47517.ll b/llvm/test/CodeGen/X86/pr47517.ll
deleted file mode 100644
index 6b508acf15dda..0000000000000
--- a/llvm/test/CodeGen/X86/pr47517.ll
+++ /dev/null
@@ -1,28 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple x86_64 < %s | FileCheck %s
-
-; To ensure unused floating point constant is removed in negation
-define float @test(float %src, float* %p) {
-; CHECK-LABEL: test:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    movq $0, (%rdi)
-; CHECK-NEXT:    xorps %xmm0, %xmm0
-; CHECK-NEXT:    retq
-entry:
-  %a0 = getelementptr inbounds float, float* %p, i32 0
-  %a1 = getelementptr inbounds float, float* %p, i32 1
-  store float 0.000000e+00, float* %a0
-  store float 0.000000e+00, float* %a1
-  %zero = load float, float* %a0
-  %fmul1 = fmul fast float %zero, %src
-  %fadd1 = fadd fast float %fmul1, %zero
-  %fmul2 = fmul fast float %fadd1, 2.000000e+00
-  %fmul3 = fmul fast float %fmul2, %fmul2
-  %fmul4 = fmul fast float %fmul2, 2.000000e+00
-  %fadd2 = fadd fast float %fmul4, -3.000000e+00
-  %fmul5 = fmul fast float %fadd2, %fmul2
-  %fadd3 = fadd fast float %fmul2, %src
-  %fadd4 = fadd fast float %fadd3, %fmul5
-  %fmul6 = fmul fast float %fmul3, %fadd4
-  ret float %fmul6
-}

From d3f1f588f902a968f102d6cdaf052674efc257aa Mon Sep 17 00:00:00 2001
From: Teresa Johnson <tejohnson@google.com>
Date: Tue, 1 Sep 2020 09:43:11 -0700
Subject: [PATCH 088/109] [Docs] Add/update release notes for D71913 (LTO WPD
 changes)

This adds documentation for the options added / changed by D71913, which
enabled aggressive WPD under LTO. The lld release notes already
mentioned it, but I expanded the note.

Differential Revision: https://reviews.llvm.org/D86958
---
 clang/docs/ReleaseNotes.rst | 3 ++-
 lld/docs/ReleaseNotes.rst   | 2 +-
 llvm/docs/ReleaseNotes.rst  | 6 ++++++
 3 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index ee257194d57fc..d90b8f182ef9c 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -306,7 +306,8 @@ Modified Compiler Flags
 - -mcpu is now supported for RISC-V, and recognises the generic-rv32,
   rocket-rv32, sifive-e31, generic-rv64, rocket-rv64, and sifive-u54 target
   CPUs.
-
+- ``-fwhole-program-vtables`` (along with ``-flto*``) now prepares all classes for possible whole program visibility if specified during the LTO link.
+  (`D71913 <https://reviews.llvm.org/D71913>`_)
 
 New Pragmas in Clang
 --------------------
diff --git a/lld/docs/ReleaseNotes.rst b/lld/docs/ReleaseNotes.rst
index 466a7f7073549..880f933e51be2 100644
--- a/lld/docs/ReleaseNotes.rst
+++ b/lld/docs/ReleaseNotes.rst
@@ -26,7 +26,7 @@ ELF Improvements
 
 * ``--lto-emit-asm`` is added to emit assembly output for debugging purposes.
   (`D77231 <https://reviews.llvm.org/D77231>`_)
-* ``--lto-whole-program-visibility`` is added to support LTO whole-program devirtualization.
+* ``--lto-whole-program-visibility`` is added to specify that classes have hidden LTO visibility in LTO and ThinLTO links of source files compiled with ``-fwhole-program-vtables``. See `LTOVisibility <https://clang.llvm.org/docs/LTOVisibility.html>`_ for details.
   (`D71913 <https://reviews.llvm.org/D71913>`_)
 * ``--print-archive-stats=`` is added to print the number of members and the number of fetched members for each archive.
   The feature is similar to GNU gold's ``--print-symbol-counts=``.
diff --git a/llvm/docs/ReleaseNotes.rst b/llvm/docs/ReleaseNotes.rst
index 0d5e0137bbc4d..e87bf3d146f54 100644
--- a/llvm/docs/ReleaseNotes.rst
+++ b/llvm/docs/ReleaseNotes.rst
@@ -333,6 +333,12 @@ Changes to the Debug Info
   passed to the callee. The feature improves the debugging user experience when
   debugging optimized code.
 
+Changes to the Gold Plugin
+--------------------------
+
+* ``--plugin-opt=whole-program-visibility`` is added to specify that classes have hidden LTO visibility in LTO and ThinLTO links of source files compiled with ``-fwhole-program-vtables``. See `LTOVisibility <https://clang.llvm.org/docs/LTOVisibility.html>`_ for details.
+  (`D71913 <https://reviews.llvm.org/D71913>`_)
+
 Changes to the LLVM tools
 ---------------------------------
 

From 1596c2dfd548b21cf33ad3353882ac465d78c1bb Mon Sep 17 00:00:00 2001
From: Joachim Priesner <llvm-project-704996@jspam.de>
Date: Thu, 20 Aug 2020 09:15:29 -0400
Subject: [PATCH 089/109] Fix -allow-enabling-analyzer-alpha-checkers always
 being passed to run-clang-tidy.py

The action='store_true' option of argparse.add_argument implicitly
generates a default value of False if the argument is not specified.
Thus, the allow_enabling_alpha_checkers argument of
get_tidy_invocation is never None.
---
 clang-tools-extra/clang-tidy/tool/run-clang-tidy.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang-tools-extra/clang-tidy/tool/run-clang-tidy.py b/clang-tools-extra/clang-tidy/tool/run-clang-tidy.py
index 4272ae0957fe2..7e23419cd9169 100755
--- a/clang-tools-extra/clang-tidy/tool/run-clang-tidy.py
+++ b/clang-tools-extra/clang-tidy/tool/run-clang-tidy.py
@@ -84,7 +84,7 @@ def get_tidy_invocation(f, clang_tidy_binary, checks, tmpdir, build_path,
                         extra_arg, extra_arg_before, quiet, config):
   """Gets a command line for clang-tidy."""
   start = [clang_tidy_binary]
-  if allow_enabling_alpha_checkers is not None:
+  if allow_enabling_alpha_checkers:
     start.append('-allow-enabling-analyzer-alpha-checkers')
   if header_filter is not None:
     start.append('-header-filter=' + header_filter)

From 4b23932e230dd48a7bfc6fadb461d0ef81aeba94 Mon Sep 17 00:00:00 2001
From: Hans Wennborg <hans@chromium.org>
Date: Tue, 15 Sep 2020 18:38:48 +0200
Subject: [PATCH 090/109] [OPENMP][NFC]Release notes for OpenMP in clang
 (11.x).

By Alexey Bataev!

Differential revision: https://reviews.llvm.org/D86562
---
 clang/docs/ReleaseNotes.rst | 47 ++++++++++++++++++++++++++++++++++++-
 1 file changed, 46 insertions(+), 1 deletion(-)

diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index d90b8f182ef9c..1c02c478be688 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -439,7 +439,52 @@ ABI Changes in Clang
 OpenMP Support in Clang
 -----------------------
 
-- ...
+New features for OpenMP 5.0 were implemented.
+
+- OpenMP 5.0 is the default version supported by the compiler. User can switch
+  to OpenMP 4.5 using ``-fopenmp-version=45`` option.
+
+- Added support for declare variant directive.
+
+- Improved support of math functions and complex types for NVPTX target.
+
+- Added support for parallel execution of target regions for NVPTX target.
+
+- Added support for ``scan`` directives and ``inscan`` modifier in ``reduction``
+  clauses.
+
+- Added support for ``iterator`` construct.
+
+- Added support for ``depobj`` construct.
+
+- Added support for ``detach`` clauses in task-based directives.
+
+- Added support for array shaping operations.
+
+- Added support for cancellation constructs in ``taskloop`` directives.
+
+- Nonmonotonic modifier is allowed with all schedule kinds.
+
+- Added support for ``task`` and ``default`` modifiers in ``reduction`` clauses.
+
+- Added support for strides in array sections.
+
+- Added support for ``use_device_addr`` clause.
+
+- Added support for ``uses_allocators`` clause.
+
+- Added support for ``defaultmap`` clause.
+
+- Added basic support for ``hint`` clause in ``atomic`` directives.
+
+- Added basic support for ``affinity`` clause.
+
+- Added basic support for ``ancestor`` modifier in ``device`` clause.
+
+- Added support for ``default(firstprivate)`` clause. This clause is the part of
+  upcoming OpenMP 5.1 and can be enabled using ``-fopenmp-version=51`` option.
+
+- Bug fixes and optimizations.
 
 CUDA Support in Clang
 ---------------------

From 6e042866c307c0ebe35094e6590dc1a1372f13c9 Mon Sep 17 00:00:00 2001
From: Hans Wennborg <hans@chromium.org>
Date: Tue, 15 Sep 2020 10:47:02 +0200
Subject: [PATCH 091/109] Revert "RegAllocFast: Record internal state based on
 register units"

This seems to have caused incorrect register allocation in some cases,
breaking tests in the Zig standard library (PR47278).

As discussed on the bug, revert back to green for now.

> Record internal state based on register units. This is often more
> efficient as there are typically fewer register units to update
> compared to iterating over all the aliases of a register.
>
> Original patch by Matthias Braun, but I've been rebasing and fixing it
> for almost 2 years and fixed a few bugs causing intermediate failures
> to make this patch independent of the changes in
> https://reviews.llvm.org/D52010.

This reverts commit 66251f7e1de79a7c1620659b7f58352b8c8e892e, and
follow-ups 931a68f26b9a3de853807ffad7b2cd0a2dd30922
and 0671a4c5087d40450603d9d26cf239f1a8b1367e. It also adjust some
test expectations.

(cherry picked from commit a21387c65470417c58021f8d3194a4510bb64f46)
---
 llvm/lib/CodeGen/RegAllocFast.cpp             |  217 +--
 .../arm64-fast-isel-conversion-fallback.ll    |    8 +-
 .../AArch64/arm64-fast-isel-conversion.ll     |    8 +-
 llvm/test/CodeGen/AArch64/arm64-vcvt_f.ll     |    8 +-
 .../CodeGen/AArch64/fast-isel-sp-adjust.ll    |    3 +-
 llvm/test/CodeGen/AArch64/popcount.ll         |   37 +-
 .../AMDGPU/indirect-addressing-term.ll        |   12 +-
 .../AMDGPU/partial-sgpr-to-vgpr-spills.ll     | 1260 ++++++++---------
 llvm/test/CodeGen/AMDGPU/spill-m0.ll          |   95 --
 llvm/test/CodeGen/AMDGPU/wwm-reserved.ll      |    8 +-
 llvm/test/CodeGen/ARM/legalize-bitcast.ll     |    6 +-
 .../GlobalISel/llvm-ir/fptosi_and_fptoui.ll   |   72 +-
 llvm/test/CodeGen/Mips/atomic-min-max.ll      |  960 ++++++-------
 llvm/test/CodeGen/Mips/atomic.ll              |  282 ++--
 llvm/test/CodeGen/Mips/implicit-sret.ll       |   14 +-
 llvm/test/CodeGen/PowerPC/addegluecrash.ll    |   10 +-
 llvm/test/CodeGen/PowerPC/popcount.ll         |   14 +-
 llvm/test/CodeGen/PowerPC/vsx.ll              |   54 +-
 llvm/test/CodeGen/SPARC/fp16-promote.ll       |   10 +-
 .../CodeGen/X86/2009-04-14-IllegalRegs.ll     |   29 +-
 llvm/test/CodeGen/X86/atomic-unordered.ll     |   58 +-
 llvm/test/CodeGen/X86/atomic32.ll             |  122 +-
 llvm/test/CodeGen/X86/atomic64.ll             |   40 +-
 llvm/test/CodeGen/X86/avx-load-store.ll       |   22 +-
 .../CodeGen/X86/avx512-mask-zext-bugfix.ll    |   22 +-
 llvm/test/CodeGen/X86/crash-O0.ll             |    9 +-
 .../CodeGen/X86/extend-set-cc-uses-dbg.ll     |    4 +-
 .../test/CodeGen/X86/fast-isel-nontemporal.ll |   60 +-
 llvm/test/CodeGen/X86/lvi-hardening-loads.ll  |    4 +-
 llvm/test/CodeGen/X86/mixed-ptr-sizes.ll      |  102 +-
 llvm/test/CodeGen/X86/pr1489.ll               |   24 +-
 llvm/test/CodeGen/X86/pr27591.ll              |   14 +-
 llvm/test/CodeGen/X86/pr30430.ll              |   34 +-
 llvm/test/CodeGen/X86/pr30813.ll              |    5 +-
 llvm/test/CodeGen/X86/pr32241.ll              |   18 +-
 llvm/test/CodeGen/X86/pr32284.ll              |  274 ++--
 llvm/test/CodeGen/X86/pr32340.ll              |   54 +-
 llvm/test/CodeGen/X86/pr32345.ll              |   63 +-
 llvm/test/CodeGen/X86/pr32451.ll              |   23 +-
 llvm/test/CodeGen/X86/pr34592.ll              |   25 +-
 llvm/test/CodeGen/X86/pr39733.ll              |    4 +-
 llvm/test/CodeGen/X86/pr44749.ll              |   24 +-
 llvm/test/CodeGen/X86/pr47000.ll              |  135 +-
 .../regalloc-fast-missing-live-out-spill.mir  |    8 +-
 llvm/test/CodeGen/X86/swift-return.ll         |   41 +-
 llvm/test/CodeGen/X86/swifterror.ll           |    4 +-
 llvm/test/DebugInfo/X86/op_deref.ll           |    8 +-
 47 files changed, 2155 insertions(+), 2153 deletions(-)

diff --git a/llvm/lib/CodeGen/RegAllocFast.cpp b/llvm/lib/CodeGen/RegAllocFast.cpp
index 5396f9f3a1432..cf3eaba23bee9 100644
--- a/llvm/lib/CodeGen/RegAllocFast.cpp
+++ b/llvm/lib/CodeGen/RegAllocFast.cpp
@@ -106,8 +106,13 @@ namespace {
     /// that it is alive across blocks.
     BitVector MayLiveAcrossBlocks;
 
-    /// State of a register unit.
-    enum RegUnitState {
+    /// State of a physical register.
+    enum RegState {
+      /// A disabled register is not available for allocation, but an alias may
+      /// be in use. A register can only be moved out of the disabled state if
+      /// all aliases are disabled.
+      regDisabled,
+
       /// A free register is not currently in use and can be allocated
       /// immediately without checking aliases.
       regFree,
@@ -121,8 +126,8 @@ namespace {
       /// register. In that case, LiveVirtRegs contains the inverse mapping.
     };
 
-    /// Maps each physical register to a RegUnitState enum or virtual register.
-    std::vector<unsigned> RegUnitStates;
+    /// Maps each physical register to a RegState enum or a virtual register.
+    std::vector<unsigned> PhysRegState;
 
     SmallVector<Register, 16> VirtDead;
     SmallVector<MachineInstr *, 32> Coalesced;
@@ -184,10 +189,6 @@ namespace {
     bool isLastUseOfLocalReg(const MachineOperand &MO) const;
 
     void addKillFlag(const LiveReg &LRI);
-#ifndef NDEBUG
-    bool verifyRegStateMapping(const LiveReg &LR) const;
-#endif
-
     void killVirtReg(LiveReg &LR);
     void killVirtReg(Register VirtReg);
     void spillVirtReg(MachineBasicBlock::iterator MI, LiveReg &LR);
@@ -195,7 +196,7 @@ namespace {
 
     void usePhysReg(MachineOperand &MO);
     void definePhysReg(MachineBasicBlock::iterator MI, MCPhysReg PhysReg,
-                       unsigned NewState);
+                       RegState NewState);
     unsigned calcSpillCost(MCPhysReg PhysReg) const;
     void assignVirtToPhysReg(LiveReg &, MCPhysReg PhysReg);
 
@@ -228,7 +229,7 @@ namespace {
     bool mayLiveOut(Register VirtReg);
     bool mayLiveIn(Register VirtReg);
 
-    void dumpState() const;
+    void dumpState();
   };
 
 } // end anonymous namespace
@@ -239,8 +240,7 @@ INITIALIZE_PASS(RegAllocFast, "regallocfast", "Fast Register Allocator", false,
                 false)
 
 void RegAllocFast::setPhysRegState(MCPhysReg PhysReg, unsigned NewState) {
-  for (MCRegUnitIterator UI(PhysReg, TRI); UI.isValid(); ++UI)
-    RegUnitStates[*UI] = NewState;
+  PhysRegState[PhysReg] = NewState;
 }
 
 /// This allocates space for the specified virtual register to be held on the
@@ -384,23 +384,12 @@ void RegAllocFast::addKillFlag(const LiveReg &LR) {
   }
 }
 
-#ifndef NDEBUG
-bool RegAllocFast::verifyRegStateMapping(const LiveReg &LR) const {
-  for (MCRegUnitIterator UI(LR.PhysReg, TRI); UI.isValid(); ++UI) {
-    if (RegUnitStates[*UI] != LR.VirtReg)
-      return false;
-  }
-
-  return true;
-}
-#endif
-
 /// Mark virtreg as no longer available.
 void RegAllocFast::killVirtReg(LiveReg &LR) {
-  assert(verifyRegStateMapping(LR) && "Broken RegState mapping");
   addKillFlag(LR);
-  MCPhysReg PhysReg = LR.PhysReg;
-  setPhysRegState(PhysReg, regFree);
+  assert(PhysRegState[LR.PhysReg] == LR.VirtReg &&
+         "Broken RegState mapping");
+  setPhysRegState(LR.PhysReg, regFree);
   LR.PhysReg = 0;
 }
 
@@ -427,9 +416,7 @@ void RegAllocFast::spillVirtReg(MachineBasicBlock::iterator MI,
 
 /// Do the actual work of spilling.
 void RegAllocFast::spillVirtReg(MachineBasicBlock::iterator MI, LiveReg &LR) {
-  assert(verifyRegStateMapping(LR) && "Broken RegState mapping");
-
-  MCPhysReg PhysReg = LR.PhysReg;
+  assert(PhysRegState[LR.PhysReg] == LR.VirtReg && "Broken RegState mapping");
 
   if (LR.Dirty) {
     // If this physreg is used by the instruction, we want to kill it on the
@@ -437,7 +424,7 @@ void RegAllocFast::spillVirtReg(MachineBasicBlock::iterator MI, LiveReg &LR) {
     bool SpillKill = MachineBasicBlock::iterator(LR.LastUse) != MI;
     LR.Dirty = false;
 
-    spill(MI, LR.VirtReg, PhysReg, SpillKill);
+    spill(MI, LR.VirtReg, LR.PhysReg, SpillKill);
 
     if (SpillKill)
       LR.LastUse = nullptr; // Don't kill register again
@@ -473,16 +460,53 @@ void RegAllocFast::usePhysReg(MachineOperand &MO) {
   assert(PhysReg.isPhysical() && "Bad usePhysReg operand");
 
   markRegUsedInInstr(PhysReg);
+  switch (PhysRegState[PhysReg]) {
+  case regDisabled:
+    break;
+  case regReserved:
+    PhysRegState[PhysReg] = regFree;
+    LLVM_FALLTHROUGH;
+  case regFree:
+    MO.setIsKill();
+    return;
+  default:
+    // The physreg was allocated to a virtual register. That means the value we
+    // wanted has been clobbered.
+    llvm_unreachable("Instruction uses an allocated register");
+  }
 
-  for (MCRegUnitIterator UI(PhysReg, TRI); UI.isValid(); ++UI) {
-    switch (RegUnitStates[*UI]) {
+  // Maybe a superregister is reserved?
+  for (MCRegAliasIterator AI(PhysReg, TRI, false); AI.isValid(); ++AI) {
+    MCPhysReg Alias = *AI;
+    switch (PhysRegState[Alias]) {
+    case regDisabled:
+      break;
     case regReserved:
-      RegUnitStates[*UI] = regFree;
+      // Either PhysReg is a subregister of Alias and we mark the
+      // whole register as free, or PhysReg is the superregister of
+      // Alias and we mark all the aliases as disabled before freeing
+      // PhysReg.
+      // In the latter case, since PhysReg was disabled, this means that
+      // its value is defined only by physical sub-registers. This check
+      // is performed by the assert of the default case in this loop.
+      // Note: The value of the superregister may only be partial
+      // defined, that is why regDisabled is a valid state for aliases.
+      assert((TRI->isSuperRegister(PhysReg, Alias) ||
+              TRI->isSuperRegister(Alias, PhysReg)) &&
+             "Instruction is not using a subregister of a reserved register");
       LLVM_FALLTHROUGH;
     case regFree:
+      if (TRI->isSuperRegister(PhysReg, Alias)) {
+        // Leave the superregister in the working set.
+        setPhysRegState(Alias, regFree);
+        MO.getParent()->addRegisterKilled(Alias, TRI, true);
+        return;
+      }
+      // Some other alias was in the working set - clear it.
+      setPhysRegState(Alias, regDisabled);
       break;
     default:
-      llvm_unreachable("Unexpected reg unit state");
+      llvm_unreachable("Instruction uses an alias of an allocated register");
     }
   }
 
@@ -495,20 +519,38 @@ void RegAllocFast::usePhysReg(MachineOperand &MO) {
 /// similar to defineVirtReg except the physreg is reserved instead of
 /// allocated.
 void RegAllocFast::definePhysReg(MachineBasicBlock::iterator MI,
-                                 MCPhysReg PhysReg, unsigned NewState) {
-  for (MCRegUnitIterator UI(PhysReg, TRI); UI.isValid(); ++UI) {
-    switch (unsigned VirtReg = RegUnitStates[*UI]) {
+                                 MCPhysReg PhysReg, RegState NewState) {
+  markRegUsedInInstr(PhysReg);
+  switch (Register VirtReg = PhysRegState[PhysReg]) {
+  case regDisabled:
+    break;
+  default:
+    spillVirtReg(MI, VirtReg);
+    LLVM_FALLTHROUGH;
+  case regFree:
+  case regReserved:
+    setPhysRegState(PhysReg, NewState);
+    return;
+  }
+
+  // This is a disabled register, disable all aliases.
+  setPhysRegState(PhysReg, NewState);
+  for (MCRegAliasIterator AI(PhysReg, TRI, false); AI.isValid(); ++AI) {
+    MCPhysReg Alias = *AI;
+    switch (Register VirtReg = PhysRegState[Alias]) {
+    case regDisabled:
+      break;
     default:
       spillVirtReg(MI, VirtReg);
-      break;
+      LLVM_FALLTHROUGH;
     case regFree:
     case regReserved:
+      setPhysRegState(Alias, regDisabled);
+      if (TRI->isSuperRegister(PhysReg, Alias))
+        return;
       break;
     }
   }
-
-  markRegUsedInInstr(PhysReg);
-  setPhysRegState(PhysReg, NewState);
 }
 
 /// Return the cost of spilling clearing out PhysReg and aliases so it is free
@@ -521,24 +563,46 @@ unsigned RegAllocFast::calcSpillCost(MCPhysReg PhysReg) const {
                       << " is already used in instr.\n");
     return spillImpossible;
   }
+  switch (Register VirtReg = PhysRegState[PhysReg]) {
+  case regDisabled:
+    break;
+  case regFree:
+    return 0;
+  case regReserved:
+    LLVM_DEBUG(dbgs() << printReg(VirtReg, TRI) << " corresponding "
+                      << printReg(PhysReg, TRI) << " is reserved already.\n");
+    return spillImpossible;
+  default: {
+    LiveRegMap::const_iterator LRI = findLiveVirtReg(VirtReg);
+    assert(LRI != LiveVirtRegs.end() && LRI->PhysReg &&
+           "Missing VirtReg entry");
+    return LRI->Dirty ? spillDirty : spillClean;
+  }
+  }
 
-  for (MCRegUnitIterator UI(PhysReg, TRI); UI.isValid(); ++UI) {
-    switch (unsigned VirtReg = RegUnitStates[*UI]) {
+  // This is a disabled register, add up cost of aliases.
+  LLVM_DEBUG(dbgs() << printReg(PhysReg, TRI) << " is disabled.\n");
+  unsigned Cost = 0;
+  for (MCRegAliasIterator AI(PhysReg, TRI, false); AI.isValid(); ++AI) {
+    MCPhysReg Alias = *AI;
+    switch (Register VirtReg = PhysRegState[Alias]) {
+    case regDisabled:
+      break;
     case regFree:
+      ++Cost;
       break;
     case regReserved:
-      LLVM_DEBUG(dbgs() << printReg(VirtReg, TRI) << " corresponding "
-                        << printReg(PhysReg, TRI) << " is reserved already.\n");
       return spillImpossible;
     default: {
       LiveRegMap::const_iterator LRI = findLiveVirtReg(VirtReg);
       assert(LRI != LiveVirtRegs.end() && LRI->PhysReg &&
              "Missing VirtReg entry");
-      return LRI->Dirty ? spillDirty : spillClean;
+      Cost += LRI->Dirty ? spillDirty : spillClean;
+      break;
     }
     }
   }
-  return 0;
+  return Cost;
 }
 
 /// This method updates local state so that we know that PhysReg is the
@@ -845,17 +909,9 @@ void RegAllocFast::handleThroughOperands(MachineInstr &MI,
     if (!Reg || !Reg.isPhysical())
       continue;
     markRegUsedInInstr(Reg);
-
-    for (MCRegUnitIterator UI(Reg, TRI); UI.isValid(); ++UI) {
-      if (!ThroughRegs.count(RegUnitStates[*UI]))
-        continue;
-
-      // Need to spill any aliasing registers.
-      for (MCRegUnitRootIterator RI(*UI, TRI); RI.isValid(); ++RI) {
-        for (MCSuperRegIterator SI(*RI, TRI, true); SI.isValid(); ++SI) {
-          definePhysReg(MI, *SI, regFree);
-        }
-      }
+    for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) {
+      if (ThroughRegs.count(PhysRegState[*AI]))
+        definePhysReg(MI, *AI, regFree);
     }
   }
 
@@ -919,40 +975,37 @@ void RegAllocFast::handleThroughOperands(MachineInstr &MI,
 }
 
 #ifndef NDEBUG
-
-void RegAllocFast::dumpState() const {
-  for (unsigned Unit = 1, UnitE = TRI->getNumRegUnits(); Unit != UnitE;
-       ++Unit) {
-    switch (unsigned VirtReg = RegUnitStates[Unit]) {
+void RegAllocFast::dumpState() {
+  for (unsigned Reg = 1, E = TRI->getNumRegs(); Reg != E; ++Reg) {
+    if (PhysRegState[Reg] == regDisabled) continue;
+    dbgs() << " " << printReg(Reg, TRI);
+    switch(PhysRegState[Reg]) {
     case regFree:
       break;
     case regReserved:
-      dbgs() << " " << printRegUnit(Unit, TRI) << "[P]";
+      dbgs() << "*";
       break;
     default: {
-      dbgs() << ' ' << printRegUnit(Unit, TRI) << '=' << printReg(VirtReg);
-      LiveRegMap::const_iterator I = findLiveVirtReg(VirtReg);
-      assert(I != LiveVirtRegs.end() && "have LiveVirtRegs entry");
-      if (I->Dirty)
-        dbgs() << "[D]";
-      assert(TRI->hasRegUnit(I->PhysReg, Unit) && "inverse mapping present");
+      dbgs() << '=' << printReg(PhysRegState[Reg]);
+      LiveRegMap::iterator LRI = findLiveVirtReg(PhysRegState[Reg]);
+      assert(LRI != LiveVirtRegs.end() && LRI->PhysReg &&
+             "Missing VirtReg entry");
+      if (LRI->Dirty)
+        dbgs() << "*";
+      assert(LRI->PhysReg == Reg && "Bad inverse map");
       break;
     }
     }
   }
   dbgs() << '\n';
   // Check that LiveVirtRegs is the inverse.
-  for (const LiveReg &LR : LiveVirtRegs) {
-    Register VirtReg = LR.VirtReg;
-    assert(VirtReg.isVirtual() && "Bad map key");
-    MCPhysReg PhysReg = LR.PhysReg;
-    if (PhysReg != 0) {
-      assert(Register::isPhysicalRegister(PhysReg) &&
-             "mapped to physreg");
-      for (MCRegUnitIterator UI(PhysReg, TRI); UI.isValid(); ++UI) {
-        assert(RegUnitStates[*UI] == VirtReg && "inverse map valid");
-      }
-    }
+  for (LiveRegMap::iterator i = LiveVirtRegs.begin(),
+       e = LiveVirtRegs.end(); i != e; ++i) {
+    if (!i->PhysReg)
+      continue;
+    assert(i->VirtReg.isVirtual() && "Bad map key");
+    assert(Register::isPhysicalRegister(i->PhysReg) && "Bad map value");
+    assert(PhysRegState[i->PhysReg] == i->VirtReg && "Bad inverse map");
   }
 }
 #endif
@@ -1194,7 +1247,7 @@ void RegAllocFast::allocateBasicBlock(MachineBasicBlock &MBB) {
   this->MBB = &MBB;
   LLVM_DEBUG(dbgs() << "\nAllocating " << MBB);
 
-  RegUnitStates.assign(TRI->getNumRegUnits(), regFree);
+  PhysRegState.assign(TRI->getNumRegs(), regDisabled);
   assert(LiveVirtRegs.empty() && "Mapping not cleared from last block?");
 
   MachineBasicBlock::iterator MII = MBB.begin();
diff --git a/llvm/test/CodeGen/AArch64/arm64-fast-isel-conversion-fallback.ll b/llvm/test/CodeGen/AArch64/arm64-fast-isel-conversion-fallback.ll
index 7c546936ba27a..392af063eb8a0 100644
--- a/llvm/test/CodeGen/AArch64/arm64-fast-isel-conversion-fallback.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-fast-isel-conversion-fallback.ll
@@ -4,8 +4,8 @@
 define i32 @fptosi_wh(half %a) nounwind ssp {
 entry:
 ; CHECK-LABEL: fptosi_wh
-; CHECK: fcvt s0, h0
-; CHECK: fcvtzs [[REG:w[0-9]+]], s0
+; CHECK: fcvt s1, h0
+; CHECK: fcvtzs [[REG:w[0-9]+]], s1
 ; CHECK: mov w0, [[REG]]
   %conv = fptosi half %a to i32
   ret i32 %conv
@@ -15,8 +15,8 @@ entry:
 define i32 @fptoui_swh(half %a) nounwind ssp {
 entry:
 ; CHECK-LABEL: fptoui_swh
-; CHECK: fcvt s0, h0
-; CHECK: fcvtzu [[REG:w[0-9]+]], s0
+; CHECK: fcvt s1, h0
+; CHECK: fcvtzu [[REG:w[0-9]+]], s1
 ; CHECK: mov w0, [[REG]]
   %conv = fptoui half %a to i32
   ret i32 %conv
diff --git a/llvm/test/CodeGen/AArch64/arm64-fast-isel-conversion.ll b/llvm/test/CodeGen/AArch64/arm64-fast-isel-conversion.ll
index d8abf14c1366b..ed03aec07e7da 100644
--- a/llvm/test/CodeGen/AArch64/arm64-fast-isel-conversion.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-fast-isel-conversion.ll
@@ -54,8 +54,8 @@ entry:
 ; CHECK: ldrh w8, [sp, #12]
 ; CHECK: str w8, [sp, #8]
 ; CHECK: ldr w8, [sp, #8]
-; CHECK: ; kill: def $x8 killed $w8
-; CHECK: str x8, [sp]
+; CHECK: mov x9, x8
+; CHECK: str x9, [sp]
 ; CHECK: ldr x0, [sp]
 ; CHECK: ret
   %a.addr = alloca i8, align 1
@@ -109,8 +109,8 @@ entry:
 ; CHECK: strh w8, [sp, #12]
 ; CHECK: ldrsh w8, [sp, #12]
 ; CHECK: str w8, [sp, #8]
-; CHECK: ldrsw x8, [sp, #8]
-; CHECK: str x8, [sp]
+; CHECK: ldrsw x9, [sp, #8]
+; CHECK: str x9, [sp]
 ; CHECK: ldr x0, [sp]
 ; CHECK: ret
   %a.addr = alloca i8, align 1
diff --git a/llvm/test/CodeGen/AArch64/arm64-vcvt_f.ll b/llvm/test/CodeGen/AArch64/arm64-vcvt_f.ll
index e1e889b906c01..6b3e8d747d43d 100644
--- a/llvm/test/CodeGen/AArch64/arm64-vcvt_f.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-vcvt_f.ll
@@ -285,11 +285,11 @@ define i16 @to_half(float %in) {
 ; FAST:       // %bb.0:
 ; FAST-NEXT:    sub sp, sp, #16 // =16
 ; FAST-NEXT:    .cfi_def_cfa_offset 16
-; FAST-NEXT:    fcvt h0, s0
+; FAST-NEXT:    fcvt h1, s0
 ; FAST-NEXT:    // implicit-def: $w0
-; FAST-NEXT:    fmov s1, w0
-; FAST-NEXT:    mov.16b v1, v0
-; FAST-NEXT:    fmov w8, s1
+; FAST-NEXT:    fmov s0, w0
+; FAST-NEXT:    mov.16b v0, v1
+; FAST-NEXT:    fmov w8, s0
 ; FAST-NEXT:    mov w0, w8
 ; FAST-NEXT:    str w0, [sp, #12] // 4-byte Folded Spill
 ; FAST-NEXT:    mov w0, w8
diff --git a/llvm/test/CodeGen/AArch64/fast-isel-sp-adjust.ll b/llvm/test/CodeGen/AArch64/fast-isel-sp-adjust.ll
index 22e3ccf2b1209..8d62fb3556661 100644
--- a/llvm/test/CodeGen/AArch64/fast-isel-sp-adjust.ll
+++ b/llvm/test/CodeGen/AArch64/fast-isel-sp-adjust.ll
@@ -15,7 +15,8 @@
 ; CHECK-LABEL: foo:
 ; CHECK: sub
 ; CHECK-DAG: mov x[[SP:[0-9]+]], sp
-; CHECK-DAG: mov w[[OFFSET:[0-9]+]], #4104
+; CHECK-DAG: mov [[TMP:w[0-9]+]], #4104
+; CHECK: mov w[[OFFSET:[0-9]+]], [[TMP]]
 ; CHECK: strb w0, [x[[SP]], x[[OFFSET]]]
 
 define void @foo(i8 %in) {
diff --git a/llvm/test/CodeGen/AArch64/popcount.ll b/llvm/test/CodeGen/AArch64/popcount.ll
index 105969717e46b..1e796fff710c0 100644
--- a/llvm/test/CodeGen/AArch64/popcount.ll
+++ b/llvm/test/CodeGen/AArch64/popcount.ll
@@ -10,11 +10,12 @@ define i8 @popcount128(i128* nocapture nonnull readonly %0) {
 ; CHECK-NEXT:    // implicit-def: $q1
 ; CHECK-NEXT:    mov v1.16b, v0.16b
 ; CHECK-NEXT:    mov v1.d[1], x8
-; CHECK-NEXT:    cnt v0.16b, v1.16b
-; CHECK-NEXT:    uaddlv h0, v0.16b
+; CHECK-NEXT:    cnt v1.16b, v1.16b
+; CHECK-NEXT:    uaddlv h2, v1.16b
 ; CHECK-NEXT:    // implicit-def: $q1
-; CHECK-NEXT:    mov v1.16b, v0.16b
-; CHECK-NEXT:    fmov w0, s1
+; CHECK-NEXT:    mov v1.16b, v2.16b
+; CHECK-NEXT:    fmov w1, s1
+; CHECK-NEXT:    mov w0, w1
 ; CHECK-NEXT:    ret
 Entry:
   %1 = load i128, i128* %0, align 16
@@ -36,21 +37,21 @@ define i16 @popcount256(i256* nocapture nonnull readonly %0) {
 ; CHECK-NEXT:    // implicit-def: $q1
 ; CHECK-NEXT:    mov v1.16b, v0.16b
 ; CHECK-NEXT:    mov v1.d[1], x9
-; CHECK-NEXT:    cnt v0.16b, v1.16b
-; CHECK-NEXT:    uaddlv h0, v0.16b
+; CHECK-NEXT:    cnt v1.16b, v1.16b
+; CHECK-NEXT:    uaddlv h2, v1.16b
 ; CHECK-NEXT:    // implicit-def: $q1
-; CHECK-NEXT:    mov v1.16b, v0.16b
-; CHECK-NEXT:    fmov w9, s1
+; CHECK-NEXT:    mov v1.16b, v2.16b
+; CHECK-NEXT:    fmov w10, s1
 ; CHECK-NEXT:    ldr d0, [x0]
 ; CHECK-NEXT:    // implicit-def: $q1
 ; CHECK-NEXT:    mov v1.16b, v0.16b
 ; CHECK-NEXT:    mov v1.d[1], x8
-; CHECK-NEXT:    cnt v0.16b, v1.16b
-; CHECK-NEXT:    uaddlv h0, v0.16b
+; CHECK-NEXT:    cnt v1.16b, v1.16b
+; CHECK-NEXT:    uaddlv h2, v1.16b
 ; CHECK-NEXT:    // implicit-def: $q1
-; CHECK-NEXT:    mov v1.16b, v0.16b
-; CHECK-NEXT:    fmov w8, s1
-; CHECK-NEXT:    add w0, w8, w9
+; CHECK-NEXT:    mov v1.16b, v2.16b
+; CHECK-NEXT:    fmov w11, s1
+; CHECK-NEXT:    add w0, w11, w10
 ; CHECK-NEXT:    ret
 Entry:
   %1 = load i256, i256* %0, align 16
@@ -69,11 +70,11 @@ define <1 x i128> @popcount1x128(<1 x i128> %0) {
 ; CHECK-NEXT:    fmov d0, x0
 ; CHECK-NEXT:    mov v0.d[1], x1
 ; CHECK-NEXT:    cnt v0.16b, v0.16b
-; CHECK-NEXT:    uaddlv h0, v0.16b
-; CHECK-NEXT:    // implicit-def: $q1
-; CHECK-NEXT:    mov v1.16b, v0.16b
-; CHECK-NEXT:    fmov w0, s1
-; CHECK-NEXT:    // kill: def $x0 killed $w0
+; CHECK-NEXT:    uaddlv h1, v0.16b
+; CHECK-NEXT:    // implicit-def: $q0
+; CHECK-NEXT:    mov v0.16b, v1.16b
+; CHECK-NEXT:    fmov w2, s0
+; CHECK-NEXT:    mov w0, w2
 ; CHECK-NEXT:    movi v0.2d, #0000000000000000
 ; CHECK-NEXT:    mov x1, v0.d[1]
 ; CHECK-NEXT:    ret
diff --git a/llvm/test/CodeGen/AMDGPU/indirect-addressing-term.ll b/llvm/test/CodeGen/AMDGPU/indirect-addressing-term.ll
index e26b1c9471049..40ef3b00da6d4 100644
--- a/llvm/test/CodeGen/AMDGPU/indirect-addressing-term.ll
+++ b/llvm/test/CodeGen/AMDGPU/indirect-addressing-term.ll
@@ -69,15 +69,15 @@ define amdgpu_kernel void @extract_w_offset_vgpr(i32 addrspace(1)* %out) {
   ; GCN:   renamable $vgpr30 = COPY killed renamable $vgpr14
   ; GCN:   renamable $vgpr31 = COPY killed renamable $vgpr15
   ; GCN:   renamable $vgpr32 = COPY killed renamable $vgpr16
-  ; GCN:   renamable $sgpr0_sgpr1 = S_MOV_B64 $exec
+  ; GCN:   renamable $sgpr20_sgpr21 = S_MOV_B64 $exec
   ; GCN:   renamable $vgpr1 = IMPLICIT_DEF
-  ; GCN:   renamable $sgpr2_sgpr3 = IMPLICIT_DEF
+  ; GCN:   renamable $sgpr22_sgpr23 = IMPLICIT_DEF
   ; GCN:   SI_SPILL_V32_SAVE killed $vgpr0, %stack.0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (store 4 into %stack.0, addrspace 5)
   ; GCN:   SI_SPILL_S128_SAVE killed $sgpr4_sgpr5_sgpr6_sgpr7, %stack.1, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 :: (store 16 into %stack.1, align 4, addrspace 5)
   ; GCN:   SI_SPILL_V512_SAVE killed $vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32, %stack.2, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (store 64 into %stack.2, align 4, addrspace 5)
-  ; GCN:   SI_SPILL_S64_SAVE killed $sgpr0_sgpr1, %stack.3, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 :: (store 8 into %stack.3, align 4, addrspace 5)
+  ; GCN:   SI_SPILL_S64_SAVE killed $sgpr20_sgpr21, %stack.3, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 :: (store 8 into %stack.3, align 4, addrspace 5)
   ; GCN:   SI_SPILL_V32_SAVE killed $vgpr1, %stack.4, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (store 4 into %stack.4, addrspace 5)
-  ; GCN:   SI_SPILL_S64_SAVE killed $sgpr2_sgpr3, %stack.5, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 :: (store 8 into %stack.5, align 4, addrspace 5)
+  ; GCN:   SI_SPILL_S64_SAVE killed $sgpr22_sgpr23, %stack.5, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 :: (store 8 into %stack.5, align 4, addrspace 5)
   ; GCN: bb.1:
   ; GCN:   successors: %bb.1(0x40000000), %bb.3(0x40000000)
   ; GCN:   $sgpr0_sgpr1 = SI_SPILL_S64_RESTORE %stack.5, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 :: (load 8 from %stack.5, align 4, addrspace 5)
@@ -91,8 +91,8 @@ define amdgpu_kernel void @extract_w_offset_vgpr(i32 addrspace(1)* %out) {
   ; GCN:   renamable $vgpr18 = V_MOV_B32_e32 undef $vgpr3, implicit $exec, implicit killed $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17, implicit $m0
   ; GCN:   S_SET_GPR_IDX_OFF implicit-def $mode, implicit $mode
   ; GCN:   renamable $vgpr19 = COPY renamable $vgpr18
-  ; GCN:   renamable $sgpr2_sgpr3 = COPY renamable $sgpr4_sgpr5
-  ; GCN:   SI_SPILL_S64_SAVE killed $sgpr2_sgpr3, %stack.5, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 :: (store 8 into %stack.5, align 4, addrspace 5)
+  ; GCN:   renamable $sgpr6_sgpr7 = COPY renamable $sgpr4_sgpr5
+  ; GCN:   SI_SPILL_S64_SAVE killed $sgpr6_sgpr7, %stack.5, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 :: (store 8 into %stack.5, align 4, addrspace 5)
   ; GCN:   SI_SPILL_S64_SAVE killed $sgpr0_sgpr1, %stack.6, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 :: (store 8 into %stack.6, align 4, addrspace 5)
   ; GCN:   SI_SPILL_V32_SAVE killed $vgpr19, %stack.4, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (store 4 into %stack.4, addrspace 5)
   ; GCN:   SI_SPILL_V32_SAVE killed $vgpr0, %stack.7, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (store 4 into %stack.7, addrspace 5)
diff --git a/llvm/test/CodeGen/AMDGPU/partial-sgpr-to-vgpr-spills.ll b/llvm/test/CodeGen/AMDGPU/partial-sgpr-to-vgpr-spills.ll
index b119ffd303e08..e991c550c6be0 100644
--- a/llvm/test/CodeGen/AMDGPU/partial-sgpr-to-vgpr-spills.ll
+++ b/llvm/test/CodeGen/AMDGPU/partial-sgpr-to-vgpr-spills.ll
@@ -11,7 +11,7 @@
 define amdgpu_kernel void @spill_sgprs_to_multiple_vgprs(i32 addrspace(1)* %out, i32 %in) #0 {
 ; GCN-LABEL: spill_sgprs_to_multiple_vgprs:
 ; GCN:       ; %bb.0:
-; GCN-NEXT:    s_load_dword s0, s[0:1], 0xb
+; GCN-NEXT:    s_load_dword s2, s[0:1], 0xb
 ; GCN-NEXT:    ;;#ASMSTART
 ; GCN-NEXT:    ; def s[4:11]
 ; GCN-NEXT:    ;;#ASMEND
@@ -42,354 +42,352 @@ define amdgpu_kernel void @spill_sgprs_to_multiple_vgprs(i32 addrspace(1)* %out,
 ; GCN-NEXT:    ;;#ASMSTART
 ; GCN-NEXT:    ; def s[84:91]
 ; GCN-NEXT:    ;;#ASMEND
+; GCN-NEXT:    v_writelane_b32 v0, s4, 0
+; GCN-NEXT:    v_writelane_b32 v0, s5, 1
+; GCN-NEXT:    v_writelane_b32 v0, s6, 2
+; GCN-NEXT:    v_writelane_b32 v0, s7, 3
+; GCN-NEXT:    v_writelane_b32 v0, s8, 4
+; GCN-NEXT:    v_writelane_b32 v0, s9, 5
+; GCN-NEXT:    v_writelane_b32 v0, s10, 6
+; GCN-NEXT:    v_writelane_b32 v0, s11, 7
+; GCN-NEXT:    ;;#ASMSTART
+; GCN-NEXT:    ; def s[4:11]
+; GCN-NEXT:    ;;#ASMEND
+; GCN-NEXT:    v_writelane_b32 v0, s4, 8
+; GCN-NEXT:    v_writelane_b32 v0, s5, 9
+; GCN-NEXT:    v_writelane_b32 v0, s6, 10
+; GCN-NEXT:    v_writelane_b32 v0, s7, 11
+; GCN-NEXT:    v_writelane_b32 v0, s8, 12
+; GCN-NEXT:    v_writelane_b32 v0, s9, 13
+; GCN-NEXT:    v_writelane_b32 v0, s10, 14
+; GCN-NEXT:    v_writelane_b32 v0, s11, 15
+; GCN-NEXT:    ;;#ASMSTART
+; GCN-NEXT:    ; def s[4:11]
+; GCN-NEXT:    ;;#ASMEND
+; GCN-NEXT:    v_writelane_b32 v0, s4, 16
+; GCN-NEXT:    v_writelane_b32 v0, s5, 17
+; GCN-NEXT:    v_writelane_b32 v0, s6, 18
+; GCN-NEXT:    v_writelane_b32 v0, s7, 19
+; GCN-NEXT:    v_writelane_b32 v0, s8, 20
+; GCN-NEXT:    v_writelane_b32 v0, s9, 21
+; GCN-NEXT:    v_writelane_b32 v0, s10, 22
+; GCN-NEXT:    v_writelane_b32 v0, s11, 23
+; GCN-NEXT:    ;;#ASMSTART
+; GCN-NEXT:    ; def s[4:11]
+; GCN-NEXT:    ;;#ASMEND
+; GCN-NEXT:    v_writelane_b32 v0, s4, 24
+; GCN-NEXT:    v_writelane_b32 v0, s5, 25
+; GCN-NEXT:    v_writelane_b32 v0, s6, 26
+; GCN-NEXT:    v_writelane_b32 v0, s7, 27
+; GCN-NEXT:    v_writelane_b32 v0, s8, 28
+; GCN-NEXT:    v_writelane_b32 v0, s9, 29
+; GCN-NEXT:    v_writelane_b32 v0, s10, 30
+; GCN-NEXT:    v_writelane_b32 v0, s11, 31
+; GCN-NEXT:    ;;#ASMSTART
+; GCN-NEXT:    ; def s[4:11]
+; GCN-NEXT:    ;;#ASMEND
+; GCN-NEXT:    v_writelane_b32 v0, s4, 32
+; GCN-NEXT:    v_writelane_b32 v0, s5, 33
+; GCN-NEXT:    v_writelane_b32 v0, s6, 34
+; GCN-NEXT:    v_writelane_b32 v0, s7, 35
+; GCN-NEXT:    v_writelane_b32 v0, s8, 36
+; GCN-NEXT:    v_writelane_b32 v0, s9, 37
+; GCN-NEXT:    v_writelane_b32 v0, s10, 38
+; GCN-NEXT:    v_writelane_b32 v0, s11, 39
+; GCN-NEXT:    ;;#ASMSTART
+; GCN-NEXT:    ; def s[4:11]
+; GCN-NEXT:    ;;#ASMEND
+; GCN-NEXT:    v_writelane_b32 v0, s4, 40
+; GCN-NEXT:    v_writelane_b32 v0, s5, 41
+; GCN-NEXT:    v_writelane_b32 v0, s6, 42
+; GCN-NEXT:    v_writelane_b32 v0, s7, 43
+; GCN-NEXT:    v_writelane_b32 v0, s8, 44
+; GCN-NEXT:    v_writelane_b32 v0, s9, 45
+; GCN-NEXT:    v_writelane_b32 v0, s10, 46
+; GCN-NEXT:    v_writelane_b32 v0, s11, 47
+; GCN-NEXT:    ;;#ASMSTART
+; GCN-NEXT:    ; def s[4:11]
+; GCN-NEXT:    ;;#ASMEND
+; GCN-NEXT:    v_writelane_b32 v0, s4, 48
+; GCN-NEXT:    v_writelane_b32 v0, s5, 49
+; GCN-NEXT:    v_writelane_b32 v0, s6, 50
+; GCN-NEXT:    v_writelane_b32 v0, s7, 51
+; GCN-NEXT:    v_writelane_b32 v0, s8, 52
+; GCN-NEXT:    v_writelane_b32 v0, s9, 53
+; GCN-NEXT:    v_writelane_b32 v0, s10, 54
+; GCN-NEXT:    v_writelane_b32 v0, s11, 55
+; GCN-NEXT:    ;;#ASMSTART
+; GCN-NEXT:    ; def s[4:11]
+; GCN-NEXT:    ;;#ASMEND
+; GCN-NEXT:    s_mov_b32 s3, 0
 ; GCN-NEXT:    s_waitcnt lgkmcnt(0)
-; GCN-NEXT:    v_writelane_b32 v0, s0, 0
-; GCN-NEXT:    v_writelane_b32 v0, s4, 1
-; GCN-NEXT:    v_writelane_b32 v0, s5, 2
-; GCN-NEXT:    v_writelane_b32 v0, s6, 3
-; GCN-NEXT:    v_writelane_b32 v0, s7, 4
-; GCN-NEXT:    v_writelane_b32 v0, s8, 5
-; GCN-NEXT:    v_writelane_b32 v0, s9, 6
-; GCN-NEXT:    v_writelane_b32 v0, s10, 7
-; GCN-NEXT:    v_writelane_b32 v0, s11, 8
-; GCN-NEXT:    ;;#ASMSTART
-; GCN-NEXT:    ; def s[0:7]
-; GCN-NEXT:    ;;#ASMEND
-; GCN-NEXT:    v_writelane_b32 v0, s0, 9
-; GCN-NEXT:    v_writelane_b32 v0, s1, 10
-; GCN-NEXT:    v_writelane_b32 v0, s2, 11
-; GCN-NEXT:    v_writelane_b32 v0, s3, 12
-; GCN-NEXT:    v_writelane_b32 v0, s4, 13
-; GCN-NEXT:    v_writelane_b32 v0, s5, 14
-; GCN-NEXT:    v_writelane_b32 v0, s6, 15
-; GCN-NEXT:    v_writelane_b32 v0, s7, 16
-; GCN-NEXT:    ;;#ASMSTART
-; GCN-NEXT:    ; def s[0:7]
-; GCN-NEXT:    ;;#ASMEND
-; GCN-NEXT:    v_writelane_b32 v0, s0, 17
-; GCN-NEXT:    v_writelane_b32 v0, s1, 18
-; GCN-NEXT:    v_writelane_b32 v0, s2, 19
-; GCN-NEXT:    v_writelane_b32 v0, s3, 20
-; GCN-NEXT:    v_writelane_b32 v0, s4, 21
-; GCN-NEXT:    v_writelane_b32 v0, s5, 22
-; GCN-NEXT:    v_writelane_b32 v0, s6, 23
-; GCN-NEXT:    v_writelane_b32 v0, s7, 24
-; GCN-NEXT:    ;;#ASMSTART
-; GCN-NEXT:    ; def s[0:7]
-; GCN-NEXT:    ;;#ASMEND
-; GCN-NEXT:    v_writelane_b32 v0, s0, 25
-; GCN-NEXT:    v_writelane_b32 v0, s1, 26
-; GCN-NEXT:    v_writelane_b32 v0, s2, 27
-; GCN-NEXT:    v_writelane_b32 v0, s3, 28
-; GCN-NEXT:    v_writelane_b32 v0, s4, 29
-; GCN-NEXT:    v_writelane_b32 v0, s5, 30
-; GCN-NEXT:    v_writelane_b32 v0, s6, 31
-; GCN-NEXT:    v_writelane_b32 v0, s7, 32
-; GCN-NEXT:    ;;#ASMSTART
-; GCN-NEXT:    ; def s[0:7]
-; GCN-NEXT:    ;;#ASMEND
-; GCN-NEXT:    v_writelane_b32 v0, s0, 33
-; GCN-NEXT:    v_writelane_b32 v0, s1, 34
-; GCN-NEXT:    v_writelane_b32 v0, s2, 35
-; GCN-NEXT:    v_writelane_b32 v0, s3, 36
-; GCN-NEXT:    v_writelane_b32 v0, s4, 37
-; GCN-NEXT:    v_writelane_b32 v0, s5, 38
-; GCN-NEXT:    v_writelane_b32 v0, s6, 39
-; GCN-NEXT:    v_writelane_b32 v0, s7, 40
-; GCN-NEXT:    ;;#ASMSTART
-; GCN-NEXT:    ; def s[0:7]
-; GCN-NEXT:    ;;#ASMEND
-; GCN-NEXT:    v_writelane_b32 v0, s0, 41
-; GCN-NEXT:    v_writelane_b32 v0, s1, 42
-; GCN-NEXT:    v_writelane_b32 v0, s2, 43
-; GCN-NEXT:    v_writelane_b32 v0, s3, 44
-; GCN-NEXT:    v_writelane_b32 v0, s4, 45
-; GCN-NEXT:    v_writelane_b32 v0, s5, 46
-; GCN-NEXT:    v_writelane_b32 v0, s6, 47
-; GCN-NEXT:    v_writelane_b32 v0, s7, 48
-; GCN-NEXT:    ;;#ASMSTART
-; GCN-NEXT:    ; def s[0:7]
-; GCN-NEXT:    ;;#ASMEND
-; GCN-NEXT:    v_writelane_b32 v0, s0, 49
-; GCN-NEXT:    v_writelane_b32 v0, s1, 50
-; GCN-NEXT:    v_writelane_b32 v0, s2, 51
-; GCN-NEXT:    v_writelane_b32 v0, s3, 52
-; GCN-NEXT:    v_writelane_b32 v0, s4, 53
-; GCN-NEXT:    v_writelane_b32 v0, s5, 54
-; GCN-NEXT:    v_writelane_b32 v0, s6, 55
-; GCN-NEXT:    v_writelane_b32 v0, s7, 56
-; GCN-NEXT:    ;;#ASMSTART
-; GCN-NEXT:    ; def s[0:7]
-; GCN-NEXT:    ;;#ASMEND
-; GCN-NEXT:    s_mov_b32 s8, 0
-; GCN-NEXT:    v_readlane_b32 s9, v0, 0
-; GCN-NEXT:    s_cmp_lg_u32 s9, s8
-; GCN-NEXT:    v_writelane_b32 v0, s12, 57
-; GCN-NEXT:    v_writelane_b32 v0, s13, 58
-; GCN-NEXT:    v_writelane_b32 v0, s14, 59
-; GCN-NEXT:    v_writelane_b32 v0, s15, 60
-; GCN-NEXT:    v_writelane_b32 v0, s16, 61
-; GCN-NEXT:    v_writelane_b32 v0, s17, 62
-; GCN-NEXT:    v_writelane_b32 v0, s18, 63
-; GCN-NEXT:    v_writelane_b32 v1, s19, 0
-; GCN-NEXT:    v_writelane_b32 v1, s20, 1
-; GCN-NEXT:    v_writelane_b32 v1, s21, 2
-; GCN-NEXT:    v_writelane_b32 v1, s22, 3
-; GCN-NEXT:    v_writelane_b32 v1, s23, 4
-; GCN-NEXT:    v_writelane_b32 v1, s24, 5
-; GCN-NEXT:    v_writelane_b32 v1, s25, 6
-; GCN-NEXT:    v_writelane_b32 v1, s26, 7
-; GCN-NEXT:    v_writelane_b32 v1, s27, 8
-; GCN-NEXT:    v_writelane_b32 v1, s36, 9
-; GCN-NEXT:    v_writelane_b32 v1, s37, 10
-; GCN-NEXT:    v_writelane_b32 v1, s38, 11
-; GCN-NEXT:    v_writelane_b32 v1, s39, 12
-; GCN-NEXT:    v_writelane_b32 v1, s40, 13
-; GCN-NEXT:    v_writelane_b32 v1, s41, 14
-; GCN-NEXT:    v_writelane_b32 v1, s42, 15
-; GCN-NEXT:    v_writelane_b32 v1, s43, 16
-; GCN-NEXT:    v_writelane_b32 v1, s44, 17
-; GCN-NEXT:    v_writelane_b32 v1, s45, 18
-; GCN-NEXT:    v_writelane_b32 v1, s46, 19
-; GCN-NEXT:    v_writelane_b32 v1, s47, 20
-; GCN-NEXT:    v_writelane_b32 v1, s48, 21
-; GCN-NEXT:    v_writelane_b32 v1, s49, 22
-; GCN-NEXT:    v_writelane_b32 v1, s50, 23
-; GCN-NEXT:    v_writelane_b32 v1, s51, 24
-; GCN-NEXT:    v_writelane_b32 v1, s52, 25
-; GCN-NEXT:    v_writelane_b32 v1, s53, 26
-; GCN-NEXT:    v_writelane_b32 v1, s54, 27
-; GCN-NEXT:    v_writelane_b32 v1, s55, 28
-; GCN-NEXT:    v_writelane_b32 v1, s56, 29
-; GCN-NEXT:    v_writelane_b32 v1, s57, 30
-; GCN-NEXT:    v_writelane_b32 v1, s58, 31
-; GCN-NEXT:    v_writelane_b32 v1, s59, 32
-; GCN-NEXT:    v_writelane_b32 v1, s60, 33
-; GCN-NEXT:    v_writelane_b32 v1, s61, 34
-; GCN-NEXT:    v_writelane_b32 v1, s62, 35
-; GCN-NEXT:    v_writelane_b32 v1, s63, 36
-; GCN-NEXT:    v_writelane_b32 v1, s64, 37
-; GCN-NEXT:    v_writelane_b32 v1, s65, 38
-; GCN-NEXT:    v_writelane_b32 v1, s66, 39
-; GCN-NEXT:    v_writelane_b32 v1, s67, 40
-; GCN-NEXT:    v_writelane_b32 v1, s68, 41
-; GCN-NEXT:    v_writelane_b32 v1, s69, 42
-; GCN-NEXT:    v_writelane_b32 v1, s70, 43
-; GCN-NEXT:    v_writelane_b32 v1, s71, 44
-; GCN-NEXT:    v_writelane_b32 v1, s72, 45
-; GCN-NEXT:    v_writelane_b32 v1, s73, 46
-; GCN-NEXT:    v_writelane_b32 v1, s74, 47
-; GCN-NEXT:    v_writelane_b32 v1, s75, 48
-; GCN-NEXT:    v_writelane_b32 v1, s76, 49
-; GCN-NEXT:    v_writelane_b32 v1, s77, 50
-; GCN-NEXT:    v_writelane_b32 v1, s78, 51
-; GCN-NEXT:    v_writelane_b32 v1, s79, 52
-; GCN-NEXT:    v_writelane_b32 v1, s80, 53
-; GCN-NEXT:    v_writelane_b32 v1, s81, 54
-; GCN-NEXT:    v_writelane_b32 v1, s82, 55
-; GCN-NEXT:    v_writelane_b32 v1, s83, 56
-; GCN-NEXT:    v_writelane_b32 v1, s84, 57
-; GCN-NEXT:    v_writelane_b32 v1, s85, 58
-; GCN-NEXT:    v_writelane_b32 v1, s86, 59
-; GCN-NEXT:    v_writelane_b32 v1, s87, 60
-; GCN-NEXT:    v_writelane_b32 v1, s88, 61
-; GCN-NEXT:    v_writelane_b32 v1, s89, 62
-; GCN-NEXT:    v_writelane_b32 v1, s90, 63
-; GCN-NEXT:    v_writelane_b32 v2, s91, 0
-; GCN-NEXT:    v_writelane_b32 v2, s0, 1
-; GCN-NEXT:    v_writelane_b32 v2, s1, 2
-; GCN-NEXT:    v_writelane_b32 v2, s2, 3
-; GCN-NEXT:    v_writelane_b32 v2, s3, 4
-; GCN-NEXT:    v_writelane_b32 v2, s4, 5
-; GCN-NEXT:    v_writelane_b32 v2, s5, 6
-; GCN-NEXT:    v_writelane_b32 v2, s6, 7
-; GCN-NEXT:    v_writelane_b32 v2, s7, 8
+; GCN-NEXT:    s_cmp_lg_u32 s2, s3
+; GCN-NEXT:    v_writelane_b32 v0, s12, 56
+; GCN-NEXT:    v_writelane_b32 v0, s13, 57
+; GCN-NEXT:    v_writelane_b32 v0, s14, 58
+; GCN-NEXT:    v_writelane_b32 v0, s15, 59
+; GCN-NEXT:    v_writelane_b32 v0, s16, 60
+; GCN-NEXT:    v_writelane_b32 v0, s17, 61
+; GCN-NEXT:    v_writelane_b32 v0, s18, 62
+; GCN-NEXT:    v_writelane_b32 v0, s19, 63
+; GCN-NEXT:    v_writelane_b32 v1, s20, 0
+; GCN-NEXT:    v_writelane_b32 v1, s21, 1
+; GCN-NEXT:    v_writelane_b32 v1, s22, 2
+; GCN-NEXT:    v_writelane_b32 v1, s23, 3
+; GCN-NEXT:    v_writelane_b32 v1, s24, 4
+; GCN-NEXT:    v_writelane_b32 v1, s25, 5
+; GCN-NEXT:    v_writelane_b32 v1, s26, 6
+; GCN-NEXT:    v_writelane_b32 v1, s27, 7
+; GCN-NEXT:    v_writelane_b32 v1, s36, 8
+; GCN-NEXT:    v_writelane_b32 v1, s37, 9
+; GCN-NEXT:    v_writelane_b32 v1, s38, 10
+; GCN-NEXT:    v_writelane_b32 v1, s39, 11
+; GCN-NEXT:    v_writelane_b32 v1, s40, 12
+; GCN-NEXT:    v_writelane_b32 v1, s41, 13
+; GCN-NEXT:    v_writelane_b32 v1, s42, 14
+; GCN-NEXT:    v_writelane_b32 v1, s43, 15
+; GCN-NEXT:    v_writelane_b32 v1, s44, 16
+; GCN-NEXT:    v_writelane_b32 v1, s45, 17
+; GCN-NEXT:    v_writelane_b32 v1, s46, 18
+; GCN-NEXT:    v_writelane_b32 v1, s47, 19
+; GCN-NEXT:    v_writelane_b32 v1, s48, 20
+; GCN-NEXT:    v_writelane_b32 v1, s49, 21
+; GCN-NEXT:    v_writelane_b32 v1, s50, 22
+; GCN-NEXT:    v_writelane_b32 v1, s51, 23
+; GCN-NEXT:    v_writelane_b32 v1, s52, 24
+; GCN-NEXT:    v_writelane_b32 v1, s53, 25
+; GCN-NEXT:    v_writelane_b32 v1, s54, 26
+; GCN-NEXT:    v_writelane_b32 v1, s55, 27
+; GCN-NEXT:    v_writelane_b32 v1, s56, 28
+; GCN-NEXT:    v_writelane_b32 v1, s57, 29
+; GCN-NEXT:    v_writelane_b32 v1, s58, 30
+; GCN-NEXT:    v_writelane_b32 v1, s59, 31
+; GCN-NEXT:    v_writelane_b32 v1, s60, 32
+; GCN-NEXT:    v_writelane_b32 v1, s61, 33
+; GCN-NEXT:    v_writelane_b32 v1, s62, 34
+; GCN-NEXT:    v_writelane_b32 v1, s63, 35
+; GCN-NEXT:    v_writelane_b32 v1, s64, 36
+; GCN-NEXT:    v_writelane_b32 v1, s65, 37
+; GCN-NEXT:    v_writelane_b32 v1, s66, 38
+; GCN-NEXT:    v_writelane_b32 v1, s67, 39
+; GCN-NEXT:    v_writelane_b32 v1, s68, 40
+; GCN-NEXT:    v_writelane_b32 v1, s69, 41
+; GCN-NEXT:    v_writelane_b32 v1, s70, 42
+; GCN-NEXT:    v_writelane_b32 v1, s71, 43
+; GCN-NEXT:    v_writelane_b32 v1, s72, 44
+; GCN-NEXT:    v_writelane_b32 v1, s73, 45
+; GCN-NEXT:    v_writelane_b32 v1, s74, 46
+; GCN-NEXT:    v_writelane_b32 v1, s75, 47
+; GCN-NEXT:    v_writelane_b32 v1, s76, 48
+; GCN-NEXT:    v_writelane_b32 v1, s77, 49
+; GCN-NEXT:    v_writelane_b32 v1, s78, 50
+; GCN-NEXT:    v_writelane_b32 v1, s79, 51
+; GCN-NEXT:    v_writelane_b32 v1, s80, 52
+; GCN-NEXT:    v_writelane_b32 v1, s81, 53
+; GCN-NEXT:    v_writelane_b32 v1, s82, 54
+; GCN-NEXT:    v_writelane_b32 v1, s83, 55
+; GCN-NEXT:    v_writelane_b32 v1, s84, 56
+; GCN-NEXT:    v_writelane_b32 v1, s85, 57
+; GCN-NEXT:    v_writelane_b32 v1, s86, 58
+; GCN-NEXT:    v_writelane_b32 v1, s87, 59
+; GCN-NEXT:    v_writelane_b32 v1, s88, 60
+; GCN-NEXT:    v_writelane_b32 v1, s89, 61
+; GCN-NEXT:    v_writelane_b32 v1, s90, 62
+; GCN-NEXT:    v_writelane_b32 v1, s91, 63
+; GCN-NEXT:    v_writelane_b32 v2, s4, 0
+; GCN-NEXT:    v_writelane_b32 v2, s5, 1
+; GCN-NEXT:    v_writelane_b32 v2, s6, 2
+; GCN-NEXT:    v_writelane_b32 v2, s7, 3
+; GCN-NEXT:    v_writelane_b32 v2, s8, 4
+; GCN-NEXT:    v_writelane_b32 v2, s9, 5
+; GCN-NEXT:    v_writelane_b32 v2, s10, 6
+; GCN-NEXT:    v_writelane_b32 v2, s11, 7
 ; GCN-NEXT:    s_cbranch_scc1 BB0_2
 ; GCN-NEXT:  ; %bb.1: ; %bb0
-; GCN-NEXT:    v_readlane_b32 s0, v0, 1
-; GCN-NEXT:    v_readlane_b32 s1, v0, 2
-; GCN-NEXT:    v_readlane_b32 s2, v0, 3
-; GCN-NEXT:    v_readlane_b32 s3, v0, 4
-; GCN-NEXT:    v_readlane_b32 s4, v0, 5
-; GCN-NEXT:    v_readlane_b32 s5, v0, 6
-; GCN-NEXT:    v_readlane_b32 s6, v0, 7
-; GCN-NEXT:    v_readlane_b32 s7, v0, 8
+; GCN-NEXT:    v_readlane_b32 s0, v0, 0
+; GCN-NEXT:    v_readlane_b32 s1, v0, 1
+; GCN-NEXT:    v_readlane_b32 s2, v0, 2
+; GCN-NEXT:    v_readlane_b32 s3, v0, 3
+; GCN-NEXT:    v_readlane_b32 s4, v0, 4
+; GCN-NEXT:    v_readlane_b32 s5, v0, 5
+; GCN-NEXT:    v_readlane_b32 s6, v0, 6
+; GCN-NEXT:    v_readlane_b32 s7, v0, 7
 ; GCN-NEXT:    ;;#ASMSTART
 ; GCN-NEXT:    ; use s[0:7]
 ; GCN-NEXT:    ;;#ASMEND
-; GCN-NEXT:    v_readlane_b32 s0, v0, 57
-; GCN-NEXT:    v_readlane_b32 s1, v0, 58
-; GCN-NEXT:    v_readlane_b32 s2, v0, 59
-; GCN-NEXT:    v_readlane_b32 s3, v0, 60
-; GCN-NEXT:    v_readlane_b32 s4, v0, 61
-; GCN-NEXT:    v_readlane_b32 s5, v0, 62
-; GCN-NEXT:    v_readlane_b32 s6, v0, 63
-; GCN-NEXT:    v_readlane_b32 s7, v1, 0
+; GCN-NEXT:    v_readlane_b32 s0, v0, 56
+; GCN-NEXT:    v_readlane_b32 s1, v0, 57
+; GCN-NEXT:    v_readlane_b32 s2, v0, 58
+; GCN-NEXT:    v_readlane_b32 s3, v0, 59
+; GCN-NEXT:    v_readlane_b32 s4, v0, 60
+; GCN-NEXT:    v_readlane_b32 s5, v0, 61
+; GCN-NEXT:    v_readlane_b32 s6, v0, 62
+; GCN-NEXT:    v_readlane_b32 s7, v0, 63
 ; GCN-NEXT:    ;;#ASMSTART
 ; GCN-NEXT:    ; use s[0:7]
 ; GCN-NEXT:    ;;#ASMEND
-; GCN-NEXT:    v_readlane_b32 s0, v1, 1
-; GCN-NEXT:    v_readlane_b32 s1, v1, 2
-; GCN-NEXT:    v_readlane_b32 s2, v1, 3
-; GCN-NEXT:    v_readlane_b32 s3, v1, 4
-; GCN-NEXT:    v_readlane_b32 s4, v1, 5
-; GCN-NEXT:    v_readlane_b32 s5, v1, 6
-; GCN-NEXT:    v_readlane_b32 s6, v1, 7
-; GCN-NEXT:    v_readlane_b32 s7, v1, 8
+; GCN-NEXT:    v_readlane_b32 s0, v1, 0
+; GCN-NEXT:    v_readlane_b32 s1, v1, 1
+; GCN-NEXT:    v_readlane_b32 s2, v1, 2
+; GCN-NEXT:    v_readlane_b32 s3, v1, 3
+; GCN-NEXT:    v_readlane_b32 s4, v1, 4
+; GCN-NEXT:    v_readlane_b32 s5, v1, 5
+; GCN-NEXT:    v_readlane_b32 s6, v1, 6
+; GCN-NEXT:    v_readlane_b32 s7, v1, 7
 ; GCN-NEXT:    ;;#ASMSTART
 ; GCN-NEXT:    ; use s[0:7]
 ; GCN-NEXT:    ;;#ASMEND
-; GCN-NEXT:    v_readlane_b32 s0, v1, 9
-; GCN-NEXT:    v_readlane_b32 s1, v1, 10
-; GCN-NEXT:    v_readlane_b32 s2, v1, 11
-; GCN-NEXT:    v_readlane_b32 s3, v1, 12
-; GCN-NEXT:    v_readlane_b32 s4, v1, 13
-; GCN-NEXT:    v_readlane_b32 s5, v1, 14
-; GCN-NEXT:    v_readlane_b32 s6, v1, 15
-; GCN-NEXT:    v_readlane_b32 s7, v1, 16
+; GCN-NEXT:    v_readlane_b32 s0, v1, 8
+; GCN-NEXT:    v_readlane_b32 s1, v1, 9
+; GCN-NEXT:    v_readlane_b32 s2, v1, 10
+; GCN-NEXT:    v_readlane_b32 s3, v1, 11
+; GCN-NEXT:    v_readlane_b32 s4, v1, 12
+; GCN-NEXT:    v_readlane_b32 s5, v1, 13
+; GCN-NEXT:    v_readlane_b32 s6, v1, 14
+; GCN-NEXT:    v_readlane_b32 s7, v1, 15
 ; GCN-NEXT:    ;;#ASMSTART
 ; GCN-NEXT:    ; use s[0:7]
 ; GCN-NEXT:    ;;#ASMEND
-; GCN-NEXT:    v_readlane_b32 s0, v1, 17
-; GCN-NEXT:    v_readlane_b32 s1, v1, 18
-; GCN-NEXT:    v_readlane_b32 s2, v1, 19
-; GCN-NEXT:    v_readlane_b32 s3, v1, 20
-; GCN-NEXT:    v_readlane_b32 s4, v1, 21
-; GCN-NEXT:    v_readlane_b32 s5, v1, 22
-; GCN-NEXT:    v_readlane_b32 s6, v1, 23
-; GCN-NEXT:    v_readlane_b32 s7, v1, 24
+; GCN-NEXT:    v_readlane_b32 s0, v1, 16
+; GCN-NEXT:    v_readlane_b32 s1, v1, 17
+; GCN-NEXT:    v_readlane_b32 s2, v1, 18
+; GCN-NEXT:    v_readlane_b32 s3, v1, 19
+; GCN-NEXT:    v_readlane_b32 s4, v1, 20
+; GCN-NEXT:    v_readlane_b32 s5, v1, 21
+; GCN-NEXT:    v_readlane_b32 s6, v1, 22
+; GCN-NEXT:    v_readlane_b32 s7, v1, 23
 ; GCN-NEXT:    ;;#ASMSTART
 ; GCN-NEXT:    ; use s[0:7]
 ; GCN-NEXT:    ;;#ASMEND
-; GCN-NEXT:    v_readlane_b32 s0, v1, 25
-; GCN-NEXT:    v_readlane_b32 s1, v1, 26
-; GCN-NEXT:    v_readlane_b32 s2, v1, 27
-; GCN-NEXT:    v_readlane_b32 s3, v1, 28
-; GCN-NEXT:    v_readlane_b32 s4, v1, 29
-; GCN-NEXT:    v_readlane_b32 s5, v1, 30
-; GCN-NEXT:    v_readlane_b32 s6, v1, 31
-; GCN-NEXT:    v_readlane_b32 s7, v1, 32
+; GCN-NEXT:    v_readlane_b32 s0, v1, 24
+; GCN-NEXT:    v_readlane_b32 s1, v1, 25
+; GCN-NEXT:    v_readlane_b32 s2, v1, 26
+; GCN-NEXT:    v_readlane_b32 s3, v1, 27
+; GCN-NEXT:    v_readlane_b32 s4, v1, 28
+; GCN-NEXT:    v_readlane_b32 s5, v1, 29
+; GCN-NEXT:    v_readlane_b32 s6, v1, 30
+; GCN-NEXT:    v_readlane_b32 s7, v1, 31
 ; GCN-NEXT:    ;;#ASMSTART
 ; GCN-NEXT:    ; use s[0:7]
 ; GCN-NEXT:    ;;#ASMEND
-; GCN-NEXT:    v_readlane_b32 s0, v1, 33
-; GCN-NEXT:    v_readlane_b32 s1, v1, 34
-; GCN-NEXT:    v_readlane_b32 s2, v1, 35
-; GCN-NEXT:    v_readlane_b32 s3, v1, 36
-; GCN-NEXT:    v_readlane_b32 s4, v1, 37
-; GCN-NEXT:    v_readlane_b32 s5, v1, 38
-; GCN-NEXT:    v_readlane_b32 s6, v1, 39
-; GCN-NEXT:    v_readlane_b32 s7, v1, 40
+; GCN-NEXT:    v_readlane_b32 s0, v1, 32
+; GCN-NEXT:    v_readlane_b32 s1, v1, 33
+; GCN-NEXT:    v_readlane_b32 s2, v1, 34
+; GCN-NEXT:    v_readlane_b32 s3, v1, 35
+; GCN-NEXT:    v_readlane_b32 s4, v1, 36
+; GCN-NEXT:    v_readlane_b32 s5, v1, 37
+; GCN-NEXT:    v_readlane_b32 s6, v1, 38
+; GCN-NEXT:    v_readlane_b32 s7, v1, 39
 ; GCN-NEXT:    ;;#ASMSTART
 ; GCN-NEXT:    ; use s[0:7]
 ; GCN-NEXT:    ;;#ASMEND
-; GCN-NEXT:    v_readlane_b32 s0, v1, 41
-; GCN-NEXT:    v_readlane_b32 s1, v1, 42
-; GCN-NEXT:    v_readlane_b32 s2, v1, 43
-; GCN-NEXT:    v_readlane_b32 s3, v1, 44
-; GCN-NEXT:    v_readlane_b32 s4, v1, 45
-; GCN-NEXT:    v_readlane_b32 s5, v1, 46
-; GCN-NEXT:    v_readlane_b32 s6, v1, 47
-; GCN-NEXT:    v_readlane_b32 s7, v1, 48
+; GCN-NEXT:    v_readlane_b32 s0, v1, 40
+; GCN-NEXT:    v_readlane_b32 s1, v1, 41
+; GCN-NEXT:    v_readlane_b32 s2, v1, 42
+; GCN-NEXT:    v_readlane_b32 s3, v1, 43
+; GCN-NEXT:    v_readlane_b32 s4, v1, 44
+; GCN-NEXT:    v_readlane_b32 s5, v1, 45
+; GCN-NEXT:    v_readlane_b32 s6, v1, 46
+; GCN-NEXT:    v_readlane_b32 s7, v1, 47
 ; GCN-NEXT:    ;;#ASMSTART
 ; GCN-NEXT:    ; use s[0:7]
 ; GCN-NEXT:    ;;#ASMEND
-; GCN-NEXT:    v_readlane_b32 s0, v1, 49
-; GCN-NEXT:    v_readlane_b32 s1, v1, 50
-; GCN-NEXT:    v_readlane_b32 s2, v1, 51
-; GCN-NEXT:    v_readlane_b32 s3, v1, 52
-; GCN-NEXT:    v_readlane_b32 s4, v1, 53
-; GCN-NEXT:    v_readlane_b32 s5, v1, 54
-; GCN-NEXT:    v_readlane_b32 s6, v1, 55
-; GCN-NEXT:    v_readlane_b32 s7, v1, 56
+; GCN-NEXT:    v_readlane_b32 s0, v1, 48
+; GCN-NEXT:    v_readlane_b32 s1, v1, 49
+; GCN-NEXT:    v_readlane_b32 s2, v1, 50
+; GCN-NEXT:    v_readlane_b32 s3, v1, 51
+; GCN-NEXT:    v_readlane_b32 s4, v1, 52
+; GCN-NEXT:    v_readlane_b32 s5, v1, 53
+; GCN-NEXT:    v_readlane_b32 s6, v1, 54
+; GCN-NEXT:    v_readlane_b32 s7, v1, 55
 ; GCN-NEXT:    ;;#ASMSTART
 ; GCN-NEXT:    ; use s[0:7]
 ; GCN-NEXT:    ;;#ASMEND
-; GCN-NEXT:    v_readlane_b32 s0, v1, 57
-; GCN-NEXT:    v_readlane_b32 s1, v1, 58
-; GCN-NEXT:    v_readlane_b32 s2, v1, 59
-; GCN-NEXT:    v_readlane_b32 s3, v1, 60
-; GCN-NEXT:    v_readlane_b32 s4, v1, 61
-; GCN-NEXT:    v_readlane_b32 s5, v1, 62
-; GCN-NEXT:    v_readlane_b32 s6, v1, 63
-; GCN-NEXT:    v_readlane_b32 s7, v2, 0
+; GCN-NEXT:    v_readlane_b32 s0, v1, 56
+; GCN-NEXT:    v_readlane_b32 s1, v1, 57
+; GCN-NEXT:    v_readlane_b32 s2, v1, 58
+; GCN-NEXT:    v_readlane_b32 s3, v1, 59
+; GCN-NEXT:    v_readlane_b32 s4, v1, 60
+; GCN-NEXT:    v_readlane_b32 s5, v1, 61
+; GCN-NEXT:    v_readlane_b32 s6, v1, 62
+; GCN-NEXT:    v_readlane_b32 s7, v1, 63
 ; GCN-NEXT:    ;;#ASMSTART
 ; GCN-NEXT:    ; use s[0:7]
 ; GCN-NEXT:    ;;#ASMEND
-; GCN-NEXT:    v_readlane_b32 s0, v0, 9
-; GCN-NEXT:    v_readlane_b32 s1, v0, 10
-; GCN-NEXT:    v_readlane_b32 s2, v0, 11
-; GCN-NEXT:    v_readlane_b32 s3, v0, 12
-; GCN-NEXT:    v_readlane_b32 s4, v0, 13
-; GCN-NEXT:    v_readlane_b32 s5, v0, 14
-; GCN-NEXT:    v_readlane_b32 s6, v0, 15
-; GCN-NEXT:    v_readlane_b32 s7, v0, 16
+; GCN-NEXT:    v_readlane_b32 s0, v0, 8
+; GCN-NEXT:    v_readlane_b32 s1, v0, 9
+; GCN-NEXT:    v_readlane_b32 s2, v0, 10
+; GCN-NEXT:    v_readlane_b32 s3, v0, 11
+; GCN-NEXT:    v_readlane_b32 s4, v0, 12
+; GCN-NEXT:    v_readlane_b32 s5, v0, 13
+; GCN-NEXT:    v_readlane_b32 s6, v0, 14
+; GCN-NEXT:    v_readlane_b32 s7, v0, 15
 ; GCN-NEXT:    ;;#ASMSTART
 ; GCN-NEXT:    ; use s[0:7]
 ; GCN-NEXT:    ;;#ASMEND
-; GCN-NEXT:    v_readlane_b32 s0, v0, 17
-; GCN-NEXT:    v_readlane_b32 s1, v0, 18
-; GCN-NEXT:    v_readlane_b32 s2, v0, 19
-; GCN-NEXT:    v_readlane_b32 s3, v0, 20
-; GCN-NEXT:    v_readlane_b32 s4, v0, 21
-; GCN-NEXT:    v_readlane_b32 s5, v0, 22
-; GCN-NEXT:    v_readlane_b32 s6, v0, 23
-; GCN-NEXT:    v_readlane_b32 s7, v0, 24
+; GCN-NEXT:    v_readlane_b32 s0, v0, 16
+; GCN-NEXT:    v_readlane_b32 s1, v0, 17
+; GCN-NEXT:    v_readlane_b32 s2, v0, 18
+; GCN-NEXT:    v_readlane_b32 s3, v0, 19
+; GCN-NEXT:    v_readlane_b32 s4, v0, 20
+; GCN-NEXT:    v_readlane_b32 s5, v0, 21
+; GCN-NEXT:    v_readlane_b32 s6, v0, 22
+; GCN-NEXT:    v_readlane_b32 s7, v0, 23
 ; GCN-NEXT:    ;;#ASMSTART
 ; GCN-NEXT:    ; use s[0:7]
 ; GCN-NEXT:    ;;#ASMEND
-; GCN-NEXT:    v_readlane_b32 s0, v0, 25
-; GCN-NEXT:    v_readlane_b32 s1, v0, 26
-; GCN-NEXT:    v_readlane_b32 s2, v0, 27
-; GCN-NEXT:    v_readlane_b32 s3, v0, 28
-; GCN-NEXT:    v_readlane_b32 s4, v0, 29
-; GCN-NEXT:    v_readlane_b32 s5, v0, 30
-; GCN-NEXT:    v_readlane_b32 s6, v0, 31
-; GCN-NEXT:    v_readlane_b32 s7, v0, 32
+; GCN-NEXT:    v_readlane_b32 s0, v0, 24
+; GCN-NEXT:    v_readlane_b32 s1, v0, 25
+; GCN-NEXT:    v_readlane_b32 s2, v0, 26
+; GCN-NEXT:    v_readlane_b32 s3, v0, 27
+; GCN-NEXT:    v_readlane_b32 s4, v0, 28
+; GCN-NEXT:    v_readlane_b32 s5, v0, 29
+; GCN-NEXT:    v_readlane_b32 s6, v0, 30
+; GCN-NEXT:    v_readlane_b32 s7, v0, 31
 ; GCN-NEXT:    ;;#ASMSTART
 ; GCN-NEXT:    ; use s[0:7]
 ; GCN-NEXT:    ;;#ASMEND
-; GCN-NEXT:    v_readlane_b32 s0, v0, 33
-; GCN-NEXT:    v_readlane_b32 s1, v0, 34
-; GCN-NEXT:    v_readlane_b32 s2, v0, 35
-; GCN-NEXT:    v_readlane_b32 s3, v0, 36
-; GCN-NEXT:    v_readlane_b32 s4, v0, 37
-; GCN-NEXT:    v_readlane_b32 s5, v0, 38
-; GCN-NEXT:    v_readlane_b32 s6, v0, 39
-; GCN-NEXT:    v_readlane_b32 s7, v0, 40
+; GCN-NEXT:    v_readlane_b32 s0, v0, 32
+; GCN-NEXT:    v_readlane_b32 s1, v0, 33
+; GCN-NEXT:    v_readlane_b32 s2, v0, 34
+; GCN-NEXT:    v_readlane_b32 s3, v0, 35
+; GCN-NEXT:    v_readlane_b32 s4, v0, 36
+; GCN-NEXT:    v_readlane_b32 s5, v0, 37
+; GCN-NEXT:    v_readlane_b32 s6, v0, 38
+; GCN-NEXT:    v_readlane_b32 s7, v0, 39
 ; GCN-NEXT:    ;;#ASMSTART
 ; GCN-NEXT:    ; use s[0:7]
 ; GCN-NEXT:    ;;#ASMEND
-; GCN-NEXT:    v_readlane_b32 s0, v0, 41
-; GCN-NEXT:    v_readlane_b32 s1, v0, 42
-; GCN-NEXT:    v_readlane_b32 s2, v0, 43
-; GCN-NEXT:    v_readlane_b32 s3, v0, 44
-; GCN-NEXT:    v_readlane_b32 s4, v0, 45
-; GCN-NEXT:    v_readlane_b32 s5, v0, 46
-; GCN-NEXT:    v_readlane_b32 s6, v0, 47
-; GCN-NEXT:    v_readlane_b32 s7, v0, 48
+; GCN-NEXT:    v_readlane_b32 s0, v0, 40
+; GCN-NEXT:    v_readlane_b32 s1, v0, 41
+; GCN-NEXT:    v_readlane_b32 s2, v0, 42
+; GCN-NEXT:    v_readlane_b32 s3, v0, 43
+; GCN-NEXT:    v_readlane_b32 s4, v0, 44
+; GCN-NEXT:    v_readlane_b32 s5, v0, 45
+; GCN-NEXT:    v_readlane_b32 s6, v0, 46
+; GCN-NEXT:    v_readlane_b32 s7, v0, 47
 ; GCN-NEXT:    ;;#ASMSTART
 ; GCN-NEXT:    ; use s[0:7]
 ; GCN-NEXT:    ;;#ASMEND
-; GCN-NEXT:    v_readlane_b32 s0, v0, 49
-; GCN-NEXT:    v_readlane_b32 s1, v0, 50
-; GCN-NEXT:    v_readlane_b32 s2, v0, 51
-; GCN-NEXT:    v_readlane_b32 s3, v0, 52
-; GCN-NEXT:    v_readlane_b32 s4, v0, 53
-; GCN-NEXT:    v_readlane_b32 s5, v0, 54
-; GCN-NEXT:    v_readlane_b32 s6, v0, 55
-; GCN-NEXT:    v_readlane_b32 s7, v0, 56
+; GCN-NEXT:    v_readlane_b32 s0, v0, 48
+; GCN-NEXT:    v_readlane_b32 s1, v0, 49
+; GCN-NEXT:    v_readlane_b32 s2, v0, 50
+; GCN-NEXT:    v_readlane_b32 s3, v0, 51
+; GCN-NEXT:    v_readlane_b32 s4, v0, 52
+; GCN-NEXT:    v_readlane_b32 s5, v0, 53
+; GCN-NEXT:    v_readlane_b32 s6, v0, 54
+; GCN-NEXT:    v_readlane_b32 s7, v0, 55
 ; GCN-NEXT:    ;;#ASMSTART
 ; GCN-NEXT:    ; use s[0:7]
 ; GCN-NEXT:    ;;#ASMEND
-; GCN-NEXT:    v_readlane_b32 s0, v2, 1
-; GCN-NEXT:    v_readlane_b32 s1, v2, 2
-; GCN-NEXT:    v_readlane_b32 s2, v2, 3
-; GCN-NEXT:    v_readlane_b32 s3, v2, 4
-; GCN-NEXT:    v_readlane_b32 s4, v2, 5
-; GCN-NEXT:    v_readlane_b32 s5, v2, 6
-; GCN-NEXT:    v_readlane_b32 s6, v2, 7
-; GCN-NEXT:    v_readlane_b32 s7, v2, 8
+; GCN-NEXT:    v_readlane_b32 s0, v2, 0
+; GCN-NEXT:    v_readlane_b32 s1, v2, 1
+; GCN-NEXT:    v_readlane_b32 s2, v2, 2
+; GCN-NEXT:    v_readlane_b32 s3, v2, 3
+; GCN-NEXT:    v_readlane_b32 s4, v2, 4
+; GCN-NEXT:    v_readlane_b32 s5, v2, 5
+; GCN-NEXT:    v_readlane_b32 s6, v2, 6
+; GCN-NEXT:    v_readlane_b32 s7, v2, 7
 ; GCN-NEXT:    ;;#ASMSTART
 ; GCN-NEXT:    ; use s[0:7]
 ; GCN-NEXT:    ;;#ASMEND
@@ -444,195 +442,193 @@ ret:
 define amdgpu_kernel void @split_sgpr_spill_2_vgprs(i32 addrspace(1)* %out, i32 %in) #1 {
 ; GCN-LABEL: split_sgpr_spill_2_vgprs:
 ; GCN:       ; %bb.0:
-; GCN-NEXT:    s_load_dword s0, s[0:1], 0xb
+; GCN-NEXT:    s_load_dword s2, s[0:1], 0xb
 ; GCN-NEXT:    ;;#ASMSTART
 ; GCN-NEXT:    ; def s[4:19]
 ; GCN-NEXT:    ;;#ASMEND
 ; GCN-NEXT:    ;;#ASMSTART
 ; GCN-NEXT:    ; def s[36:51]
 ; GCN-NEXT:    ;;#ASMEND
+; GCN-NEXT:    v_writelane_b32 v0, s4, 0
+; GCN-NEXT:    v_writelane_b32 v0, s5, 1
+; GCN-NEXT:    v_writelane_b32 v0, s6, 2
+; GCN-NEXT:    v_writelane_b32 v0, s7, 3
+; GCN-NEXT:    v_writelane_b32 v0, s8, 4
+; GCN-NEXT:    v_writelane_b32 v0, s9, 5
+; GCN-NEXT:    v_writelane_b32 v0, s10, 6
+; GCN-NEXT:    v_writelane_b32 v0, s11, 7
+; GCN-NEXT:    v_writelane_b32 v0, s12, 8
+; GCN-NEXT:    v_writelane_b32 v0, s13, 9
+; GCN-NEXT:    v_writelane_b32 v0, s14, 10
+; GCN-NEXT:    v_writelane_b32 v0, s15, 11
+; GCN-NEXT:    v_writelane_b32 v0, s16, 12
+; GCN-NEXT:    v_writelane_b32 v0, s17, 13
+; GCN-NEXT:    v_writelane_b32 v0, s18, 14
+; GCN-NEXT:    v_writelane_b32 v0, s19, 15
+; GCN-NEXT:    ;;#ASMSTART
+; GCN-NEXT:    ; def s[4:19]
+; GCN-NEXT:    ;;#ASMEND
+; GCN-NEXT:    v_writelane_b32 v0, s4, 16
+; GCN-NEXT:    v_writelane_b32 v0, s5, 17
+; GCN-NEXT:    v_writelane_b32 v0, s6, 18
+; GCN-NEXT:    v_writelane_b32 v0, s7, 19
+; GCN-NEXT:    v_writelane_b32 v0, s8, 20
+; GCN-NEXT:    v_writelane_b32 v0, s9, 21
+; GCN-NEXT:    v_writelane_b32 v0, s10, 22
+; GCN-NEXT:    v_writelane_b32 v0, s11, 23
+; GCN-NEXT:    v_writelane_b32 v0, s12, 24
+; GCN-NEXT:    v_writelane_b32 v0, s13, 25
+; GCN-NEXT:    v_writelane_b32 v0, s14, 26
+; GCN-NEXT:    v_writelane_b32 v0, s15, 27
+; GCN-NEXT:    v_writelane_b32 v0, s16, 28
+; GCN-NEXT:    v_writelane_b32 v0, s17, 29
+; GCN-NEXT:    v_writelane_b32 v0, s18, 30
+; GCN-NEXT:    v_writelane_b32 v0, s19, 31
+; GCN-NEXT:    ;;#ASMSTART
+; GCN-NEXT:    ; def s[4:19]
+; GCN-NEXT:    ;;#ASMEND
+; GCN-NEXT:    ;;#ASMSTART
+; GCN-NEXT:    ; def s[20:27]
+; GCN-NEXT:    ;;#ASMEND
+; GCN-NEXT:    ;;#ASMSTART
+; GCN-NEXT:    ; def s[0:1]
+; GCN-NEXT:    ;;#ASMEND
+; GCN-NEXT:    s_mov_b32 s3, 0
 ; GCN-NEXT:    s_waitcnt lgkmcnt(0)
-; GCN-NEXT:    v_writelane_b32 v0, s0, 0
-; GCN-NEXT:    v_writelane_b32 v0, s4, 1
-; GCN-NEXT:    v_writelane_b32 v0, s5, 2
-; GCN-NEXT:    v_writelane_b32 v0, s6, 3
-; GCN-NEXT:    v_writelane_b32 v0, s7, 4
-; GCN-NEXT:    v_writelane_b32 v0, s8, 5
-; GCN-NEXT:    v_writelane_b32 v0, s9, 6
-; GCN-NEXT:    v_writelane_b32 v0, s10, 7
-; GCN-NEXT:    v_writelane_b32 v0, s11, 8
-; GCN-NEXT:    v_writelane_b32 v0, s12, 9
-; GCN-NEXT:    v_writelane_b32 v0, s13, 10
-; GCN-NEXT:    v_writelane_b32 v0, s14, 11
-; GCN-NEXT:    v_writelane_b32 v0, s15, 12
-; GCN-NEXT:    v_writelane_b32 v0, s16, 13
-; GCN-NEXT:    v_writelane_b32 v0, s17, 14
-; GCN-NEXT:    v_writelane_b32 v0, s18, 15
-; GCN-NEXT:    v_writelane_b32 v0, s19, 16
-; GCN-NEXT:    ;;#ASMSTART
-; GCN-NEXT:    ; def s[0:15]
-; GCN-NEXT:    ;;#ASMEND
-; GCN-NEXT:    ;;#ASMSTART
-; GCN-NEXT:    ; def s[16:31]
-; GCN-NEXT:    ;;#ASMEND
-; GCN-NEXT:    v_writelane_b32 v0, s0, 17
-; GCN-NEXT:    v_writelane_b32 v0, s1, 18
-; GCN-NEXT:    v_writelane_b32 v0, s2, 19
-; GCN-NEXT:    v_writelane_b32 v0, s3, 20
-; GCN-NEXT:    v_writelane_b32 v0, s4, 21
-; GCN-NEXT:    v_writelane_b32 v0, s5, 22
-; GCN-NEXT:    v_writelane_b32 v0, s6, 23
-; GCN-NEXT:    v_writelane_b32 v0, s7, 24
-; GCN-NEXT:    v_writelane_b32 v0, s8, 25
-; GCN-NEXT:    v_writelane_b32 v0, s9, 26
-; GCN-NEXT:    v_writelane_b32 v0, s10, 27
-; GCN-NEXT:    v_writelane_b32 v0, s11, 28
-; GCN-NEXT:    v_writelane_b32 v0, s12, 29
-; GCN-NEXT:    v_writelane_b32 v0, s13, 30
-; GCN-NEXT:    v_writelane_b32 v0, s14, 31
-; GCN-NEXT:    v_writelane_b32 v0, s15, 32
-; GCN-NEXT:    ;;#ASMSTART
-; GCN-NEXT:    ; def s[0:7]
-; GCN-NEXT:    ;;#ASMEND
-; GCN-NEXT:    ;;#ASMSTART
-; GCN-NEXT:    ; def s[8:9]
-; GCN-NEXT:    ;;#ASMEND
-; GCN-NEXT:    s_mov_b32 s10, 0
-; GCN-NEXT:    v_readlane_b32 s11, v0, 0
-; GCN-NEXT:    s_cmp_lg_u32 s11, s10
-; GCN-NEXT:    v_writelane_b32 v0, s36, 33
-; GCN-NEXT:    v_writelane_b32 v0, s37, 34
-; GCN-NEXT:    v_writelane_b32 v0, s38, 35
-; GCN-NEXT:    v_writelane_b32 v0, s39, 36
-; GCN-NEXT:    v_writelane_b32 v0, s40, 37
-; GCN-NEXT:    v_writelane_b32 v0, s41, 38
-; GCN-NEXT:    v_writelane_b32 v0, s42, 39
-; GCN-NEXT:    v_writelane_b32 v0, s43, 40
-; GCN-NEXT:    v_writelane_b32 v0, s44, 41
-; GCN-NEXT:    v_writelane_b32 v0, s45, 42
-; GCN-NEXT:    v_writelane_b32 v0, s46, 43
-; GCN-NEXT:    v_writelane_b32 v0, s47, 44
-; GCN-NEXT:    v_writelane_b32 v0, s48, 45
-; GCN-NEXT:    v_writelane_b32 v0, s49, 46
-; GCN-NEXT:    v_writelane_b32 v0, s50, 47
-; GCN-NEXT:    v_writelane_b32 v0, s51, 48
-; GCN-NEXT:    v_writelane_b32 v0, s16, 49
-; GCN-NEXT:    v_writelane_b32 v0, s17, 50
-; GCN-NEXT:    v_writelane_b32 v0, s18, 51
-; GCN-NEXT:    v_writelane_b32 v0, s19, 52
-; GCN-NEXT:    v_writelane_b32 v0, s20, 53
-; GCN-NEXT:    v_writelane_b32 v0, s21, 54
-; GCN-NEXT:    v_writelane_b32 v0, s22, 55
-; GCN-NEXT:    v_writelane_b32 v0, s23, 56
-; GCN-NEXT:    v_writelane_b32 v0, s24, 57
-; GCN-NEXT:    v_writelane_b32 v0, s25, 58
-; GCN-NEXT:    v_writelane_b32 v0, s26, 59
-; GCN-NEXT:    v_writelane_b32 v0, s27, 60
-; GCN-NEXT:    v_writelane_b32 v0, s28, 61
-; GCN-NEXT:    v_writelane_b32 v0, s29, 62
-; GCN-NEXT:    v_writelane_b32 v0, s30, 63
-; GCN-NEXT:    v_writelane_b32 v1, s31, 0
-; GCN-NEXT:    v_writelane_b32 v1, s0, 1
-; GCN-NEXT:    v_writelane_b32 v1, s1, 2
-; GCN-NEXT:    v_writelane_b32 v1, s2, 3
-; GCN-NEXT:    v_writelane_b32 v1, s3, 4
-; GCN-NEXT:    v_writelane_b32 v1, s4, 5
-; GCN-NEXT:    v_writelane_b32 v1, s5, 6
-; GCN-NEXT:    v_writelane_b32 v1, s6, 7
-; GCN-NEXT:    v_writelane_b32 v1, s7, 8
-; GCN-NEXT:    v_writelane_b32 v1, s8, 9
-; GCN-NEXT:    v_writelane_b32 v1, s9, 10
+; GCN-NEXT:    s_cmp_lg_u32 s2, s3
+; GCN-NEXT:    v_writelane_b32 v0, s36, 32
+; GCN-NEXT:    v_writelane_b32 v0, s37, 33
+; GCN-NEXT:    v_writelane_b32 v0, s38, 34
+; GCN-NEXT:    v_writelane_b32 v0, s39, 35
+; GCN-NEXT:    v_writelane_b32 v0, s40, 36
+; GCN-NEXT:    v_writelane_b32 v0, s41, 37
+; GCN-NEXT:    v_writelane_b32 v0, s42, 38
+; GCN-NEXT:    v_writelane_b32 v0, s43, 39
+; GCN-NEXT:    v_writelane_b32 v0, s44, 40
+; GCN-NEXT:    v_writelane_b32 v0, s45, 41
+; GCN-NEXT:    v_writelane_b32 v0, s46, 42
+; GCN-NEXT:    v_writelane_b32 v0, s47, 43
+; GCN-NEXT:    v_writelane_b32 v0, s48, 44
+; GCN-NEXT:    v_writelane_b32 v0, s49, 45
+; GCN-NEXT:    v_writelane_b32 v0, s50, 46
+; GCN-NEXT:    v_writelane_b32 v0, s51, 47
+; GCN-NEXT:    v_writelane_b32 v0, s4, 48
+; GCN-NEXT:    v_writelane_b32 v0, s5, 49
+; GCN-NEXT:    v_writelane_b32 v0, s6, 50
+; GCN-NEXT:    v_writelane_b32 v0, s7, 51
+; GCN-NEXT:    v_writelane_b32 v0, s8, 52
+; GCN-NEXT:    v_writelane_b32 v0, s9, 53
+; GCN-NEXT:    v_writelane_b32 v0, s10, 54
+; GCN-NEXT:    v_writelane_b32 v0, s11, 55
+; GCN-NEXT:    v_writelane_b32 v0, s12, 56
+; GCN-NEXT:    v_writelane_b32 v0, s13, 57
+; GCN-NEXT:    v_writelane_b32 v0, s14, 58
+; GCN-NEXT:    v_writelane_b32 v0, s15, 59
+; GCN-NEXT:    v_writelane_b32 v0, s16, 60
+; GCN-NEXT:    v_writelane_b32 v0, s17, 61
+; GCN-NEXT:    v_writelane_b32 v0, s18, 62
+; GCN-NEXT:    v_writelane_b32 v0, s19, 63
+; GCN-NEXT:    v_writelane_b32 v1, s20, 0
+; GCN-NEXT:    v_writelane_b32 v1, s21, 1
+; GCN-NEXT:    v_writelane_b32 v1, s22, 2
+; GCN-NEXT:    v_writelane_b32 v1, s23, 3
+; GCN-NEXT:    v_writelane_b32 v1, s24, 4
+; GCN-NEXT:    v_writelane_b32 v1, s25, 5
+; GCN-NEXT:    v_writelane_b32 v1, s26, 6
+; GCN-NEXT:    v_writelane_b32 v1, s27, 7
+; GCN-NEXT:    v_writelane_b32 v1, s0, 8
+; GCN-NEXT:    v_writelane_b32 v1, s1, 9
 ; GCN-NEXT:    s_cbranch_scc1 BB1_2
 ; GCN-NEXT:  ; %bb.1: ; %bb0
-; GCN-NEXT:    v_readlane_b32 s0, v0, 1
-; GCN-NEXT:    v_readlane_b32 s1, v0, 2
-; GCN-NEXT:    v_readlane_b32 s2, v0, 3
-; GCN-NEXT:    v_readlane_b32 s3, v0, 4
-; GCN-NEXT:    v_readlane_b32 s4, v0, 5
-; GCN-NEXT:    v_readlane_b32 s5, v0, 6
-; GCN-NEXT:    v_readlane_b32 s6, v0, 7
-; GCN-NEXT:    v_readlane_b32 s7, v0, 8
-; GCN-NEXT:    v_readlane_b32 s8, v0, 9
-; GCN-NEXT:    v_readlane_b32 s9, v0, 10
-; GCN-NEXT:    v_readlane_b32 s10, v0, 11
-; GCN-NEXT:    v_readlane_b32 s11, v0, 12
-; GCN-NEXT:    v_readlane_b32 s12, v0, 13
-; GCN-NEXT:    v_readlane_b32 s13, v0, 14
-; GCN-NEXT:    v_readlane_b32 s14, v0, 15
-; GCN-NEXT:    v_readlane_b32 s15, v0, 16
+; GCN-NEXT:    v_readlane_b32 s0, v0, 0
+; GCN-NEXT:    v_readlane_b32 s1, v0, 1
+; GCN-NEXT:    v_readlane_b32 s2, v0, 2
+; GCN-NEXT:    v_readlane_b32 s3, v0, 3
+; GCN-NEXT:    v_readlane_b32 s4, v0, 4
+; GCN-NEXT:    v_readlane_b32 s5, v0, 5
+; GCN-NEXT:    v_readlane_b32 s6, v0, 6
+; GCN-NEXT:    v_readlane_b32 s7, v0, 7
+; GCN-NEXT:    v_readlane_b32 s8, v0, 8
+; GCN-NEXT:    v_readlane_b32 s9, v0, 9
+; GCN-NEXT:    v_readlane_b32 s10, v0, 10
+; GCN-NEXT:    v_readlane_b32 s11, v0, 11
+; GCN-NEXT:    v_readlane_b32 s12, v0, 12
+; GCN-NEXT:    v_readlane_b32 s13, v0, 13
+; GCN-NEXT:    v_readlane_b32 s14, v0, 14
+; GCN-NEXT:    v_readlane_b32 s15, v0, 15
 ; GCN-NEXT:    ;;#ASMSTART
 ; GCN-NEXT:    ; use s[0:15]
 ; GCN-NEXT:    ;;#ASMEND
-; GCN-NEXT:    v_readlane_b32 s0, v0, 33
-; GCN-NEXT:    v_readlane_b32 s1, v0, 34
-; GCN-NEXT:    v_readlane_b32 s2, v0, 35
-; GCN-NEXT:    v_readlane_b32 s3, v0, 36
-; GCN-NEXT:    v_readlane_b32 s4, v0, 37
-; GCN-NEXT:    v_readlane_b32 s5, v0, 38
-; GCN-NEXT:    v_readlane_b32 s6, v0, 39
-; GCN-NEXT:    v_readlane_b32 s7, v0, 40
-; GCN-NEXT:    v_readlane_b32 s8, v0, 41
-; GCN-NEXT:    v_readlane_b32 s9, v0, 42
-; GCN-NEXT:    v_readlane_b32 s10, v0, 43
-; GCN-NEXT:    v_readlane_b32 s11, v0, 44
-; GCN-NEXT:    v_readlane_b32 s12, v0, 45
-; GCN-NEXT:    v_readlane_b32 s13, v0, 46
-; GCN-NEXT:    v_readlane_b32 s14, v0, 47
-; GCN-NEXT:    v_readlane_b32 s15, v0, 48
+; GCN-NEXT:    v_readlane_b32 s0, v0, 32
+; GCN-NEXT:    v_readlane_b32 s1, v0, 33
+; GCN-NEXT:    v_readlane_b32 s2, v0, 34
+; GCN-NEXT:    v_readlane_b32 s3, v0, 35
+; GCN-NEXT:    v_readlane_b32 s4, v0, 36
+; GCN-NEXT:    v_readlane_b32 s5, v0, 37
+; GCN-NEXT:    v_readlane_b32 s6, v0, 38
+; GCN-NEXT:    v_readlane_b32 s7, v0, 39
+; GCN-NEXT:    v_readlane_b32 s8, v0, 40
+; GCN-NEXT:    v_readlane_b32 s9, v0, 41
+; GCN-NEXT:    v_readlane_b32 s10, v0, 42
+; GCN-NEXT:    v_readlane_b32 s11, v0, 43
+; GCN-NEXT:    v_readlane_b32 s12, v0, 44
+; GCN-NEXT:    v_readlane_b32 s13, v0, 45
+; GCN-NEXT:    v_readlane_b32 s14, v0, 46
+; GCN-NEXT:    v_readlane_b32 s15, v0, 47
 ; GCN-NEXT:    ;;#ASMSTART
 ; GCN-NEXT:    ; use s[0:15]
 ; GCN-NEXT:    ;;#ASMEND
-; GCN-NEXT:    v_readlane_b32 s0, v0, 17
-; GCN-NEXT:    v_readlane_b32 s1, v0, 18
-; GCN-NEXT:    v_readlane_b32 s2, v0, 19
-; GCN-NEXT:    v_readlane_b32 s3, v0, 20
-; GCN-NEXT:    v_readlane_b32 s4, v0, 21
-; GCN-NEXT:    v_readlane_b32 s5, v0, 22
-; GCN-NEXT:    v_readlane_b32 s6, v0, 23
-; GCN-NEXT:    v_readlane_b32 s7, v0, 24
-; GCN-NEXT:    v_readlane_b32 s8, v0, 25
-; GCN-NEXT:    v_readlane_b32 s9, v0, 26
-; GCN-NEXT:    v_readlane_b32 s10, v0, 27
-; GCN-NEXT:    v_readlane_b32 s11, v0, 28
-; GCN-NEXT:    v_readlane_b32 s12, v0, 29
-; GCN-NEXT:    v_readlane_b32 s13, v0, 30
-; GCN-NEXT:    v_readlane_b32 s14, v0, 31
-; GCN-NEXT:    v_readlane_b32 s15, v0, 32
+; GCN-NEXT:    v_readlane_b32 s0, v0, 16
+; GCN-NEXT:    v_readlane_b32 s1, v0, 17
+; GCN-NEXT:    v_readlane_b32 s2, v0, 18
+; GCN-NEXT:    v_readlane_b32 s3, v0, 19
+; GCN-NEXT:    v_readlane_b32 s4, v0, 20
+; GCN-NEXT:    v_readlane_b32 s5, v0, 21
+; GCN-NEXT:    v_readlane_b32 s6, v0, 22
+; GCN-NEXT:    v_readlane_b32 s7, v0, 23
+; GCN-NEXT:    v_readlane_b32 s8, v0, 24
+; GCN-NEXT:    v_readlane_b32 s9, v0, 25
+; GCN-NEXT:    v_readlane_b32 s10, v0, 26
+; GCN-NEXT:    v_readlane_b32 s11, v0, 27
+; GCN-NEXT:    v_readlane_b32 s12, v0, 28
+; GCN-NEXT:    v_readlane_b32 s13, v0, 29
+; GCN-NEXT:    v_readlane_b32 s14, v0, 30
+; GCN-NEXT:    v_readlane_b32 s15, v0, 31
 ; GCN-NEXT:    ;;#ASMSTART
 ; GCN-NEXT:    ; use s[0:15]
 ; GCN-NEXT:    ;;#ASMEND
-; GCN-NEXT:    v_readlane_b32 s0, v1, 1
-; GCN-NEXT:    v_readlane_b32 s1, v1, 2
-; GCN-NEXT:    v_readlane_b32 s2, v1, 3
-; GCN-NEXT:    v_readlane_b32 s3, v1, 4
-; GCN-NEXT:    v_readlane_b32 s4, v1, 5
-; GCN-NEXT:    v_readlane_b32 s5, v1, 6
-; GCN-NEXT:    v_readlane_b32 s6, v1, 7
-; GCN-NEXT:    v_readlane_b32 s7, v1, 8
-; GCN-NEXT:    ;;#ASMSTART
-; GCN-NEXT:    ; use s[0:7]
-; GCN-NEXT:    ;;#ASMEND
-; GCN-NEXT:    v_readlane_b32 s0, v1, 9
-; GCN-NEXT:    v_readlane_b32 s1, v1, 10
-; GCN-NEXT:    ;;#ASMSTART
-; GCN-NEXT:    ; use s[0:1]
-; GCN-NEXT:    ;;#ASMEND
-; GCN-NEXT:    v_readlane_b32 s0, v0, 49
-; GCN-NEXT:    v_readlane_b32 s1, v0, 50
-; GCN-NEXT:    v_readlane_b32 s2, v0, 51
-; GCN-NEXT:    v_readlane_b32 s3, v0, 52
-; GCN-NEXT:    v_readlane_b32 s4, v0, 53
-; GCN-NEXT:    v_readlane_b32 s5, v0, 54
-; GCN-NEXT:    v_readlane_b32 s6, v0, 55
-; GCN-NEXT:    v_readlane_b32 s7, v0, 56
-; GCN-NEXT:    v_readlane_b32 s8, v0, 57
-; GCN-NEXT:    v_readlane_b32 s9, v0, 58
-; GCN-NEXT:    v_readlane_b32 s10, v0, 59
-; GCN-NEXT:    v_readlane_b32 s11, v0, 60
-; GCN-NEXT:    v_readlane_b32 s12, v0, 61
-; GCN-NEXT:    v_readlane_b32 s13, v0, 62
-; GCN-NEXT:    v_readlane_b32 s14, v0, 63
-; GCN-NEXT:    v_readlane_b32 s15, v1, 0
+; GCN-NEXT:    v_readlane_b32 s16, v1, 0
+; GCN-NEXT:    v_readlane_b32 s17, v1, 1
+; GCN-NEXT:    v_readlane_b32 s18, v1, 2
+; GCN-NEXT:    v_readlane_b32 s19, v1, 3
+; GCN-NEXT:    v_readlane_b32 s20, v1, 4
+; GCN-NEXT:    v_readlane_b32 s21, v1, 5
+; GCN-NEXT:    v_readlane_b32 s22, v1, 6
+; GCN-NEXT:    v_readlane_b32 s23, v1, 7
+; GCN-NEXT:    ;;#ASMSTART
+; GCN-NEXT:    ; use s[16:23]
+; GCN-NEXT:    ;;#ASMEND
+; GCN-NEXT:    v_readlane_b32 s24, v1, 8
+; GCN-NEXT:    v_readlane_b32 s25, v1, 9
+; GCN-NEXT:    ;;#ASMSTART
+; GCN-NEXT:    ; use s[24:25]
+; GCN-NEXT:    ;;#ASMEND
+; GCN-NEXT:    v_readlane_b32 s0, v0, 48
+; GCN-NEXT:    v_readlane_b32 s1, v0, 49
+; GCN-NEXT:    v_readlane_b32 s2, v0, 50
+; GCN-NEXT:    v_readlane_b32 s3, v0, 51
+; GCN-NEXT:    v_readlane_b32 s4, v0, 52
+; GCN-NEXT:    v_readlane_b32 s5, v0, 53
+; GCN-NEXT:    v_readlane_b32 s6, v0, 54
+; GCN-NEXT:    v_readlane_b32 s7, v0, 55
+; GCN-NEXT:    v_readlane_b32 s8, v0, 56
+; GCN-NEXT:    v_readlane_b32 s9, v0, 57
+; GCN-NEXT:    v_readlane_b32 s10, v0, 58
+; GCN-NEXT:    v_readlane_b32 s11, v0, 59
+; GCN-NEXT:    v_readlane_b32 s12, v0, 60
+; GCN-NEXT:    v_readlane_b32 s13, v0, 61
+; GCN-NEXT:    v_readlane_b32 s14, v0, 62
+; GCN-NEXT:    v_readlane_b32 s15, v0, 63
 ; GCN-NEXT:    ;;#ASMSTART
 ; GCN-NEXT:    ; use s[0:15]
 ; GCN-NEXT:    ;;#ASMEND
@@ -667,13 +663,13 @@ ret:
 define amdgpu_kernel void @no_vgprs_last_sgpr_spill(i32 addrspace(1)* %out, i32 %in) #1 {
 ; GCN-LABEL: no_vgprs_last_sgpr_spill:
 ; GCN:       ; %bb.0:
-; GCN-NEXT:    s_mov_b32 s56, SCRATCH_RSRC_DWORD0
-; GCN-NEXT:    s_mov_b32 s57, SCRATCH_RSRC_DWORD1
-; GCN-NEXT:    s_mov_b32 s58, -1
-; GCN-NEXT:    s_mov_b32 s59, 0xe8f000
-; GCN-NEXT:    s_add_u32 s56, s56, s3
-; GCN-NEXT:    s_addc_u32 s57, s57, 0
-; GCN-NEXT:    s_load_dword s0, s[0:1], 0xb
+; GCN-NEXT:    s_mov_b32 s20, SCRATCH_RSRC_DWORD0
+; GCN-NEXT:    s_mov_b32 s21, SCRATCH_RSRC_DWORD1
+; GCN-NEXT:    s_mov_b32 s22, -1
+; GCN-NEXT:    s_mov_b32 s23, 0xe8f000
+; GCN-NEXT:    s_add_u32 s20, s20, s3
+; GCN-NEXT:    s_addc_u32 s21, s21, 0
+; GCN-NEXT:    s_load_dword s2, s[0:1], 0xb
 ; GCN-NEXT:    ;;#ASMSTART
 ; GCN-NEXT:    ;;#ASMEND
 ; GCN-NEXT:    ;;#ASMSTART
@@ -692,179 +688,177 @@ define amdgpu_kernel void @no_vgprs_last_sgpr_spill(i32 addrspace(1)* %out, i32
 ; GCN-NEXT:    ;;#ASMSTART
 ; GCN-NEXT:    ; def s[36:51]
 ; GCN-NEXT:    ;;#ASMEND
+; GCN-NEXT:    v_writelane_b32 v31, s4, 0
+; GCN-NEXT:    v_writelane_b32 v31, s5, 1
+; GCN-NEXT:    v_writelane_b32 v31, s6, 2
+; GCN-NEXT:    v_writelane_b32 v31, s7, 3
+; GCN-NEXT:    v_writelane_b32 v31, s8, 4
+; GCN-NEXT:    v_writelane_b32 v31, s9, 5
+; GCN-NEXT:    v_writelane_b32 v31, s10, 6
+; GCN-NEXT:    v_writelane_b32 v31, s11, 7
+; GCN-NEXT:    v_writelane_b32 v31, s12, 8
+; GCN-NEXT:    v_writelane_b32 v31, s13, 9
+; GCN-NEXT:    v_writelane_b32 v31, s14, 10
+; GCN-NEXT:    v_writelane_b32 v31, s15, 11
+; GCN-NEXT:    v_writelane_b32 v31, s16, 12
+; GCN-NEXT:    v_writelane_b32 v31, s17, 13
+; GCN-NEXT:    v_writelane_b32 v31, s18, 14
+; GCN-NEXT:    v_writelane_b32 v31, s19, 15
+; GCN-NEXT:    ;;#ASMSTART
+; GCN-NEXT:    ; def s[4:19]
+; GCN-NEXT:    ;;#ASMEND
+; GCN-NEXT:    v_writelane_b32 v31, s4, 16
+; GCN-NEXT:    v_writelane_b32 v31, s5, 17
+; GCN-NEXT:    v_writelane_b32 v31, s6, 18
+; GCN-NEXT:    v_writelane_b32 v31, s7, 19
+; GCN-NEXT:    v_writelane_b32 v31, s8, 20
+; GCN-NEXT:    v_writelane_b32 v31, s9, 21
+; GCN-NEXT:    v_writelane_b32 v31, s10, 22
+; GCN-NEXT:    v_writelane_b32 v31, s11, 23
+; GCN-NEXT:    v_writelane_b32 v31, s12, 24
+; GCN-NEXT:    v_writelane_b32 v31, s13, 25
+; GCN-NEXT:    v_writelane_b32 v31, s14, 26
+; GCN-NEXT:    v_writelane_b32 v31, s15, 27
+; GCN-NEXT:    v_writelane_b32 v31, s16, 28
+; GCN-NEXT:    v_writelane_b32 v31, s17, 29
+; GCN-NEXT:    v_writelane_b32 v31, s18, 30
+; GCN-NEXT:    v_writelane_b32 v31, s19, 31
+; GCN-NEXT:    ;;#ASMSTART
+; GCN-NEXT:    ; def s[4:19]
+; GCN-NEXT:    ;;#ASMEND
+; GCN-NEXT:    ;;#ASMSTART
+; GCN-NEXT:    ; def s[0:1]
+; GCN-NEXT:    ;;#ASMEND
+; GCN-NEXT:    s_mov_b32 s3, 0
 ; GCN-NEXT:    s_waitcnt lgkmcnt(0)
-; GCN-NEXT:    v_writelane_b32 v31, s0, 0
-; GCN-NEXT:    v_writelane_b32 v31, s4, 1
-; GCN-NEXT:    v_writelane_b32 v31, s5, 2
-; GCN-NEXT:    v_writelane_b32 v31, s6, 3
-; GCN-NEXT:    v_writelane_b32 v31, s7, 4
-; GCN-NEXT:    v_writelane_b32 v31, s8, 5
-; GCN-NEXT:    v_writelane_b32 v31, s9, 6
-; GCN-NEXT:    v_writelane_b32 v31, s10, 7
-; GCN-NEXT:    v_writelane_b32 v31, s11, 8
-; GCN-NEXT:    v_writelane_b32 v31, s12, 9
-; GCN-NEXT:    v_writelane_b32 v31, s13, 10
-; GCN-NEXT:    v_writelane_b32 v31, s14, 11
-; GCN-NEXT:    v_writelane_b32 v31, s15, 12
-; GCN-NEXT:    v_writelane_b32 v31, s16, 13
-; GCN-NEXT:    v_writelane_b32 v31, s17, 14
-; GCN-NEXT:    v_writelane_b32 v31, s18, 15
-; GCN-NEXT:    v_writelane_b32 v31, s19, 16
-; GCN-NEXT:    ;;#ASMSTART
-; GCN-NEXT:    ; def s[0:15]
-; GCN-NEXT:    ;;#ASMEND
-; GCN-NEXT:    ;;#ASMSTART
-; GCN-NEXT:    ; def s[16:31]
-; GCN-NEXT:    ;;#ASMEND
-; GCN-NEXT:    ;;#ASMSTART
-; GCN-NEXT:    ; def s[34:35]
-; GCN-NEXT:    ;;#ASMEND
-; GCN-NEXT:    s_mov_b32 s33, 0
-; GCN-NEXT:    v_readlane_b32 s52, v31, 0
-; GCN-NEXT:    s_cmp_lg_u32 s52, s33
-; GCN-NEXT:    v_writelane_b32 v31, s36, 17
-; GCN-NEXT:    v_writelane_b32 v31, s37, 18
-; GCN-NEXT:    v_writelane_b32 v31, s38, 19
-; GCN-NEXT:    v_writelane_b32 v31, s39, 20
-; GCN-NEXT:    v_writelane_b32 v31, s40, 21
-; GCN-NEXT:    v_writelane_b32 v31, s41, 22
-; GCN-NEXT:    v_writelane_b32 v31, s42, 23
-; GCN-NEXT:    v_writelane_b32 v31, s43, 24
-; GCN-NEXT:    v_writelane_b32 v31, s44, 25
-; GCN-NEXT:    v_writelane_b32 v31, s45, 26
-; GCN-NEXT:    v_writelane_b32 v31, s46, 27
-; GCN-NEXT:    v_writelane_b32 v31, s47, 28
-; GCN-NEXT:    v_writelane_b32 v31, s48, 29
-; GCN-NEXT:    v_writelane_b32 v31, s49, 30
-; GCN-NEXT:    v_writelane_b32 v31, s50, 31
-; GCN-NEXT:    v_writelane_b32 v31, s51, 32
-; GCN-NEXT:    v_writelane_b32 v31, s0, 33
-; GCN-NEXT:    v_writelane_b32 v31, s1, 34
-; GCN-NEXT:    v_writelane_b32 v31, s2, 35
-; GCN-NEXT:    v_writelane_b32 v31, s3, 36
-; GCN-NEXT:    v_writelane_b32 v31, s4, 37
-; GCN-NEXT:    v_writelane_b32 v31, s5, 38
-; GCN-NEXT:    v_writelane_b32 v31, s6, 39
-; GCN-NEXT:    v_writelane_b32 v31, s7, 40
-; GCN-NEXT:    v_writelane_b32 v31, s8, 41
-; GCN-NEXT:    v_writelane_b32 v31, s9, 42
-; GCN-NEXT:    v_writelane_b32 v31, s10, 43
-; GCN-NEXT:    v_writelane_b32 v31, s11, 44
-; GCN-NEXT:    v_writelane_b32 v31, s12, 45
-; GCN-NEXT:    v_writelane_b32 v31, s13, 46
-; GCN-NEXT:    v_writelane_b32 v31, s14, 47
-; GCN-NEXT:    v_writelane_b32 v31, s15, 48
-; GCN-NEXT:    buffer_store_dword v0, off, s[56:59], 0
-; GCN-NEXT:    v_writelane_b32 v0, s16, 0
-; GCN-NEXT:    v_writelane_b32 v0, s17, 1
-; GCN-NEXT:    v_writelane_b32 v0, s18, 2
-; GCN-NEXT:    v_writelane_b32 v0, s19, 3
-; GCN-NEXT:    v_writelane_b32 v0, s20, 4
-; GCN-NEXT:    v_writelane_b32 v0, s21, 5
-; GCN-NEXT:    v_writelane_b32 v0, s22, 6
-; GCN-NEXT:    v_writelane_b32 v0, s23, 7
-; GCN-NEXT:    v_writelane_b32 v0, s24, 8
-; GCN-NEXT:    v_writelane_b32 v0, s25, 9
-; GCN-NEXT:    v_writelane_b32 v0, s26, 10
-; GCN-NEXT:    v_writelane_b32 v0, s27, 11
-; GCN-NEXT:    v_writelane_b32 v0, s28, 12
-; GCN-NEXT:    v_writelane_b32 v0, s29, 13
-; GCN-NEXT:    v_writelane_b32 v0, s30, 14
-; GCN-NEXT:    v_writelane_b32 v0, s31, 15
-; GCN-NEXT:    s_mov_b64 s[16:17], exec
-; GCN-NEXT:    s_mov_b64 exec, 0xffff
-; GCN-NEXT:    buffer_store_dword v0, off, s[56:59], 0 offset:4 ; 4-byte Folded Spill
-; GCN-NEXT:    s_mov_b64 exec, s[16:17]
-; GCN-NEXT:    v_writelane_b32 v31, s34, 49
-; GCN-NEXT:    v_writelane_b32 v31, s35, 50
-; GCN-NEXT:    buffer_load_dword v0, off, s[56:59], 0
+; GCN-NEXT:    s_cmp_lg_u32 s2, s3
+; GCN-NEXT:    v_writelane_b32 v31, s36, 32
+; GCN-NEXT:    v_writelane_b32 v31, s37, 33
+; GCN-NEXT:    v_writelane_b32 v31, s38, 34
+; GCN-NEXT:    v_writelane_b32 v31, s39, 35
+; GCN-NEXT:    v_writelane_b32 v31, s40, 36
+; GCN-NEXT:    v_writelane_b32 v31, s41, 37
+; GCN-NEXT:    v_writelane_b32 v31, s42, 38
+; GCN-NEXT:    v_writelane_b32 v31, s43, 39
+; GCN-NEXT:    v_writelane_b32 v31, s44, 40
+; GCN-NEXT:    v_writelane_b32 v31, s45, 41
+; GCN-NEXT:    v_writelane_b32 v31, s46, 42
+; GCN-NEXT:    v_writelane_b32 v31, s47, 43
+; GCN-NEXT:    v_writelane_b32 v31, s48, 44
+; GCN-NEXT:    v_writelane_b32 v31, s49, 45
+; GCN-NEXT:    v_writelane_b32 v31, s50, 46
+; GCN-NEXT:    v_writelane_b32 v31, s51, 47
+; GCN-NEXT:    v_writelane_b32 v31, s4, 48
+; GCN-NEXT:    v_writelane_b32 v31, s5, 49
+; GCN-NEXT:    v_writelane_b32 v31, s6, 50
+; GCN-NEXT:    v_writelane_b32 v31, s7, 51
+; GCN-NEXT:    v_writelane_b32 v31, s8, 52
+; GCN-NEXT:    v_writelane_b32 v31, s9, 53
+; GCN-NEXT:    v_writelane_b32 v31, s10, 54
+; GCN-NEXT:    v_writelane_b32 v31, s11, 55
+; GCN-NEXT:    v_writelane_b32 v31, s12, 56
+; GCN-NEXT:    v_writelane_b32 v31, s13, 57
+; GCN-NEXT:    v_writelane_b32 v31, s14, 58
+; GCN-NEXT:    v_writelane_b32 v31, s15, 59
+; GCN-NEXT:    v_writelane_b32 v31, s16, 60
+; GCN-NEXT:    v_writelane_b32 v31, s17, 61
+; GCN-NEXT:    v_writelane_b32 v31, s18, 62
+; GCN-NEXT:    v_writelane_b32 v31, s19, 63
+; GCN-NEXT:    buffer_store_dword v0, off, s[20:23], 0
+; GCN-NEXT:    v_writelane_b32 v0, s0, 0
+; GCN-NEXT:    v_writelane_b32 v0, s1, 1
+; GCN-NEXT:    s_mov_b64 s[0:1], exec
+; GCN-NEXT:    s_mov_b64 exec, 3
+; GCN-NEXT:    buffer_store_dword v0, off, s[20:23], 0 offset:4 ; 4-byte Folded Spill
+; GCN-NEXT:    s_mov_b64 exec, s[0:1]
+; GCN-NEXT:    buffer_load_dword v0, off, s[20:23], 0
 ; GCN-NEXT:    s_cbranch_scc1 BB2_2
 ; GCN-NEXT:  ; %bb.1: ; %bb0
-; GCN-NEXT:    v_readlane_b32 s0, v31, 1
-; GCN-NEXT:    v_readlane_b32 s1, v31, 2
-; GCN-NEXT:    v_readlane_b32 s2, v31, 3
-; GCN-NEXT:    v_readlane_b32 s3, v31, 4
-; GCN-NEXT:    v_readlane_b32 s4, v31, 5
-; GCN-NEXT:    v_readlane_b32 s5, v31, 6
-; GCN-NEXT:    v_readlane_b32 s6, v31, 7
-; GCN-NEXT:    v_readlane_b32 s7, v31, 8
-; GCN-NEXT:    v_readlane_b32 s8, v31, 9
-; GCN-NEXT:    v_readlane_b32 s9, v31, 10
-; GCN-NEXT:    v_readlane_b32 s10, v31, 11
-; GCN-NEXT:    v_readlane_b32 s11, v31, 12
-; GCN-NEXT:    v_readlane_b32 s12, v31, 13
-; GCN-NEXT:    v_readlane_b32 s13, v31, 14
-; GCN-NEXT:    v_readlane_b32 s14, v31, 15
-; GCN-NEXT:    v_readlane_b32 s15, v31, 16
+; GCN-NEXT:    v_readlane_b32 s0, v31, 0
+; GCN-NEXT:    v_readlane_b32 s1, v31, 1
+; GCN-NEXT:    v_readlane_b32 s2, v31, 2
+; GCN-NEXT:    v_readlane_b32 s3, v31, 3
+; GCN-NEXT:    v_readlane_b32 s4, v31, 4
+; GCN-NEXT:    v_readlane_b32 s5, v31, 5
+; GCN-NEXT:    v_readlane_b32 s6, v31, 6
+; GCN-NEXT:    v_readlane_b32 s7, v31, 7
+; GCN-NEXT:    v_readlane_b32 s8, v31, 8
+; GCN-NEXT:    v_readlane_b32 s9, v31, 9
+; GCN-NEXT:    v_readlane_b32 s10, v31, 10
+; GCN-NEXT:    v_readlane_b32 s11, v31, 11
+; GCN-NEXT:    v_readlane_b32 s12, v31, 12
+; GCN-NEXT:    v_readlane_b32 s13, v31, 13
+; GCN-NEXT:    v_readlane_b32 s14, v31, 14
+; GCN-NEXT:    v_readlane_b32 s15, v31, 15
 ; GCN-NEXT:    ;;#ASMSTART
 ; GCN-NEXT:    ; use s[0:15]
 ; GCN-NEXT:    ;;#ASMEND
-; GCN-NEXT:    v_readlane_b32 s0, v31, 17
-; GCN-NEXT:    v_readlane_b32 s1, v31, 18
-; GCN-NEXT:    v_readlane_b32 s2, v31, 19
-; GCN-NEXT:    v_readlane_b32 s3, v31, 20
-; GCN-NEXT:    v_readlane_b32 s4, v31, 21
-; GCN-NEXT:    v_readlane_b32 s5, v31, 22
-; GCN-NEXT:    v_readlane_b32 s6, v31, 23
-; GCN-NEXT:    v_readlane_b32 s7, v31, 24
-; GCN-NEXT:    v_readlane_b32 s8, v31, 25
-; GCN-NEXT:    v_readlane_b32 s9, v31, 26
-; GCN-NEXT:    v_readlane_b32 s10, v31, 27
-; GCN-NEXT:    v_readlane_b32 s11, v31, 28
-; GCN-NEXT:    v_readlane_b32 s12, v31, 29
-; GCN-NEXT:    v_readlane_b32 s13, v31, 30
-; GCN-NEXT:    v_readlane_b32 s14, v31, 31
-; GCN-NEXT:    v_readlane_b32 s15, v31, 32
+; GCN-NEXT:    v_readlane_b32 s0, v31, 32
+; GCN-NEXT:    v_readlane_b32 s1, v31, 33
+; GCN-NEXT:    v_readlane_b32 s2, v31, 34
+; GCN-NEXT:    v_readlane_b32 s3, v31, 35
+; GCN-NEXT:    v_readlane_b32 s4, v31, 36
+; GCN-NEXT:    v_readlane_b32 s5, v31, 37
+; GCN-NEXT:    v_readlane_b32 s6, v31, 38
+; GCN-NEXT:    v_readlane_b32 s7, v31, 39
+; GCN-NEXT:    v_readlane_b32 s8, v31, 40
+; GCN-NEXT:    v_readlane_b32 s9, v31, 41
+; GCN-NEXT:    v_readlane_b32 s10, v31, 42
+; GCN-NEXT:    v_readlane_b32 s11, v31, 43
+; GCN-NEXT:    v_readlane_b32 s12, v31, 44
+; GCN-NEXT:    v_readlane_b32 s13, v31, 45
+; GCN-NEXT:    v_readlane_b32 s14, v31, 46
+; GCN-NEXT:    v_readlane_b32 s15, v31, 47
 ; GCN-NEXT:    ;;#ASMSTART
 ; GCN-NEXT:    ; use s[0:15]
 ; GCN-NEXT:    ;;#ASMEND
-; GCN-NEXT:    v_readlane_b32 s0, v31, 33
-; GCN-NEXT:    v_readlane_b32 s1, v31, 34
-; GCN-NEXT:    v_readlane_b32 s2, v31, 35
-; GCN-NEXT:    v_readlane_b32 s3, v31, 36
-; GCN-NEXT:    v_readlane_b32 s4, v31, 37
-; GCN-NEXT:    v_readlane_b32 s5, v31, 38
-; GCN-NEXT:    v_readlane_b32 s6, v31, 39
-; GCN-NEXT:    v_readlane_b32 s7, v31, 40
-; GCN-NEXT:    v_readlane_b32 s8, v31, 41
-; GCN-NEXT:    v_readlane_b32 s9, v31, 42
-; GCN-NEXT:    v_readlane_b32 s10, v31, 43
-; GCN-NEXT:    v_readlane_b32 s11, v31, 44
-; GCN-NEXT:    v_readlane_b32 s12, v31, 45
-; GCN-NEXT:    v_readlane_b32 s13, v31, 46
-; GCN-NEXT:    v_readlane_b32 s14, v31, 47
-; GCN-NEXT:    v_readlane_b32 s15, v31, 48
+; GCN-NEXT:    v_readlane_b32 s0, v31, 16
+; GCN-NEXT:    v_readlane_b32 s1, v31, 17
+; GCN-NEXT:    v_readlane_b32 s2, v31, 18
+; GCN-NEXT:    v_readlane_b32 s3, v31, 19
+; GCN-NEXT:    v_readlane_b32 s4, v31, 20
+; GCN-NEXT:    v_readlane_b32 s5, v31, 21
+; GCN-NEXT:    v_readlane_b32 s6, v31, 22
+; GCN-NEXT:    v_readlane_b32 s7, v31, 23
+; GCN-NEXT:    v_readlane_b32 s8, v31, 24
+; GCN-NEXT:    v_readlane_b32 s9, v31, 25
+; GCN-NEXT:    v_readlane_b32 s10, v31, 26
+; GCN-NEXT:    v_readlane_b32 s11, v31, 27
+; GCN-NEXT:    v_readlane_b32 s12, v31, 28
+; GCN-NEXT:    v_readlane_b32 s13, v31, 29
+; GCN-NEXT:    v_readlane_b32 s14, v31, 30
+; GCN-NEXT:    v_readlane_b32 s15, v31, 31
 ; GCN-NEXT:    ;;#ASMSTART
 ; GCN-NEXT:    ; use s[0:15]
 ; GCN-NEXT:    ;;#ASMEND
-; GCN-NEXT:    s_mov_b64 s[0:1], exec
-; GCN-NEXT:    s_mov_b64 exec, 0xffff
-; GCN-NEXT:    buffer_load_dword v0, off, s[56:59], 0 offset:4 ; 4-byte Folded Reload
-; GCN-NEXT:    s_mov_b64 exec, s[0:1]
-; GCN-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NEXT:    v_readlane_b32 s0, v0, 0
-; GCN-NEXT:    v_readlane_b32 s1, v0, 1
-; GCN-NEXT:    v_readlane_b32 s2, v0, 2
-; GCN-NEXT:    v_readlane_b32 s3, v0, 3
-; GCN-NEXT:    v_readlane_b32 s4, v0, 4
-; GCN-NEXT:    v_readlane_b32 s5, v0, 5
-; GCN-NEXT:    v_readlane_b32 s6, v0, 6
-; GCN-NEXT:    v_readlane_b32 s7, v0, 7
-; GCN-NEXT:    v_readlane_b32 s8, v0, 8
-; GCN-NEXT:    v_readlane_b32 s9, v0, 9
-; GCN-NEXT:    v_readlane_b32 s10, v0, 10
-; GCN-NEXT:    v_readlane_b32 s11, v0, 11
-; GCN-NEXT:    v_readlane_b32 s12, v0, 12
-; GCN-NEXT:    v_readlane_b32 s13, v0, 13
-; GCN-NEXT:    v_readlane_b32 s14, v0, 14
-; GCN-NEXT:    v_readlane_b32 s15, v0, 15
+; GCN-NEXT:    v_readlane_b32 s0, v31, 48
+; GCN-NEXT:    v_readlane_b32 s1, v31, 49
+; GCN-NEXT:    v_readlane_b32 s2, v31, 50
+; GCN-NEXT:    v_readlane_b32 s3, v31, 51
+; GCN-NEXT:    v_readlane_b32 s4, v31, 52
+; GCN-NEXT:    v_readlane_b32 s5, v31, 53
+; GCN-NEXT:    v_readlane_b32 s6, v31, 54
+; GCN-NEXT:    v_readlane_b32 s7, v31, 55
+; GCN-NEXT:    v_readlane_b32 s8, v31, 56
+; GCN-NEXT:    v_readlane_b32 s9, v31, 57
+; GCN-NEXT:    v_readlane_b32 s10, v31, 58
+; GCN-NEXT:    v_readlane_b32 s11, v31, 59
+; GCN-NEXT:    v_readlane_b32 s12, v31, 60
+; GCN-NEXT:    v_readlane_b32 s13, v31, 61
+; GCN-NEXT:    v_readlane_b32 s14, v31, 62
+; GCN-NEXT:    v_readlane_b32 s15, v31, 63
 ; GCN-NEXT:    ;;#ASMSTART
 ; GCN-NEXT:    ; use s[0:15]
 ; GCN-NEXT:    ;;#ASMEND
-; GCN-NEXT:    v_readlane_b32 s0, v31, 49
-; GCN-NEXT:    v_readlane_b32 s1, v31, 50
+; GCN-NEXT:    s_mov_b64 s[16:17], exec
+; GCN-NEXT:    s_mov_b64 exec, 3
+; GCN-NEXT:    buffer_load_dword v0, off, s[20:23], 0 offset:4 ; 4-byte Folded Reload
+; GCN-NEXT:    s_mov_b64 exec, s[16:17]
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    v_readlane_b32 s16, v0, 0
+; GCN-NEXT:    v_readlane_b32 s17, v0, 1
 ; GCN-NEXT:    ;;#ASMSTART
-; GCN-NEXT:    ; use s[0:1]
+; GCN-NEXT:    ; use s[16:17]
 ; GCN-NEXT:    ;;#ASMEND
 ; GCN-NEXT:  BB2_2: ; %ret
 ; GCN-NEXT:    s_endpgm
diff --git a/llvm/test/CodeGen/AMDGPU/spill-m0.ll b/llvm/test/CodeGen/AMDGPU/spill-m0.ll
index 9b629a5f91110..a03318ead716c 100644
--- a/llvm/test/CodeGen/AMDGPU/spill-m0.ll
+++ b/llvm/test/CodeGen/AMDGPU/spill-m0.ll
@@ -77,101 +77,6 @@ endif:                                            ; preds = %else, %if
   ret void
 }
 
-; Force save and restore of m0 during SMEM spill
-; GCN-LABEL: {{^}}m0_unavailable_spill:
-
-; GCN: ; def m0, 1
-
-; GCN: s_mov_b32 m0, s0
-; GCN: v_interp_mov_f32
-
-; GCN: ; clobber m0
-
-; TOSMEM: s_mov_b32 s2, m0
-; TOSMEM: s_add_u32 m0, s3, 0x100
-; TOSMEM-NEXT: s_buffer_store_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, m0 ; 8-byte Folded Spill
-; TOSMEM: s_mov_b32 m0, s2
-
-; TOSMEM: s_mov_b64 exec,
-; TOSMEM: s_cbranch_execz
-; TOSMEM: s_branch
-
-; TOSMEM: BB{{[0-9]+_[0-9]+}}:
-; TOSMEM: s_add_u32 m0, s3, 0x100
-; TOSMEM-NEXT: s_buffer_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, m0 ; 8-byte Folded Reload
-
-; GCN-NOT: v_readlane_b32 m0
-; GCN-NOT: s_buffer_store_dword m0
-; GCN-NOT: s_buffer_load_dword m0
-define amdgpu_kernel void @m0_unavailable_spill(i32 %m0.arg) #0 {
-main_body:
-  %m0 = call i32 asm sideeffect "; def $0, 1", "={m0}"() #0
-  %tmp = call float @llvm.amdgcn.interp.mov(i32 2, i32 0, i32 0, i32 %m0.arg)
-  call void asm sideeffect "; clobber $0", "~{m0}"() #0
-  %cmp = fcmp ueq float 0.000000e+00, %tmp
-   br i1 %cmp, label %if, label %else
-
-if:                                               ; preds = %main_body
-  store volatile i32 8, i32 addrspace(1)* undef
-  br label %endif
-
-else:                                             ; preds = %main_body
-  store volatile i32 11, i32 addrspace(1)* undef
-  br label %endif
-
-endif:
-  ret void
-}
-
-; GCN-LABEL: {{^}}restore_m0_lds:
-; TOSMEM: s_load_dwordx2 [[REG:s\[[0-9]+:[0-9]+\]]]
-; TOSMEM: s_cmp_eq_u32
-; FIXME: RegScavenger::isRegUsed() always returns true if m0 is reserved, so we have to save and restore it
-; FIXME-TOSMEM-NOT: m0
-; TOSMEM: s_add_u32 m0, s3, 0x100
-; TOSMEM: s_buffer_store_dword s{{[0-9]+}}, s[88:91], m0 ; 4-byte Folded Spill
-; FIXME-TOSMEM-NOT: m0
-; TOSMEM: s_add_u32 m0, s3, 0x200
-; TOSMEM: s_buffer_store_dwordx2 [[REG]], s[88:91], m0 ; 8-byte Folded Spill
-; FIXME-TOSMEM-NOT: m0
-; TOSMEM: s_cbranch_scc1
-
-; TOSMEM: s_mov_b32 m0, -1
-
-; TOSMEM: s_mov_b32 s2, m0
-; TOSMEM: s_add_u32 m0, s3, 0x200
-; TOSMEM: s_buffer_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s[88:91], m0 ; 8-byte Folded Reload
-; TOSMEM: s_mov_b32 m0, s2
-; TOSMEM: s_waitcnt lgkmcnt(0)
-
-; TOSMEM: ds_write_b64
-
-; FIXME-TOSMEM-NOT: m0
-; TOSMEM: s_add_u32 m0, s3, 0x100
-; TOSMEM: s_buffer_load_dword s2, s[88:91], m0 ; 4-byte Folded Reload
-; FIXME-TOSMEM-NOT: m0
-; TOSMEM: s_waitcnt lgkmcnt(0)
-; TOSMEM-NOT: m0
-; TOSMEM: s_mov_b32 m0, s2
-; TOSMEM: ; use m0
-
-; TOSMEM: s_dcache_wb
-; TOSMEM: s_endpgm
-define amdgpu_kernel void @restore_m0_lds(i32 %arg) {
-  %m0 = call i32 asm sideeffect "s_mov_b32 m0, 0", "={m0}"() #0
-  %sval = load volatile i64, i64 addrspace(4)* undef
-  %cmp = icmp eq i32 %arg, 0
-  br i1 %cmp, label %ret, label %bb
-
-bb:
-  store volatile i64 %sval, i64 addrspace(3)* undef
-  call void asm sideeffect "; use $0", "{m0}"(i32 %m0) #0
-  br label %ret
-
-ret:
-  ret void
-}
-
 declare float @llvm.amdgcn.interp.mov(i32, i32, i32, i32) #1
 declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
 declare void @llvm.amdgcn.exp.compr.v2f16(i32, i32, <2 x half>, <2 x half>, i1, i1) #0
diff --git a/llvm/test/CodeGen/AMDGPU/wwm-reserved.ll b/llvm/test/CodeGen/AMDGPU/wwm-reserved.ll
index 1a48e76a241bb..e4beac77e1be2 100644
--- a/llvm/test/CodeGen/AMDGPU/wwm-reserved.ll
+++ b/llvm/test/CodeGen/AMDGPU/wwm-reserved.ll
@@ -94,10 +94,10 @@ define i32 @called(i32 %a) noinline {
 
 ; GFX9-LABEL: {{^}}call:
 define amdgpu_kernel void @call(<4 x i32> inreg %tmp14, i32 inreg %arg) {
-; GFX9-O0: v_mov_b32_e32 v0, s0
+; GFX9-O0: v_mov_b32_e32 v0, s2
 ; GFX9-O3: v_mov_b32_e32 v2, s0
 ; GFX9-NEXT: s_not_b64 exec, exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v0, s1
+; GFX9-O0-NEXT: v_mov_b32_e32 v0, s3
 ; GFX9-O3-NEXT: v_mov_b32_e32 v2, 0
 ; GFX9-NEXT: s_not_b64 exec, exec
   %tmp107 = tail call i32 @llvm.amdgcn.set.inactive.i32(i32 %arg, i32 0)
@@ -142,8 +142,8 @@ define amdgpu_kernel void @call_i64(<4 x i32> inreg %tmp14, i64 inreg %arg) {
 ; GFX9-O0: buffer_store_dword v1
 ; GFX9: s_swappc_b64
   %tmp134 = call i64 @called_i64(i64 %tmp107)
-; GFX9-O0: buffer_load_dword v4
-; GFX9-O0: buffer_load_dword v5
+; GFX9-O0: buffer_load_dword v6
+; GFX9-O0: buffer_load_dword v7
   %tmp136 = add i64 %tmp134, %tmp107
   %tmp137 = tail call i64 @llvm.amdgcn.wwm.i64(i64 %tmp136)
   %tmp138 = bitcast i64 %tmp137 to <2 x i32>
diff --git a/llvm/test/CodeGen/ARM/legalize-bitcast.ll b/llvm/test/CodeGen/ARM/legalize-bitcast.ll
index 529775df5fd7d..478ff985bf475 100644
--- a/llvm/test/CodeGen/ARM/legalize-bitcast.ll
+++ b/llvm/test/CodeGen/ARM/legalize-bitcast.ll
@@ -49,9 +49,9 @@ define i16 @int_to_vec(i80 %in) {
 ; CHECK-NEXT:    vmov.32 d16[0], r0
 ; CHECK-NEXT:    @ implicit-def: $q9
 ; CHECK-NEXT:    vmov.f64 d18, d16
-; CHECK-NEXT:    vrev32.16 q8, q9
-; CHECK-NEXT:    @ kill: def $d16 killed $d16 killed $q8
-; CHECK-NEXT:    vmov.u16 r0, d16[0]
+; CHECK-NEXT:    vrev32.16 q9, q9
+; CHECK-NEXT:    @ kill: def $d18 killed $d18 killed $q9
+; CHECK-NEXT:    vmov.u16 r0, d18[0]
 ; CHECK-NEXT:    bx lr
   %vec = bitcast i80 %in to <5 x i16>
   %e0 = extractelement <5 x i16> %vec, i32 0
diff --git a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/fptosi_and_fptoui.ll b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/fptosi_and_fptoui.ll
index a98c6eb9fd6cb..c63f24ea692ce 100644
--- a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/fptosi_and_fptoui.ll
+++ b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/fptosi_and_fptoui.ll
@@ -235,15 +235,15 @@ define i32 @f64tou32(double %a) {
 ; FP32-NEXT:    mfc1 $1, $f0
 ; FP32-NEXT:    lui $2, 16864
 ; FP32-NEXT:    ori $3, $zero, 0
-; FP32-NEXT:    mtc1 $3, $f0
-; FP32-NEXT:    mtc1 $2, $f1
-; FP32-NEXT:    sub.d $f2, $f12, $f0
-; FP32-NEXT:    trunc.w.d $f2, $f2
-; FP32-NEXT:    mfc1 $2, $f2
+; FP32-NEXT:    mtc1 $3, $f2
+; FP32-NEXT:    mtc1 $2, $f3
+; FP32-NEXT:    sub.d $f4, $f12, $f2
+; FP32-NEXT:    trunc.w.d $f0, $f4
+; FP32-NEXT:    mfc1 $2, $f0
 ; FP32-NEXT:    lui $3, 32768
 ; FP32-NEXT:    xor $2, $2, $3
 ; FP32-NEXT:    addiu $3, $zero, 1
-; FP32-NEXT:    c.ult.d $f12, $f0
+; FP32-NEXT:    c.ult.d $f12, $f2
 ; FP32-NEXT:    movf $3, $zero, $fcc0
 ; FP32-NEXT:    andi $3, $3, 1
 ; FP32-NEXT:    movn $2, $1, $3
@@ -256,15 +256,15 @@ define i32 @f64tou32(double %a) {
 ; FP64-NEXT:    mfc1 $1, $f0
 ; FP64-NEXT:    lui $2, 16864
 ; FP64-NEXT:    ori $3, $zero, 0
-; FP64-NEXT:    mtc1 $3, $f0
-; FP64-NEXT:    mthc1 $2, $f0
-; FP64-NEXT:    sub.d $f1, $f12, $f0
-; FP64-NEXT:    trunc.w.d $f1, $f1
-; FP64-NEXT:    mfc1 $2, $f1
+; FP64-NEXT:    mtc1 $3, $f1
+; FP64-NEXT:    mthc1 $2, $f1
+; FP64-NEXT:    sub.d $f2, $f12, $f1
+; FP64-NEXT:    trunc.w.d $f0, $f2
+; FP64-NEXT:    mfc1 $2, $f0
 ; FP64-NEXT:    lui $3, 32768
 ; FP64-NEXT:    xor $2, $2, $3
 ; FP64-NEXT:    addiu $3, $zero, 1
-; FP64-NEXT:    c.ult.d $f12, $f0
+; FP64-NEXT:    c.ult.d $f12, $f1
 ; FP64-NEXT:    movf $3, $zero, $fcc0
 ; FP64-NEXT:    andi $3, $3, 1
 ; FP64-NEXT:    movn $2, $1, $3
@@ -282,15 +282,15 @@ define zeroext i16 @f64tou16(double %a) {
 ; FP32-NEXT:    mfc1 $1, $f0
 ; FP32-NEXT:    lui $2, 16864
 ; FP32-NEXT:    ori $3, $zero, 0
-; FP32-NEXT:    mtc1 $3, $f0
-; FP32-NEXT:    mtc1 $2, $f1
-; FP32-NEXT:    sub.d $f2, $f12, $f0
-; FP32-NEXT:    trunc.w.d $f2, $f2
-; FP32-NEXT:    mfc1 $2, $f2
+; FP32-NEXT:    mtc1 $3, $f2
+; FP32-NEXT:    mtc1 $2, $f3
+; FP32-NEXT:    sub.d $f4, $f12, $f2
+; FP32-NEXT:    trunc.w.d $f0, $f4
+; FP32-NEXT:    mfc1 $2, $f0
 ; FP32-NEXT:    lui $3, 32768
 ; FP32-NEXT:    xor $2, $2, $3
 ; FP32-NEXT:    addiu $3, $zero, 1
-; FP32-NEXT:    c.ult.d $f12, $f0
+; FP32-NEXT:    c.ult.d $f12, $f2
 ; FP32-NEXT:    movf $3, $zero, $fcc0
 ; FP32-NEXT:    andi $3, $3, 1
 ; FP32-NEXT:    movn $2, $1, $3
@@ -304,15 +304,15 @@ define zeroext i16 @f64tou16(double %a) {
 ; FP64-NEXT:    mfc1 $1, $f0
 ; FP64-NEXT:    lui $2, 16864
 ; FP64-NEXT:    ori $3, $zero, 0
-; FP64-NEXT:    mtc1 $3, $f0
-; FP64-NEXT:    mthc1 $2, $f0
-; FP64-NEXT:    sub.d $f1, $f12, $f0
-; FP64-NEXT:    trunc.w.d $f1, $f1
-; FP64-NEXT:    mfc1 $2, $f1
+; FP64-NEXT:    mtc1 $3, $f1
+; FP64-NEXT:    mthc1 $2, $f1
+; FP64-NEXT:    sub.d $f2, $f12, $f1
+; FP64-NEXT:    trunc.w.d $f0, $f2
+; FP64-NEXT:    mfc1 $2, $f0
 ; FP64-NEXT:    lui $3, 32768
 ; FP64-NEXT:    xor $2, $2, $3
 ; FP64-NEXT:    addiu $3, $zero, 1
-; FP64-NEXT:    c.ult.d $f12, $f0
+; FP64-NEXT:    c.ult.d $f12, $f1
 ; FP64-NEXT:    movf $3, $zero, $fcc0
 ; FP64-NEXT:    andi $3, $3, 1
 ; FP64-NEXT:    movn $2, $1, $3
@@ -331,15 +331,15 @@ define zeroext i8 @f64tou8(double %a) {
 ; FP32-NEXT:    mfc1 $1, $f0
 ; FP32-NEXT:    lui $2, 16864
 ; FP32-NEXT:    ori $3, $zero, 0
-; FP32-NEXT:    mtc1 $3, $f0
-; FP32-NEXT:    mtc1 $2, $f1
-; FP32-NEXT:    sub.d $f2, $f12, $f0
-; FP32-NEXT:    trunc.w.d $f2, $f2
-; FP32-NEXT:    mfc1 $2, $f2
+; FP32-NEXT:    mtc1 $3, $f2
+; FP32-NEXT:    mtc1 $2, $f3
+; FP32-NEXT:    sub.d $f4, $f12, $f2
+; FP32-NEXT:    trunc.w.d $f0, $f4
+; FP32-NEXT:    mfc1 $2, $f0
 ; FP32-NEXT:    lui $3, 32768
 ; FP32-NEXT:    xor $2, $2, $3
 ; FP32-NEXT:    addiu $3, $zero, 1
-; FP32-NEXT:    c.ult.d $f12, $f0
+; FP32-NEXT:    c.ult.d $f12, $f2
 ; FP32-NEXT:    movf $3, $zero, $fcc0
 ; FP32-NEXT:    andi $3, $3, 1
 ; FP32-NEXT:    movn $2, $1, $3
@@ -353,15 +353,15 @@ define zeroext i8 @f64tou8(double %a) {
 ; FP64-NEXT:    mfc1 $1, $f0
 ; FP64-NEXT:    lui $2, 16864
 ; FP64-NEXT:    ori $3, $zero, 0
-; FP64-NEXT:    mtc1 $3, $f0
-; FP64-NEXT:    mthc1 $2, $f0
-; FP64-NEXT:    sub.d $f1, $f12, $f0
-; FP64-NEXT:    trunc.w.d $f1, $f1
-; FP64-NEXT:    mfc1 $2, $f1
+; FP64-NEXT:    mtc1 $3, $f1
+; FP64-NEXT:    mthc1 $2, $f1
+; FP64-NEXT:    sub.d $f2, $f12, $f1
+; FP64-NEXT:    trunc.w.d $f0, $f2
+; FP64-NEXT:    mfc1 $2, $f0
 ; FP64-NEXT:    lui $3, 32768
 ; FP64-NEXT:    xor $2, $2, $3
 ; FP64-NEXT:    addiu $3, $zero, 1
-; FP64-NEXT:    c.ult.d $f12, $f0
+; FP64-NEXT:    c.ult.d $f12, $f1
 ; FP64-NEXT:    movf $3, $zero, $fcc0
 ; FP64-NEXT:    andi $3, $3, 1
 ; FP64-NEXT:    movn $2, $1, $3
diff --git a/llvm/test/CodeGen/Mips/atomic-min-max.ll b/llvm/test/CodeGen/Mips/atomic-min-max.ll
index 646af650c00e7..a6200851940cd 100644
--- a/llvm/test/CodeGen/Mips/atomic-min-max.ll
+++ b/llvm/test/CodeGen/Mips/atomic-min-max.ll
@@ -1154,26 +1154,26 @@ define i16 @test_max_16(i16* nocapture %ptr, i16 signext %val) {
 ; MIPS64-NEXT:    sll $2, $2, 3
 ; MIPS64-NEXT:    ori $3, $zero, 65535
 ; MIPS64-NEXT:    sllv $3, $3, $2
-; MIPS64-NEXT:    nor $4, $zero, $3
+; MIPS64-NEXT:    nor $6, $zero, $3
 ; MIPS64-NEXT:    sllv $5, $5, $2
 ; MIPS64-NEXT:  .LBB4_1: # %entry
 ; MIPS64-NEXT:    # =>This Inner Loop Header: Depth=1
-; MIPS64-NEXT:    ll $7, 0($1)
-; MIPS64-NEXT:    slt $10, $7, $5
-; MIPS64-NEXT:    move $8, $7
-; MIPS64-NEXT:    movn $8, $5, $10
-; MIPS64-NEXT:    and $8, $8, $3
-; MIPS64-NEXT:    and $9, $7, $4
-; MIPS64-NEXT:    or $9, $9, $8
-; MIPS64-NEXT:    sc $9, 0($1)
-; MIPS64-NEXT:    beqz $9, .LBB4_1
+; MIPS64-NEXT:    ll $8, 0($1)
+; MIPS64-NEXT:    slt $11, $8, $5
+; MIPS64-NEXT:    move $9, $8
+; MIPS64-NEXT:    movn $9, $5, $11
+; MIPS64-NEXT:    and $9, $9, $3
+; MIPS64-NEXT:    and $10, $8, $6
+; MIPS64-NEXT:    or $10, $10, $9
+; MIPS64-NEXT:    sc $10, 0($1)
+; MIPS64-NEXT:    beqz $10, .LBB4_1
 ; MIPS64-NEXT:    nop
 ; MIPS64-NEXT:  # %bb.2: # %entry
-; MIPS64-NEXT:    and $6, $7, $3
-; MIPS64-NEXT:    srlv $6, $6, $2
-; MIPS64-NEXT:    seh $6, $6
+; MIPS64-NEXT:    and $7, $8, $3
+; MIPS64-NEXT:    srlv $7, $7, $2
+; MIPS64-NEXT:    seh $7, $7
 ; MIPS64-NEXT:  # %bb.3: # %entry
-; MIPS64-NEXT:    sw $6, 12($sp) # 4-byte Folded Spill
+; MIPS64-NEXT:    sw $7, 12($sp) # 4-byte Folded Spill
 ; MIPS64-NEXT:  # %bb.4: # %entry
 ; MIPS64-NEXT:    sync
 ; MIPS64-NEXT:    lw $2, 12($sp) # 4-byte Folded Reload
@@ -1194,26 +1194,26 @@ define i16 @test_max_16(i16* nocapture %ptr, i16 signext %val) {
 ; MIPS64R6-NEXT:    sll $2, $2, 3
 ; MIPS64R6-NEXT:    ori $3, $zero, 65535
 ; MIPS64R6-NEXT:    sllv $3, $3, $2
-; MIPS64R6-NEXT:    nor $4, $zero, $3
+; MIPS64R6-NEXT:    nor $6, $zero, $3
 ; MIPS64R6-NEXT:    sllv $5, $5, $2
 ; MIPS64R6-NEXT:  .LBB4_1: # %entry
 ; MIPS64R6-NEXT:    # =>This Inner Loop Header: Depth=1
-; MIPS64R6-NEXT:    ll $7, 0($1)
-; MIPS64R6-NEXT:    slt $10, $7, $5
-; MIPS64R6-NEXT:    seleqz $8, $7, $10
-; MIPS64R6-NEXT:    selnez $10, $5, $10
-; MIPS64R6-NEXT:    or $8, $8, $10
-; MIPS64R6-NEXT:    and $8, $8, $3
-; MIPS64R6-NEXT:    and $9, $7, $4
-; MIPS64R6-NEXT:    or $9, $9, $8
-; MIPS64R6-NEXT:    sc $9, 0($1)
-; MIPS64R6-NEXT:    beqzc $9, .LBB4_1
+; MIPS64R6-NEXT:    ll $8, 0($1)
+; MIPS64R6-NEXT:    slt $11, $8, $5
+; MIPS64R6-NEXT:    seleqz $9, $8, $11
+; MIPS64R6-NEXT:    selnez $11, $5, $11
+; MIPS64R6-NEXT:    or $9, $9, $11
+; MIPS64R6-NEXT:    and $9, $9, $3
+; MIPS64R6-NEXT:    and $10, $8, $6
+; MIPS64R6-NEXT:    or $10, $10, $9
+; MIPS64R6-NEXT:    sc $10, 0($1)
+; MIPS64R6-NEXT:    beqzc $10, .LBB4_1
 ; MIPS64R6-NEXT:  # %bb.2: # %entry
-; MIPS64R6-NEXT:    and $6, $7, $3
-; MIPS64R6-NEXT:    srlv $6, $6, $2
-; MIPS64R6-NEXT:    seh $6, $6
+; MIPS64R6-NEXT:    and $7, $8, $3
+; MIPS64R6-NEXT:    srlv $7, $7, $2
+; MIPS64R6-NEXT:    seh $7, $7
 ; MIPS64R6-NEXT:  # %bb.3: # %entry
-; MIPS64R6-NEXT:    sw $6, 12($sp) # 4-byte Folded Spill
+; MIPS64R6-NEXT:    sw $7, 12($sp) # 4-byte Folded Spill
 ; MIPS64R6-NEXT:  # %bb.4: # %entry
 ; MIPS64R6-NEXT:    sync
 ; MIPS64R6-NEXT:    lw $2, 12($sp) # 4-byte Folded Reload
@@ -1232,28 +1232,28 @@ define i16 @test_max_16(i16* nocapture %ptr, i16 signext %val) {
 ; MIPS64EL-NEXT:    sll $2, $2, 3
 ; MIPS64EL-NEXT:    ori $3, $zero, 65535
 ; MIPS64EL-NEXT:    sllv $3, $3, $2
-; MIPS64EL-NEXT:    nor $4, $zero, $3
+; MIPS64EL-NEXT:    nor $6, $zero, $3
 ; MIPS64EL-NEXT:    sllv $5, $5, $2
 ; MIPS64EL-NEXT:  .LBB4_1: # %entry
 ; MIPS64EL-NEXT:    # =>This Inner Loop Header: Depth=1
-; MIPS64EL-NEXT:    ll $7, 0($1)
-; MIPS64EL-NEXT:    and $7, $7, $3
-; MIPS64EL-NEXT:    and $5, $5, $3
-; MIPS64EL-NEXT:    slt $10, $7, $5
-; MIPS64EL-NEXT:    move $8, $7
-; MIPS64EL-NEXT:    movn $8, $5, $10
+; MIPS64EL-NEXT:    ll $8, 0($1)
 ; MIPS64EL-NEXT:    and $8, $8, $3
-; MIPS64EL-NEXT:    and $9, $7, $4
-; MIPS64EL-NEXT:    or $9, $9, $8
-; MIPS64EL-NEXT:    sc $9, 0($1)
-; MIPS64EL-NEXT:    beqz $9, .LBB4_1
+; MIPS64EL-NEXT:    and $5, $5, $3
+; MIPS64EL-NEXT:    slt $11, $8, $5
+; MIPS64EL-NEXT:    move $9, $8
+; MIPS64EL-NEXT:    movn $9, $5, $11
+; MIPS64EL-NEXT:    and $9, $9, $3
+; MIPS64EL-NEXT:    and $10, $8, $6
+; MIPS64EL-NEXT:    or $10, $10, $9
+; MIPS64EL-NEXT:    sc $10, 0($1)
+; MIPS64EL-NEXT:    beqz $10, .LBB4_1
 ; MIPS64EL-NEXT:    nop
 ; MIPS64EL-NEXT:  # %bb.2: # %entry
-; MIPS64EL-NEXT:    and $6, $7, $3
-; MIPS64EL-NEXT:    srlv $6, $6, $2
-; MIPS64EL-NEXT:    seh $6, $6
+; MIPS64EL-NEXT:    and $7, $8, $3
+; MIPS64EL-NEXT:    srlv $7, $7, $2
+; MIPS64EL-NEXT:    seh $7, $7
 ; MIPS64EL-NEXT:  # %bb.3: # %entry
-; MIPS64EL-NEXT:    sw $6, 12($sp) # 4-byte Folded Spill
+; MIPS64EL-NEXT:    sw $7, 12($sp) # 4-byte Folded Spill
 ; MIPS64EL-NEXT:  # %bb.4: # %entry
 ; MIPS64EL-NEXT:    sync
 ; MIPS64EL-NEXT:    lw $2, 12($sp) # 4-byte Folded Reload
@@ -1273,28 +1273,28 @@ define i16 @test_max_16(i16* nocapture %ptr, i16 signext %val) {
 ; MIPS64ELR6-NEXT:    sll $2, $2, 3
 ; MIPS64ELR6-NEXT:    ori $3, $zero, 65535
 ; MIPS64ELR6-NEXT:    sllv $3, $3, $2
-; MIPS64ELR6-NEXT:    nor $4, $zero, $3
+; MIPS64ELR6-NEXT:    nor $6, $zero, $3
 ; MIPS64ELR6-NEXT:    sllv $5, $5, $2
 ; MIPS64ELR6-NEXT:  .LBB4_1: # %entry
 ; MIPS64ELR6-NEXT:    # =>This Inner Loop Header: Depth=1
-; MIPS64ELR6-NEXT:    ll $7, 0($1)
-; MIPS64ELR6-NEXT:    and $7, $7, $3
-; MIPS64ELR6-NEXT:    and $5, $5, $3
-; MIPS64ELR6-NEXT:    slt $10, $7, $5
-; MIPS64ELR6-NEXT:    seleqz $8, $7, $10
-; MIPS64ELR6-NEXT:    selnez $10, $5, $10
-; MIPS64ELR6-NEXT:    or $8, $8, $10
+; MIPS64ELR6-NEXT:    ll $8, 0($1)
 ; MIPS64ELR6-NEXT:    and $8, $8, $3
-; MIPS64ELR6-NEXT:    and $9, $7, $4
-; MIPS64ELR6-NEXT:    or $9, $9, $8
-; MIPS64ELR6-NEXT:    sc $9, 0($1)
-; MIPS64ELR6-NEXT:    beqzc $9, .LBB4_1
+; MIPS64ELR6-NEXT:    and $5, $5, $3
+; MIPS64ELR6-NEXT:    slt $11, $8, $5
+; MIPS64ELR6-NEXT:    seleqz $9, $8, $11
+; MIPS64ELR6-NEXT:    selnez $11, $5, $11
+; MIPS64ELR6-NEXT:    or $9, $9, $11
+; MIPS64ELR6-NEXT:    and $9, $9, $3
+; MIPS64ELR6-NEXT:    and $10, $8, $6
+; MIPS64ELR6-NEXT:    or $10, $10, $9
+; MIPS64ELR6-NEXT:    sc $10, 0($1)
+; MIPS64ELR6-NEXT:    beqzc $10, .LBB4_1
 ; MIPS64ELR6-NEXT:  # %bb.2: # %entry
-; MIPS64ELR6-NEXT:    and $6, $7, $3
-; MIPS64ELR6-NEXT:    srlv $6, $6, $2
-; MIPS64ELR6-NEXT:    seh $6, $6
+; MIPS64ELR6-NEXT:    and $7, $8, $3
+; MIPS64ELR6-NEXT:    srlv $7, $7, $2
+; MIPS64ELR6-NEXT:    seh $7, $7
 ; MIPS64ELR6-NEXT:  # %bb.3: # %entry
-; MIPS64ELR6-NEXT:    sw $6, 12($sp) # 4-byte Folded Spill
+; MIPS64ELR6-NEXT:    sw $7, 12($sp) # 4-byte Folded Spill
 ; MIPS64ELR6-NEXT:  # %bb.4: # %entry
 ; MIPS64ELR6-NEXT:    sync
 ; MIPS64ELR6-NEXT:    lw $2, 12($sp) # 4-byte Folded Reload
@@ -1635,26 +1635,26 @@ define i16 @test_min_16(i16* nocapture %ptr, i16 signext %val) {
 ; MIPS64-NEXT:    sll $2, $2, 3
 ; MIPS64-NEXT:    ori $3, $zero, 65535
 ; MIPS64-NEXT:    sllv $3, $3, $2
-; MIPS64-NEXT:    nor $4, $zero, $3
+; MIPS64-NEXT:    nor $6, $zero, $3
 ; MIPS64-NEXT:    sllv $5, $5, $2
 ; MIPS64-NEXT:  .LBB5_1: # %entry
 ; MIPS64-NEXT:    # =>This Inner Loop Header: Depth=1
-; MIPS64-NEXT:    ll $7, 0($1)
-; MIPS64-NEXT:    slt $10, $7, $5
-; MIPS64-NEXT:    move $8, $7
-; MIPS64-NEXT:    movz $8, $5, $10
-; MIPS64-NEXT:    and $8, $8, $3
-; MIPS64-NEXT:    and $9, $7, $4
-; MIPS64-NEXT:    or $9, $9, $8
-; MIPS64-NEXT:    sc $9, 0($1)
-; MIPS64-NEXT:    beqz $9, .LBB5_1
+; MIPS64-NEXT:    ll $8, 0($1)
+; MIPS64-NEXT:    slt $11, $8, $5
+; MIPS64-NEXT:    move $9, $8
+; MIPS64-NEXT:    movz $9, $5, $11
+; MIPS64-NEXT:    and $9, $9, $3
+; MIPS64-NEXT:    and $10, $8, $6
+; MIPS64-NEXT:    or $10, $10, $9
+; MIPS64-NEXT:    sc $10, 0($1)
+; MIPS64-NEXT:    beqz $10, .LBB5_1
 ; MIPS64-NEXT:    nop
 ; MIPS64-NEXT:  # %bb.2: # %entry
-; MIPS64-NEXT:    and $6, $7, $3
-; MIPS64-NEXT:    srlv $6, $6, $2
-; MIPS64-NEXT:    seh $6, $6
+; MIPS64-NEXT:    and $7, $8, $3
+; MIPS64-NEXT:    srlv $7, $7, $2
+; MIPS64-NEXT:    seh $7, $7
 ; MIPS64-NEXT:  # %bb.3: # %entry
-; MIPS64-NEXT:    sw $6, 12($sp) # 4-byte Folded Spill
+; MIPS64-NEXT:    sw $7, 12($sp) # 4-byte Folded Spill
 ; MIPS64-NEXT:  # %bb.4: # %entry
 ; MIPS64-NEXT:    sync
 ; MIPS64-NEXT:    lw $2, 12($sp) # 4-byte Folded Reload
@@ -1675,26 +1675,26 @@ define i16 @test_min_16(i16* nocapture %ptr, i16 signext %val) {
 ; MIPS64R6-NEXT:    sll $2, $2, 3
 ; MIPS64R6-NEXT:    ori $3, $zero, 65535
 ; MIPS64R6-NEXT:    sllv $3, $3, $2
-; MIPS64R6-NEXT:    nor $4, $zero, $3
+; MIPS64R6-NEXT:    nor $6, $zero, $3
 ; MIPS64R6-NEXT:    sllv $5, $5, $2
 ; MIPS64R6-NEXT:  .LBB5_1: # %entry
 ; MIPS64R6-NEXT:    # =>This Inner Loop Header: Depth=1
-; MIPS64R6-NEXT:    ll $7, 0($1)
-; MIPS64R6-NEXT:    slt $10, $7, $5
-; MIPS64R6-NEXT:    selnez $8, $7, $10
-; MIPS64R6-NEXT:    seleqz $10, $5, $10
-; MIPS64R6-NEXT:    or $8, $8, $10
-; MIPS64R6-NEXT:    and $8, $8, $3
-; MIPS64R6-NEXT:    and $9, $7, $4
-; MIPS64R6-NEXT:    or $9, $9, $8
-; MIPS64R6-NEXT:    sc $9, 0($1)
-; MIPS64R6-NEXT:    beqzc $9, .LBB5_1
+; MIPS64R6-NEXT:    ll $8, 0($1)
+; MIPS64R6-NEXT:    slt $11, $8, $5
+; MIPS64R6-NEXT:    selnez $9, $8, $11
+; MIPS64R6-NEXT:    seleqz $11, $5, $11
+; MIPS64R6-NEXT:    or $9, $9, $11
+; MIPS64R6-NEXT:    and $9, $9, $3
+; MIPS64R6-NEXT:    and $10, $8, $6
+; MIPS64R6-NEXT:    or $10, $10, $9
+; MIPS64R6-NEXT:    sc $10, 0($1)
+; MIPS64R6-NEXT:    beqzc $10, .LBB5_1
 ; MIPS64R6-NEXT:  # %bb.2: # %entry
-; MIPS64R6-NEXT:    and $6, $7, $3
-; MIPS64R6-NEXT:    srlv $6, $6, $2
-; MIPS64R6-NEXT:    seh $6, $6
+; MIPS64R6-NEXT:    and $7, $8, $3
+; MIPS64R6-NEXT:    srlv $7, $7, $2
+; MIPS64R6-NEXT:    seh $7, $7
 ; MIPS64R6-NEXT:  # %bb.3: # %entry
-; MIPS64R6-NEXT:    sw $6, 12($sp) # 4-byte Folded Spill
+; MIPS64R6-NEXT:    sw $7, 12($sp) # 4-byte Folded Spill
 ; MIPS64R6-NEXT:  # %bb.4: # %entry
 ; MIPS64R6-NEXT:    sync
 ; MIPS64R6-NEXT:    lw $2, 12($sp) # 4-byte Folded Reload
@@ -1713,28 +1713,28 @@ define i16 @test_min_16(i16* nocapture %ptr, i16 signext %val) {
 ; MIPS64EL-NEXT:    sll $2, $2, 3
 ; MIPS64EL-NEXT:    ori $3, $zero, 65535
 ; MIPS64EL-NEXT:    sllv $3, $3, $2
-; MIPS64EL-NEXT:    nor $4, $zero, $3
+; MIPS64EL-NEXT:    nor $6, $zero, $3
 ; MIPS64EL-NEXT:    sllv $5, $5, $2
 ; MIPS64EL-NEXT:  .LBB5_1: # %entry
 ; MIPS64EL-NEXT:    # =>This Inner Loop Header: Depth=1
-; MIPS64EL-NEXT:    ll $7, 0($1)
-; MIPS64EL-NEXT:    and $7, $7, $3
-; MIPS64EL-NEXT:    and $5, $5, $3
-; MIPS64EL-NEXT:    slt $10, $7, $5
-; MIPS64EL-NEXT:    move $8, $7
-; MIPS64EL-NEXT:    movz $8, $5, $10
+; MIPS64EL-NEXT:    ll $8, 0($1)
 ; MIPS64EL-NEXT:    and $8, $8, $3
-; MIPS64EL-NEXT:    and $9, $7, $4
-; MIPS64EL-NEXT:    or $9, $9, $8
-; MIPS64EL-NEXT:    sc $9, 0($1)
-; MIPS64EL-NEXT:    beqz $9, .LBB5_1
+; MIPS64EL-NEXT:    and $5, $5, $3
+; MIPS64EL-NEXT:    slt $11, $8, $5
+; MIPS64EL-NEXT:    move $9, $8
+; MIPS64EL-NEXT:    movz $9, $5, $11
+; MIPS64EL-NEXT:    and $9, $9, $3
+; MIPS64EL-NEXT:    and $10, $8, $6
+; MIPS64EL-NEXT:    or $10, $10, $9
+; MIPS64EL-NEXT:    sc $10, 0($1)
+; MIPS64EL-NEXT:    beqz $10, .LBB5_1
 ; MIPS64EL-NEXT:    nop
 ; MIPS64EL-NEXT:  # %bb.2: # %entry
-; MIPS64EL-NEXT:    and $6, $7, $3
-; MIPS64EL-NEXT:    srlv $6, $6, $2
-; MIPS64EL-NEXT:    seh $6, $6
+; MIPS64EL-NEXT:    and $7, $8, $3
+; MIPS64EL-NEXT:    srlv $7, $7, $2
+; MIPS64EL-NEXT:    seh $7, $7
 ; MIPS64EL-NEXT:  # %bb.3: # %entry
-; MIPS64EL-NEXT:    sw $6, 12($sp) # 4-byte Folded Spill
+; MIPS64EL-NEXT:    sw $7, 12($sp) # 4-byte Folded Spill
 ; MIPS64EL-NEXT:  # %bb.4: # %entry
 ; MIPS64EL-NEXT:    sync
 ; MIPS64EL-NEXT:    lw $2, 12($sp) # 4-byte Folded Reload
@@ -1754,28 +1754,28 @@ define i16 @test_min_16(i16* nocapture %ptr, i16 signext %val) {
 ; MIPS64ELR6-NEXT:    sll $2, $2, 3
 ; MIPS64ELR6-NEXT:    ori $3, $zero, 65535
 ; MIPS64ELR6-NEXT:    sllv $3, $3, $2
-; MIPS64ELR6-NEXT:    nor $4, $zero, $3
+; MIPS64ELR6-NEXT:    nor $6, $zero, $3
 ; MIPS64ELR6-NEXT:    sllv $5, $5, $2
 ; MIPS64ELR6-NEXT:  .LBB5_1: # %entry
 ; MIPS64ELR6-NEXT:    # =>This Inner Loop Header: Depth=1
-; MIPS64ELR6-NEXT:    ll $7, 0($1)
-; MIPS64ELR6-NEXT:    and $7, $7, $3
-; MIPS64ELR6-NEXT:    and $5, $5, $3
-; MIPS64ELR6-NEXT:    slt $10, $7, $5
-; MIPS64ELR6-NEXT:    selnez $8, $7, $10
-; MIPS64ELR6-NEXT:    seleqz $10, $5, $10
-; MIPS64ELR6-NEXT:    or $8, $8, $10
+; MIPS64ELR6-NEXT:    ll $8, 0($1)
 ; MIPS64ELR6-NEXT:    and $8, $8, $3
-; MIPS64ELR6-NEXT:    and $9, $7, $4
-; MIPS64ELR6-NEXT:    or $9, $9, $8
-; MIPS64ELR6-NEXT:    sc $9, 0($1)
-; MIPS64ELR6-NEXT:    beqzc $9, .LBB5_1
+; MIPS64ELR6-NEXT:    and $5, $5, $3
+; MIPS64ELR6-NEXT:    slt $11, $8, $5
+; MIPS64ELR6-NEXT:    selnez $9, $8, $11
+; MIPS64ELR6-NEXT:    seleqz $11, $5, $11
+; MIPS64ELR6-NEXT:    or $9, $9, $11
+; MIPS64ELR6-NEXT:    and $9, $9, $3
+; MIPS64ELR6-NEXT:    and $10, $8, $6
+; MIPS64ELR6-NEXT:    or $10, $10, $9
+; MIPS64ELR6-NEXT:    sc $10, 0($1)
+; MIPS64ELR6-NEXT:    beqzc $10, .LBB5_1
 ; MIPS64ELR6-NEXT:  # %bb.2: # %entry
-; MIPS64ELR6-NEXT:    and $6, $7, $3
-; MIPS64ELR6-NEXT:    srlv $6, $6, $2
-; MIPS64ELR6-NEXT:    seh $6, $6
+; MIPS64ELR6-NEXT:    and $7, $8, $3
+; MIPS64ELR6-NEXT:    srlv $7, $7, $2
+; MIPS64ELR6-NEXT:    seh $7, $7
 ; MIPS64ELR6-NEXT:  # %bb.3: # %entry
-; MIPS64ELR6-NEXT:    sw $6, 12($sp) # 4-byte Folded Spill
+; MIPS64ELR6-NEXT:    sw $7, 12($sp) # 4-byte Folded Spill
 ; MIPS64ELR6-NEXT:  # %bb.4: # %entry
 ; MIPS64ELR6-NEXT:    sync
 ; MIPS64ELR6-NEXT:    lw $2, 12($sp) # 4-byte Folded Reload
@@ -2116,26 +2116,26 @@ define i16 @test_umax_16(i16* nocapture %ptr, i16 signext %val) {
 ; MIPS64-NEXT:    sll $2, $2, 3
 ; MIPS64-NEXT:    ori $3, $zero, 65535
 ; MIPS64-NEXT:    sllv $3, $3, $2
-; MIPS64-NEXT:    nor $4, $zero, $3
+; MIPS64-NEXT:    nor $6, $zero, $3
 ; MIPS64-NEXT:    sllv $5, $5, $2
 ; MIPS64-NEXT:  .LBB6_1: # %entry
 ; MIPS64-NEXT:    # =>This Inner Loop Header: Depth=1
-; MIPS64-NEXT:    ll $7, 0($1)
-; MIPS64-NEXT:    sltu $10, $7, $5
-; MIPS64-NEXT:    move $8, $7
-; MIPS64-NEXT:    movn $8, $5, $10
-; MIPS64-NEXT:    and $8, $8, $3
-; MIPS64-NEXT:    and $9, $7, $4
-; MIPS64-NEXT:    or $9, $9, $8
-; MIPS64-NEXT:    sc $9, 0($1)
-; MIPS64-NEXT:    beqz $9, .LBB6_1
+; MIPS64-NEXT:    ll $8, 0($1)
+; MIPS64-NEXT:    sltu $11, $8, $5
+; MIPS64-NEXT:    move $9, $8
+; MIPS64-NEXT:    movn $9, $5, $11
+; MIPS64-NEXT:    and $9, $9, $3
+; MIPS64-NEXT:    and $10, $8, $6
+; MIPS64-NEXT:    or $10, $10, $9
+; MIPS64-NEXT:    sc $10, 0($1)
+; MIPS64-NEXT:    beqz $10, .LBB6_1
 ; MIPS64-NEXT:    nop
 ; MIPS64-NEXT:  # %bb.2: # %entry
-; MIPS64-NEXT:    and $6, $7, $3
-; MIPS64-NEXT:    srlv $6, $6, $2
-; MIPS64-NEXT:    seh $6, $6
+; MIPS64-NEXT:    and $7, $8, $3
+; MIPS64-NEXT:    srlv $7, $7, $2
+; MIPS64-NEXT:    seh $7, $7
 ; MIPS64-NEXT:  # %bb.3: # %entry
-; MIPS64-NEXT:    sw $6, 12($sp) # 4-byte Folded Spill
+; MIPS64-NEXT:    sw $7, 12($sp) # 4-byte Folded Spill
 ; MIPS64-NEXT:  # %bb.4: # %entry
 ; MIPS64-NEXT:    sync
 ; MIPS64-NEXT:    lw $2, 12($sp) # 4-byte Folded Reload
@@ -2156,26 +2156,26 @@ define i16 @test_umax_16(i16* nocapture %ptr, i16 signext %val) {
 ; MIPS64R6-NEXT:    sll $2, $2, 3
 ; MIPS64R6-NEXT:    ori $3, $zero, 65535
 ; MIPS64R6-NEXT:    sllv $3, $3, $2
-; MIPS64R6-NEXT:    nor $4, $zero, $3
+; MIPS64R6-NEXT:    nor $6, $zero, $3
 ; MIPS64R6-NEXT:    sllv $5, $5, $2
 ; MIPS64R6-NEXT:  .LBB6_1: # %entry
 ; MIPS64R6-NEXT:    # =>This Inner Loop Header: Depth=1
-; MIPS64R6-NEXT:    ll $7, 0($1)
-; MIPS64R6-NEXT:    sltu $10, $7, $5
-; MIPS64R6-NEXT:    seleqz $8, $7, $10
-; MIPS64R6-NEXT:    selnez $10, $5, $10
-; MIPS64R6-NEXT:    or $8, $8, $10
-; MIPS64R6-NEXT:    and $8, $8, $3
-; MIPS64R6-NEXT:    and $9, $7, $4
-; MIPS64R6-NEXT:    or $9, $9, $8
-; MIPS64R6-NEXT:    sc $9, 0($1)
-; MIPS64R6-NEXT:    beqzc $9, .LBB6_1
+; MIPS64R6-NEXT:    ll $8, 0($1)
+; MIPS64R6-NEXT:    sltu $11, $8, $5
+; MIPS64R6-NEXT:    seleqz $9, $8, $11
+; MIPS64R6-NEXT:    selnez $11, $5, $11
+; MIPS64R6-NEXT:    or $9, $9, $11
+; MIPS64R6-NEXT:    and $9, $9, $3
+; MIPS64R6-NEXT:    and $10, $8, $6
+; MIPS64R6-NEXT:    or $10, $10, $9
+; MIPS64R6-NEXT:    sc $10, 0($1)
+; MIPS64R6-NEXT:    beqzc $10, .LBB6_1
 ; MIPS64R6-NEXT:  # %bb.2: # %entry
-; MIPS64R6-NEXT:    and $6, $7, $3
-; MIPS64R6-NEXT:    srlv $6, $6, $2
-; MIPS64R6-NEXT:    seh $6, $6
+; MIPS64R6-NEXT:    and $7, $8, $3
+; MIPS64R6-NEXT:    srlv $7, $7, $2
+; MIPS64R6-NEXT:    seh $7, $7
 ; MIPS64R6-NEXT:  # %bb.3: # %entry
-; MIPS64R6-NEXT:    sw $6, 12($sp) # 4-byte Folded Spill
+; MIPS64R6-NEXT:    sw $7, 12($sp) # 4-byte Folded Spill
 ; MIPS64R6-NEXT:  # %bb.4: # %entry
 ; MIPS64R6-NEXT:    sync
 ; MIPS64R6-NEXT:    lw $2, 12($sp) # 4-byte Folded Reload
@@ -2194,28 +2194,28 @@ define i16 @test_umax_16(i16* nocapture %ptr, i16 signext %val) {
 ; MIPS64EL-NEXT:    sll $2, $2, 3
 ; MIPS64EL-NEXT:    ori $3, $zero, 65535
 ; MIPS64EL-NEXT:    sllv $3, $3, $2
-; MIPS64EL-NEXT:    nor $4, $zero, $3
+; MIPS64EL-NEXT:    nor $6, $zero, $3
 ; MIPS64EL-NEXT:    sllv $5, $5, $2
 ; MIPS64EL-NEXT:  .LBB6_1: # %entry
 ; MIPS64EL-NEXT:    # =>This Inner Loop Header: Depth=1
-; MIPS64EL-NEXT:    ll $7, 0($1)
-; MIPS64EL-NEXT:    and $7, $7, $3
-; MIPS64EL-NEXT:    and $5, $5, $3
-; MIPS64EL-NEXT:    sltu $10, $7, $5
-; MIPS64EL-NEXT:    move $8, $7
-; MIPS64EL-NEXT:    movn $8, $5, $10
+; MIPS64EL-NEXT:    ll $8, 0($1)
 ; MIPS64EL-NEXT:    and $8, $8, $3
-; MIPS64EL-NEXT:    and $9, $7, $4
-; MIPS64EL-NEXT:    or $9, $9, $8
-; MIPS64EL-NEXT:    sc $9, 0($1)
-; MIPS64EL-NEXT:    beqz $9, .LBB6_1
+; MIPS64EL-NEXT:    and $5, $5, $3
+; MIPS64EL-NEXT:    sltu $11, $8, $5
+; MIPS64EL-NEXT:    move $9, $8
+; MIPS64EL-NEXT:    movn $9, $5, $11
+; MIPS64EL-NEXT:    and $9, $9, $3
+; MIPS64EL-NEXT:    and $10, $8, $6
+; MIPS64EL-NEXT:    or $10, $10, $9
+; MIPS64EL-NEXT:    sc $10, 0($1)
+; MIPS64EL-NEXT:    beqz $10, .LBB6_1
 ; MIPS64EL-NEXT:    nop
 ; MIPS64EL-NEXT:  # %bb.2: # %entry
-; MIPS64EL-NEXT:    and $6, $7, $3
-; MIPS64EL-NEXT:    srlv $6, $6, $2
-; MIPS64EL-NEXT:    seh $6, $6
+; MIPS64EL-NEXT:    and $7, $8, $3
+; MIPS64EL-NEXT:    srlv $7, $7, $2
+; MIPS64EL-NEXT:    seh $7, $7
 ; MIPS64EL-NEXT:  # %bb.3: # %entry
-; MIPS64EL-NEXT:    sw $6, 12($sp) # 4-byte Folded Spill
+; MIPS64EL-NEXT:    sw $7, 12($sp) # 4-byte Folded Spill
 ; MIPS64EL-NEXT:  # %bb.4: # %entry
 ; MIPS64EL-NEXT:    sync
 ; MIPS64EL-NEXT:    lw $2, 12($sp) # 4-byte Folded Reload
@@ -2235,28 +2235,28 @@ define i16 @test_umax_16(i16* nocapture %ptr, i16 signext %val) {
 ; MIPS64ELR6-NEXT:    sll $2, $2, 3
 ; MIPS64ELR6-NEXT:    ori $3, $zero, 65535
 ; MIPS64ELR6-NEXT:    sllv $3, $3, $2
-; MIPS64ELR6-NEXT:    nor $4, $zero, $3
+; MIPS64ELR6-NEXT:    nor $6, $zero, $3
 ; MIPS64ELR6-NEXT:    sllv $5, $5, $2
 ; MIPS64ELR6-NEXT:  .LBB6_1: # %entry
 ; MIPS64ELR6-NEXT:    # =>This Inner Loop Header: Depth=1
-; MIPS64ELR6-NEXT:    ll $7, 0($1)
-; MIPS64ELR6-NEXT:    and $7, $7, $3
-; MIPS64ELR6-NEXT:    and $5, $5, $3
-; MIPS64ELR6-NEXT:    sltu $10, $7, $5
-; MIPS64ELR6-NEXT:    seleqz $8, $7, $10
-; MIPS64ELR6-NEXT:    selnez $10, $5, $10
-; MIPS64ELR6-NEXT:    or $8, $8, $10
+; MIPS64ELR6-NEXT:    ll $8, 0($1)
 ; MIPS64ELR6-NEXT:    and $8, $8, $3
-; MIPS64ELR6-NEXT:    and $9, $7, $4
-; MIPS64ELR6-NEXT:    or $9, $9, $8
-; MIPS64ELR6-NEXT:    sc $9, 0($1)
-; MIPS64ELR6-NEXT:    beqzc $9, .LBB6_1
+; MIPS64ELR6-NEXT:    and $5, $5, $3
+; MIPS64ELR6-NEXT:    sltu $11, $8, $5
+; MIPS64ELR6-NEXT:    seleqz $9, $8, $11
+; MIPS64ELR6-NEXT:    selnez $11, $5, $11
+; MIPS64ELR6-NEXT:    or $9, $9, $11
+; MIPS64ELR6-NEXT:    and $9, $9, $3
+; MIPS64ELR6-NEXT:    and $10, $8, $6
+; MIPS64ELR6-NEXT:    or $10, $10, $9
+; MIPS64ELR6-NEXT:    sc $10, 0($1)
+; MIPS64ELR6-NEXT:    beqzc $10, .LBB6_1
 ; MIPS64ELR6-NEXT:  # %bb.2: # %entry
-; MIPS64ELR6-NEXT:    and $6, $7, $3
-; MIPS64ELR6-NEXT:    srlv $6, $6, $2
-; MIPS64ELR6-NEXT:    seh $6, $6
+; MIPS64ELR6-NEXT:    and $7, $8, $3
+; MIPS64ELR6-NEXT:    srlv $7, $7, $2
+; MIPS64ELR6-NEXT:    seh $7, $7
 ; MIPS64ELR6-NEXT:  # %bb.3: # %entry
-; MIPS64ELR6-NEXT:    sw $6, 12($sp) # 4-byte Folded Spill
+; MIPS64ELR6-NEXT:    sw $7, 12($sp) # 4-byte Folded Spill
 ; MIPS64ELR6-NEXT:  # %bb.4: # %entry
 ; MIPS64ELR6-NEXT:    sync
 ; MIPS64ELR6-NEXT:    lw $2, 12($sp) # 4-byte Folded Reload
@@ -2597,26 +2597,26 @@ define i16 @test_umin_16(i16* nocapture %ptr, i16 signext %val) {
 ; MIPS64-NEXT:    sll $2, $2, 3
 ; MIPS64-NEXT:    ori $3, $zero, 65535
 ; MIPS64-NEXT:    sllv $3, $3, $2
-; MIPS64-NEXT:    nor $4, $zero, $3
+; MIPS64-NEXT:    nor $6, $zero, $3
 ; MIPS64-NEXT:    sllv $5, $5, $2
 ; MIPS64-NEXT:  .LBB7_1: # %entry
 ; MIPS64-NEXT:    # =>This Inner Loop Header: Depth=1
-; MIPS64-NEXT:    ll $7, 0($1)
-; MIPS64-NEXT:    sltu $10, $7, $5
-; MIPS64-NEXT:    move $8, $7
-; MIPS64-NEXT:    movz $8, $5, $10
-; MIPS64-NEXT:    and $8, $8, $3
-; MIPS64-NEXT:    and $9, $7, $4
-; MIPS64-NEXT:    or $9, $9, $8
-; MIPS64-NEXT:    sc $9, 0($1)
-; MIPS64-NEXT:    beqz $9, .LBB7_1
+; MIPS64-NEXT:    ll $8, 0($1)
+; MIPS64-NEXT:    sltu $11, $8, $5
+; MIPS64-NEXT:    move $9, $8
+; MIPS64-NEXT:    movz $9, $5, $11
+; MIPS64-NEXT:    and $9, $9, $3
+; MIPS64-NEXT:    and $10, $8, $6
+; MIPS64-NEXT:    or $10, $10, $9
+; MIPS64-NEXT:    sc $10, 0($1)
+; MIPS64-NEXT:    beqz $10, .LBB7_1
 ; MIPS64-NEXT:    nop
 ; MIPS64-NEXT:  # %bb.2: # %entry
-; MIPS64-NEXT:    and $6, $7, $3
-; MIPS64-NEXT:    srlv $6, $6, $2
-; MIPS64-NEXT:    seh $6, $6
+; MIPS64-NEXT:    and $7, $8, $3
+; MIPS64-NEXT:    srlv $7, $7, $2
+; MIPS64-NEXT:    seh $7, $7
 ; MIPS64-NEXT:  # %bb.3: # %entry
-; MIPS64-NEXT:    sw $6, 12($sp) # 4-byte Folded Spill
+; MIPS64-NEXT:    sw $7, 12($sp) # 4-byte Folded Spill
 ; MIPS64-NEXT:  # %bb.4: # %entry
 ; MIPS64-NEXT:    sync
 ; MIPS64-NEXT:    lw $2, 12($sp) # 4-byte Folded Reload
@@ -2637,26 +2637,26 @@ define i16 @test_umin_16(i16* nocapture %ptr, i16 signext %val) {
 ; MIPS64R6-NEXT:    sll $2, $2, 3
 ; MIPS64R6-NEXT:    ori $3, $zero, 65535
 ; MIPS64R6-NEXT:    sllv $3, $3, $2
-; MIPS64R6-NEXT:    nor $4, $zero, $3
+; MIPS64R6-NEXT:    nor $6, $zero, $3
 ; MIPS64R6-NEXT:    sllv $5, $5, $2
 ; MIPS64R6-NEXT:  .LBB7_1: # %entry
 ; MIPS64R6-NEXT:    # =>This Inner Loop Header: Depth=1
-; MIPS64R6-NEXT:    ll $7, 0($1)
-; MIPS64R6-NEXT:    sltu $10, $7, $5
-; MIPS64R6-NEXT:    selnez $8, $7, $10
-; MIPS64R6-NEXT:    seleqz $10, $5, $10
-; MIPS64R6-NEXT:    or $8, $8, $10
-; MIPS64R6-NEXT:    and $8, $8, $3
-; MIPS64R6-NEXT:    and $9, $7, $4
-; MIPS64R6-NEXT:    or $9, $9, $8
-; MIPS64R6-NEXT:    sc $9, 0($1)
-; MIPS64R6-NEXT:    beqzc $9, .LBB7_1
+; MIPS64R6-NEXT:    ll $8, 0($1)
+; MIPS64R6-NEXT:    sltu $11, $8, $5
+; MIPS64R6-NEXT:    selnez $9, $8, $11
+; MIPS64R6-NEXT:    seleqz $11, $5, $11
+; MIPS64R6-NEXT:    or $9, $9, $11
+; MIPS64R6-NEXT:    and $9, $9, $3
+; MIPS64R6-NEXT:    and $10, $8, $6
+; MIPS64R6-NEXT:    or $10, $10, $9
+; MIPS64R6-NEXT:    sc $10, 0($1)
+; MIPS64R6-NEXT:    beqzc $10, .LBB7_1
 ; MIPS64R6-NEXT:  # %bb.2: # %entry
-; MIPS64R6-NEXT:    and $6, $7, $3
-; MIPS64R6-NEXT:    srlv $6, $6, $2
-; MIPS64R6-NEXT:    seh $6, $6
+; MIPS64R6-NEXT:    and $7, $8, $3
+; MIPS64R6-NEXT:    srlv $7, $7, $2
+; MIPS64R6-NEXT:    seh $7, $7
 ; MIPS64R6-NEXT:  # %bb.3: # %entry
-; MIPS64R6-NEXT:    sw $6, 12($sp) # 4-byte Folded Spill
+; MIPS64R6-NEXT:    sw $7, 12($sp) # 4-byte Folded Spill
 ; MIPS64R6-NEXT:  # %bb.4: # %entry
 ; MIPS64R6-NEXT:    sync
 ; MIPS64R6-NEXT:    lw $2, 12($sp) # 4-byte Folded Reload
@@ -2675,28 +2675,28 @@ define i16 @test_umin_16(i16* nocapture %ptr, i16 signext %val) {
 ; MIPS64EL-NEXT:    sll $2, $2, 3
 ; MIPS64EL-NEXT:    ori $3, $zero, 65535
 ; MIPS64EL-NEXT:    sllv $3, $3, $2
-; MIPS64EL-NEXT:    nor $4, $zero, $3
+; MIPS64EL-NEXT:    nor $6, $zero, $3
 ; MIPS64EL-NEXT:    sllv $5, $5, $2
 ; MIPS64EL-NEXT:  .LBB7_1: # %entry
 ; MIPS64EL-NEXT:    # =>This Inner Loop Header: Depth=1
-; MIPS64EL-NEXT:    ll $7, 0($1)
-; MIPS64EL-NEXT:    and $7, $7, $3
-; MIPS64EL-NEXT:    and $5, $5, $3
-; MIPS64EL-NEXT:    sltu $10, $7, $5
-; MIPS64EL-NEXT:    move $8, $7
-; MIPS64EL-NEXT:    movz $8, $5, $10
+; MIPS64EL-NEXT:    ll $8, 0($1)
 ; MIPS64EL-NEXT:    and $8, $8, $3
-; MIPS64EL-NEXT:    and $9, $7, $4
-; MIPS64EL-NEXT:    or $9, $9, $8
-; MIPS64EL-NEXT:    sc $9, 0($1)
-; MIPS64EL-NEXT:    beqz $9, .LBB7_1
+; MIPS64EL-NEXT:    and $5, $5, $3
+; MIPS64EL-NEXT:    sltu $11, $8, $5
+; MIPS64EL-NEXT:    move $9, $8
+; MIPS64EL-NEXT:    movz $9, $5, $11
+; MIPS64EL-NEXT:    and $9, $9, $3
+; MIPS64EL-NEXT:    and $10, $8, $6
+; MIPS64EL-NEXT:    or $10, $10, $9
+; MIPS64EL-NEXT:    sc $10, 0($1)
+; MIPS64EL-NEXT:    beqz $10, .LBB7_1
 ; MIPS64EL-NEXT:    nop
 ; MIPS64EL-NEXT:  # %bb.2: # %entry
-; MIPS64EL-NEXT:    and $6, $7, $3
-; MIPS64EL-NEXT:    srlv $6, $6, $2
-; MIPS64EL-NEXT:    seh $6, $6
+; MIPS64EL-NEXT:    and $7, $8, $3
+; MIPS64EL-NEXT:    srlv $7, $7, $2
+; MIPS64EL-NEXT:    seh $7, $7
 ; MIPS64EL-NEXT:  # %bb.3: # %entry
-; MIPS64EL-NEXT:    sw $6, 12($sp) # 4-byte Folded Spill
+; MIPS64EL-NEXT:    sw $7, 12($sp) # 4-byte Folded Spill
 ; MIPS64EL-NEXT:  # %bb.4: # %entry
 ; MIPS64EL-NEXT:    sync
 ; MIPS64EL-NEXT:    lw $2, 12($sp) # 4-byte Folded Reload
@@ -2716,28 +2716,28 @@ define i16 @test_umin_16(i16* nocapture %ptr, i16 signext %val) {
 ; MIPS64ELR6-NEXT:    sll $2, $2, 3
 ; MIPS64ELR6-NEXT:    ori $3, $zero, 65535
 ; MIPS64ELR6-NEXT:    sllv $3, $3, $2
-; MIPS64ELR6-NEXT:    nor $4, $zero, $3
+; MIPS64ELR6-NEXT:    nor $6, $zero, $3
 ; MIPS64ELR6-NEXT:    sllv $5, $5, $2
 ; MIPS64ELR6-NEXT:  .LBB7_1: # %entry
 ; MIPS64ELR6-NEXT:    # =>This Inner Loop Header: Depth=1
-; MIPS64ELR6-NEXT:    ll $7, 0($1)
-; MIPS64ELR6-NEXT:    and $7, $7, $3
-; MIPS64ELR6-NEXT:    and $5, $5, $3
-; MIPS64ELR6-NEXT:    sltu $10, $7, $5
-; MIPS64ELR6-NEXT:    selnez $8, $7, $10
-; MIPS64ELR6-NEXT:    seleqz $10, $5, $10
-; MIPS64ELR6-NEXT:    or $8, $8, $10
+; MIPS64ELR6-NEXT:    ll $8, 0($1)
 ; MIPS64ELR6-NEXT:    and $8, $8, $3
-; MIPS64ELR6-NEXT:    and $9, $7, $4
-; MIPS64ELR6-NEXT:    or $9, $9, $8
-; MIPS64ELR6-NEXT:    sc $9, 0($1)
-; MIPS64ELR6-NEXT:    beqzc $9, .LBB7_1
+; MIPS64ELR6-NEXT:    and $5, $5, $3
+; MIPS64ELR6-NEXT:    sltu $11, $8, $5
+; MIPS64ELR6-NEXT:    selnez $9, $8, $11
+; MIPS64ELR6-NEXT:    seleqz $11, $5, $11
+; MIPS64ELR6-NEXT:    or $9, $9, $11
+; MIPS64ELR6-NEXT:    and $9, $9, $3
+; MIPS64ELR6-NEXT:    and $10, $8, $6
+; MIPS64ELR6-NEXT:    or $10, $10, $9
+; MIPS64ELR6-NEXT:    sc $10, 0($1)
+; MIPS64ELR6-NEXT:    beqzc $10, .LBB7_1
 ; MIPS64ELR6-NEXT:  # %bb.2: # %entry
-; MIPS64ELR6-NEXT:    and $6, $7, $3
-; MIPS64ELR6-NEXT:    srlv $6, $6, $2
-; MIPS64ELR6-NEXT:    seh $6, $6
+; MIPS64ELR6-NEXT:    and $7, $8, $3
+; MIPS64ELR6-NEXT:    srlv $7, $7, $2
+; MIPS64ELR6-NEXT:    seh $7, $7
 ; MIPS64ELR6-NEXT:  # %bb.3: # %entry
-; MIPS64ELR6-NEXT:    sw $6, 12($sp) # 4-byte Folded Spill
+; MIPS64ELR6-NEXT:    sw $7, 12($sp) # 4-byte Folded Spill
 ; MIPS64ELR6-NEXT:  # %bb.4: # %entry
 ; MIPS64ELR6-NEXT:    sync
 ; MIPS64ELR6-NEXT:    lw $2, 12($sp) # 4-byte Folded Reload
@@ -3079,26 +3079,26 @@ define i8 @test_max_8(i8* nocapture %ptr, i8 signext %val) {
 ; MIPS64-NEXT:    sll $2, $2, 3
 ; MIPS64-NEXT:    ori $3, $zero, 255
 ; MIPS64-NEXT:    sllv $3, $3, $2
-; MIPS64-NEXT:    nor $4, $zero, $3
+; MIPS64-NEXT:    nor $6, $zero, $3
 ; MIPS64-NEXT:    sllv $5, $5, $2
 ; MIPS64-NEXT:  .LBB8_1: # %entry
 ; MIPS64-NEXT:    # =>This Inner Loop Header: Depth=1
-; MIPS64-NEXT:    ll $7, 0($1)
-; MIPS64-NEXT:    slt $10, $7, $5
-; MIPS64-NEXT:    move $8, $7
-; MIPS64-NEXT:    movn $8, $5, $10
-; MIPS64-NEXT:    and $8, $8, $3
-; MIPS64-NEXT:    and $9, $7, $4
-; MIPS64-NEXT:    or $9, $9, $8
-; MIPS64-NEXT:    sc $9, 0($1)
-; MIPS64-NEXT:    beqz $9, .LBB8_1
+; MIPS64-NEXT:    ll $8, 0($1)
+; MIPS64-NEXT:    slt $11, $8, $5
+; MIPS64-NEXT:    move $9, $8
+; MIPS64-NEXT:    movn $9, $5, $11
+; MIPS64-NEXT:    and $9, $9, $3
+; MIPS64-NEXT:    and $10, $8, $6
+; MIPS64-NEXT:    or $10, $10, $9
+; MIPS64-NEXT:    sc $10, 0($1)
+; MIPS64-NEXT:    beqz $10, .LBB8_1
 ; MIPS64-NEXT:    nop
 ; MIPS64-NEXT:  # %bb.2: # %entry
-; MIPS64-NEXT:    and $6, $7, $3
-; MIPS64-NEXT:    srlv $6, $6, $2
-; MIPS64-NEXT:    seh $6, $6
+; MIPS64-NEXT:    and $7, $8, $3
+; MIPS64-NEXT:    srlv $7, $7, $2
+; MIPS64-NEXT:    seh $7, $7
 ; MIPS64-NEXT:  # %bb.3: # %entry
-; MIPS64-NEXT:    sw $6, 12($sp) # 4-byte Folded Spill
+; MIPS64-NEXT:    sw $7, 12($sp) # 4-byte Folded Spill
 ; MIPS64-NEXT:  # %bb.4: # %entry
 ; MIPS64-NEXT:    sync
 ; MIPS64-NEXT:    lw $2, 12($sp) # 4-byte Folded Reload
@@ -3119,26 +3119,26 @@ define i8 @test_max_8(i8* nocapture %ptr, i8 signext %val) {
 ; MIPS64R6-NEXT:    sll $2, $2, 3
 ; MIPS64R6-NEXT:    ori $3, $zero, 255
 ; MIPS64R6-NEXT:    sllv $3, $3, $2
-; MIPS64R6-NEXT:    nor $4, $zero, $3
+; MIPS64R6-NEXT:    nor $6, $zero, $3
 ; MIPS64R6-NEXT:    sllv $5, $5, $2
 ; MIPS64R6-NEXT:  .LBB8_1: # %entry
 ; MIPS64R6-NEXT:    # =>This Inner Loop Header: Depth=1
-; MIPS64R6-NEXT:    ll $7, 0($1)
-; MIPS64R6-NEXT:    slt $10, $7, $5
-; MIPS64R6-NEXT:    seleqz $8, $7, $10
-; MIPS64R6-NEXT:    selnez $10, $5, $10
-; MIPS64R6-NEXT:    or $8, $8, $10
-; MIPS64R6-NEXT:    and $8, $8, $3
-; MIPS64R6-NEXT:    and $9, $7, $4
-; MIPS64R6-NEXT:    or $9, $9, $8
-; MIPS64R6-NEXT:    sc $9, 0($1)
-; MIPS64R6-NEXT:    beqzc $9, .LBB8_1
+; MIPS64R6-NEXT:    ll $8, 0($1)
+; MIPS64R6-NEXT:    slt $11, $8, $5
+; MIPS64R6-NEXT:    seleqz $9, $8, $11
+; MIPS64R6-NEXT:    selnez $11, $5, $11
+; MIPS64R6-NEXT:    or $9, $9, $11
+; MIPS64R6-NEXT:    and $9, $9, $3
+; MIPS64R6-NEXT:    and $10, $8, $6
+; MIPS64R6-NEXT:    or $10, $10, $9
+; MIPS64R6-NEXT:    sc $10, 0($1)
+; MIPS64R6-NEXT:    beqzc $10, .LBB8_1
 ; MIPS64R6-NEXT:  # %bb.2: # %entry
-; MIPS64R6-NEXT:    and $6, $7, $3
-; MIPS64R6-NEXT:    srlv $6, $6, $2
-; MIPS64R6-NEXT:    seh $6, $6
+; MIPS64R6-NEXT:    and $7, $8, $3
+; MIPS64R6-NEXT:    srlv $7, $7, $2
+; MIPS64R6-NEXT:    seh $7, $7
 ; MIPS64R6-NEXT:  # %bb.3: # %entry
-; MIPS64R6-NEXT:    sw $6, 12($sp) # 4-byte Folded Spill
+; MIPS64R6-NEXT:    sw $7, 12($sp) # 4-byte Folded Spill
 ; MIPS64R6-NEXT:  # %bb.4: # %entry
 ; MIPS64R6-NEXT:    sync
 ; MIPS64R6-NEXT:    lw $2, 12($sp) # 4-byte Folded Reload
@@ -3157,28 +3157,28 @@ define i8 @test_max_8(i8* nocapture %ptr, i8 signext %val) {
 ; MIPS64EL-NEXT:    sll $2, $2, 3
 ; MIPS64EL-NEXT:    ori $3, $zero, 255
 ; MIPS64EL-NEXT:    sllv $3, $3, $2
-; MIPS64EL-NEXT:    nor $4, $zero, $3
+; MIPS64EL-NEXT:    nor $6, $zero, $3
 ; MIPS64EL-NEXT:    sllv $5, $5, $2
 ; MIPS64EL-NEXT:  .LBB8_1: # %entry
 ; MIPS64EL-NEXT:    # =>This Inner Loop Header: Depth=1
-; MIPS64EL-NEXT:    ll $7, 0($1)
-; MIPS64EL-NEXT:    and $7, $7, $3
-; MIPS64EL-NEXT:    and $5, $5, $3
-; MIPS64EL-NEXT:    slt $10, $7, $5
-; MIPS64EL-NEXT:    move $8, $7
-; MIPS64EL-NEXT:    movn $8, $5, $10
+; MIPS64EL-NEXT:    ll $8, 0($1)
 ; MIPS64EL-NEXT:    and $8, $8, $3
-; MIPS64EL-NEXT:    and $9, $7, $4
-; MIPS64EL-NEXT:    or $9, $9, $8
-; MIPS64EL-NEXT:    sc $9, 0($1)
-; MIPS64EL-NEXT:    beqz $9, .LBB8_1
+; MIPS64EL-NEXT:    and $5, $5, $3
+; MIPS64EL-NEXT:    slt $11, $8, $5
+; MIPS64EL-NEXT:    move $9, $8
+; MIPS64EL-NEXT:    movn $9, $5, $11
+; MIPS64EL-NEXT:    and $9, $9, $3
+; MIPS64EL-NEXT:    and $10, $8, $6
+; MIPS64EL-NEXT:    or $10, $10, $9
+; MIPS64EL-NEXT:    sc $10, 0($1)
+; MIPS64EL-NEXT:    beqz $10, .LBB8_1
 ; MIPS64EL-NEXT:    nop
 ; MIPS64EL-NEXT:  # %bb.2: # %entry
-; MIPS64EL-NEXT:    and $6, $7, $3
-; MIPS64EL-NEXT:    srlv $6, $6, $2
-; MIPS64EL-NEXT:    seh $6, $6
+; MIPS64EL-NEXT:    and $7, $8, $3
+; MIPS64EL-NEXT:    srlv $7, $7, $2
+; MIPS64EL-NEXT:    seh $7, $7
 ; MIPS64EL-NEXT:  # %bb.3: # %entry
-; MIPS64EL-NEXT:    sw $6, 12($sp) # 4-byte Folded Spill
+; MIPS64EL-NEXT:    sw $7, 12($sp) # 4-byte Folded Spill
 ; MIPS64EL-NEXT:  # %bb.4: # %entry
 ; MIPS64EL-NEXT:    sync
 ; MIPS64EL-NEXT:    lw $2, 12($sp) # 4-byte Folded Reload
@@ -3198,28 +3198,28 @@ define i8 @test_max_8(i8* nocapture %ptr, i8 signext %val) {
 ; MIPS64ELR6-NEXT:    sll $2, $2, 3
 ; MIPS64ELR6-NEXT:    ori $3, $zero, 255
 ; MIPS64ELR6-NEXT:    sllv $3, $3, $2
-; MIPS64ELR6-NEXT:    nor $4, $zero, $3
+; MIPS64ELR6-NEXT:    nor $6, $zero, $3
 ; MIPS64ELR6-NEXT:    sllv $5, $5, $2
 ; MIPS64ELR6-NEXT:  .LBB8_1: # %entry
 ; MIPS64ELR6-NEXT:    # =>This Inner Loop Header: Depth=1
-; MIPS64ELR6-NEXT:    ll $7, 0($1)
-; MIPS64ELR6-NEXT:    and $7, $7, $3
-; MIPS64ELR6-NEXT:    and $5, $5, $3
-; MIPS64ELR6-NEXT:    slt $10, $7, $5
-; MIPS64ELR6-NEXT:    seleqz $8, $7, $10
-; MIPS64ELR6-NEXT:    selnez $10, $5, $10
-; MIPS64ELR6-NEXT:    or $8, $8, $10
+; MIPS64ELR6-NEXT:    ll $8, 0($1)
 ; MIPS64ELR6-NEXT:    and $8, $8, $3
-; MIPS64ELR6-NEXT:    and $9, $7, $4
-; MIPS64ELR6-NEXT:    or $9, $9, $8
-; MIPS64ELR6-NEXT:    sc $9, 0($1)
-; MIPS64ELR6-NEXT:    beqzc $9, .LBB8_1
+; MIPS64ELR6-NEXT:    and $5, $5, $3
+; MIPS64ELR6-NEXT:    slt $11, $8, $5
+; MIPS64ELR6-NEXT:    seleqz $9, $8, $11
+; MIPS64ELR6-NEXT:    selnez $11, $5, $11
+; MIPS64ELR6-NEXT:    or $9, $9, $11
+; MIPS64ELR6-NEXT:    and $9, $9, $3
+; MIPS64ELR6-NEXT:    and $10, $8, $6
+; MIPS64ELR6-NEXT:    or $10, $10, $9
+; MIPS64ELR6-NEXT:    sc $10, 0($1)
+; MIPS64ELR6-NEXT:    beqzc $10, .LBB8_1
 ; MIPS64ELR6-NEXT:  # %bb.2: # %entry
-; MIPS64ELR6-NEXT:    and $6, $7, $3
-; MIPS64ELR6-NEXT:    srlv $6, $6, $2
-; MIPS64ELR6-NEXT:    seh $6, $6
+; MIPS64ELR6-NEXT:    and $7, $8, $3
+; MIPS64ELR6-NEXT:    srlv $7, $7, $2
+; MIPS64ELR6-NEXT:    seh $7, $7
 ; MIPS64ELR6-NEXT:  # %bb.3: # %entry
-; MIPS64ELR6-NEXT:    sw $6, 12($sp) # 4-byte Folded Spill
+; MIPS64ELR6-NEXT:    sw $7, 12($sp) # 4-byte Folded Spill
 ; MIPS64ELR6-NEXT:  # %bb.4: # %entry
 ; MIPS64ELR6-NEXT:    sync
 ; MIPS64ELR6-NEXT:    lw $2, 12($sp) # 4-byte Folded Reload
@@ -3560,26 +3560,26 @@ define i8 @test_min_8(i8* nocapture %ptr, i8 signext %val) {
 ; MIPS64-NEXT:    sll $2, $2, 3
 ; MIPS64-NEXT:    ori $3, $zero, 255
 ; MIPS64-NEXT:    sllv $3, $3, $2
-; MIPS64-NEXT:    nor $4, $zero, $3
+; MIPS64-NEXT:    nor $6, $zero, $3
 ; MIPS64-NEXT:    sllv $5, $5, $2
 ; MIPS64-NEXT:  .LBB9_1: # %entry
 ; MIPS64-NEXT:    # =>This Inner Loop Header: Depth=1
-; MIPS64-NEXT:    ll $7, 0($1)
-; MIPS64-NEXT:    slt $10, $7, $5
-; MIPS64-NEXT:    move $8, $7
-; MIPS64-NEXT:    movz $8, $5, $10
-; MIPS64-NEXT:    and $8, $8, $3
-; MIPS64-NEXT:    and $9, $7, $4
-; MIPS64-NEXT:    or $9, $9, $8
-; MIPS64-NEXT:    sc $9, 0($1)
-; MIPS64-NEXT:    beqz $9, .LBB9_1
+; MIPS64-NEXT:    ll $8, 0($1)
+; MIPS64-NEXT:    slt $11, $8, $5
+; MIPS64-NEXT:    move $9, $8
+; MIPS64-NEXT:    movz $9, $5, $11
+; MIPS64-NEXT:    and $9, $9, $3
+; MIPS64-NEXT:    and $10, $8, $6
+; MIPS64-NEXT:    or $10, $10, $9
+; MIPS64-NEXT:    sc $10, 0($1)
+; MIPS64-NEXT:    beqz $10, .LBB9_1
 ; MIPS64-NEXT:    nop
 ; MIPS64-NEXT:  # %bb.2: # %entry
-; MIPS64-NEXT:    and $6, $7, $3
-; MIPS64-NEXT:    srlv $6, $6, $2
-; MIPS64-NEXT:    seh $6, $6
+; MIPS64-NEXT:    and $7, $8, $3
+; MIPS64-NEXT:    srlv $7, $7, $2
+; MIPS64-NEXT:    seh $7, $7
 ; MIPS64-NEXT:  # %bb.3: # %entry
-; MIPS64-NEXT:    sw $6, 12($sp) # 4-byte Folded Spill
+; MIPS64-NEXT:    sw $7, 12($sp) # 4-byte Folded Spill
 ; MIPS64-NEXT:  # %bb.4: # %entry
 ; MIPS64-NEXT:    sync
 ; MIPS64-NEXT:    lw $2, 12($sp) # 4-byte Folded Reload
@@ -3600,26 +3600,26 @@ define i8 @test_min_8(i8* nocapture %ptr, i8 signext %val) {
 ; MIPS64R6-NEXT:    sll $2, $2, 3
 ; MIPS64R6-NEXT:    ori $3, $zero, 255
 ; MIPS64R6-NEXT:    sllv $3, $3, $2
-; MIPS64R6-NEXT:    nor $4, $zero, $3
+; MIPS64R6-NEXT:    nor $6, $zero, $3
 ; MIPS64R6-NEXT:    sllv $5, $5, $2
 ; MIPS64R6-NEXT:  .LBB9_1: # %entry
 ; MIPS64R6-NEXT:    # =>This Inner Loop Header: Depth=1
-; MIPS64R6-NEXT:    ll $7, 0($1)
-; MIPS64R6-NEXT:    slt $10, $7, $5
-; MIPS64R6-NEXT:    selnez $8, $7, $10
-; MIPS64R6-NEXT:    seleqz $10, $5, $10
-; MIPS64R6-NEXT:    or $8, $8, $10
-; MIPS64R6-NEXT:    and $8, $8, $3
-; MIPS64R6-NEXT:    and $9, $7, $4
-; MIPS64R6-NEXT:    or $9, $9, $8
-; MIPS64R6-NEXT:    sc $9, 0($1)
-; MIPS64R6-NEXT:    beqzc $9, .LBB9_1
+; MIPS64R6-NEXT:    ll $8, 0($1)
+; MIPS64R6-NEXT:    slt $11, $8, $5
+; MIPS64R6-NEXT:    selnez $9, $8, $11
+; MIPS64R6-NEXT:    seleqz $11, $5, $11
+; MIPS64R6-NEXT:    or $9, $9, $11
+; MIPS64R6-NEXT:    and $9, $9, $3
+; MIPS64R6-NEXT:    and $10, $8, $6
+; MIPS64R6-NEXT:    or $10, $10, $9
+; MIPS64R6-NEXT:    sc $10, 0($1)
+; MIPS64R6-NEXT:    beqzc $10, .LBB9_1
 ; MIPS64R6-NEXT:  # %bb.2: # %entry
-; MIPS64R6-NEXT:    and $6, $7, $3
-; MIPS64R6-NEXT:    srlv $6, $6, $2
-; MIPS64R6-NEXT:    seh $6, $6
+; MIPS64R6-NEXT:    and $7, $8, $3
+; MIPS64R6-NEXT:    srlv $7, $7, $2
+; MIPS64R6-NEXT:    seh $7, $7
 ; MIPS64R6-NEXT:  # %bb.3: # %entry
-; MIPS64R6-NEXT:    sw $6, 12($sp) # 4-byte Folded Spill
+; MIPS64R6-NEXT:    sw $7, 12($sp) # 4-byte Folded Spill
 ; MIPS64R6-NEXT:  # %bb.4: # %entry
 ; MIPS64R6-NEXT:    sync
 ; MIPS64R6-NEXT:    lw $2, 12($sp) # 4-byte Folded Reload
@@ -3638,28 +3638,28 @@ define i8 @test_min_8(i8* nocapture %ptr, i8 signext %val) {
 ; MIPS64EL-NEXT:    sll $2, $2, 3
 ; MIPS64EL-NEXT:    ori $3, $zero, 255
 ; MIPS64EL-NEXT:    sllv $3, $3, $2
-; MIPS64EL-NEXT:    nor $4, $zero, $3
+; MIPS64EL-NEXT:    nor $6, $zero, $3
 ; MIPS64EL-NEXT:    sllv $5, $5, $2
 ; MIPS64EL-NEXT:  .LBB9_1: # %entry
 ; MIPS64EL-NEXT:    # =>This Inner Loop Header: Depth=1
-; MIPS64EL-NEXT:    ll $7, 0($1)
-; MIPS64EL-NEXT:    and $7, $7, $3
-; MIPS64EL-NEXT:    and $5, $5, $3
-; MIPS64EL-NEXT:    slt $10, $7, $5
-; MIPS64EL-NEXT:    move $8, $7
-; MIPS64EL-NEXT:    movz $8, $5, $10
+; MIPS64EL-NEXT:    ll $8, 0($1)
 ; MIPS64EL-NEXT:    and $8, $8, $3
-; MIPS64EL-NEXT:    and $9, $7, $4
-; MIPS64EL-NEXT:    or $9, $9, $8
-; MIPS64EL-NEXT:    sc $9, 0($1)
-; MIPS64EL-NEXT:    beqz $9, .LBB9_1
+; MIPS64EL-NEXT:    and $5, $5, $3
+; MIPS64EL-NEXT:    slt $11, $8, $5
+; MIPS64EL-NEXT:    move $9, $8
+; MIPS64EL-NEXT:    movz $9, $5, $11
+; MIPS64EL-NEXT:    and $9, $9, $3
+; MIPS64EL-NEXT:    and $10, $8, $6
+; MIPS64EL-NEXT:    or $10, $10, $9
+; MIPS64EL-NEXT:    sc $10, 0($1)
+; MIPS64EL-NEXT:    beqz $10, .LBB9_1
 ; MIPS64EL-NEXT:    nop
 ; MIPS64EL-NEXT:  # %bb.2: # %entry
-; MIPS64EL-NEXT:    and $6, $7, $3
-; MIPS64EL-NEXT:    srlv $6, $6, $2
-; MIPS64EL-NEXT:    seh $6, $6
+; MIPS64EL-NEXT:    and $7, $8, $3
+; MIPS64EL-NEXT:    srlv $7, $7, $2
+; MIPS64EL-NEXT:    seh $7, $7
 ; MIPS64EL-NEXT:  # %bb.3: # %entry
-; MIPS64EL-NEXT:    sw $6, 12($sp) # 4-byte Folded Spill
+; MIPS64EL-NEXT:    sw $7, 12($sp) # 4-byte Folded Spill
 ; MIPS64EL-NEXT:  # %bb.4: # %entry
 ; MIPS64EL-NEXT:    sync
 ; MIPS64EL-NEXT:    lw $2, 12($sp) # 4-byte Folded Reload
@@ -3679,28 +3679,28 @@ define i8 @test_min_8(i8* nocapture %ptr, i8 signext %val) {
 ; MIPS64ELR6-NEXT:    sll $2, $2, 3
 ; MIPS64ELR6-NEXT:    ori $3, $zero, 255
 ; MIPS64ELR6-NEXT:    sllv $3, $3, $2
-; MIPS64ELR6-NEXT:    nor $4, $zero, $3
+; MIPS64ELR6-NEXT:    nor $6, $zero, $3
 ; MIPS64ELR6-NEXT:    sllv $5, $5, $2
 ; MIPS64ELR6-NEXT:  .LBB9_1: # %entry
 ; MIPS64ELR6-NEXT:    # =>This Inner Loop Header: Depth=1
-; MIPS64ELR6-NEXT:    ll $7, 0($1)
-; MIPS64ELR6-NEXT:    and $7, $7, $3
-; MIPS64ELR6-NEXT:    and $5, $5, $3
-; MIPS64ELR6-NEXT:    slt $10, $7, $5
-; MIPS64ELR6-NEXT:    selnez $8, $7, $10
-; MIPS64ELR6-NEXT:    seleqz $10, $5, $10
-; MIPS64ELR6-NEXT:    or $8, $8, $10
+; MIPS64ELR6-NEXT:    ll $8, 0($1)
 ; MIPS64ELR6-NEXT:    and $8, $8, $3
-; MIPS64ELR6-NEXT:    and $9, $7, $4
-; MIPS64ELR6-NEXT:    or $9, $9, $8
-; MIPS64ELR6-NEXT:    sc $9, 0($1)
-; MIPS64ELR6-NEXT:    beqzc $9, .LBB9_1
+; MIPS64ELR6-NEXT:    and $5, $5, $3
+; MIPS64ELR6-NEXT:    slt $11, $8, $5
+; MIPS64ELR6-NEXT:    selnez $9, $8, $11
+; MIPS64ELR6-NEXT:    seleqz $11, $5, $11
+; MIPS64ELR6-NEXT:    or $9, $9, $11
+; MIPS64ELR6-NEXT:    and $9, $9, $3
+; MIPS64ELR6-NEXT:    and $10, $8, $6
+; MIPS64ELR6-NEXT:    or $10, $10, $9
+; MIPS64ELR6-NEXT:    sc $10, 0($1)
+; MIPS64ELR6-NEXT:    beqzc $10, .LBB9_1
 ; MIPS64ELR6-NEXT:  # %bb.2: # %entry
-; MIPS64ELR6-NEXT:    and $6, $7, $3
-; MIPS64ELR6-NEXT:    srlv $6, $6, $2
-; MIPS64ELR6-NEXT:    seh $6, $6
+; MIPS64ELR6-NEXT:    and $7, $8, $3
+; MIPS64ELR6-NEXT:    srlv $7, $7, $2
+; MIPS64ELR6-NEXT:    seh $7, $7
 ; MIPS64ELR6-NEXT:  # %bb.3: # %entry
-; MIPS64ELR6-NEXT:    sw $6, 12($sp) # 4-byte Folded Spill
+; MIPS64ELR6-NEXT:    sw $7, 12($sp) # 4-byte Folded Spill
 ; MIPS64ELR6-NEXT:  # %bb.4: # %entry
 ; MIPS64ELR6-NEXT:    sync
 ; MIPS64ELR6-NEXT:    lw $2, 12($sp) # 4-byte Folded Reload
@@ -4041,26 +4041,26 @@ define i8 @test_umax_8(i8* nocapture %ptr, i8 signext %val) {
 ; MIPS64-NEXT:    sll $2, $2, 3
 ; MIPS64-NEXT:    ori $3, $zero, 255
 ; MIPS64-NEXT:    sllv $3, $3, $2
-; MIPS64-NEXT:    nor $4, $zero, $3
+; MIPS64-NEXT:    nor $6, $zero, $3
 ; MIPS64-NEXT:    sllv $5, $5, $2
 ; MIPS64-NEXT:  .LBB10_1: # %entry
 ; MIPS64-NEXT:    # =>This Inner Loop Header: Depth=1
-; MIPS64-NEXT:    ll $7, 0($1)
-; MIPS64-NEXT:    sltu $10, $7, $5
-; MIPS64-NEXT:    move $8, $7
-; MIPS64-NEXT:    movn $8, $5, $10
-; MIPS64-NEXT:    and $8, $8, $3
-; MIPS64-NEXT:    and $9, $7, $4
-; MIPS64-NEXT:    or $9, $9, $8
-; MIPS64-NEXT:    sc $9, 0($1)
-; MIPS64-NEXT:    beqz $9, .LBB10_1
+; MIPS64-NEXT:    ll $8, 0($1)
+; MIPS64-NEXT:    sltu $11, $8, $5
+; MIPS64-NEXT:    move $9, $8
+; MIPS64-NEXT:    movn $9, $5, $11
+; MIPS64-NEXT:    and $9, $9, $3
+; MIPS64-NEXT:    and $10, $8, $6
+; MIPS64-NEXT:    or $10, $10, $9
+; MIPS64-NEXT:    sc $10, 0($1)
+; MIPS64-NEXT:    beqz $10, .LBB10_1
 ; MIPS64-NEXT:    nop
 ; MIPS64-NEXT:  # %bb.2: # %entry
-; MIPS64-NEXT:    and $6, $7, $3
-; MIPS64-NEXT:    srlv $6, $6, $2
-; MIPS64-NEXT:    seh $6, $6
+; MIPS64-NEXT:    and $7, $8, $3
+; MIPS64-NEXT:    srlv $7, $7, $2
+; MIPS64-NEXT:    seh $7, $7
 ; MIPS64-NEXT:  # %bb.3: # %entry
-; MIPS64-NEXT:    sw $6, 12($sp) # 4-byte Folded Spill
+; MIPS64-NEXT:    sw $7, 12($sp) # 4-byte Folded Spill
 ; MIPS64-NEXT:  # %bb.4: # %entry
 ; MIPS64-NEXT:    sync
 ; MIPS64-NEXT:    lw $2, 12($sp) # 4-byte Folded Reload
@@ -4081,26 +4081,26 @@ define i8 @test_umax_8(i8* nocapture %ptr, i8 signext %val) {
 ; MIPS64R6-NEXT:    sll $2, $2, 3
 ; MIPS64R6-NEXT:    ori $3, $zero, 255
 ; MIPS64R6-NEXT:    sllv $3, $3, $2
-; MIPS64R6-NEXT:    nor $4, $zero, $3
+; MIPS64R6-NEXT:    nor $6, $zero, $3
 ; MIPS64R6-NEXT:    sllv $5, $5, $2
 ; MIPS64R6-NEXT:  .LBB10_1: # %entry
 ; MIPS64R6-NEXT:    # =>This Inner Loop Header: Depth=1
-; MIPS64R6-NEXT:    ll $7, 0($1)
-; MIPS64R6-NEXT:    sltu $10, $7, $5
-; MIPS64R6-NEXT:    seleqz $8, $7, $10
-; MIPS64R6-NEXT:    selnez $10, $5, $10
-; MIPS64R6-NEXT:    or $8, $8, $10
-; MIPS64R6-NEXT:    and $8, $8, $3
-; MIPS64R6-NEXT:    and $9, $7, $4
-; MIPS64R6-NEXT:    or $9, $9, $8
-; MIPS64R6-NEXT:    sc $9, 0($1)
-; MIPS64R6-NEXT:    beqzc $9, .LBB10_1
+; MIPS64R6-NEXT:    ll $8, 0($1)
+; MIPS64R6-NEXT:    sltu $11, $8, $5
+; MIPS64R6-NEXT:    seleqz $9, $8, $11
+; MIPS64R6-NEXT:    selnez $11, $5, $11
+; MIPS64R6-NEXT:    or $9, $9, $11
+; MIPS64R6-NEXT:    and $9, $9, $3
+; MIPS64R6-NEXT:    and $10, $8, $6
+; MIPS64R6-NEXT:    or $10, $10, $9
+; MIPS64R6-NEXT:    sc $10, 0($1)
+; MIPS64R6-NEXT:    beqzc $10, .LBB10_1
 ; MIPS64R6-NEXT:  # %bb.2: # %entry
-; MIPS64R6-NEXT:    and $6, $7, $3
-; MIPS64R6-NEXT:    srlv $6, $6, $2
-; MIPS64R6-NEXT:    seh $6, $6
+; MIPS64R6-NEXT:    and $7, $8, $3
+; MIPS64R6-NEXT:    srlv $7, $7, $2
+; MIPS64R6-NEXT:    seh $7, $7
 ; MIPS64R6-NEXT:  # %bb.3: # %entry
-; MIPS64R6-NEXT:    sw $6, 12($sp) # 4-byte Folded Spill
+; MIPS64R6-NEXT:    sw $7, 12($sp) # 4-byte Folded Spill
 ; MIPS64R6-NEXT:  # %bb.4: # %entry
 ; MIPS64R6-NEXT:    sync
 ; MIPS64R6-NEXT:    lw $2, 12($sp) # 4-byte Folded Reload
@@ -4119,28 +4119,28 @@ define i8 @test_umax_8(i8* nocapture %ptr, i8 signext %val) {
 ; MIPS64EL-NEXT:    sll $2, $2, 3
 ; MIPS64EL-NEXT:    ori $3, $zero, 255
 ; MIPS64EL-NEXT:    sllv $3, $3, $2
-; MIPS64EL-NEXT:    nor $4, $zero, $3
+; MIPS64EL-NEXT:    nor $6, $zero, $3
 ; MIPS64EL-NEXT:    sllv $5, $5, $2
 ; MIPS64EL-NEXT:  .LBB10_1: # %entry
 ; MIPS64EL-NEXT:    # =>This Inner Loop Header: Depth=1
-; MIPS64EL-NEXT:    ll $7, 0($1)
-; MIPS64EL-NEXT:    and $7, $7, $3
-; MIPS64EL-NEXT:    and $5, $5, $3
-; MIPS64EL-NEXT:    sltu $10, $7, $5
-; MIPS64EL-NEXT:    move $8, $7
-; MIPS64EL-NEXT:    movn $8, $5, $10
+; MIPS64EL-NEXT:    ll $8, 0($1)
 ; MIPS64EL-NEXT:    and $8, $8, $3
-; MIPS64EL-NEXT:    and $9, $7, $4
-; MIPS64EL-NEXT:    or $9, $9, $8
-; MIPS64EL-NEXT:    sc $9, 0($1)
-; MIPS64EL-NEXT:    beqz $9, .LBB10_1
+; MIPS64EL-NEXT:    and $5, $5, $3
+; MIPS64EL-NEXT:    sltu $11, $8, $5
+; MIPS64EL-NEXT:    move $9, $8
+; MIPS64EL-NEXT:    movn $9, $5, $11
+; MIPS64EL-NEXT:    and $9, $9, $3
+; MIPS64EL-NEXT:    and $10, $8, $6
+; MIPS64EL-NEXT:    or $10, $10, $9
+; MIPS64EL-NEXT:    sc $10, 0($1)
+; MIPS64EL-NEXT:    beqz $10, .LBB10_1
 ; MIPS64EL-NEXT:    nop
 ; MIPS64EL-NEXT:  # %bb.2: # %entry
-; MIPS64EL-NEXT:    and $6, $7, $3
-; MIPS64EL-NEXT:    srlv $6, $6, $2
-; MIPS64EL-NEXT:    seh $6, $6
+; MIPS64EL-NEXT:    and $7, $8, $3
+; MIPS64EL-NEXT:    srlv $7, $7, $2
+; MIPS64EL-NEXT:    seh $7, $7
 ; MIPS64EL-NEXT:  # %bb.3: # %entry
-; MIPS64EL-NEXT:    sw $6, 12($sp) # 4-byte Folded Spill
+; MIPS64EL-NEXT:    sw $7, 12($sp) # 4-byte Folded Spill
 ; MIPS64EL-NEXT:  # %bb.4: # %entry
 ; MIPS64EL-NEXT:    sync
 ; MIPS64EL-NEXT:    lw $2, 12($sp) # 4-byte Folded Reload
@@ -4160,28 +4160,28 @@ define i8 @test_umax_8(i8* nocapture %ptr, i8 signext %val) {
 ; MIPS64ELR6-NEXT:    sll $2, $2, 3
 ; MIPS64ELR6-NEXT:    ori $3, $zero, 255
 ; MIPS64ELR6-NEXT:    sllv $3, $3, $2
-; MIPS64ELR6-NEXT:    nor $4, $zero, $3
+; MIPS64ELR6-NEXT:    nor $6, $zero, $3
 ; MIPS64ELR6-NEXT:    sllv $5, $5, $2
 ; MIPS64ELR6-NEXT:  .LBB10_1: # %entry
 ; MIPS64ELR6-NEXT:    # =>This Inner Loop Header: Depth=1
-; MIPS64ELR6-NEXT:    ll $7, 0($1)
-; MIPS64ELR6-NEXT:    and $7, $7, $3
-; MIPS64ELR6-NEXT:    and $5, $5, $3
-; MIPS64ELR6-NEXT:    sltu $10, $7, $5
-; MIPS64ELR6-NEXT:    seleqz $8, $7, $10
-; MIPS64ELR6-NEXT:    selnez $10, $5, $10
-; MIPS64ELR6-NEXT:    or $8, $8, $10
+; MIPS64ELR6-NEXT:    ll $8, 0($1)
 ; MIPS64ELR6-NEXT:    and $8, $8, $3
-; MIPS64ELR6-NEXT:    and $9, $7, $4
-; MIPS64ELR6-NEXT:    or $9, $9, $8
-; MIPS64ELR6-NEXT:    sc $9, 0($1)
-; MIPS64ELR6-NEXT:    beqzc $9, .LBB10_1
+; MIPS64ELR6-NEXT:    and $5, $5, $3
+; MIPS64ELR6-NEXT:    sltu $11, $8, $5
+; MIPS64ELR6-NEXT:    seleqz $9, $8, $11
+; MIPS64ELR6-NEXT:    selnez $11, $5, $11
+; MIPS64ELR6-NEXT:    or $9, $9, $11
+; MIPS64ELR6-NEXT:    and $9, $9, $3
+; MIPS64ELR6-NEXT:    and $10, $8, $6
+; MIPS64ELR6-NEXT:    or $10, $10, $9
+; MIPS64ELR6-NEXT:    sc $10, 0($1)
+; MIPS64ELR6-NEXT:    beqzc $10, .LBB10_1
 ; MIPS64ELR6-NEXT:  # %bb.2: # %entry
-; MIPS64ELR6-NEXT:    and $6, $7, $3
-; MIPS64ELR6-NEXT:    srlv $6, $6, $2
-; MIPS64ELR6-NEXT:    seh $6, $6
+; MIPS64ELR6-NEXT:    and $7, $8, $3
+; MIPS64ELR6-NEXT:    srlv $7, $7, $2
+; MIPS64ELR6-NEXT:    seh $7, $7
 ; MIPS64ELR6-NEXT:  # %bb.3: # %entry
-; MIPS64ELR6-NEXT:    sw $6, 12($sp) # 4-byte Folded Spill
+; MIPS64ELR6-NEXT:    sw $7, 12($sp) # 4-byte Folded Spill
 ; MIPS64ELR6-NEXT:  # %bb.4: # %entry
 ; MIPS64ELR6-NEXT:    sync
 ; MIPS64ELR6-NEXT:    lw $2, 12($sp) # 4-byte Folded Reload
@@ -4522,26 +4522,26 @@ define i8 @test_umin_8(i8* nocapture %ptr, i8 signext %val) {
 ; MIPS64-NEXT:    sll $2, $2, 3
 ; MIPS64-NEXT:    ori $3, $zero, 255
 ; MIPS64-NEXT:    sllv $3, $3, $2
-; MIPS64-NEXT:    nor $4, $zero, $3
+; MIPS64-NEXT:    nor $6, $zero, $3
 ; MIPS64-NEXT:    sllv $5, $5, $2
 ; MIPS64-NEXT:  .LBB11_1: # %entry
 ; MIPS64-NEXT:    # =>This Inner Loop Header: Depth=1
-; MIPS64-NEXT:    ll $7, 0($1)
-; MIPS64-NEXT:    sltu $10, $7, $5
-; MIPS64-NEXT:    move $8, $7
-; MIPS64-NEXT:    movz $8, $5, $10
-; MIPS64-NEXT:    and $8, $8, $3
-; MIPS64-NEXT:    and $9, $7, $4
-; MIPS64-NEXT:    or $9, $9, $8
-; MIPS64-NEXT:    sc $9, 0($1)
-; MIPS64-NEXT:    beqz $9, .LBB11_1
+; MIPS64-NEXT:    ll $8, 0($1)
+; MIPS64-NEXT:    sltu $11, $8, $5
+; MIPS64-NEXT:    move $9, $8
+; MIPS64-NEXT:    movz $9, $5, $11
+; MIPS64-NEXT:    and $9, $9, $3
+; MIPS64-NEXT:    and $10, $8, $6
+; MIPS64-NEXT:    or $10, $10, $9
+; MIPS64-NEXT:    sc $10, 0($1)
+; MIPS64-NEXT:    beqz $10, .LBB11_1
 ; MIPS64-NEXT:    nop
 ; MIPS64-NEXT:  # %bb.2: # %entry
-; MIPS64-NEXT:    and $6, $7, $3
-; MIPS64-NEXT:    srlv $6, $6, $2
-; MIPS64-NEXT:    seh $6, $6
+; MIPS64-NEXT:    and $7, $8, $3
+; MIPS64-NEXT:    srlv $7, $7, $2
+; MIPS64-NEXT:    seh $7, $7
 ; MIPS64-NEXT:  # %bb.3: # %entry
-; MIPS64-NEXT:    sw $6, 12($sp) # 4-byte Folded Spill
+; MIPS64-NEXT:    sw $7, 12($sp) # 4-byte Folded Spill
 ; MIPS64-NEXT:  # %bb.4: # %entry
 ; MIPS64-NEXT:    sync
 ; MIPS64-NEXT:    lw $2, 12($sp) # 4-byte Folded Reload
@@ -4562,26 +4562,26 @@ define i8 @test_umin_8(i8* nocapture %ptr, i8 signext %val) {
 ; MIPS64R6-NEXT:    sll $2, $2, 3
 ; MIPS64R6-NEXT:    ori $3, $zero, 255
 ; MIPS64R6-NEXT:    sllv $3, $3, $2
-; MIPS64R6-NEXT:    nor $4, $zero, $3
+; MIPS64R6-NEXT:    nor $6, $zero, $3
 ; MIPS64R6-NEXT:    sllv $5, $5, $2
 ; MIPS64R6-NEXT:  .LBB11_1: # %entry
 ; MIPS64R6-NEXT:    # =>This Inner Loop Header: Depth=1
-; MIPS64R6-NEXT:    ll $7, 0($1)
-; MIPS64R6-NEXT:    sltu $10, $7, $5
-; MIPS64R6-NEXT:    selnez $8, $7, $10
-; MIPS64R6-NEXT:    seleqz $10, $5, $10
-; MIPS64R6-NEXT:    or $8, $8, $10
-; MIPS64R6-NEXT:    and $8, $8, $3
-; MIPS64R6-NEXT:    and $9, $7, $4
-; MIPS64R6-NEXT:    or $9, $9, $8
-; MIPS64R6-NEXT:    sc $9, 0($1)
-; MIPS64R6-NEXT:    beqzc $9, .LBB11_1
+; MIPS64R6-NEXT:    ll $8, 0($1)
+; MIPS64R6-NEXT:    sltu $11, $8, $5
+; MIPS64R6-NEXT:    selnez $9, $8, $11
+; MIPS64R6-NEXT:    seleqz $11, $5, $11
+; MIPS64R6-NEXT:    or $9, $9, $11
+; MIPS64R6-NEXT:    and $9, $9, $3
+; MIPS64R6-NEXT:    and $10, $8, $6
+; MIPS64R6-NEXT:    or $10, $10, $9
+; MIPS64R6-NEXT:    sc $10, 0($1)
+; MIPS64R6-NEXT:    beqzc $10, .LBB11_1
 ; MIPS64R6-NEXT:  # %bb.2: # %entry
-; MIPS64R6-NEXT:    and $6, $7, $3
-; MIPS64R6-NEXT:    srlv $6, $6, $2
-; MIPS64R6-NEXT:    seh $6, $6
+; MIPS64R6-NEXT:    and $7, $8, $3
+; MIPS64R6-NEXT:    srlv $7, $7, $2
+; MIPS64R6-NEXT:    seh $7, $7
 ; MIPS64R6-NEXT:  # %bb.3: # %entry
-; MIPS64R6-NEXT:    sw $6, 12($sp) # 4-byte Folded Spill
+; MIPS64R6-NEXT:    sw $7, 12($sp) # 4-byte Folded Spill
 ; MIPS64R6-NEXT:  # %bb.4: # %entry
 ; MIPS64R6-NEXT:    sync
 ; MIPS64R6-NEXT:    lw $2, 12($sp) # 4-byte Folded Reload
@@ -4600,28 +4600,28 @@ define i8 @test_umin_8(i8* nocapture %ptr, i8 signext %val) {
 ; MIPS64EL-NEXT:    sll $2, $2, 3
 ; MIPS64EL-NEXT:    ori $3, $zero, 255
 ; MIPS64EL-NEXT:    sllv $3, $3, $2
-; MIPS64EL-NEXT:    nor $4, $zero, $3
+; MIPS64EL-NEXT:    nor $6, $zero, $3
 ; MIPS64EL-NEXT:    sllv $5, $5, $2
 ; MIPS64EL-NEXT:  .LBB11_1: # %entry
 ; MIPS64EL-NEXT:    # =>This Inner Loop Header: Depth=1
-; MIPS64EL-NEXT:    ll $7, 0($1)
-; MIPS64EL-NEXT:    and $7, $7, $3
-; MIPS64EL-NEXT:    and $5, $5, $3
-; MIPS64EL-NEXT:    sltu $10, $7, $5
-; MIPS64EL-NEXT:    move $8, $7
-; MIPS64EL-NEXT:    movz $8, $5, $10
+; MIPS64EL-NEXT:    ll $8, 0($1)
 ; MIPS64EL-NEXT:    and $8, $8, $3
-; MIPS64EL-NEXT:    and $9, $7, $4
-; MIPS64EL-NEXT:    or $9, $9, $8
-; MIPS64EL-NEXT:    sc $9, 0($1)
-; MIPS64EL-NEXT:    beqz $9, .LBB11_1
+; MIPS64EL-NEXT:    and $5, $5, $3
+; MIPS64EL-NEXT:    sltu $11, $8, $5
+; MIPS64EL-NEXT:    move $9, $8
+; MIPS64EL-NEXT:    movz $9, $5, $11
+; MIPS64EL-NEXT:    and $9, $9, $3
+; MIPS64EL-NEXT:    and $10, $8, $6
+; MIPS64EL-NEXT:    or $10, $10, $9
+; MIPS64EL-NEXT:    sc $10, 0($1)
+; MIPS64EL-NEXT:    beqz $10, .LBB11_1
 ; MIPS64EL-NEXT:    nop
 ; MIPS64EL-NEXT:  # %bb.2: # %entry
-; MIPS64EL-NEXT:    and $6, $7, $3
-; MIPS64EL-NEXT:    srlv $6, $6, $2
-; MIPS64EL-NEXT:    seh $6, $6
+; MIPS64EL-NEXT:    and $7, $8, $3
+; MIPS64EL-NEXT:    srlv $7, $7, $2
+; MIPS64EL-NEXT:    seh $7, $7
 ; MIPS64EL-NEXT:  # %bb.3: # %entry
-; MIPS64EL-NEXT:    sw $6, 12($sp) # 4-byte Folded Spill
+; MIPS64EL-NEXT:    sw $7, 12($sp) # 4-byte Folded Spill
 ; MIPS64EL-NEXT:  # %bb.4: # %entry
 ; MIPS64EL-NEXT:    sync
 ; MIPS64EL-NEXT:    lw $2, 12($sp) # 4-byte Folded Reload
@@ -4641,28 +4641,28 @@ define i8 @test_umin_8(i8* nocapture %ptr, i8 signext %val) {
 ; MIPS64ELR6-NEXT:    sll $2, $2, 3
 ; MIPS64ELR6-NEXT:    ori $3, $zero, 255
 ; MIPS64ELR6-NEXT:    sllv $3, $3, $2
-; MIPS64ELR6-NEXT:    nor $4, $zero, $3
+; MIPS64ELR6-NEXT:    nor $6, $zero, $3
 ; MIPS64ELR6-NEXT:    sllv $5, $5, $2
 ; MIPS64ELR6-NEXT:  .LBB11_1: # %entry
 ; MIPS64ELR6-NEXT:    # =>This Inner Loop Header: Depth=1
-; MIPS64ELR6-NEXT:    ll $7, 0($1)
-; MIPS64ELR6-NEXT:    and $7, $7, $3
-; MIPS64ELR6-NEXT:    and $5, $5, $3
-; MIPS64ELR6-NEXT:    sltu $10, $7, $5
-; MIPS64ELR6-NEXT:    selnez $8, $7, $10
-; MIPS64ELR6-NEXT:    seleqz $10, $5, $10
-; MIPS64ELR6-NEXT:    or $8, $8, $10
+; MIPS64ELR6-NEXT:    ll $8, 0($1)
 ; MIPS64ELR6-NEXT:    and $8, $8, $3
-; MIPS64ELR6-NEXT:    and $9, $7, $4
-; MIPS64ELR6-NEXT:    or $9, $9, $8
-; MIPS64ELR6-NEXT:    sc $9, 0($1)
-; MIPS64ELR6-NEXT:    beqzc $9, .LBB11_1
+; MIPS64ELR6-NEXT:    and $5, $5, $3
+; MIPS64ELR6-NEXT:    sltu $11, $8, $5
+; MIPS64ELR6-NEXT:    selnez $9, $8, $11
+; MIPS64ELR6-NEXT:    seleqz $11, $5, $11
+; MIPS64ELR6-NEXT:    or $9, $9, $11
+; MIPS64ELR6-NEXT:    and $9, $9, $3
+; MIPS64ELR6-NEXT:    and $10, $8, $6
+; MIPS64ELR6-NEXT:    or $10, $10, $9
+; MIPS64ELR6-NEXT:    sc $10, 0($1)
+; MIPS64ELR6-NEXT:    beqzc $10, .LBB11_1
 ; MIPS64ELR6-NEXT:  # %bb.2: # %entry
-; MIPS64ELR6-NEXT:    and $6, $7, $3
-; MIPS64ELR6-NEXT:    srlv $6, $6, $2
-; MIPS64ELR6-NEXT:    seh $6, $6
+; MIPS64ELR6-NEXT:    and $7, $8, $3
+; MIPS64ELR6-NEXT:    srlv $7, $7, $2
+; MIPS64ELR6-NEXT:    seh $7, $7
 ; MIPS64ELR6-NEXT:  # %bb.3: # %entry
-; MIPS64ELR6-NEXT:    sw $6, 12($sp) # 4-byte Folded Spill
+; MIPS64ELR6-NEXT:    sw $7, 12($sp) # 4-byte Folded Spill
 ; MIPS64ELR6-NEXT:  # %bb.4: # %entry
 ; MIPS64ELR6-NEXT:    sync
 ; MIPS64ELR6-NEXT:    lw $2, 12($sp) # 4-byte Folded Reload
diff --git a/llvm/test/CodeGen/Mips/atomic.ll b/llvm/test/CodeGen/Mips/atomic.ll
index 59ff83e4969cc..3846fda47b138 100644
--- a/llvm/test/CodeGen/Mips/atomic.ll
+++ b/llvm/test/CodeGen/Mips/atomic.ll
@@ -2559,28 +2559,28 @@ define signext i8 @AtomicLoadAdd8(i8 signext %incr) nounwind {
 ; MIPS64R6O0-NEXT:    ld $1, %got_disp(y)($1)
 ; MIPS64R6O0-NEXT:    daddiu $2, $zero, -4
 ; MIPS64R6O0-NEXT:    and $2, $1, $2
-; MIPS64R6O0-NEXT:    andi $1, $1, 3
-; MIPS64R6O0-NEXT:    xori $1, $1, 3
-; MIPS64R6O0-NEXT:    sll $1, $1, 3
-; MIPS64R6O0-NEXT:    ori $3, $zero, 255
-; MIPS64R6O0-NEXT:    sllv $3, $3, $1
-; MIPS64R6O0-NEXT:    nor $5, $zero, $3
-; MIPS64R6O0-NEXT:    sllv $4, $4, $1
+; MIPS64R6O0-NEXT:    andi $3, $1, 3
+; MIPS64R6O0-NEXT:    xori $3, $3, 3
+; MIPS64R6O0-NEXT:    sll $3, $3, 3
+; MIPS64R6O0-NEXT:    ori $5, $zero, 255
+; MIPS64R6O0-NEXT:    sllv $5, $5, $3
+; MIPS64R6O0-NEXT:    nor $6, $zero, $5
+; MIPS64R6O0-NEXT:    sllv $4, $4, $3
 ; MIPS64R6O0-NEXT:  .LBB8_1: # %entry
 ; MIPS64R6O0-NEXT:    # =>This Inner Loop Header: Depth=1
-; MIPS64R6O0-NEXT:    ll $7, 0($2)
-; MIPS64R6O0-NEXT:    addu $8, $7, $4
-; MIPS64R6O0-NEXT:    and $8, $8, $3
-; MIPS64R6O0-NEXT:    and $9, $7, $5
-; MIPS64R6O0-NEXT:    or $9, $9, $8
-; MIPS64R6O0-NEXT:    sc $9, 0($2)
-; MIPS64R6O0-NEXT:    beqzc $9, .LBB8_1
+; MIPS64R6O0-NEXT:    ll $8, 0($2)
+; MIPS64R6O0-NEXT:    addu $9, $8, $4
+; MIPS64R6O0-NEXT:    and $9, $9, $5
+; MIPS64R6O0-NEXT:    and $10, $8, $6
+; MIPS64R6O0-NEXT:    or $10, $10, $9
+; MIPS64R6O0-NEXT:    sc $10, 0($2)
+; MIPS64R6O0-NEXT:    beqzc $10, .LBB8_1
 ; MIPS64R6O0-NEXT:  # %bb.2: # %entry
-; MIPS64R6O0-NEXT:    and $6, $7, $3
-; MIPS64R6O0-NEXT:    srlv $6, $6, $1
-; MIPS64R6O0-NEXT:    seb $6, $6
+; MIPS64R6O0-NEXT:    and $7, $8, $5
+; MIPS64R6O0-NEXT:    srlv $7, $7, $3
+; MIPS64R6O0-NEXT:    seb $7, $7
 ; MIPS64R6O0-NEXT:  # %bb.3: # %entry
-; MIPS64R6O0-NEXT:    sw $6, 12($sp) # 4-byte Folded Spill
+; MIPS64R6O0-NEXT:    sw $7, 12($sp) # 4-byte Folded Spill
 ; MIPS64R6O0-NEXT:  # %bb.4: # %entry
 ; MIPS64R6O0-NEXT:    lw $1, 12($sp) # 4-byte Folded Reload
 ; MIPS64R6O0-NEXT:    seb $2, $1
@@ -3075,28 +3075,28 @@ define signext i8 @AtomicLoadSub8(i8 signext %incr) nounwind {
 ; MIPS64R6O0-NEXT:    ld $1, %got_disp(y)($1)
 ; MIPS64R6O0-NEXT:    daddiu $2, $zero, -4
 ; MIPS64R6O0-NEXT:    and $2, $1, $2
-; MIPS64R6O0-NEXT:    andi $1, $1, 3
-; MIPS64R6O0-NEXT:    xori $1, $1, 3
-; MIPS64R6O0-NEXT:    sll $1, $1, 3
-; MIPS64R6O0-NEXT:    ori $3, $zero, 255
-; MIPS64R6O0-NEXT:    sllv $3, $3, $1
-; MIPS64R6O0-NEXT:    nor $5, $zero, $3
-; MIPS64R6O0-NEXT:    sllv $4, $4, $1
+; MIPS64R6O0-NEXT:    andi $3, $1, 3
+; MIPS64R6O0-NEXT:    xori $3, $3, 3
+; MIPS64R6O0-NEXT:    sll $3, $3, 3
+; MIPS64R6O0-NEXT:    ori $5, $zero, 255
+; MIPS64R6O0-NEXT:    sllv $5, $5, $3
+; MIPS64R6O0-NEXT:    nor $6, $zero, $5
+; MIPS64R6O0-NEXT:    sllv $4, $4, $3
 ; MIPS64R6O0-NEXT:  .LBB9_1: # %entry
 ; MIPS64R6O0-NEXT:    # =>This Inner Loop Header: Depth=1
-; MIPS64R6O0-NEXT:    ll $7, 0($2)
-; MIPS64R6O0-NEXT:    subu $8, $7, $4
-; MIPS64R6O0-NEXT:    and $8, $8, $3
-; MIPS64R6O0-NEXT:    and $9, $7, $5
-; MIPS64R6O0-NEXT:    or $9, $9, $8
-; MIPS64R6O0-NEXT:    sc $9, 0($2)
-; MIPS64R6O0-NEXT:    beqzc $9, .LBB9_1
+; MIPS64R6O0-NEXT:    ll $8, 0($2)
+; MIPS64R6O0-NEXT:    subu $9, $8, $4
+; MIPS64R6O0-NEXT:    and $9, $9, $5
+; MIPS64R6O0-NEXT:    and $10, $8, $6
+; MIPS64R6O0-NEXT:    or $10, $10, $9
+; MIPS64R6O0-NEXT:    sc $10, 0($2)
+; MIPS64R6O0-NEXT:    beqzc $10, .LBB9_1
 ; MIPS64R6O0-NEXT:  # %bb.2: # %entry
-; MIPS64R6O0-NEXT:    and $6, $7, $3
-; MIPS64R6O0-NEXT:    srlv $6, $6, $1
-; MIPS64R6O0-NEXT:    seb $6, $6
+; MIPS64R6O0-NEXT:    and $7, $8, $5
+; MIPS64R6O0-NEXT:    srlv $7, $7, $3
+; MIPS64R6O0-NEXT:    seb $7, $7
 ; MIPS64R6O0-NEXT:  # %bb.3: # %entry
-; MIPS64R6O0-NEXT:    sw $6, 12($sp) # 4-byte Folded Spill
+; MIPS64R6O0-NEXT:    sw $7, 12($sp) # 4-byte Folded Spill
 ; MIPS64R6O0-NEXT:  # %bb.4: # %entry
 ; MIPS64R6O0-NEXT:    lw $1, 12($sp) # 4-byte Folded Reload
 ; MIPS64R6O0-NEXT:    seb $2, $1
@@ -3601,29 +3601,29 @@ define signext i8 @AtomicLoadNand8(i8 signext %incr) nounwind {
 ; MIPS64R6O0-NEXT:    ld $1, %got_disp(y)($1)
 ; MIPS64R6O0-NEXT:    daddiu $2, $zero, -4
 ; MIPS64R6O0-NEXT:    and $2, $1, $2
-; MIPS64R6O0-NEXT:    andi $1, $1, 3
-; MIPS64R6O0-NEXT:    xori $1, $1, 3
-; MIPS64R6O0-NEXT:    sll $1, $1, 3
-; MIPS64R6O0-NEXT:    ori $3, $zero, 255
-; MIPS64R6O0-NEXT:    sllv $3, $3, $1
-; MIPS64R6O0-NEXT:    nor $5, $zero, $3
-; MIPS64R6O0-NEXT:    sllv $4, $4, $1
+; MIPS64R6O0-NEXT:    andi $3, $1, 3
+; MIPS64R6O0-NEXT:    xori $3, $3, 3
+; MIPS64R6O0-NEXT:    sll $3, $3, 3
+; MIPS64R6O0-NEXT:    ori $5, $zero, 255
+; MIPS64R6O0-NEXT:    sllv $5, $5, $3
+; MIPS64R6O0-NEXT:    nor $6, $zero, $5
+; MIPS64R6O0-NEXT:    sllv $4, $4, $3
 ; MIPS64R6O0-NEXT:  .LBB10_1: # %entry
 ; MIPS64R6O0-NEXT:    # =>This Inner Loop Header: Depth=1
-; MIPS64R6O0-NEXT:    ll $7, 0($2)
-; MIPS64R6O0-NEXT:    and $8, $7, $4
-; MIPS64R6O0-NEXT:    nor $8, $zero, $8
-; MIPS64R6O0-NEXT:    and $8, $8, $3
-; MIPS64R6O0-NEXT:    and $9, $7, $5
-; MIPS64R6O0-NEXT:    or $9, $9, $8
-; MIPS64R6O0-NEXT:    sc $9, 0($2)
-; MIPS64R6O0-NEXT:    beqzc $9, .LBB10_1
+; MIPS64R6O0-NEXT:    ll $8, 0($2)
+; MIPS64R6O0-NEXT:    and $9, $8, $4
+; MIPS64R6O0-NEXT:    nor $9, $zero, $9
+; MIPS64R6O0-NEXT:    and $9, $9, $5
+; MIPS64R6O0-NEXT:    and $10, $8, $6
+; MIPS64R6O0-NEXT:    or $10, $10, $9
+; MIPS64R6O0-NEXT:    sc $10, 0($2)
+; MIPS64R6O0-NEXT:    beqzc $10, .LBB10_1
 ; MIPS64R6O0-NEXT:  # %bb.2: # %entry
-; MIPS64R6O0-NEXT:    and $6, $7, $3
-; MIPS64R6O0-NEXT:    srlv $6, $6, $1
-; MIPS64R6O0-NEXT:    seb $6, $6
+; MIPS64R6O0-NEXT:    and $7, $8, $5
+; MIPS64R6O0-NEXT:    srlv $7, $7, $3
+; MIPS64R6O0-NEXT:    seb $7, $7
 ; MIPS64R6O0-NEXT:  # %bb.3: # %entry
-; MIPS64R6O0-NEXT:    sw $6, 12($sp) # 4-byte Folded Spill
+; MIPS64R6O0-NEXT:    sw $7, 12($sp) # 4-byte Folded Spill
 ; MIPS64R6O0-NEXT:  # %bb.4: # %entry
 ; MIPS64R6O0-NEXT:    lw $1, 12($sp) # 4-byte Folded Reload
 ; MIPS64R6O0-NEXT:    seb $2, $1
@@ -4115,27 +4115,27 @@ define signext i8 @AtomicSwap8(i8 signext %newval) nounwind {
 ; MIPS64R6O0-NEXT:    ld $1, %got_disp(y)($1)
 ; MIPS64R6O0-NEXT:    daddiu $2, $zero, -4
 ; MIPS64R6O0-NEXT:    and $2, $1, $2
-; MIPS64R6O0-NEXT:    andi $1, $1, 3
-; MIPS64R6O0-NEXT:    xori $1, $1, 3
-; MIPS64R6O0-NEXT:    sll $1, $1, 3
-; MIPS64R6O0-NEXT:    ori $3, $zero, 255
-; MIPS64R6O0-NEXT:    sllv $3, $3, $1
-; MIPS64R6O0-NEXT:    nor $5, $zero, $3
-; MIPS64R6O0-NEXT:    sllv $4, $4, $1
+; MIPS64R6O0-NEXT:    andi $3, $1, 3
+; MIPS64R6O0-NEXT:    xori $3, $3, 3
+; MIPS64R6O0-NEXT:    sll $3, $3, 3
+; MIPS64R6O0-NEXT:    ori $5, $zero, 255
+; MIPS64R6O0-NEXT:    sllv $5, $5, $3
+; MIPS64R6O0-NEXT:    nor $6, $zero, $5
+; MIPS64R6O0-NEXT:    sllv $4, $4, $3
 ; MIPS64R6O0-NEXT:  .LBB11_1: # %entry
 ; MIPS64R6O0-NEXT:    # =>This Inner Loop Header: Depth=1
-; MIPS64R6O0-NEXT:    ll $7, 0($2)
-; MIPS64R6O0-NEXT:    and $8, $4, $3
-; MIPS64R6O0-NEXT:    and $9, $7, $5
-; MIPS64R6O0-NEXT:    or $9, $9, $8
-; MIPS64R6O0-NEXT:    sc $9, 0($2)
-; MIPS64R6O0-NEXT:    beqzc $9, .LBB11_1
+; MIPS64R6O0-NEXT:    ll $8, 0($2)
+; MIPS64R6O0-NEXT:    and $9, $4, $5
+; MIPS64R6O0-NEXT:    and $10, $8, $6
+; MIPS64R6O0-NEXT:    or $10, $10, $9
+; MIPS64R6O0-NEXT:    sc $10, 0($2)
+; MIPS64R6O0-NEXT:    beqzc $10, .LBB11_1
 ; MIPS64R6O0-NEXT:  # %bb.2: # %entry
-; MIPS64R6O0-NEXT:    and $6, $7, $3
-; MIPS64R6O0-NEXT:    srlv $6, $6, $1
-; MIPS64R6O0-NEXT:    seb $6, $6
+; MIPS64R6O0-NEXT:    and $7, $8, $5
+; MIPS64R6O0-NEXT:    srlv $7, $7, $3
+; MIPS64R6O0-NEXT:    seb $7, $7
 ; MIPS64R6O0-NEXT:  # %bb.3: # %entry
-; MIPS64R6O0-NEXT:    sw $6, 12($sp) # 4-byte Folded Spill
+; MIPS64R6O0-NEXT:    sw $7, 12($sp) # 4-byte Folded Spill
 ; MIPS64R6O0-NEXT:  # %bb.4: # %entry
 ; MIPS64R6O0-NEXT:    lw $1, 12($sp) # 4-byte Folded Reload
 ; MIPS64R6O0-NEXT:    seb $2, $1
@@ -4666,32 +4666,32 @@ define signext i8 @AtomicCmpSwap8(i8 signext %oldval, i8 signext %newval) nounwi
 ; MIPS64R6O0-NEXT:    ld $1, %got_disp(y)($1)
 ; MIPS64R6O0-NEXT:    daddiu $2, $zero, -4
 ; MIPS64R6O0-NEXT:    and $2, $1, $2
-; MIPS64R6O0-NEXT:    andi $1, $1, 3
-; MIPS64R6O0-NEXT:    xori $1, $1, 3
-; MIPS64R6O0-NEXT:    sll $1, $1, 3
-; MIPS64R6O0-NEXT:    ori $3, $zero, 255
-; MIPS64R6O0-NEXT:    sllv $3, $3, $1
-; MIPS64R6O0-NEXT:    nor $6, $zero, $3
+; MIPS64R6O0-NEXT:    andi $3, $1, 3
+; MIPS64R6O0-NEXT:    xori $3, $3, 3
+; MIPS64R6O0-NEXT:    sll $3, $3, 3
+; MIPS64R6O0-NEXT:    ori $6, $zero, 255
+; MIPS64R6O0-NEXT:    sllv $6, $6, $3
+; MIPS64R6O0-NEXT:    nor $7, $zero, $6
 ; MIPS64R6O0-NEXT:    andi $4, $4, 255
-; MIPS64R6O0-NEXT:    sllv $4, $4, $1
+; MIPS64R6O0-NEXT:    sllv $4, $4, $3
 ; MIPS64R6O0-NEXT:    andi $5, $5, 255
-; MIPS64R6O0-NEXT:    sllv $5, $5, $1
+; MIPS64R6O0-NEXT:    sllv $5, $5, $3
 ; MIPS64R6O0-NEXT:  .LBB12_1: # %entry
 ; MIPS64R6O0-NEXT:    # =>This Inner Loop Header: Depth=1
-; MIPS64R6O0-NEXT:    ll $8, 0($2)
-; MIPS64R6O0-NEXT:    and $9, $8, $3
-; MIPS64R6O0-NEXT:    bnec $9, $4, .LBB12_3
+; MIPS64R6O0-NEXT:    ll $9, 0($2)
+; MIPS64R6O0-NEXT:    and $10, $9, $6
+; MIPS64R6O0-NEXT:    bnec $10, $4, .LBB12_3
 ; MIPS64R6O0-NEXT:  # %bb.2: # %entry
 ; MIPS64R6O0-NEXT:    # in Loop: Header=BB12_1 Depth=1
-; MIPS64R6O0-NEXT:    and $8, $8, $6
-; MIPS64R6O0-NEXT:    or $8, $8, $5
-; MIPS64R6O0-NEXT:    sc $8, 0($2)
-; MIPS64R6O0-NEXT:    beqzc $8, .LBB12_1
+; MIPS64R6O0-NEXT:    and $9, $9, $7
+; MIPS64R6O0-NEXT:    or $9, $9, $5
+; MIPS64R6O0-NEXT:    sc $9, 0($2)
+; MIPS64R6O0-NEXT:    beqzc $9, .LBB12_1
 ; MIPS64R6O0-NEXT:  .LBB12_3: # %entry
-; MIPS64R6O0-NEXT:    srlv $7, $9, $1
-; MIPS64R6O0-NEXT:    seb $7, $7
+; MIPS64R6O0-NEXT:    srlv $8, $10, $3
+; MIPS64R6O0-NEXT:    seb $8, $8
 ; MIPS64R6O0-NEXT:  # %bb.4: # %entry
-; MIPS64R6O0-NEXT:    sw $7, 12($sp) # 4-byte Folded Spill
+; MIPS64R6O0-NEXT:    sw $8, 12($sp) # 4-byte Folded Spill
 ; MIPS64R6O0-NEXT:  # %bb.5: # %entry
 ; MIPS64R6O0-NEXT:    lw $2, 12($sp) # 4-byte Folded Reload
 ; MIPS64R6O0-NEXT:    daddiu $sp, $sp, 16
@@ -5236,28 +5236,28 @@ define i1 @AtomicCmpSwapRes8(i8* %ptr, i8 signext %oldval, i8 signext %newval) n
 ; MIPS64R6O0-NEXT:    sll $2, $2, 3
 ; MIPS64R6O0-NEXT:    ori $3, $zero, 255
 ; MIPS64R6O0-NEXT:    sllv $3, $3, $2
-; MIPS64R6O0-NEXT:    nor $4, $zero, $3
-; MIPS64R6O0-NEXT:    andi $7, $5, 255
-; MIPS64R6O0-NEXT:    sllv $7, $7, $2
+; MIPS64R6O0-NEXT:    nor $7, $zero, $3
+; MIPS64R6O0-NEXT:    andi $8, $5, 255
+; MIPS64R6O0-NEXT:    sllv $8, $8, $2
 ; MIPS64R6O0-NEXT:    andi $6, $6, 255
 ; MIPS64R6O0-NEXT:    sllv $6, $6, $2
 ; MIPS64R6O0-NEXT:  .LBB13_1: # %entry
 ; MIPS64R6O0-NEXT:    # =>This Inner Loop Header: Depth=1
-; MIPS64R6O0-NEXT:    ll $9, 0($1)
-; MIPS64R6O0-NEXT:    and $10, $9, $3
-; MIPS64R6O0-NEXT:    bnec $10, $7, .LBB13_3
+; MIPS64R6O0-NEXT:    ll $10, 0($1)
+; MIPS64R6O0-NEXT:    and $11, $10, $3
+; MIPS64R6O0-NEXT:    bnec $11, $8, .LBB13_3
 ; MIPS64R6O0-NEXT:  # %bb.2: # %entry
 ; MIPS64R6O0-NEXT:    # in Loop: Header=BB13_1 Depth=1
-; MIPS64R6O0-NEXT:    and $9, $9, $4
-; MIPS64R6O0-NEXT:    or $9, $9, $6
-; MIPS64R6O0-NEXT:    sc $9, 0($1)
-; MIPS64R6O0-NEXT:    beqzc $9, .LBB13_1
+; MIPS64R6O0-NEXT:    and $10, $10, $7
+; MIPS64R6O0-NEXT:    or $10, $10, $6
+; MIPS64R6O0-NEXT:    sc $10, 0($1)
+; MIPS64R6O0-NEXT:    beqzc $10, .LBB13_1
 ; MIPS64R6O0-NEXT:  .LBB13_3: # %entry
-; MIPS64R6O0-NEXT:    srlv $8, $10, $2
-; MIPS64R6O0-NEXT:    seb $8, $8
+; MIPS64R6O0-NEXT:    srlv $9, $11, $2
+; MIPS64R6O0-NEXT:    seb $9, $9
 ; MIPS64R6O0-NEXT:  # %bb.4: # %entry
 ; MIPS64R6O0-NEXT:    sw $5, 12($sp) # 4-byte Folded Spill
-; MIPS64R6O0-NEXT:    sw $8, 8($sp) # 4-byte Folded Spill
+; MIPS64R6O0-NEXT:    sw $9, 8($sp) # 4-byte Folded Spill
 ; MIPS64R6O0-NEXT:  # %bb.5: # %entry
 ; MIPS64R6O0-NEXT:    lw $1, 8($sp) # 4-byte Folded Reload
 ; MIPS64R6O0-NEXT:    lw $2, 12($sp) # 4-byte Folded Reload
@@ -5775,28 +5775,28 @@ define signext i16 @AtomicLoadAdd16(i16 signext %incr) nounwind {
 ; MIPS64R6O0-NEXT:    ld $1, %got_disp(z)($1)
 ; MIPS64R6O0-NEXT:    daddiu $2, $zero, -4
 ; MIPS64R6O0-NEXT:    and $2, $1, $2
-; MIPS64R6O0-NEXT:    andi $1, $1, 3
-; MIPS64R6O0-NEXT:    xori $1, $1, 2
-; MIPS64R6O0-NEXT:    sll $1, $1, 3
-; MIPS64R6O0-NEXT:    ori $3, $zero, 65535
-; MIPS64R6O0-NEXT:    sllv $3, $3, $1
-; MIPS64R6O0-NEXT:    nor $5, $zero, $3
-; MIPS64R6O0-NEXT:    sllv $4, $4, $1
+; MIPS64R6O0-NEXT:    andi $3, $1, 3
+; MIPS64R6O0-NEXT:    xori $3, $3, 2
+; MIPS64R6O0-NEXT:    sll $3, $3, 3
+; MIPS64R6O0-NEXT:    ori $5, $zero, 65535
+; MIPS64R6O0-NEXT:    sllv $5, $5, $3
+; MIPS64R6O0-NEXT:    nor $6, $zero, $5
+; MIPS64R6O0-NEXT:    sllv $4, $4, $3
 ; MIPS64R6O0-NEXT:  .LBB14_1: # %entry
 ; MIPS64R6O0-NEXT:    # =>This Inner Loop Header: Depth=1
-; MIPS64R6O0-NEXT:    ll $7, 0($2)
-; MIPS64R6O0-NEXT:    addu $8, $7, $4
-; MIPS64R6O0-NEXT:    and $8, $8, $3
-; MIPS64R6O0-NEXT:    and $9, $7, $5
-; MIPS64R6O0-NEXT:    or $9, $9, $8
-; MIPS64R6O0-NEXT:    sc $9, 0($2)
-; MIPS64R6O0-NEXT:    beqzc $9, .LBB14_1
+; MIPS64R6O0-NEXT:    ll $8, 0($2)
+; MIPS64R6O0-NEXT:    addu $9, $8, $4
+; MIPS64R6O0-NEXT:    and $9, $9, $5
+; MIPS64R6O0-NEXT:    and $10, $8, $6
+; MIPS64R6O0-NEXT:    or $10, $10, $9
+; MIPS64R6O0-NEXT:    sc $10, 0($2)
+; MIPS64R6O0-NEXT:    beqzc $10, .LBB14_1
 ; MIPS64R6O0-NEXT:  # %bb.2: # %entry
-; MIPS64R6O0-NEXT:    and $6, $7, $3
-; MIPS64R6O0-NEXT:    srlv $6, $6, $1
-; MIPS64R6O0-NEXT:    seh $6, $6
+; MIPS64R6O0-NEXT:    and $7, $8, $5
+; MIPS64R6O0-NEXT:    srlv $7, $7, $3
+; MIPS64R6O0-NEXT:    seh $7, $7
 ; MIPS64R6O0-NEXT:  # %bb.3: # %entry
-; MIPS64R6O0-NEXT:    sw $6, 12($sp) # 4-byte Folded Spill
+; MIPS64R6O0-NEXT:    sw $7, 12($sp) # 4-byte Folded Spill
 ; MIPS64R6O0-NEXT:  # %bb.4: # %entry
 ; MIPS64R6O0-NEXT:    lw $1, 12($sp) # 4-byte Folded Reload
 ; MIPS64R6O0-NEXT:    seh $2, $1
@@ -6359,33 +6359,33 @@ define {i16, i1} @foo(i16* %addr, i16 %l, i16 %r, i16 %new) {
 ; MIPS64R6O0-NEXT:    sll $3, $5, 0
 ; MIPS64R6O0-NEXT:    addu $2, $3, $2
 ; MIPS64R6O0-NEXT:    sync
-; MIPS64R6O0-NEXT:    daddiu $3, $zero, -4
-; MIPS64R6O0-NEXT:    and $3, $4, $3
-; MIPS64R6O0-NEXT:    andi $4, $4, 3
-; MIPS64R6O0-NEXT:    xori $4, $4, 2
-; MIPS64R6O0-NEXT:    sll $4, $4, 3
+; MIPS64R6O0-NEXT:    daddiu $8, $zero, -4
+; MIPS64R6O0-NEXT:    and $8, $4, $8
+; MIPS64R6O0-NEXT:    andi $3, $4, 3
+; MIPS64R6O0-NEXT:    xori $3, $3, 2
+; MIPS64R6O0-NEXT:    sll $3, $3, 3
 ; MIPS64R6O0-NEXT:    ori $5, $zero, 65535
-; MIPS64R6O0-NEXT:    sllv $5, $5, $4
+; MIPS64R6O0-NEXT:    sllv $5, $5, $3
 ; MIPS64R6O0-NEXT:    nor $6, $zero, $5
 ; MIPS64R6O0-NEXT:    andi $7, $2, 65535
-; MIPS64R6O0-NEXT:    sllv $7, $7, $4
+; MIPS64R6O0-NEXT:    sllv $7, $7, $3
 ; MIPS64R6O0-NEXT:    andi $1, $1, 65535
-; MIPS64R6O0-NEXT:    sllv $1, $1, $4
+; MIPS64R6O0-NEXT:    sllv $1, $1, $3
 ; MIPS64R6O0-NEXT:  .LBB15_1: # =>This Inner Loop Header: Depth=1
-; MIPS64R6O0-NEXT:    ll $9, 0($3)
-; MIPS64R6O0-NEXT:    and $10, $9, $5
-; MIPS64R6O0-NEXT:    bnec $10, $7, .LBB15_3
+; MIPS64R6O0-NEXT:    ll $10, 0($8)
+; MIPS64R6O0-NEXT:    and $11, $10, $5
+; MIPS64R6O0-NEXT:    bnec $11, $7, .LBB15_3
 ; MIPS64R6O0-NEXT:  # %bb.2: # in Loop: Header=BB15_1 Depth=1
-; MIPS64R6O0-NEXT:    and $9, $9, $6
-; MIPS64R6O0-NEXT:    or $9, $9, $1
-; MIPS64R6O0-NEXT:    sc $9, 0($3)
-; MIPS64R6O0-NEXT:    beqzc $9, .LBB15_1
+; MIPS64R6O0-NEXT:    and $10, $10, $6
+; MIPS64R6O0-NEXT:    or $10, $10, $1
+; MIPS64R6O0-NEXT:    sc $10, 0($8)
+; MIPS64R6O0-NEXT:    beqzc $10, .LBB15_1
 ; MIPS64R6O0-NEXT:  .LBB15_3:
-; MIPS64R6O0-NEXT:    srlv $8, $10, $4
-; MIPS64R6O0-NEXT:    seh $8, $8
+; MIPS64R6O0-NEXT:    srlv $9, $11, $3
+; MIPS64R6O0-NEXT:    seh $9, $9
 ; MIPS64R6O0-NEXT:  # %bb.4:
 ; MIPS64R6O0-NEXT:    sw $2, 12($sp) # 4-byte Folded Spill
-; MIPS64R6O0-NEXT:    sw $8, 8($sp) # 4-byte Folded Spill
+; MIPS64R6O0-NEXT:    sw $9, 8($sp) # 4-byte Folded Spill
 ; MIPS64R6O0-NEXT:  # %bb.5:
 ; MIPS64R6O0-NEXT:    lw $1, 12($sp) # 4-byte Folded Reload
 ; MIPS64R6O0-NEXT:    seh $2, $1
@@ -7145,8 +7145,8 @@ define i32 @zeroreg() nounwind {
 ; MIPS64R6O0-NEXT:    sc $6, 0($1)
 ; MIPS64R6O0-NEXT:    beqzc $6, .LBB17_1
 ; MIPS64R6O0-NEXT:  .LBB17_3: # %entry
-; MIPS64R6O0-NEXT:    xor $1, $5, $3
-; MIPS64R6O0-NEXT:    sltiu $2, $1, 1
+; MIPS64R6O0-NEXT:    xor $2, $5, $3
+; MIPS64R6O0-NEXT:    sltiu $2, $2, 1
 ; MIPS64R6O0-NEXT:    sync
 ; MIPS64R6O0-NEXT:    jrc $ra
 ;
diff --git a/llvm/test/CodeGen/Mips/implicit-sret.ll b/llvm/test/CodeGen/Mips/implicit-sret.ll
index b9f6568e40c92..e86cec37d5100 100644
--- a/llvm/test/CodeGen/Mips/implicit-sret.ll
+++ b/llvm/test/CodeGen/Mips/implicit-sret.ll
@@ -48,8 +48,8 @@ define internal { i32, i128, i64 } @implicit_sret_impl() unnamed_addr nounwind {
 ; CHECK-NEXT:    sd $zero, 8($4)
 ; CHECK-NEXT:    daddiu $3, $zero, 30
 ; CHECK-NEXT:    sd $3, 24($4)
-; CHECK-NEXT:    addiu $3, $zero, 10
-; CHECK-NEXT:    sw $3, 0($4)
+; CHECK-NEXT:    addiu $5, $zero, 10
+; CHECK-NEXT:    sw $5, 0($4)
 ; CHECK-NEXT:    jr $ra
 ; CHECK-NEXT:    nop
   ret { i32, i128, i64 } { i32 10, i128 20, i64 30 }
@@ -70,12 +70,10 @@ define internal void @test2() unnamed_addr nounwind {
 ; CHECK-NEXT:    lw $3, 4($sp)
 ; CHECK-NEXT:    # implicit-def: $a0_64
 ; CHECK-NEXT:    move $4, $3
-; CHECK-NEXT:    # implicit-def: $v1_64
-; CHECK-NEXT:    move $3, $2
-; CHECK-NEXT:    # implicit-def: $v0_64
-; CHECK-NEXT:    move $2, $1
-; CHECK-NEXT:    move $5, $3
-; CHECK-NEXT:    move $6, $2
+; CHECK-NEXT:    # implicit-def: $a1_64
+; CHECK-NEXT:    move $5, $2
+; CHECK-NEXT:    # implicit-def: $a2_64
+; CHECK-NEXT:    move $6, $1
 ; CHECK-NEXT:    jal use_sret2
 ; CHECK-NEXT:    nop
 ; CHECK-NEXT:    ld $ra, 24($sp) # 8-byte Folded Reload
diff --git a/llvm/test/CodeGen/PowerPC/addegluecrash.ll b/llvm/test/CodeGen/PowerPC/addegluecrash.ll
index c38f377869f86..a1d9805458368 100644
--- a/llvm/test/CodeGen/PowerPC/addegluecrash.ll
+++ b/llvm/test/CodeGen/PowerPC/addegluecrash.ll
@@ -21,11 +21,11 @@ define void @bn_mul_comba8(i64* nocapture %r, i64* nocapture readonly %a, i64* n
 ; CHECK-NEXT:    addze 5, 5
 ; CHECK-NEXT:    add 4, 5, 4
 ; CHECK-NEXT:    cmpld 7, 4, 5
-; CHECK-NEXT:    mfocrf 4, 1
-; CHECK-NEXT:    rlwinm 4, 4, 29, 31, 31
-; CHECK-NEXT:    # implicit-def: $x5
-; CHECK-NEXT:    mr 5, 4
-; CHECK-NEXT:    clrldi 4, 5, 32
+; CHECK-NEXT:    mfocrf 10, 1
+; CHECK-NEXT:    rlwinm 10, 10, 29, 31, 31
+; CHECK-NEXT:    # implicit-def: $x4
+; CHECK-NEXT:    mr 4, 10
+; CHECK-NEXT:    clrldi 4, 4, 32
 ; CHECK-NEXT:    std 4, 0(3)
 ; CHECK-NEXT:    blr
   %1 = load i64, i64* %a, align 8
diff --git a/llvm/test/CodeGen/PowerPC/popcount.ll b/llvm/test/CodeGen/PowerPC/popcount.ll
index fb20f1d3ee43b..170d3d77d0886 100644
--- a/llvm/test/CodeGen/PowerPC/popcount.ll
+++ b/llvm/test/CodeGen/PowerPC/popcount.ll
@@ -58,17 +58,17 @@ define <1 x i128> @popcount1x128(<1 x i128> %0) {
 ; CHECK-NEXT:    # kill: def $f0 killed $f0 killed $vsl0
 ; CHECK-NEXT:    mffprd 3, 0
 ; CHECK-NEXT:    popcntd 3, 3
-; CHECK-NEXT:    xxswapd 0, 34
-; CHECK-NEXT:    # kill: def $f0 killed $f0 killed $vsl0
-; CHECK-NEXT:    mffprd 4, 0
+; CHECK-NEXT:    xxswapd 1, 34
+; CHECK-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
+; CHECK-NEXT:    mffprd 4, 1
 ; CHECK-NEXT:    popcntd 4, 4
 ; CHECK-NEXT:    add 3, 4, 3
 ; CHECK-NEXT:    mtfprd 0, 3
-; CHECK-NEXT:    # kill: def $vsl0 killed $f0
+; CHECK-NEXT:    fmr 2, 0
 ; CHECK-NEXT:    li 3, 0
-; CHECK-NEXT:    mtfprd 1, 3
-; CHECK-NEXT:    # kill: def $vsl1 killed $f1
-; CHECK-NEXT:    xxmrghd 34, 1, 0
+; CHECK-NEXT:    mtfprd 0, 3
+; CHECK-NEXT:    fmr 3, 0
+; CHECK-NEXT:    xxmrghd 34, 3, 2
 ; CHECK-NEXT:    blr
 Entry:
   %1 = tail call <1 x i128> @llvm.ctpop.v1.i128(<1 x i128> %0)
diff --git a/llvm/test/CodeGen/PowerPC/vsx.ll b/llvm/test/CodeGen/PowerPC/vsx.ll
index 4a78218262ca0..39469d63b9078 100644
--- a/llvm/test/CodeGen/PowerPC/vsx.ll
+++ b/llvm/test/CodeGen/PowerPC/vsx.ll
@@ -1548,8 +1548,8 @@ define <2 x i64> @test46(<2 x float> %a) {
 ; CHECK-FISL-NEXT:    ld r3, -24(r1)
 ; CHECK-FISL-NEXT:    std r3, -16(r1)
 ; CHECK-FISL-NEXT:    addi r3, r1, -16
-; CHECK-FISL-NEXT:    lxvd2x vs0, 0, r3
-; CHECK-FISL-NEXT:    xxlor v2, vs0, vs0
+; CHECK-FISL-NEXT:    lxvd2x vs1, 0, r3
+; CHECK-FISL-NEXT:    xxlor v2, vs1, vs1
 ; CHECK-FISL-NEXT:    blr
 ;
 ; CHECK-LE-LABEL: test46:
@@ -1616,8 +1616,8 @@ define <2 x i64> @test47(<2 x float> %a) {
 ; CHECK-FISL-NEXT:    ld r3, -24(r1)
 ; CHECK-FISL-NEXT:    std r3, -16(r1)
 ; CHECK-FISL-NEXT:    addi r3, r1, -16
-; CHECK-FISL-NEXT:    lxvd2x vs0, 0, r3
-; CHECK-FISL-NEXT:    xxlor v2, vs0, vs0
+; CHECK-FISL-NEXT:    lxvd2x vs1, 0, r3
+; CHECK-FISL-NEXT:    xxlor v2, vs1, vs1
 ; CHECK-FISL-NEXT:    blr
 ;
 ; CHECK-LE-LABEL: test47:
@@ -1859,13 +1859,13 @@ define <2 x i64> @test60(<2 x i64> %a, <2 x i64> %b) {
 ; CHECK-FISL-NEXT:    stxvd2x v3, 0, r3
 ; CHECK-FISL-NEXT:    addi r3, r1, -48
 ; CHECK-FISL-NEXT:    stxvd2x v2, 0, r3
-; CHECK-FISL-NEXT:    lwz r3, -20(r1)
-; CHECK-FISL-NEXT:    ld r4, -40(r1)
-; CHECK-FISL-NEXT:    sld r3, r4, r3
+; CHECK-FISL-NEXT:    lwz r4, -20(r1)
+; CHECK-FISL-NEXT:    ld r3, -40(r1)
+; CHECK-FISL-NEXT:    sld r3, r3, r4
 ; CHECK-FISL-NEXT:    std r3, -8(r1)
-; CHECK-FISL-NEXT:    lwz r3, -28(r1)
-; CHECK-FISL-NEXT:    ld r4, -48(r1)
-; CHECK-FISL-NEXT:    sld r3, r4, r3
+; CHECK-FISL-NEXT:    lwz r4, -28(r1)
+; CHECK-FISL-NEXT:    ld r3, -48(r1)
+; CHECK-FISL-NEXT:    sld r3, r3, r4
 ; CHECK-FISL-NEXT:    std r3, -16(r1)
 ; CHECK-FISL-NEXT:    addi r3, r1, -16
 ; CHECK-FISL-NEXT:    lxvd2x vs0, 0, r3
@@ -1925,13 +1925,13 @@ define <2 x i64> @test61(<2 x i64> %a, <2 x i64> %b) {
 ; CHECK-FISL-NEXT:    stxvd2x v3, 0, r3
 ; CHECK-FISL-NEXT:    addi r3, r1, -48
 ; CHECK-FISL-NEXT:    stxvd2x v2, 0, r3
-; CHECK-FISL-NEXT:    lwz r3, -20(r1)
-; CHECK-FISL-NEXT:    ld r4, -40(r1)
-; CHECK-FISL-NEXT:    srd r3, r4, r3
+; CHECK-FISL-NEXT:    lwz r4, -20(r1)
+; CHECK-FISL-NEXT:    ld r3, -40(r1)
+; CHECK-FISL-NEXT:    srd r3, r3, r4
 ; CHECK-FISL-NEXT:    std r3, -8(r1)
-; CHECK-FISL-NEXT:    lwz r3, -28(r1)
-; CHECK-FISL-NEXT:    ld r4, -48(r1)
-; CHECK-FISL-NEXT:    srd r3, r4, r3
+; CHECK-FISL-NEXT:    lwz r4, -28(r1)
+; CHECK-FISL-NEXT:    ld r3, -48(r1)
+; CHECK-FISL-NEXT:    srd r3, r3, r4
 ; CHECK-FISL-NEXT:    std r3, -16(r1)
 ; CHECK-FISL-NEXT:    addi r3, r1, -16
 ; CHECK-FISL-NEXT:    lxvd2x vs0, 0, r3
@@ -1991,13 +1991,13 @@ define <2 x i64> @test62(<2 x i64> %a, <2 x i64> %b) {
 ; CHECK-FISL-NEXT:    stxvd2x v3, 0, r3
 ; CHECK-FISL-NEXT:    addi r3, r1, -48
 ; CHECK-FISL-NEXT:    stxvd2x v2, 0, r3
-; CHECK-FISL-NEXT:    lwz r3, -20(r1)
-; CHECK-FISL-NEXT:    ld r4, -40(r1)
-; CHECK-FISL-NEXT:    srad r3, r4, r3
+; CHECK-FISL-NEXT:    lwz r4, -20(r1)
+; CHECK-FISL-NEXT:    ld r3, -40(r1)
+; CHECK-FISL-NEXT:    srad r3, r3, r4
 ; CHECK-FISL-NEXT:    std r3, -8(r1)
-; CHECK-FISL-NEXT:    lwz r3, -28(r1)
-; CHECK-FISL-NEXT:    ld r4, -48(r1)
-; CHECK-FISL-NEXT:    srad r3, r4, r3
+; CHECK-FISL-NEXT:    lwz r4, -28(r1)
+; CHECK-FISL-NEXT:    ld r3, -48(r1)
+; CHECK-FISL-NEXT:    srad r3, r3, r4
 ; CHECK-FISL-NEXT:    std r3, -16(r1)
 ; CHECK-FISL-NEXT:    addi r3, r1, -16
 ; CHECK-FISL-NEXT:    lxvd2x vs0, 0, r3
@@ -2426,12 +2426,12 @@ define <2 x i32> @test80(i32 %v) {
 ; CHECK-FISL:       # %bb.0:
 ; CHECK-FISL-NEXT:    # kill: def $r3 killed $r3 killed $x3
 ; CHECK-FISL-NEXT:    stw r3, -16(r1)
-; CHECK-FISL-NEXT:    addi r3, r1, -16
-; CHECK-FISL-NEXT:    lxvw4x vs0, 0, r3
+; CHECK-FISL-NEXT:    addi r4, r1, -16
+; CHECK-FISL-NEXT:    lxvw4x vs0, 0, r4
 ; CHECK-FISL-NEXT:    xxspltw v2, vs0, 0
-; CHECK-FISL-NEXT:    addis r3, r2, .LCPI65_0@toc@ha
-; CHECK-FISL-NEXT:    addi r3, r3, .LCPI65_0@toc@l
-; CHECK-FISL-NEXT:    lxvw4x v3, 0, r3
+; CHECK-FISL-NEXT:    addis r4, r2, .LCPI65_0@toc@ha
+; CHECK-FISL-NEXT:    addi r4, r4, .LCPI65_0@toc@l
+; CHECK-FISL-NEXT:    lxvw4x v3, 0, r4
 ; CHECK-FISL-NEXT:    vadduwm v2, v2, v3
 ; CHECK-FISL-NEXT:    blr
 ;
diff --git a/llvm/test/CodeGen/SPARC/fp16-promote.ll b/llvm/test/CodeGen/SPARC/fp16-promote.ll
index 0c402430dadc1..9709322f48a57 100644
--- a/llvm/test/CodeGen/SPARC/fp16-promote.ll
+++ b/llvm/test/CodeGen/SPARC/fp16-promote.ll
@@ -182,11 +182,11 @@ define void @test_fptrunc_double(double %d, half* %p) nounwind {
 ; V8-UNOPT-NEXT:    std %i4, [%fp+-8]
 ; V8-UNOPT-NEXT:    ldd [%fp+-8], %f0
 ; V8-UNOPT-NEXT:    std %f0, [%fp+-16]
-; V8-UNOPT-NEXT:    ldd [%fp+-16], %i0
-; V8-UNOPT-NEXT:    mov %i0, %i3
-; V8-UNOPT-NEXT:    ! kill: def $i1 killed $i1 killed $i0_i1
-; V8-UNOPT-NEXT:    mov %i3, %o0
-; V8-UNOPT-NEXT:    mov %i1, %o1
+; V8-UNOPT-NEXT:    ldd [%fp+-16], %i4
+; V8-UNOPT-NEXT:    mov %i4, %i0
+; V8-UNOPT-NEXT:    ! kill: def $i5 killed $i5 killed $i4_i5
+; V8-UNOPT-NEXT:    mov %i0, %o0
+; V8-UNOPT-NEXT:    mov %i5, %o1
 ; V8-UNOPT-NEXT:    call __truncdfhf2
 ; V8-UNOPT-NEXT:    st %i2, [%fp+-20]
 ; V8-UNOPT-NEXT:    ld [%fp+-20], %i0 ! 4-byte Folded Reload
diff --git a/llvm/test/CodeGen/X86/2009-04-14-IllegalRegs.ll b/llvm/test/CodeGen/X86/2009-04-14-IllegalRegs.ll
index b5635c7e0f067..48ad2a2c07770 100644
--- a/llvm/test/CodeGen/X86/2009-04-14-IllegalRegs.ll
+++ b/llvm/test/CodeGen/X86/2009-04-14-IllegalRegs.ll
@@ -8,34 +8,34 @@
 define i32 @z() nounwind ssp {
 ; CHECK-LABEL: z:
 ; CHECK:       ## %bb.0: ## %entry
+; CHECK-NEXT:    pushl %ebx
 ; CHECK-NEXT:    pushl %edi
 ; CHECK-NEXT:    pushl %esi
-; CHECK-NEXT:    subl $148, %esp
+; CHECK-NEXT:    subl $144, %esp
 ; CHECK-NEXT:    movl L___stack_chk_guard$non_lazy_ptr, %eax
 ; CHECK-NEXT:    movl (%eax), %eax
 ; CHECK-NEXT:    movl %eax, {{[0-9]+}}(%esp)
 ; CHECK-NEXT:    movb $48, {{[0-9]+}}(%esp)
-; CHECK-NEXT:    movb {{[0-9]+}}(%esp), %al
-; CHECK-NEXT:    movb %al, {{[0-9]+}}(%esp)
+; CHECK-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; CHECK-NEXT:    movb %cl, {{[0-9]+}}(%esp)
 ; CHECK-NEXT:    movb $15, {{[0-9]+}}(%esp)
 ; CHECK-NEXT:    movl %esp, %eax
-; CHECK-NEXT:    movl $8, %ecx
-; CHECK-NEXT:    leal {{[0-9]+}}(%esp), %edx
-; CHECK-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; CHECK-NEXT:    movl $8, %edx
+; CHECK-NEXT:    leal {{[0-9]+}}(%esp), %esi
+; CHECK-NEXT:    movl %edx, %ecx
 ; CHECK-NEXT:    movl %eax, %edi
-; CHECK-NEXT:    movl %edx, %esi
+; CHECK-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
 ; CHECK-NEXT:    rep;movsl (%esi), %es:(%edi)
 ; CHECK-NEXT:    movl %eax, %ecx
 ; CHECK-NEXT:    addl $36, %ecx
-; CHECK-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 4-byte Reload
 ; CHECK-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
-; CHECK-NEXT:    movl %esi, %ecx
+; CHECK-NEXT:    movl %edx, %ecx
 ; CHECK-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi ## 4-byte Reload
-; CHECK-NEXT:    movl %edx, %esi
+; CHECK-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 4-byte Reload
 ; CHECK-NEXT:    rep;movsl (%esi), %es:(%edi)
-; CHECK-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; CHECK-NEXT:    movb %cl, 32(%eax)
-; CHECK-NEXT:    movb %cl, 68(%eax)
+; CHECK-NEXT:    movb {{[0-9]+}}(%esp), %bl
+; CHECK-NEXT:    movb %bl, 32(%eax)
+; CHECK-NEXT:    movb %bl, 68(%eax)
 ; CHECK-NEXT:    calll _f
 ; CHECK-NEXT:    movl %eax, {{[0-9]+}}(%esp)
 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
@@ -50,9 +50,10 @@ define i32 @z() nounwind ssp {
 ; CHECK-NEXT:    jne LBB0_3
 ; CHECK-NEXT:  ## %bb.2: ## %SP_return
 ; CHECK-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Reload
-; CHECK-NEXT:    addl $148, %esp
+; CHECK-NEXT:    addl $144, %esp
 ; CHECK-NEXT:    popl %esi
 ; CHECK-NEXT:    popl %edi
+; CHECK-NEXT:    popl %ebx
 ; CHECK-NEXT:    retl
 ; CHECK-NEXT:  LBB0_3: ## %CallStackCheckFailBlk
 ; CHECK-NEXT:    calll ___stack_chk_fail
diff --git a/llvm/test/CodeGen/X86/atomic-unordered.ll b/llvm/test/CodeGen/X86/atomic-unordered.ll
index 7a1f34c65c183..16fde4074ea0e 100644
--- a/llvm/test/CodeGen/X86/atomic-unordered.ll
+++ b/llvm/test/CodeGen/X86/atomic-unordered.ll
@@ -126,8 +126,8 @@ define void @narrow_writeback_and(i64* %ptr) {
 ; CHECK-O0-NEXT:    movq (%rdi), %rax
 ; CHECK-O0-NEXT:    # kill: def $eax killed $eax killed $rax
 ; CHECK-O0-NEXT:    andl $-256, %eax
-; CHECK-O0-NEXT:    # kill: def $rax killed $eax
-; CHECK-O0-NEXT:    movq %rax, (%rdi)
+; CHECK-O0-NEXT:    movl %eax, %ecx
+; CHECK-O0-NEXT:    movq %rcx, (%rdi)
 ; CHECK-O0-NEXT:    retq
 ;
 ; CHECK-O3-LABEL: narrow_writeback_and:
@@ -231,10 +231,10 @@ define i128 @load_i128(i128* %ptr) {
 ; CHECK-O0-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-O0-NEXT:    .cfi_offset %rbx, -16
 ; CHECK-O0-NEXT:    xorl %eax, %eax
-; CHECK-O0-NEXT:    # kill: def $rax killed $eax
-; CHECK-O0-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; CHECK-O0-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
-; CHECK-O0-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; CHECK-O0-NEXT:    movl %eax, %ecx
+; CHECK-O0-NEXT:    movq %rcx, %rax
+; CHECK-O0-NEXT:    movq %rcx, %rdx
+; CHECK-O0-NEXT:    movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
 ; CHECK-O0-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload
 ; CHECK-O0-NEXT:    lock cmpxchg16b (%rdi)
 ; CHECK-O0-NEXT:    popq %rbx
@@ -326,14 +326,14 @@ define i256 @load_i256(i256* %ptr) {
 ; CHECK-O0-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
 ; CHECK-O0-NEXT:    callq __atomic_load
 ; CHECK-O0-NEXT:    movq {{[0-9]+}}(%rsp), %rax
-; CHECK-O0-NEXT:    movq {{[0-9]+}}(%rsp), %rcx
 ; CHECK-O0-NEXT:    movq {{[0-9]+}}(%rsp), %rdx
 ; CHECK-O0-NEXT:    movq {{[0-9]+}}(%rsp), %rsi
-; CHECK-O0-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload
-; CHECK-O0-NEXT:    movq %rsi, 24(%rdi)
-; CHECK-O0-NEXT:    movq %rdx, 16(%rdi)
-; CHECK-O0-NEXT:    movq %rcx, 8(%rdi)
-; CHECK-O0-NEXT:    movq %rax, (%rdi)
+; CHECK-O0-NEXT:    movq {{[0-9]+}}(%rsp), %rdi
+; CHECK-O0-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload
+; CHECK-O0-NEXT:    movq %rdi, 24(%r9)
+; CHECK-O0-NEXT:    movq %rsi, 16(%r9)
+; CHECK-O0-NEXT:    movq %rdx, 8(%r9)
+; CHECK-O0-NEXT:    movq %rax, (%r9)
 ; CHECK-O0-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
 ; CHECK-O0-NEXT:    addq $56, %rsp
 ; CHECK-O0-NEXT:    .cfi_def_cfa_offset 8
@@ -831,8 +831,8 @@ define i64 @load_fold_udiv1(i64* %p) {
 ; CHECK-O0-NEXT:    movq (%rdi), %rax
 ; CHECK-O0-NEXT:    xorl %ecx, %ecx
 ; CHECK-O0-NEXT:    movl %ecx, %edx
-; CHECK-O0-NEXT:    movl $15, %ecx
-; CHECK-O0-NEXT:    divq %rcx
+; CHECK-O0-NEXT:    movl $15, %esi
+; CHECK-O0-NEXT:    divq %rsi
 ; CHECK-O0-NEXT:    retq
 ;
 ; CHECK-O3-CUR-LABEL: load_fold_udiv1:
@@ -1024,8 +1024,8 @@ define i64 @load_fold_urem1(i64* %p) {
 ; CHECK-O0-NEXT:    movq (%rdi), %rax
 ; CHECK-O0-NEXT:    xorl %ecx, %ecx
 ; CHECK-O0-NEXT:    movl %ecx, %edx
-; CHECK-O0-NEXT:    movl $15, %ecx
-; CHECK-O0-NEXT:    divq %rcx
+; CHECK-O0-NEXT:    movl $15, %esi
+; CHECK-O0-NEXT:    divq %rsi
 ; CHECK-O0-NEXT:    movq %rdx, %rax
 ; CHECK-O0-NEXT:    retq
 ;
@@ -1475,9 +1475,9 @@ define i1 @load_fold_icmp3(i64* %p1, i64* %p2) {
 ; CHECK-O0-NEXT:    movq (%rdi), %rax
 ; CHECK-O0-NEXT:    movq (%rsi), %rcx
 ; CHECK-O0-NEXT:    subq %rcx, %rax
-; CHECK-O0-NEXT:    sete %cl
+; CHECK-O0-NEXT:    sete %dl
 ; CHECK-O0-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; CHECK-O0-NEXT:    movb %cl, %al
+; CHECK-O0-NEXT:    movb %dl, %al
 ; CHECK-O0-NEXT:    retq
 ;
 ; CHECK-O3-CUR-LABEL: load_fold_icmp3:
@@ -2076,8 +2076,8 @@ define void @rmw_fold_and1(i64* %p, i64 %v) {
 ; CHECK-O0-NEXT:    movq (%rdi), %rax
 ; CHECK-O0-NEXT:    # kill: def $eax killed $eax killed $rax
 ; CHECK-O0-NEXT:    andl $15, %eax
-; CHECK-O0-NEXT:    # kill: def $rax killed $eax
-; CHECK-O0-NEXT:    movq %rax, (%rdi)
+; CHECK-O0-NEXT:    movl %eax, %ecx
+; CHECK-O0-NEXT:    movq %rcx, (%rdi)
 ; CHECK-O0-NEXT:    retq
 ;
 ; CHECK-O3-LABEL: rmw_fold_and1:
@@ -2541,8 +2541,9 @@ define i16 @load_i8_anyext_i16(i8* %ptr) {
 ; CHECK-O0-CUR-LABEL: load_i8_anyext_i16:
 ; CHECK-O0-CUR:       # %bb.0:
 ; CHECK-O0-CUR-NEXT:    movb (%rdi), %al
-; CHECK-O0-CUR-NEXT:    movzbl %al, %eax
-; CHECK-O0-CUR-NEXT:    # kill: def $ax killed $ax killed $eax
+; CHECK-O0-CUR-NEXT:    movzbl %al, %ecx
+; CHECK-O0-CUR-NEXT:    # kill: def $cx killed $cx killed $ecx
+; CHECK-O0-CUR-NEXT:    movw %cx, %ax
 ; CHECK-O0-CUR-NEXT:    retq
 ;
 ; CHECK-O3-CUR-LABEL: load_i8_anyext_i16:
@@ -2670,12 +2671,13 @@ define i16 @load_combine(i8* %p) {
 ; CHECK-O0:       # %bb.0:
 ; CHECK-O0-NEXT:    movb (%rdi), %al
 ; CHECK-O0-NEXT:    movb 1(%rdi), %cl
-; CHECK-O0-NEXT:    movzbl %al, %eax
-; CHECK-O0-NEXT:    # kill: def $ax killed $ax killed $eax
-; CHECK-O0-NEXT:    movzbl %cl, %ecx
-; CHECK-O0-NEXT:    # kill: def $cx killed $cx killed $ecx
-; CHECK-O0-NEXT:    shlw $8, %cx
-; CHECK-O0-NEXT:    orw %cx, %ax
+; CHECK-O0-NEXT:    movzbl %al, %edx
+; CHECK-O0-NEXT:    # kill: def $dx killed $dx killed $edx
+; CHECK-O0-NEXT:    movzbl %cl, %esi
+; CHECK-O0-NEXT:    # kill: def $si killed $si killed $esi
+; CHECK-O0-NEXT:    shlw $8, %si
+; CHECK-O0-NEXT:    orw %si, %dx
+; CHECK-O0-NEXT:    movw %dx, %ax
 ; CHECK-O0-NEXT:    retq
 ;
 ; CHECK-O3-LABEL: load_combine:
diff --git a/llvm/test/CodeGen/X86/atomic32.ll b/llvm/test/CodeGen/X86/atomic32.ll
index 3fe5ef8311ce7..4fb03356f99f4 100644
--- a/llvm/test/CodeGen/X86/atomic32.ll
+++ b/llvm/test/CodeGen/X86/atomic32.ll
@@ -70,8 +70,8 @@ define void @atomic_fetch_and32() nounwind {
 ; X64-NEXT:    movl %eax, %ecx
 ; X64-NEXT:    andl $5, %ecx
 ; X64-NEXT:    lock cmpxchgl %ecx, {{.*}}(%rip)
-; X64-NEXT:    sete %cl
-; X64-NEXT:    testb $1, %cl
+; X64-NEXT:    sete %dl
+; X64-NEXT:    testb $1, %dl
 ; X64-NEXT:    movl %eax, %ecx
 ; X64-NEXT:    movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
 ; X64-NEXT:    movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
@@ -94,8 +94,8 @@ define void @atomic_fetch_and32() nounwind {
 ; X86-NEXT:    movl %eax, %ecx
 ; X86-NEXT:    andl $5, %ecx
 ; X86-NEXT:    lock cmpxchgl %ecx, sc32
-; X86-NEXT:    sete %cl
-; X86-NEXT:    testb $1, %cl
+; X86-NEXT:    sete %dl
+; X86-NEXT:    testb $1, %dl
 ; X86-NEXT:    movl %eax, %ecx
 ; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X86-NEXT:    movl %eax, (%esp) # 4-byte Spill
@@ -124,8 +124,8 @@ define void @atomic_fetch_or32() nounwind {
 ; X64-NEXT:    movl %eax, %ecx
 ; X64-NEXT:    orl $5, %ecx
 ; X64-NEXT:    lock cmpxchgl %ecx, {{.*}}(%rip)
-; X64-NEXT:    sete %cl
-; X64-NEXT:    testb $1, %cl
+; X64-NEXT:    sete %dl
+; X64-NEXT:    testb $1, %dl
 ; X64-NEXT:    movl %eax, %ecx
 ; X64-NEXT:    movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
 ; X64-NEXT:    movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
@@ -148,8 +148,8 @@ define void @atomic_fetch_or32() nounwind {
 ; X86-NEXT:    movl %eax, %ecx
 ; X86-NEXT:    orl $5, %ecx
 ; X86-NEXT:    lock cmpxchgl %ecx, sc32
-; X86-NEXT:    sete %cl
-; X86-NEXT:    testb $1, %cl
+; X86-NEXT:    sete %dl
+; X86-NEXT:    testb $1, %dl
 ; X86-NEXT:    movl %eax, %ecx
 ; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X86-NEXT:    movl %eax, (%esp) # 4-byte Spill
@@ -178,8 +178,8 @@ define void @atomic_fetch_xor32() nounwind {
 ; X64-NEXT:    movl %eax, %ecx
 ; X64-NEXT:    xorl $5, %ecx
 ; X64-NEXT:    lock cmpxchgl %ecx, {{.*}}(%rip)
-; X64-NEXT:    sete %cl
-; X64-NEXT:    testb $1, %cl
+; X64-NEXT:    sete %dl
+; X64-NEXT:    testb $1, %dl
 ; X64-NEXT:    movl %eax, %ecx
 ; X64-NEXT:    movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
 ; X64-NEXT:    movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
@@ -202,8 +202,8 @@ define void @atomic_fetch_xor32() nounwind {
 ; X86-NEXT:    movl %eax, %ecx
 ; X86-NEXT:    xorl $5, %ecx
 ; X86-NEXT:    lock cmpxchgl %ecx, sc32
-; X86-NEXT:    sete %cl
-; X86-NEXT:    testb $1, %cl
+; X86-NEXT:    sete %dl
+; X86-NEXT:    testb $1, %dl
 ; X86-NEXT:    movl %eax, %ecx
 ; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X86-NEXT:    movl %eax, (%esp) # 4-byte Spill
@@ -234,8 +234,8 @@ define void @atomic_fetch_nand32(i32 %x) nounwind {
 ; X64-NEXT:    andl %edx, %ecx
 ; X64-NEXT:    notl %ecx
 ; X64-NEXT:    lock cmpxchgl %ecx, {{.*}}(%rip)
-; X64-NEXT:    sete %cl
-; X64-NEXT:    testb $1, %cl
+; X64-NEXT:    sete %sil
+; X64-NEXT:    testb $1, %sil
 ; X64-NEXT:    movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
 ; X64-NEXT:    jne .LBB5_2
 ; X64-NEXT:    jmp .LBB5_1
@@ -244,6 +244,7 @@ define void @atomic_fetch_nand32(i32 %x) nounwind {
 ;
 ; X86-LABEL: atomic_fetch_nand32:
 ; X86:       # %bb.0:
+; X86-NEXT:    pushl %ebx
 ; X86-NEXT:    subl $8, %esp
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    movl sc32, %ecx
@@ -257,13 +258,14 @@ define void @atomic_fetch_nand32(i32 %x) nounwind {
 ; X86-NEXT:    andl %edx, %ecx
 ; X86-NEXT:    notl %ecx
 ; X86-NEXT:    lock cmpxchgl %ecx, sc32
-; X86-NEXT:    sete %cl
-; X86-NEXT:    testb $1, %cl
+; X86-NEXT:    sete %bl
+; X86-NEXT:    testb $1, %bl
 ; X86-NEXT:    movl %eax, (%esp) # 4-byte Spill
 ; X86-NEXT:    jne .LBB5_2
 ; X86-NEXT:    jmp .LBB5_1
 ; X86-NEXT:  .LBB5_2: # %atomicrmw.end
 ; X86-NEXT:    addl $8, %esp
+; X86-NEXT:    popl %ebx
 ; X86-NEXT:    retl
   %t1 = atomicrmw nand i32* @sc32, i32 %x acquire
   ret void
@@ -283,8 +285,8 @@ define void @atomic_fetch_max32(i32 %x) nounwind {
 ; X64-NEXT:    subl %edx, %ecx
 ; X64-NEXT:    cmovgel %eax, %edx
 ; X64-NEXT:    lock cmpxchgl %edx, {{.*}}(%rip)
-; X64-NEXT:    sete %dl
-; X64-NEXT:    testb $1, %dl
+; X64-NEXT:    sete %sil
+; X64-NEXT:    testb $1, %sil
 ; X64-NEXT:    movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
 ; X64-NEXT:    movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
 ; X64-NEXT:    jne .LBB6_2
@@ -294,6 +296,7 @@ define void @atomic_fetch_max32(i32 %x) nounwind {
 ;
 ; X86-CMOV-LABEL: atomic_fetch_max32:
 ; X86-CMOV:       # %bb.0:
+; X86-CMOV-NEXT:    pushl %ebx
 ; X86-CMOV-NEXT:    subl $12, %esp
 ; X86-CMOV-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-CMOV-NEXT:    movl sc32, %ecx
@@ -307,18 +310,20 @@ define void @atomic_fetch_max32(i32 %x) nounwind {
 ; X86-CMOV-NEXT:    subl %edx, %ecx
 ; X86-CMOV-NEXT:    cmovgel %eax, %edx
 ; X86-CMOV-NEXT:    lock cmpxchgl %edx, sc32
-; X86-CMOV-NEXT:    sete %dl
-; X86-CMOV-NEXT:    testb $1, %dl
+; X86-CMOV-NEXT:    sete %bl
+; X86-CMOV-NEXT:    testb $1, %bl
 ; X86-CMOV-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X86-CMOV-NEXT:    movl %ecx, (%esp) # 4-byte Spill
 ; X86-CMOV-NEXT:    jne .LBB6_2
 ; X86-CMOV-NEXT:    jmp .LBB6_1
 ; X86-CMOV-NEXT:  .LBB6_2: # %atomicrmw.end
 ; X86-CMOV-NEXT:    addl $12, %esp
+; X86-CMOV-NEXT:    popl %ebx
 ; X86-CMOV-NEXT:    retl
 ;
 ; X86-NOCMOV-LABEL: atomic_fetch_max32:
 ; X86-NOCMOV:       # %bb.0:
+; X86-NOCMOV-NEXT:    pushl %ebx
 ; X86-NOCMOV-NEXT:    pushl %esi
 ; X86-NOCMOV-NEXT:    subl $20, %esp
 ; X86-NOCMOV-NEXT:    movl {{[0-9]+}}(%esp), %eax
@@ -347,18 +352,20 @@ define void @atomic_fetch_max32(i32 %x) nounwind {
 ; X86-NOCMOV-NEXT:    movl %ecx, %eax
 ; X86-NOCMOV-NEXT:    movl (%esp), %edx # 4-byte Reload
 ; X86-NOCMOV-NEXT:    lock cmpxchgl %edx, sc32
-; X86-NOCMOV-NEXT:    sete %dl
-; X86-NOCMOV-NEXT:    testb $1, %dl
+; X86-NOCMOV-NEXT:    sete %bl
+; X86-NOCMOV-NEXT:    testb $1, %bl
 ; X86-NOCMOV-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X86-NOCMOV-NEXT:    jne .LBB6_2
 ; X86-NOCMOV-NEXT:    jmp .LBB6_1
 ; X86-NOCMOV-NEXT:  .LBB6_2: # %atomicrmw.end
 ; X86-NOCMOV-NEXT:    addl $20, %esp
 ; X86-NOCMOV-NEXT:    popl %esi
+; X86-NOCMOV-NEXT:    popl %ebx
 ; X86-NOCMOV-NEXT:    retl
 ;
 ; X86-NOX87-LABEL: atomic_fetch_max32:
 ; X86-NOX87:       # %bb.0:
+; X86-NOX87-NEXT:    pushl %ebx
 ; X86-NOX87-NEXT:    pushl %esi
 ; X86-NOX87-NEXT:    subl $20, %esp
 ; X86-NOX87-NEXT:    movl {{[0-9]+}}(%esp), %eax
@@ -387,14 +394,15 @@ define void @atomic_fetch_max32(i32 %x) nounwind {
 ; X86-NOX87-NEXT:    movl %ecx, %eax
 ; X86-NOX87-NEXT:    movl (%esp), %edx # 4-byte Reload
 ; X86-NOX87-NEXT:    lock cmpxchgl %edx, sc32
-; X86-NOX87-NEXT:    sete %dl
-; X86-NOX87-NEXT:    testb $1, %dl
+; X86-NOX87-NEXT:    sete %bl
+; X86-NOX87-NEXT:    testb $1, %bl
 ; X86-NOX87-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X86-NOX87-NEXT:    jne .LBB6_2
 ; X86-NOX87-NEXT:    jmp .LBB6_1
 ; X86-NOX87-NEXT:  .LBB6_2: # %atomicrmw.end
 ; X86-NOX87-NEXT:    addl $20, %esp
 ; X86-NOX87-NEXT:    popl %esi
+; X86-NOX87-NEXT:    popl %ebx
 ; X86-NOX87-NEXT:    retl
   %t1 = atomicrmw max  i32* @sc32, i32 %x acquire
   ret void
@@ -414,8 +422,8 @@ define void @atomic_fetch_min32(i32 %x) nounwind {
 ; X64-NEXT:    subl %edx, %ecx
 ; X64-NEXT:    cmovlel %eax, %edx
 ; X64-NEXT:    lock cmpxchgl %edx, {{.*}}(%rip)
-; X64-NEXT:    sete %dl
-; X64-NEXT:    testb $1, %dl
+; X64-NEXT:    sete %sil
+; X64-NEXT:    testb $1, %sil
 ; X64-NEXT:    movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
 ; X64-NEXT:    movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
 ; X64-NEXT:    jne .LBB7_2
@@ -425,6 +433,7 @@ define void @atomic_fetch_min32(i32 %x) nounwind {
 ;
 ; X86-CMOV-LABEL: atomic_fetch_min32:
 ; X86-CMOV:       # %bb.0:
+; X86-CMOV-NEXT:    pushl %ebx
 ; X86-CMOV-NEXT:    subl $12, %esp
 ; X86-CMOV-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-CMOV-NEXT:    movl sc32, %ecx
@@ -438,18 +447,20 @@ define void @atomic_fetch_min32(i32 %x) nounwind {
 ; X86-CMOV-NEXT:    subl %edx, %ecx
 ; X86-CMOV-NEXT:    cmovlel %eax, %edx
 ; X86-CMOV-NEXT:    lock cmpxchgl %edx, sc32
-; X86-CMOV-NEXT:    sete %dl
-; X86-CMOV-NEXT:    testb $1, %dl
+; X86-CMOV-NEXT:    sete %bl
+; X86-CMOV-NEXT:    testb $1, %bl
 ; X86-CMOV-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X86-CMOV-NEXT:    movl %ecx, (%esp) # 4-byte Spill
 ; X86-CMOV-NEXT:    jne .LBB7_2
 ; X86-CMOV-NEXT:    jmp .LBB7_1
 ; X86-CMOV-NEXT:  .LBB7_2: # %atomicrmw.end
 ; X86-CMOV-NEXT:    addl $12, %esp
+; X86-CMOV-NEXT:    popl %ebx
 ; X86-CMOV-NEXT:    retl
 ;
 ; X86-NOCMOV-LABEL: atomic_fetch_min32:
 ; X86-NOCMOV:       # %bb.0:
+; X86-NOCMOV-NEXT:    pushl %ebx
 ; X86-NOCMOV-NEXT:    pushl %esi
 ; X86-NOCMOV-NEXT:    subl $20, %esp
 ; X86-NOCMOV-NEXT:    movl {{[0-9]+}}(%esp), %eax
@@ -478,18 +489,20 @@ define void @atomic_fetch_min32(i32 %x) nounwind {
 ; X86-NOCMOV-NEXT:    movl %ecx, %eax
 ; X86-NOCMOV-NEXT:    movl (%esp), %edx # 4-byte Reload
 ; X86-NOCMOV-NEXT:    lock cmpxchgl %edx, sc32
-; X86-NOCMOV-NEXT:    sete %dl
-; X86-NOCMOV-NEXT:    testb $1, %dl
+; X86-NOCMOV-NEXT:    sete %bl
+; X86-NOCMOV-NEXT:    testb $1, %bl
 ; X86-NOCMOV-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X86-NOCMOV-NEXT:    jne .LBB7_2
 ; X86-NOCMOV-NEXT:    jmp .LBB7_1
 ; X86-NOCMOV-NEXT:  .LBB7_2: # %atomicrmw.end
 ; X86-NOCMOV-NEXT:    addl $20, %esp
 ; X86-NOCMOV-NEXT:    popl %esi
+; X86-NOCMOV-NEXT:    popl %ebx
 ; X86-NOCMOV-NEXT:    retl
 ;
 ; X86-NOX87-LABEL: atomic_fetch_min32:
 ; X86-NOX87:       # %bb.0:
+; X86-NOX87-NEXT:    pushl %ebx
 ; X86-NOX87-NEXT:    pushl %esi
 ; X86-NOX87-NEXT:    subl $20, %esp
 ; X86-NOX87-NEXT:    movl {{[0-9]+}}(%esp), %eax
@@ -518,14 +531,15 @@ define void @atomic_fetch_min32(i32 %x) nounwind {
 ; X86-NOX87-NEXT:    movl %ecx, %eax
 ; X86-NOX87-NEXT:    movl (%esp), %edx # 4-byte Reload
 ; X86-NOX87-NEXT:    lock cmpxchgl %edx, sc32
-; X86-NOX87-NEXT:    sete %dl
-; X86-NOX87-NEXT:    testb $1, %dl
+; X86-NOX87-NEXT:    sete %bl
+; X86-NOX87-NEXT:    testb $1, %bl
 ; X86-NOX87-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X86-NOX87-NEXT:    jne .LBB7_2
 ; X86-NOX87-NEXT:    jmp .LBB7_1
 ; X86-NOX87-NEXT:  .LBB7_2: # %atomicrmw.end
 ; X86-NOX87-NEXT:    addl $20, %esp
 ; X86-NOX87-NEXT:    popl %esi
+; X86-NOX87-NEXT:    popl %ebx
 ; X86-NOX87-NEXT:    retl
   %t1 = atomicrmw min  i32* @sc32, i32 %x acquire
   ret void
@@ -545,8 +559,8 @@ define void @atomic_fetch_umax32(i32 %x) nounwind {
 ; X64-NEXT:    subl %edx, %ecx
 ; X64-NEXT:    cmoval %eax, %edx
 ; X64-NEXT:    lock cmpxchgl %edx, {{.*}}(%rip)
-; X64-NEXT:    sete %dl
-; X64-NEXT:    testb $1, %dl
+; X64-NEXT:    sete %sil
+; X64-NEXT:    testb $1, %sil
 ; X64-NEXT:    movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
 ; X64-NEXT:    movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
 ; X64-NEXT:    jne .LBB8_2
@@ -556,6 +570,7 @@ define void @atomic_fetch_umax32(i32 %x) nounwind {
 ;
 ; X86-CMOV-LABEL: atomic_fetch_umax32:
 ; X86-CMOV:       # %bb.0:
+; X86-CMOV-NEXT:    pushl %ebx
 ; X86-CMOV-NEXT:    subl $12, %esp
 ; X86-CMOV-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-CMOV-NEXT:    movl sc32, %ecx
@@ -569,18 +584,20 @@ define void @atomic_fetch_umax32(i32 %x) nounwind {
 ; X86-CMOV-NEXT:    subl %edx, %ecx
 ; X86-CMOV-NEXT:    cmoval %eax, %edx
 ; X86-CMOV-NEXT:    lock cmpxchgl %edx, sc32
-; X86-CMOV-NEXT:    sete %dl
-; X86-CMOV-NEXT:    testb $1, %dl
+; X86-CMOV-NEXT:    sete %bl
+; X86-CMOV-NEXT:    testb $1, %bl
 ; X86-CMOV-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X86-CMOV-NEXT:    movl %ecx, (%esp) # 4-byte Spill
 ; X86-CMOV-NEXT:    jne .LBB8_2
 ; X86-CMOV-NEXT:    jmp .LBB8_1
 ; X86-CMOV-NEXT:  .LBB8_2: # %atomicrmw.end
 ; X86-CMOV-NEXT:    addl $12, %esp
+; X86-CMOV-NEXT:    popl %ebx
 ; X86-CMOV-NEXT:    retl
 ;
 ; X86-NOCMOV-LABEL: atomic_fetch_umax32:
 ; X86-NOCMOV:       # %bb.0:
+; X86-NOCMOV-NEXT:    pushl %ebx
 ; X86-NOCMOV-NEXT:    pushl %esi
 ; X86-NOCMOV-NEXT:    subl $20, %esp
 ; X86-NOCMOV-NEXT:    movl {{[0-9]+}}(%esp), %eax
@@ -609,18 +626,20 @@ define void @atomic_fetch_umax32(i32 %x) nounwind {
 ; X86-NOCMOV-NEXT:    movl %ecx, %eax
 ; X86-NOCMOV-NEXT:    movl (%esp), %edx # 4-byte Reload
 ; X86-NOCMOV-NEXT:    lock cmpxchgl %edx, sc32
-; X86-NOCMOV-NEXT:    sete %dl
-; X86-NOCMOV-NEXT:    testb $1, %dl
+; X86-NOCMOV-NEXT:    sete %bl
+; X86-NOCMOV-NEXT:    testb $1, %bl
 ; X86-NOCMOV-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X86-NOCMOV-NEXT:    jne .LBB8_2
 ; X86-NOCMOV-NEXT:    jmp .LBB8_1
 ; X86-NOCMOV-NEXT:  .LBB8_2: # %atomicrmw.end
 ; X86-NOCMOV-NEXT:    addl $20, %esp
 ; X86-NOCMOV-NEXT:    popl %esi
+; X86-NOCMOV-NEXT:    popl %ebx
 ; X86-NOCMOV-NEXT:    retl
 ;
 ; X86-NOX87-LABEL: atomic_fetch_umax32:
 ; X86-NOX87:       # %bb.0:
+; X86-NOX87-NEXT:    pushl %ebx
 ; X86-NOX87-NEXT:    pushl %esi
 ; X86-NOX87-NEXT:    subl $20, %esp
 ; X86-NOX87-NEXT:    movl {{[0-9]+}}(%esp), %eax
@@ -649,14 +668,15 @@ define void @atomic_fetch_umax32(i32 %x) nounwind {
 ; X86-NOX87-NEXT:    movl %ecx, %eax
 ; X86-NOX87-NEXT:    movl (%esp), %edx # 4-byte Reload
 ; X86-NOX87-NEXT:    lock cmpxchgl %edx, sc32
-; X86-NOX87-NEXT:    sete %dl
-; X86-NOX87-NEXT:    testb $1, %dl
+; X86-NOX87-NEXT:    sete %bl
+; X86-NOX87-NEXT:    testb $1, %bl
 ; X86-NOX87-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X86-NOX87-NEXT:    jne .LBB8_2
 ; X86-NOX87-NEXT:    jmp .LBB8_1
 ; X86-NOX87-NEXT:  .LBB8_2: # %atomicrmw.end
 ; X86-NOX87-NEXT:    addl $20, %esp
 ; X86-NOX87-NEXT:    popl %esi
+; X86-NOX87-NEXT:    popl %ebx
 ; X86-NOX87-NEXT:    retl
   %t1 = atomicrmw umax i32* @sc32, i32 %x acquire
   ret void
@@ -676,8 +696,8 @@ define void @atomic_fetch_umin32(i32 %x) nounwind {
 ; X64-NEXT:    subl %edx, %ecx
 ; X64-NEXT:    cmovbel %eax, %edx
 ; X64-NEXT:    lock cmpxchgl %edx, {{.*}}(%rip)
-; X64-NEXT:    sete %dl
-; X64-NEXT:    testb $1, %dl
+; X64-NEXT:    sete %sil
+; X64-NEXT:    testb $1, %sil
 ; X64-NEXT:    movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
 ; X64-NEXT:    movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
 ; X64-NEXT:    jne .LBB9_2
@@ -687,6 +707,7 @@ define void @atomic_fetch_umin32(i32 %x) nounwind {
 ;
 ; X86-CMOV-LABEL: atomic_fetch_umin32:
 ; X86-CMOV:       # %bb.0:
+; X86-CMOV-NEXT:    pushl %ebx
 ; X86-CMOV-NEXT:    subl $12, %esp
 ; X86-CMOV-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-CMOV-NEXT:    movl sc32, %ecx
@@ -700,18 +721,20 @@ define void @atomic_fetch_umin32(i32 %x) nounwind {
 ; X86-CMOV-NEXT:    subl %edx, %ecx
 ; X86-CMOV-NEXT:    cmovbel %eax, %edx
 ; X86-CMOV-NEXT:    lock cmpxchgl %edx, sc32
-; X86-CMOV-NEXT:    sete %dl
-; X86-CMOV-NEXT:    testb $1, %dl
+; X86-CMOV-NEXT:    sete %bl
+; X86-CMOV-NEXT:    testb $1, %bl
 ; X86-CMOV-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X86-CMOV-NEXT:    movl %ecx, (%esp) # 4-byte Spill
 ; X86-CMOV-NEXT:    jne .LBB9_2
 ; X86-CMOV-NEXT:    jmp .LBB9_1
 ; X86-CMOV-NEXT:  .LBB9_2: # %atomicrmw.end
 ; X86-CMOV-NEXT:    addl $12, %esp
+; X86-CMOV-NEXT:    popl %ebx
 ; X86-CMOV-NEXT:    retl
 ;
 ; X86-NOCMOV-LABEL: atomic_fetch_umin32:
 ; X86-NOCMOV:       # %bb.0:
+; X86-NOCMOV-NEXT:    pushl %ebx
 ; X86-NOCMOV-NEXT:    pushl %esi
 ; X86-NOCMOV-NEXT:    subl $20, %esp
 ; X86-NOCMOV-NEXT:    movl {{[0-9]+}}(%esp), %eax
@@ -740,18 +763,20 @@ define void @atomic_fetch_umin32(i32 %x) nounwind {
 ; X86-NOCMOV-NEXT:    movl %ecx, %eax
 ; X86-NOCMOV-NEXT:    movl (%esp), %edx # 4-byte Reload
 ; X86-NOCMOV-NEXT:    lock cmpxchgl %edx, sc32
-; X86-NOCMOV-NEXT:    sete %dl
-; X86-NOCMOV-NEXT:    testb $1, %dl
+; X86-NOCMOV-NEXT:    sete %bl
+; X86-NOCMOV-NEXT:    testb $1, %bl
 ; X86-NOCMOV-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X86-NOCMOV-NEXT:    jne .LBB9_2
 ; X86-NOCMOV-NEXT:    jmp .LBB9_1
 ; X86-NOCMOV-NEXT:  .LBB9_2: # %atomicrmw.end
 ; X86-NOCMOV-NEXT:    addl $20, %esp
 ; X86-NOCMOV-NEXT:    popl %esi
+; X86-NOCMOV-NEXT:    popl %ebx
 ; X86-NOCMOV-NEXT:    retl
 ;
 ; X86-NOX87-LABEL: atomic_fetch_umin32:
 ; X86-NOX87:       # %bb.0:
+; X86-NOX87-NEXT:    pushl %ebx
 ; X86-NOX87-NEXT:    pushl %esi
 ; X86-NOX87-NEXT:    subl $20, %esp
 ; X86-NOX87-NEXT:    movl {{[0-9]+}}(%esp), %eax
@@ -780,14 +805,15 @@ define void @atomic_fetch_umin32(i32 %x) nounwind {
 ; X86-NOX87-NEXT:    movl %ecx, %eax
 ; X86-NOX87-NEXT:    movl (%esp), %edx # 4-byte Reload
 ; X86-NOX87-NEXT:    lock cmpxchgl %edx, sc32
-; X86-NOX87-NEXT:    sete %dl
-; X86-NOX87-NEXT:    testb $1, %dl
+; X86-NOX87-NEXT:    sete %bl
+; X86-NOX87-NEXT:    testb $1, %bl
 ; X86-NOX87-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X86-NOX87-NEXT:    jne .LBB9_2
 ; X86-NOX87-NEXT:    jmp .LBB9_1
 ; X86-NOX87-NEXT:  .LBB9_2: # %atomicrmw.end
 ; X86-NOX87-NEXT:    addl $20, %esp
 ; X86-NOX87-NEXT:    popl %esi
+; X86-NOX87-NEXT:    popl %ebx
 ; X86-NOX87-NEXT:    retl
   %t1 = atomicrmw umin i32* @sc32, i32 %x acquire
   ret void
diff --git a/llvm/test/CodeGen/X86/atomic64.ll b/llvm/test/CodeGen/X86/atomic64.ll
index fe7635bdc3ff5..0149851ea4671 100644
--- a/llvm/test/CodeGen/X86/atomic64.ll
+++ b/llvm/test/CodeGen/X86/atomic64.ll
@@ -137,12 +137,12 @@ define void @atomic_fetch_and64() nounwind {
 ; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
 ; X64-NEXT:    movl %eax, %ecx
 ; X64-NEXT:    andl $5, %ecx
-; X64-NEXT:    # kill: def $rcx killed $ecx
-; X64-NEXT:    lock cmpxchgq %rcx, {{.*}}(%rip)
-; X64-NEXT:    sete %cl
-; X64-NEXT:    testb $1, %cl
-; X64-NEXT:    movq %rax, %rcx
-; X64-NEXT:    movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movl %ecx, %edx
+; X64-NEXT:    lock cmpxchgq %rdx, {{.*}}(%rip)
+; X64-NEXT:    sete %sil
+; X64-NEXT:    testb $1, %sil
+; X64-NEXT:    movq %rax, %rdx
+; X64-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
 ; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
 ; X64-NEXT:    jne .LBB2_2
 ; X64-NEXT:    jmp .LBB2_1
@@ -202,8 +202,8 @@ define void @atomic_fetch_or64() nounwind {
 ; X64-NEXT:    movq %rax, %rcx
 ; X64-NEXT:    orq $5, %rcx
 ; X64-NEXT:    lock cmpxchgq %rcx, {{.*}}(%rip)
-; X64-NEXT:    sete %cl
-; X64-NEXT:    testb $1, %cl
+; X64-NEXT:    sete %dl
+; X64-NEXT:    testb $1, %dl
 ; X64-NEXT:    movq %rax, %rcx
 ; X64-NEXT:    movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
 ; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
@@ -265,8 +265,8 @@ define void @atomic_fetch_xor64() nounwind {
 ; X64-NEXT:    movq %rax, %rcx
 ; X64-NEXT:    xorq $5, %rcx
 ; X64-NEXT:    lock cmpxchgq %rcx, {{.*}}(%rip)
-; X64-NEXT:    sete %cl
-; X64-NEXT:    testb $1, %cl
+; X64-NEXT:    sete %dl
+; X64-NEXT:    testb $1, %dl
 ; X64-NEXT:    movq %rax, %rcx
 ; X64-NEXT:    movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
 ; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
@@ -330,8 +330,8 @@ define void @atomic_fetch_nand64(i64 %x) nounwind {
 ; X64-NEXT:    andq %rdx, %rcx
 ; X64-NEXT:    notq %rcx
 ; X64-NEXT:    lock cmpxchgq %rcx, {{.*}}(%rip)
-; X64-NEXT:    sete %cl
-; X64-NEXT:    testb $1, %cl
+; X64-NEXT:    sete %sil
+; X64-NEXT:    testb $1, %sil
 ; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
 ; X64-NEXT:    jne .LBB5_2
 ; X64-NEXT:    jmp .LBB5_1
@@ -373,8 +373,8 @@ define void @atomic_fetch_max64(i64 %x) nounwind {
 ; X64-NEXT:    subq %rdx, %rcx
 ; X64-NEXT:    cmovgeq %rax, %rdx
 ; X64-NEXT:    lock cmpxchgq %rdx, {{.*}}(%rip)
-; X64-NEXT:    sete %dl
-; X64-NEXT:    testb $1, %dl
+; X64-NEXT:    sete %sil
+; X64-NEXT:    testb $1, %sil
 ; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
 ; X64-NEXT:    movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
 ; X64-NEXT:    jne .LBB6_2
@@ -473,8 +473,8 @@ define void @atomic_fetch_min64(i64 %x) nounwind {
 ; X64-NEXT:    subq %rdx, %rcx
 ; X64-NEXT:    cmovleq %rax, %rdx
 ; X64-NEXT:    lock cmpxchgq %rdx, {{.*}}(%rip)
-; X64-NEXT:    sete %dl
-; X64-NEXT:    testb $1, %dl
+; X64-NEXT:    sete %sil
+; X64-NEXT:    testb $1, %sil
 ; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
 ; X64-NEXT:    movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
 ; X64-NEXT:    jne .LBB7_2
@@ -571,8 +571,8 @@ define void @atomic_fetch_umax64(i64 %x) nounwind {
 ; X64-NEXT:    subq %rdx, %rcx
 ; X64-NEXT:    cmovaq %rax, %rdx
 ; X64-NEXT:    lock cmpxchgq %rdx, {{.*}}(%rip)
-; X64-NEXT:    sete %dl
-; X64-NEXT:    testb $1, %dl
+; X64-NEXT:    sete %sil
+; X64-NEXT:    testb $1, %sil
 ; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
 ; X64-NEXT:    movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
 ; X64-NEXT:    jne .LBB8_2
@@ -669,8 +669,8 @@ define void @atomic_fetch_umin64(i64 %x) nounwind {
 ; X64-NEXT:    subq %rdx, %rcx
 ; X64-NEXT:    cmovbeq %rax, %rdx
 ; X64-NEXT:    lock cmpxchgq %rdx, {{.*}}(%rip)
-; X64-NEXT:    sete %dl
-; X64-NEXT:    testb $1, %dl
+; X64-NEXT:    sete %sil
+; X64-NEXT:    testb $1, %sil
 ; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
 ; X64-NEXT:    movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
 ; X64-NEXT:    jne .LBB9_2
diff --git a/llvm/test/CodeGen/X86/avx-load-store.ll b/llvm/test/CodeGen/X86/avx-load-store.ll
index f448bfec2ec99..718449d7a771f 100644
--- a/llvm/test/CodeGen/X86/avx-load-store.ll
+++ b/llvm/test/CodeGen/X86/avx-load-store.ll
@@ -175,8 +175,8 @@ define void @double_save(<4 x i32> %A, <4 x i32> %B, <8 x i32>* %P) nounwind ssp
 ; CHECK_O0:       # %bb.0:
 ; CHECK_O0-NEXT:    # implicit-def: $ymm2
 ; CHECK_O0-NEXT:    vmovaps %xmm0, %xmm2
-; CHECK_O0-NEXT:    vinsertf128 $1, %xmm1, %ymm2, %ymm0
-; CHECK_O0-NEXT:    vmovdqu %ymm0, (%rdi)
+; CHECK_O0-NEXT:    vinsertf128 $1, %xmm1, %ymm2, %ymm2
+; CHECK_O0-NEXT:    vmovdqu %ymm2, (%rdi)
 ; CHECK_O0-NEXT:    vzeroupper
 ; CHECK_O0-NEXT:    retq
   %Z = shufflevector <4 x i32>%A, <4 x i32>%B, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
@@ -197,8 +197,8 @@ define void @double_save_volatile(<4 x i32> %A, <4 x i32> %B, <8 x i32>* %P) nou
 ; CHECK_O0:       # %bb.0:
 ; CHECK_O0-NEXT:    # implicit-def: $ymm2
 ; CHECK_O0-NEXT:    vmovaps %xmm0, %xmm2
-; CHECK_O0-NEXT:    vinsertf128 $1, %xmm1, %ymm2, %ymm0
-; CHECK_O0-NEXT:    vmovdqu %ymm0, (%rdi)
+; CHECK_O0-NEXT:    vinsertf128 $1, %xmm1, %ymm2, %ymm2
+; CHECK_O0-NEXT:    vmovdqu %ymm2, (%rdi)
 ; CHECK_O0-NEXT:    vzeroupper
 ; CHECK_O0-NEXT:    retq
   %Z = shufflevector <4 x i32>%A, <4 x i32>%B, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
@@ -239,10 +239,10 @@ define void @f_f() nounwind {
 ; CHECK_O0-NEXT:  .LBB9_3: # %cif_mixed_test_all
 ; CHECK_O0-NEXT:    vmovdqa {{.*#+}} xmm0 = [4294967295,0,0,0]
 ; CHECK_O0-NEXT:    vmovdqa %xmm0, %xmm0
-; CHECK_O0-NEXT:    # kill: def $ymm0 killed $xmm0
+; CHECK_O0-NEXT:    vmovaps %xmm0, %xmm1
 ; CHECK_O0-NEXT:    # implicit-def: $rax
-; CHECK_O0-NEXT:    # implicit-def: $ymm1
-; CHECK_O0-NEXT:    vmaskmovps %ymm1, %ymm0, (%rax)
+; CHECK_O0-NEXT:    # implicit-def: $ymm2
+; CHECK_O0-NEXT:    vmaskmovps %ymm2, %ymm1, (%rax)
 ; CHECK_O0-NEXT:  .LBB9_4: # %cif_mixed_test_any_check
 allocas:
   br i1 undef, label %cif_mask_all, label %cif_mask_mixed
@@ -276,8 +276,8 @@ define void @add8i32(<8 x i32>* %ret, <8 x i32>* %bp) nounwind {
 ; CHECK_O0-NEXT:    vmovdqu 16(%rsi), %xmm1
 ; CHECK_O0-NEXT:    # implicit-def: $ymm2
 ; CHECK_O0-NEXT:    vmovaps %xmm0, %xmm2
-; CHECK_O0-NEXT:    vinsertf128 $1, %xmm1, %ymm2, %ymm0
-; CHECK_O0-NEXT:    vmovdqu %ymm0, (%rdi)
+; CHECK_O0-NEXT:    vinsertf128 $1, %xmm1, %ymm2, %ymm2
+; CHECK_O0-NEXT:    vmovdqu %ymm2, (%rdi)
 ; CHECK_O0-NEXT:    vzeroupper
 ; CHECK_O0-NEXT:    retq
   %b = load <8 x i32>, <8 x i32>* %bp, align 1
@@ -321,8 +321,8 @@ define void @add4i64a16(<4 x i64>* %ret, <4 x i64>* %bp) nounwind {
 ; CHECK_O0-NEXT:    vmovdqa 16(%rsi), %xmm1
 ; CHECK_O0-NEXT:    # implicit-def: $ymm2
 ; CHECK_O0-NEXT:    vmovaps %xmm0, %xmm2
-; CHECK_O0-NEXT:    vinsertf128 $1, %xmm1, %ymm2, %ymm0
-; CHECK_O0-NEXT:    vmovdqu %ymm0, (%rdi)
+; CHECK_O0-NEXT:    vinsertf128 $1, %xmm1, %ymm2, %ymm2
+; CHECK_O0-NEXT:    vmovdqu %ymm2, (%rdi)
 ; CHECK_O0-NEXT:    vzeroupper
 ; CHECK_O0-NEXT:    retq
   %b = load <4 x i64>, <4 x i64>* %bp, align 16
diff --git a/llvm/test/CodeGen/X86/avx512-mask-zext-bugfix.ll b/llvm/test/CodeGen/X86/avx512-mask-zext-bugfix.ll
index 186370ca675c7..c4e009d54ec7a 100755
--- a/llvm/test/CodeGen/X86/avx512-mask-zext-bugfix.ll
+++ b/llvm/test/CodeGen/X86/avx512-mask-zext-bugfix.ll
@@ -40,20 +40,22 @@ define void @test_xmm(i32 %shift, i32 %mulp, <2 x i64> %a,i8* %arraydecay,i8* %f
 ; CHECK-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload
 ; CHECK-NEXT:    vpmovd2m %xmm0, %k0
 ; CHECK-NEXT:    kmovq %k0, %k1
-; CHECK-NEXT:    kmovd %k0, %ecx
-; CHECK-NEXT:    ## kill: def $cl killed $cl killed $ecx
-; CHECK-NEXT:    movzbl %cl, %ecx
-; CHECK-NEXT:    ## kill: def $cx killed $cx killed $ecx
-; CHECK-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi ## 8-byte Reload
-; CHECK-NEXT:    movl $4, %edx
-; CHECK-NEXT:    movl %edx, %esi
+; CHECK-NEXT:    kmovd %k0, %esi
+; CHECK-NEXT:    ## kill: def $sil killed $sil killed $esi
+; CHECK-NEXT:    movzbl %sil, %edi
+; CHECK-NEXT:    ## kill: def $di killed $di killed $edi
+; CHECK-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx ## 8-byte Reload
+; CHECK-NEXT:    movw %di, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
+; CHECK-NEXT:    movq %rcx, %rdi
+; CHECK-NEXT:    movl $4, %r8d
+; CHECK-NEXT:    movl %r8d, %esi
+; CHECK-NEXT:    movl %r8d, %edx
 ; CHECK-NEXT:    movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill
 ; CHECK-NEXT:    kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
-; CHECK-NEXT:    movw %cx, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
 ; CHECK-NEXT:    callq _calc_expected_mask_val
 ; CHECK-NEXT:    ## kill: def $ax killed $ax killed $rax
-; CHECK-NEXT:    movw {{[-0-9]+}}(%r{{[sb]}}p), %cx ## 2-byte Reload
-; CHECK-NEXT:    movzwl %cx, %edi
+; CHECK-NEXT:    movw {{[-0-9]+}}(%r{{[sb]}}p), %r9w ## 2-byte Reload
+; CHECK-NEXT:    movzwl %r9w, %edi
 ; CHECK-NEXT:    movzwl %ax, %esi
 ; CHECK-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx ## 8-byte Reload
 ; CHECK-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx ## 8-byte Reload
diff --git a/llvm/test/CodeGen/X86/crash-O0.ll b/llvm/test/CodeGen/X86/crash-O0.ll
index 9f9e5584d6f21..a93d3dd267b52 100644
--- a/llvm/test/CodeGen/X86/crash-O0.ll
+++ b/llvm/test/CodeGen/X86/crash-O0.ll
@@ -79,12 +79,11 @@ define i64 @addressModeWith32bitIndex(i32 %V) {
 ; CHECK-NEXT:    movq %rsp, %rbp
 ; CHECK-NEXT:    .cfi_def_cfa_register %rbp
 ; CHECK-NEXT:    xorl %eax, %eax
-; CHECK-NEXT:    ## kill: def $rax killed $eax
-; CHECK-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
+; CHECK-NEXT:    movl %eax, %ecx
+; CHECK-NEXT:    movq %rcx, %rax
 ; CHECK-NEXT:    cqto
-; CHECK-NEXT:    movslq %edi, %rcx
-; CHECK-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi ## 8-byte Reload
-; CHECK-NEXT:    idivq (%rsi,%rcx,8)
+; CHECK-NEXT:    movslq %edi, %rsi
+; CHECK-NEXT:    idivq (%rcx,%rsi,8)
 ; CHECK-NEXT:    popq %rbp
 ; CHECK-NEXT:    retq
   %gep = getelementptr i64, i64* null, i32 %V
diff --git a/llvm/test/CodeGen/X86/extend-set-cc-uses-dbg.ll b/llvm/test/CodeGen/X86/extend-set-cc-uses-dbg.ll
index 664d9ded1e0e1..7d05a869be893 100644
--- a/llvm/test/CodeGen/X86/extend-set-cc-uses-dbg.ll
+++ b/llvm/test/CodeGen/X86/extend-set-cc-uses-dbg.ll
@@ -7,8 +7,8 @@ define void @foo(i32* %p) !dbg !4 {
 bb:
   %tmp = load i32, i32* %p, align 4, !dbg !7
   ; CHECK: $eax = MOV32rm killed {{.*}} $rdi, {{.*}} debug-location !7 :: (load 4 from %ir.p)
-  ; CHECK-NEXT: $rax = KILL killed renamable $eax, debug-location !7
-  ; CHECK-NEXT: $rcx = MOV64rr $rax, debug-location !7
+  ; CHECK-NEXT: $ecx = MOV32rr killed $eax, implicit-def $rcx, debug-location !7
+  ; CHECK-NEXT: $rdx = MOV64rr $rcx, debug-location !7
 
   switch i32 %tmp, label %bb7 [
     i32 0, label %bb1
diff --git a/llvm/test/CodeGen/X86/fast-isel-nontemporal.ll b/llvm/test/CodeGen/X86/fast-isel-nontemporal.ll
index 7fffa21f0d24d..5d7c83fa19d44 100644
--- a/llvm/test/CodeGen/X86/fast-isel-nontemporal.ll
+++ b/llvm/test/CodeGen/X86/fast-isel-nontemporal.ll
@@ -1013,11 +1013,11 @@ define <16 x float> @test_load_nt16xfloat(<16 x float>* nocapture %ptr) {
 ; AVX1-NEXT:    vmovaps %xmm0, %xmm1
 ; AVX1-NEXT:    vmovntdqa 16(%rdi), %xmm0
 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
-; AVX1-NEXT:    vmovntdqa 32(%rdi), %xmm1
-; AVX1-NEXT:    # implicit-def: $ymm2
-; AVX1-NEXT:    vmovaps %xmm1, %xmm2
-; AVX1-NEXT:    vmovntdqa 48(%rdi), %xmm1
-; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm2, %ymm1
+; AVX1-NEXT:    vmovntdqa 32(%rdi), %xmm2
+; AVX1-NEXT:    # implicit-def: $ymm1
+; AVX1-NEXT:    vmovaps %xmm2, %xmm1
+; AVX1-NEXT:    vmovntdqa 48(%rdi), %xmm2
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: test_load_nt16xfloat:
@@ -1067,11 +1067,11 @@ define <8 x double> @test_load_nt8xdouble(<8 x double>* nocapture %ptr) {
 ; AVX1-NEXT:    vmovaps %xmm0, %xmm1
 ; AVX1-NEXT:    vmovntdqa 16(%rdi), %xmm0
 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
-; AVX1-NEXT:    vmovntdqa 32(%rdi), %xmm1
-; AVX1-NEXT:    # implicit-def: $ymm2
-; AVX1-NEXT:    vmovaps %xmm1, %xmm2
-; AVX1-NEXT:    vmovntdqa 48(%rdi), %xmm1
-; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm2, %ymm1
+; AVX1-NEXT:    vmovntdqa 32(%rdi), %xmm2
+; AVX1-NEXT:    # implicit-def: $ymm1
+; AVX1-NEXT:    vmovaps %xmm2, %xmm1
+; AVX1-NEXT:    vmovntdqa 48(%rdi), %xmm2
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: test_load_nt8xdouble:
@@ -1121,11 +1121,11 @@ define <64 x i8> @test_load_nt64xi8(<64 x i8>* nocapture %ptr) {
 ; AVX1-NEXT:    vmovaps %xmm0, %xmm1
 ; AVX1-NEXT:    vmovntdqa 16(%rdi), %xmm0
 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
-; AVX1-NEXT:    vmovntdqa 32(%rdi), %xmm1
-; AVX1-NEXT:    # implicit-def: $ymm2
-; AVX1-NEXT:    vmovaps %xmm1, %xmm2
-; AVX1-NEXT:    vmovntdqa 48(%rdi), %xmm1
-; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm2, %ymm1
+; AVX1-NEXT:    vmovntdqa 32(%rdi), %xmm2
+; AVX1-NEXT:    # implicit-def: $ymm1
+; AVX1-NEXT:    vmovaps %xmm2, %xmm1
+; AVX1-NEXT:    vmovntdqa 48(%rdi), %xmm2
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: test_load_nt64xi8:
@@ -1175,11 +1175,11 @@ define <32 x i16> @test_load_nt32xi16(<32 x i16>* nocapture %ptr) {
 ; AVX1-NEXT:    vmovaps %xmm0, %xmm1
 ; AVX1-NEXT:    vmovntdqa 16(%rdi), %xmm0
 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
-; AVX1-NEXT:    vmovntdqa 32(%rdi), %xmm1
-; AVX1-NEXT:    # implicit-def: $ymm2
-; AVX1-NEXT:    vmovaps %xmm1, %xmm2
-; AVX1-NEXT:    vmovntdqa 48(%rdi), %xmm1
-; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm2, %ymm1
+; AVX1-NEXT:    vmovntdqa 32(%rdi), %xmm2
+; AVX1-NEXT:    # implicit-def: $ymm1
+; AVX1-NEXT:    vmovaps %xmm2, %xmm1
+; AVX1-NEXT:    vmovntdqa 48(%rdi), %xmm2
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: test_load_nt32xi16:
@@ -1229,11 +1229,11 @@ define <16 x i32> @test_load_nt16xi32(<16 x i32>* nocapture %ptr) {
 ; AVX1-NEXT:    vmovaps %xmm0, %xmm1
 ; AVX1-NEXT:    vmovntdqa 16(%rdi), %xmm0
 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
-; AVX1-NEXT:    vmovntdqa 32(%rdi), %xmm1
-; AVX1-NEXT:    # implicit-def: $ymm2
-; AVX1-NEXT:    vmovaps %xmm1, %xmm2
-; AVX1-NEXT:    vmovntdqa 48(%rdi), %xmm1
-; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm2, %ymm1
+; AVX1-NEXT:    vmovntdqa 32(%rdi), %xmm2
+; AVX1-NEXT:    # implicit-def: $ymm1
+; AVX1-NEXT:    vmovaps %xmm2, %xmm1
+; AVX1-NEXT:    vmovntdqa 48(%rdi), %xmm2
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: test_load_nt16xi32:
@@ -1283,11 +1283,11 @@ define <8 x i64> @test_load_nt8xi64(<8 x i64>* nocapture %ptr) {
 ; AVX1-NEXT:    vmovaps %xmm0, %xmm1
 ; AVX1-NEXT:    vmovntdqa 16(%rdi), %xmm0
 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
-; AVX1-NEXT:    vmovntdqa 32(%rdi), %xmm1
-; AVX1-NEXT:    # implicit-def: $ymm2
-; AVX1-NEXT:    vmovaps %xmm1, %xmm2
-; AVX1-NEXT:    vmovntdqa 48(%rdi), %xmm1
-; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm2, %ymm1
+; AVX1-NEXT:    vmovntdqa 32(%rdi), %xmm2
+; AVX1-NEXT:    # implicit-def: $ymm1
+; AVX1-NEXT:    vmovaps %xmm2, %xmm1
+; AVX1-NEXT:    vmovntdqa 48(%rdi), %xmm2
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: test_load_nt8xi64:
diff --git a/llvm/test/CodeGen/X86/lvi-hardening-loads.ll b/llvm/test/CodeGen/X86/lvi-hardening-loads.ll
index ff8276f6f1c22..e660f306ef75b 100644
--- a/llvm/test/CodeGen/X86/lvi-hardening-loads.ll
+++ b/llvm/test/CodeGen/X86/lvi-hardening-loads.ll
@@ -117,9 +117,9 @@ if.then:                                          ; preds = %for.body
 ; X64-NOOPT-NEXT:      lfence
 ; X64-NOOPT-NEXT:      movq (%rax,%rcx,8), %rax
 ; X64-NOOPT-NEXT:      lfence
-; X64-NOOPT-NEXT:      movl (%rax), %eax
+; X64-NOOPT-NEXT:      movl (%rax), %edx
 ; X64-NOOPT-NEXT:      lfence
-; X64-NOOPT-NEXT:      movl %eax, -{{[0-9]+}}(%rsp)
+; X64-NOOPT-NEXT:      movl %edx, -{{[0-9]+}}(%rsp)
 
 if.end:                                           ; preds = %if.then, %for.body
   br label %for.inc
diff --git a/llvm/test/CodeGen/X86/mixed-ptr-sizes.ll b/llvm/test/CodeGen/X86/mixed-ptr-sizes.ll
index ac55e1a1fc653..a1ad7f3c0f534 100644
--- a/llvm/test/CodeGen/X86/mixed-ptr-sizes.ll
+++ b/llvm/test/CodeGen/X86/mixed-ptr-sizes.ll
@@ -69,8 +69,8 @@ define dso_local void @test_zero_ext(%struct.Foo* %f, i32 addrspace(271)* %i) {
 ; CHECK-O0-LABEL: test_zero_ext:
 ; CHECK-O0:       # %bb.0: # %entry
 ; CHECK-O0-NEXT:    movl %edx, %eax
-; CHECK-O0-NEXT:    # kill: def $rax killed $eax
-; CHECK-O0-NEXT:    movq %rax, 8(%rcx)
+; CHECK-O0-NEXT:    movl %eax, %r8d
+; CHECK-O0-NEXT:    movq %r8, 8(%rcx)
 ; CHECK-O0-NEXT:    jmp use_foo # TAILCALL
 entry:
   %0 = addrspacecast i32 addrspace(271)* %i to i32*
@@ -125,23 +125,19 @@ entry:
 
 ; Test that null can be passed as a 32-bit pointer.
 define dso_local void @test_null_arg(%struct.Foo* %f) {
-; CHECK-LABEL: test_null_arg:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    subq $40, %rsp
-; CHECK:         xorl %edx, %edx
-; CHECK-NEXT:    callq test_noop1
-; CHECK-NEXT:    nop
-; CHECK-NEXT:    addq $40, %rsp
-; CHECK-NEXT:    retq
-;
-; CHECK-O0-LABEL: test_null_arg:
-; CHECK-O0:       # %bb.0: # %entry
-; CHECK-O0-NEXT:    subq $40, %rsp
-; CHECK-O0:         xorl %edx, %edx
-; CHECK-O0-NEXT:    callq test_noop1
-; CHECK-O0-NEXT:    nop
-; CHECK-O0-NEXT:    addq $40, %rsp
-; CHECK-O0-NEXT:    retq
+; ALL-LABEL: test_null_arg:
+; ALL:       # %bb.0: # %entry
+; ALL-NEXT:    subq $40, %rsp
+; ALL-NEXT:    .seh_stackalloc 40
+; ALL-NEXT:    .seh_endprologue
+; ALL-NEXT:    xorl %edx, %edx
+; ALL-NEXT:    callq test_noop1
+; ALL-NEXT:    nop
+; ALL-NEXT:    addq $40, %rsp
+; ALL-NEXT:    retq
+; ALL-NEXT:    .seh_handlerdata
+; ALL-NEXT:    .text
+; ALL-NEXT:    .seh_endproc
 entry:
   call void @test_noop1(%struct.Foo* %f, i32 addrspace(270)* null)
   ret void
@@ -177,8 +173,8 @@ define void @test_unrecognized2(%struct.Foo* %f, i32 addrspace(271)* %i) {
 ; CHECK-O0-LABEL: test_unrecognized2:
 ; CHECK-O0:       # %bb.0: # %entry
 ; CHECK-O0-NEXT:    movl %edx, %eax
-; CHECK-O0-NEXT:    # kill: def $rax killed $eax
-; CHECK-O0-NEXT:    movq %rax, 16(%rcx)
+; CHECK-O0-NEXT:    movl %eax, %r8d
+; CHECK-O0-NEXT:    movq %r8, 16(%rcx)
 ; CHECK-O0-NEXT:    jmp use_foo # TAILCALL
 entry:
   %0 = addrspacecast i32 addrspace(271)* %i to i32 addrspace(9)*
@@ -189,16 +185,11 @@ entry:
 }
 
 define i32 @test_load_sptr32(i32 addrspace(270)* %i) {
-; CHECK-LABEL: test_load_sptr32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    movslq  %ecx, %rax
-; CHECK-NEXT:    movl (%rax), %eax
-; CHECK-NEXT:    retq
-; CHECK-O0-LABEL: test_load_sptr32:
-; CHECK-O0:       # %bb.0: # %entry
-; CHECK-O0-NEXT:    movslq  %ecx, %rax
-; CHECK-O0-NEXT:    movl (%rax), %eax
-; CHECK-O0-NEXT:    retq
+; ALL-LABEL: test_load_sptr32:
+; ALL:       # %bb.0: # %entry
+; ALL-NEXT:    movslq %ecx, %rax
+; ALL-NEXT:    movl (%rax), %eax
+; ALL-NEXT:    retq
 entry:
   %0 = load i32, i32 addrspace(270)* %i, align 4
   ret i32 %0
@@ -210,11 +201,12 @@ define i32 @test_load_uptr32(i32 addrspace(271)* %i) {
 ; CHECK-NEXT:    movl %ecx, %eax
 ; CHECK-NEXT:    movl (%rax), %eax
 ; CHECK-NEXT:    retq
+;
 ; CHECK-O0-LABEL: test_load_uptr32:
 ; CHECK-O0:       # %bb.0: # %entry
 ; CHECK-O0-NEXT:    movl %ecx, %eax
-; CHECK-O0-NEXT:    # kill: def $rax killed $eax
-; CHECK-O0-NEXT:    movl (%rax), %eax
+; CHECK-O0-NEXT:    movl %eax, %edx
+; CHECK-O0-NEXT:    movl (%rdx), %eax
 ; CHECK-O0-NEXT:    retq
 entry:
   %0 = load i32, i32 addrspace(271)* %i, align 4
@@ -222,30 +214,21 @@ entry:
 }
 
 define i32 @test_load_ptr64(i32 addrspace(272)* %i) {
-; CHECK-LABEL: test_load_ptr64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    movl (%rcx), %eax
-; CHECK-NEXT:    retq
-; CHECK-O0-LABEL: test_load_ptr64:
-; CHECK-O0:       # %bb.0: # %entry
-; CHECK-O0-NEXT:    movl (%rcx), %eax
-; CHECK-O0-NEXT:    retq
+; ALL-LABEL: test_load_ptr64:
+; ALL:       # %bb.0: # %entry
+; ALL-NEXT:    movl (%rcx), %eax
+; ALL-NEXT:    retq
 entry:
   %0 = load i32, i32 addrspace(272)* %i, align 8
   ret i32 %0
 }
 
 define void @test_store_sptr32(i32 addrspace(270)* %s, i32 %i) {
-; CHECK-LABEL: test_store_sptr32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    movslq %ecx, %rax
-; CHECK-NEXT:    movl %edx, (%rax)
-; CHECK-NEXT:    retq
-; CHECK-O0-LABEL: test_store_sptr32:
-; CHECK-O0:       # %bb.0: # %entry
-; CHECK-O0-NEXT:    movslq %ecx, %rax
-; CHECK-O0-NEXT:    movl %edx, (%rax)
-; CHECK-O0-NEXT:    retq
+; ALL-LABEL: test_store_sptr32:
+; ALL:       # %bb.0: # %entry
+; ALL-NEXT:    movslq %ecx, %rax
+; ALL-NEXT:    movl %edx, (%rax)
+; ALL-NEXT:    retq
 entry:
   store i32 %i, i32 addrspace(270)* %s, align 4
   ret void
@@ -257,11 +240,12 @@ define void @test_store_uptr32(i32 addrspace(271)* %s, i32 %i) {
 ; CHECK-NEXT:    movl %ecx, %eax
 ; CHECK-NEXT:    movl %edx, (%rax)
 ; CHECK-NEXT:    retq
+;
 ; CHECK-O0-LABEL: test_store_uptr32:
 ; CHECK-O0:       # %bb.0: # %entry
 ; CHECK-O0-NEXT:    movl %ecx, %eax
-; CHECK-O0-NEXT:    # kill: def $rax killed $eax
-; CHECK-O0-NEXT:    movl %edx, (%rax)
+; CHECK-O0-NEXT:    movl %eax, %r8d
+; CHECK-O0-NEXT:    movl %edx, (%r8)
 ; CHECK-O0-NEXT:    retq
 entry:
   store i32 %i, i32 addrspace(271)* %s, align 4
@@ -269,14 +253,10 @@ entry:
 }
 
 define void @test_store_ptr64(i32 addrspace(272)* %s, i32 %i) {
-; CHECK-LABEL: test_store_ptr64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    movl %edx, (%rcx)
-; CHECK-NEXT:    retq
-; CHECK-O0-LABEL: test_store_ptr64:
-; CHECK-O0:       # %bb.0: # %entry
-; CHECK-O0-NEXT:    movl %edx, (%rcx)
-; CHECK-O0-NEXT:    retq
+; ALL-LABEL: test_store_ptr64:
+; ALL:       # %bb.0: # %entry
+; ALL-NEXT:    movl %edx, (%rcx)
+; ALL-NEXT:    retq
 entry:
   store i32 %i, i32 addrspace(272)* %s, align 8
   ret void
diff --git a/llvm/test/CodeGen/X86/pr1489.ll b/llvm/test/CodeGen/X86/pr1489.ll
index d1148eecb0da9..6226ea6caf90f 100644
--- a/llvm/test/CodeGen/X86/pr1489.ll
+++ b/llvm/test/CodeGen/X86/pr1489.ll
@@ -16,9 +16,9 @@ define i32 @quux() nounwind {
 ; CHECK-NEXT:    movl $1082126238, (%eax) ## imm = 0x407FEF9E
 ; CHECK-NEXT:    calll _lrintf
 ; CHECK-NEXT:    cmpl $1, %eax
-; CHECK-NEXT:    setl %al
-; CHECK-NEXT:    andb $1, %al
-; CHECK-NEXT:    movzbl %al, %eax
+; CHECK-NEXT:    setl %cl
+; CHECK-NEXT:    andb $1, %cl
+; CHECK-NEXT:    movzbl %cl, %eax
 ; CHECK-NEXT:    addl $8, %esp
 ; CHECK-NEXT:    popl %ebp
 ; CHECK-NEXT:    retl
@@ -42,9 +42,9 @@ define i32 @foo() nounwind {
 ; CHECK-NEXT:    movl $-1236950581, (%eax) ## imm = 0xB645A1CB
 ; CHECK-NEXT:    calll _lrint
 ; CHECK-NEXT:    cmpl $1, %eax
-; CHECK-NEXT:    setl %al
-; CHECK-NEXT:    andb $1, %al
-; CHECK-NEXT:    movzbl %al, %eax
+; CHECK-NEXT:    setl %cl
+; CHECK-NEXT:    andb $1, %cl
+; CHECK-NEXT:    movzbl %cl, %eax
 ; CHECK-NEXT:    addl $8, %esp
 ; CHECK-NEXT:    popl %ebp
 ; CHECK-NEXT:    retl
@@ -67,9 +67,9 @@ define i32 @bar() nounwind {
 ; CHECK-NEXT:    movl $1082126238, (%eax) ## imm = 0x407FEF9E
 ; CHECK-NEXT:    calll _lrintf
 ; CHECK-NEXT:    cmpl $1, %eax
-; CHECK-NEXT:    setl %al
-; CHECK-NEXT:    andb $1, %al
-; CHECK-NEXT:    movzbl %al, %eax
+; CHECK-NEXT:    setl %cl
+; CHECK-NEXT:    andb $1, %cl
+; CHECK-NEXT:    movzbl %cl, %eax
 ; CHECK-NEXT:    addl $8, %esp
 ; CHECK-NEXT:    popl %ebp
 ; CHECK-NEXT:    retl
@@ -90,9 +90,9 @@ define i32 @baz() nounwind {
 ; CHECK-NEXT:    movl $1082126238, (%eax) ## imm = 0x407FEF9E
 ; CHECK-NEXT:    calll _lrintf
 ; CHECK-NEXT:    cmpl $1, %eax
-; CHECK-NEXT:    setl %al
-; CHECK-NEXT:    andb $1, %al
-; CHECK-NEXT:    movzbl %al, %eax
+; CHECK-NEXT:    setl %cl
+; CHECK-NEXT:    andb $1, %cl
+; CHECK-NEXT:    movzbl %cl, %eax
 ; CHECK-NEXT:    addl $8, %esp
 ; CHECK-NEXT:    popl %ebp
 ; CHECK-NEXT:    retl
diff --git a/llvm/test/CodeGen/X86/pr27591.ll b/llvm/test/CodeGen/X86/pr27591.ll
index 7455584ac698a..97ad6814f1926 100644
--- a/llvm/test/CodeGen/X86/pr27591.ll
+++ b/llvm/test/CodeGen/X86/pr27591.ll
@@ -9,9 +9,9 @@ define void @test1(i32 %x) #0 {
 ; CHECK-NEXT:    pushq %rax
 ; CHECK-NEXT:    cmpl $0, %edi
 ; CHECK-NEXT:    setne %al
-; CHECK-NEXT:    movzbl %al, %eax
-; CHECK-NEXT:    andl $1, %eax
-; CHECK-NEXT:    movl %eax, %edi
+; CHECK-NEXT:    movzbl %al, %ecx
+; CHECK-NEXT:    andl $1, %ecx
+; CHECK-NEXT:    movl %ecx, %edi
 ; CHECK-NEXT:    callq callee1
 ; CHECK-NEXT:    popq %rax
 ; CHECK-NEXT:    retq
@@ -27,10 +27,10 @@ define void @test2(i32 %x) #0 {
 ; CHECK-NEXT:    pushq %rax
 ; CHECK-NEXT:    cmpl $0, %edi
 ; CHECK-NEXT:    setne %al
-; CHECK-NEXT:    movzbl %al, %eax
-; CHECK-NEXT:    andl $1, %eax
-; CHECK-NEXT:    negl %eax
-; CHECK-NEXT:    movl %eax, %edi
+; CHECK-NEXT:    movzbl %al, %ecx
+; CHECK-NEXT:    andl $1, %ecx
+; CHECK-NEXT:    negl %ecx
+; CHECK-NEXT:    movl %ecx, %edi
 ; CHECK-NEXT:    callq callee2
 ; CHECK-NEXT:    popq %rax
 ; CHECK-NEXT:    retq
diff --git a/llvm/test/CodeGen/X86/pr30430.ll b/llvm/test/CodeGen/X86/pr30430.ll
index e524245daa112..4d40aa09eeab1 100644
--- a/llvm/test/CodeGen/X86/pr30430.ll
+++ b/llvm/test/CodeGen/X86/pr30430.ll
@@ -75,28 +75,28 @@ define <16 x float> @makefloat(float %f1, float %f2, float %f3, float %f4, float
 ; CHECK-NEXT:    vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],xmm2[0]
 ; CHECK-NEXT:    # implicit-def: $ymm2
 ; CHECK-NEXT:    vmovaps %xmm1, %xmm2
-; CHECK-NEXT:    vinsertf128 $1, %xmm0, %ymm2, %ymm0
+; CHECK-NEXT:    vinsertf128 $1, %xmm0, %ymm2, %ymm2
+; CHECK-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; CHECK-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
+; CHECK-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3]
+; CHECK-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0]
 ; CHECK-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; CHECK-NEXT:    vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; CHECK-NEXT:    vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
-; CHECK-NEXT:    vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; CHECK-NEXT:    vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
-; CHECK-NEXT:    vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; CHECK-NEXT:    vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],xmm2[0]
-; CHECK-NEXT:    vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
 ; CHECK-NEXT:    vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero
-; CHECK-NEXT:    vinsertps {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[2,3]
+; CHECK-NEXT:    vinsertps {{.*#+}} xmm1 = xmm3[0],xmm1[0],xmm3[2,3]
 ; CHECK-NEXT:    vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero
-; CHECK-NEXT:    vinsertps {{.*#+}} xmm2 = xmm2[0,1],xmm3[0],xmm2[3]
+; CHECK-NEXT:    vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm3[0],xmm1[3]
 ; CHECK-NEXT:    vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero
-; CHECK-NEXT:    vinsertps {{.*#+}} xmm2 = xmm2[0,1,2],xmm3[0]
+; CHECK-NEXT:    vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],xmm3[0]
 ; CHECK-NEXT:    # implicit-def: $ymm3
-; CHECK-NEXT:    vmovaps %xmm2, %xmm3
-; CHECK-NEXT:    vinsertf128 $1, %xmm1, %ymm3, %ymm1
-; CHECK-NEXT:    # implicit-def: $zmm2
-; CHECK-NEXT:    vmovaps %ymm1, %ymm2
-; CHECK-NEXT:    vinsertf64x4 $1, %ymm0, %zmm2, %zmm0
-; CHECK-NEXT:    vmovaps %zmm0, {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    vmovaps %xmm1, %xmm3
+; CHECK-NEXT:    vinsertf128 $1, %xmm0, %ymm3, %ymm3
+; CHECK-NEXT:    # implicit-def: $zmm24
+; CHECK-NEXT:    vmovaps %zmm3, %zmm24
+; CHECK-NEXT:    vinsertf64x4 $1, %ymm2, %zmm24, %zmm24
+; CHECK-NEXT:    vmovaps %zmm24, {{[0-9]+}}(%rsp)
 ; CHECK-NEXT:    vmovaps {{[0-9]+}}(%rsp), %zmm0
 ; CHECK-NEXT:    movq %rbp, %rsp
 ; CHECK-NEXT:    popq %rbp
diff --git a/llvm/test/CodeGen/X86/pr30813.ll b/llvm/test/CodeGen/X86/pr30813.ll
index 7266c5bd8d015..e3e096bda6c28 100644
--- a/llvm/test/CodeGen/X86/pr30813.ll
+++ b/llvm/test/CodeGen/X86/pr30813.ll
@@ -1,8 +1,9 @@
 ; RUN: llc -mtriple=x86_64-linux-gnu -O0 %s -o - | FileCheck %s
 ; CHECK: patatino:
 ; CHECK:         .cfi_startproc
-; CHECK:         movzwl  (%rax), %e[[REG0:[abcd]x]]
-; CHECK:         movq    %r[[REG0]], ({{%r[abcd]x}})
+; CHECK:         movzwl  (%rax), [[REG0:%e[abcd]x]]
+; CHECK:         movl    [[REG0]], %e[[REG1C:[abcd]]]x
+; CHECK:         movq    %r[[REG1C]]x, ({{%r[abcd]x}})
 ; CHECK:         retq
 
 define void @patatino() {
diff --git a/llvm/test/CodeGen/X86/pr32241.ll b/llvm/test/CodeGen/X86/pr32241.ll
index 1f3d273dfc416..6d628e6962eda 100644
--- a/llvm/test/CodeGen/X86/pr32241.ll
+++ b/llvm/test/CodeGen/X86/pr32241.ll
@@ -23,14 +23,14 @@ define i32 @_Z3foov() {
 ; CHECK-NEXT:  .LBB0_2: # %lor.end
 ; CHECK-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %al # 1-byte Reload
 ; CHECK-NEXT:    andb $1, %al
-; CHECK-NEXT:    movzbl %al, %eax
-; CHECK-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; CHECK-NEXT:    cmpl %eax, %ecx
+; CHECK-NEXT:    movzbl %al, %ecx
+; CHECK-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; CHECK-NEXT:    cmpl %ecx, %edx
 ; CHECK-NEXT:    setl %al
 ; CHECK-NEXT:    andb $1, %al
-; CHECK-NEXT:    movzbl %al, %eax
-; CHECK-NEXT:    xorl $-1, %eax
-; CHECK-NEXT:    cmpl $0, %eax
+; CHECK-NEXT:    movzbl %al, %ecx
+; CHECK-NEXT:    xorl $-1, %ecx
+; CHECK-NEXT:    cmpl $0, %ecx
 ; CHECK-NEXT:    movb $1, %al
 ; CHECK-NEXT:    movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
 ; CHECK-NEXT:    jne .LBB0_4
@@ -42,9 +42,9 @@ define i32 @_Z3foov() {
 ; CHECK-NEXT:  .LBB0_4: # %lor.end5
 ; CHECK-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %al # 1-byte Reload
 ; CHECK-NEXT:    andb $1, %al
-; CHECK-NEXT:    movzbl %al, %eax
-; CHECK-NEXT:    # kill: def $ax killed $ax killed $eax
-; CHECK-NEXT:    movw %ax, {{[0-9]+}}(%esp)
+; CHECK-NEXT:    movzbl %al, %ecx
+; CHECK-NEXT:    # kill: def $cx killed $cx killed $ecx
+; CHECK-NEXT:    movw %cx, {{[0-9]+}}(%esp)
 ; CHECK-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
 ; CHECK-NEXT:    addl $16, %esp
 ; CHECK-NEXT:    .cfi_def_cfa_offset 4
diff --git a/llvm/test/CodeGen/X86/pr32284.ll b/llvm/test/CodeGen/X86/pr32284.ll
index 533473663d73b..a1041ab889c23 100644
--- a/llvm/test/CodeGen/X86/pr32284.ll
+++ b/llvm/test/CodeGen/X86/pr32284.ll
@@ -10,28 +10,28 @@ define void @foo() {
 ; X86-O0-LABEL: foo:
 ; X86-O0:       # %bb.0: # %entry
 ; X86-O0-NEXT:    xorl %eax, %eax
-; X86-O0-NEXT:    # kill: def $rax killed $eax
-; X86-O0-NEXT:    xorl %ecx, %ecx
+; X86-O0-NEXT:    movl %eax, %ecx
+; X86-O0-NEXT:    xorl %eax, %eax
 ; X86-O0-NEXT:    movzbl c, %edx
-; X86-O0-NEXT:    subl %edx, %ecx
-; X86-O0-NEXT:    movslq %ecx, %rcx
-; X86-O0-NEXT:    subq %rcx, %rax
-; X86-O0-NEXT:    # kill: def $al killed $al killed $rax
-; X86-O0-NEXT:    cmpb $0, %al
-; X86-O0-NEXT:    setne %al
-; X86-O0-NEXT:    andb $1, %al
-; X86-O0-NEXT:    movb %al, -{{[0-9]+}}(%rsp)
+; X86-O0-NEXT:    subl %edx, %eax
+; X86-O0-NEXT:    movslq %eax, %rsi
+; X86-O0-NEXT:    subq %rsi, %rcx
+; X86-O0-NEXT:    # kill: def $cl killed $cl killed $rcx
+; X86-O0-NEXT:    cmpb $0, %cl
+; X86-O0-NEXT:    setne %cl
+; X86-O0-NEXT:    andb $1, %cl
+; X86-O0-NEXT:    movb %cl, -{{[0-9]+}}(%rsp)
 ; X86-O0-NEXT:    cmpb $0, c
-; X86-O0-NEXT:    setne %al
-; X86-O0-NEXT:    xorb $-1, %al
-; X86-O0-NEXT:    xorb $-1, %al
-; X86-O0-NEXT:    andb $1, %al
-; X86-O0-NEXT:    movzbl %al, %eax
-; X86-O0-NEXT:    movzbl c, %ecx
-; X86-O0-NEXT:    cmpl %ecx, %eax
-; X86-O0-NEXT:    setle %al
-; X86-O0-NEXT:    andb $1, %al
-; X86-O0-NEXT:    movzbl %al, %eax
+; X86-O0-NEXT:    setne %cl
+; X86-O0-NEXT:    xorb $-1, %cl
+; X86-O0-NEXT:    xorb $-1, %cl
+; X86-O0-NEXT:    andb $1, %cl
+; X86-O0-NEXT:    movzbl %cl, %eax
+; X86-O0-NEXT:    movzbl c, %edx
+; X86-O0-NEXT:    cmpl %edx, %eax
+; X86-O0-NEXT:    setle %cl
+; X86-O0-NEXT:    andb $1, %cl
+; X86-O0-NEXT:    movzbl %cl, %eax
 ; X86-O0-NEXT:    movl %eax, -{{[0-9]+}}(%rsp)
 ; X86-O0-NEXT:    retq
 ;
@@ -63,13 +63,13 @@ define void @foo() {
 ; 686-O0-NEXT:    xorb $-1, %al
 ; 686-O0-NEXT:    xorb $-1, %al
 ; 686-O0-NEXT:    andb $1, %al
-; 686-O0-NEXT:    movzbl %al, %eax
-; 686-O0-NEXT:    movzbl c, %ecx
-; 686-O0-NEXT:    cmpl %ecx, %eax
+; 686-O0-NEXT:    movzbl %al, %ecx
+; 686-O0-NEXT:    movzbl c, %edx
+; 686-O0-NEXT:    cmpl %edx, %ecx
 ; 686-O0-NEXT:    setle %al
 ; 686-O0-NEXT:    andb $1, %al
-; 686-O0-NEXT:    movzbl %al, %eax
-; 686-O0-NEXT:    movl %eax, (%esp)
+; 686-O0-NEXT:    movzbl %al, %ecx
+; 686-O0-NEXT:    movl %ecx, (%esp)
 ; 686-O0-NEXT:    addl $8, %esp
 ; 686-O0-NEXT:    .cfi_def_cfa_offset 4
 ; 686-O0-NEXT:    retl
@@ -126,33 +126,33 @@ define void @f1() {
 ; X86-O0-NEXT:    movabsq $8381627093, %rcx # imm = 0x1F3957AD5
 ; X86-O0-NEXT:    addq %rcx, %rax
 ; X86-O0-NEXT:    cmpq $0, %rax
-; X86-O0-NEXT:    setne %al
-; X86-O0-NEXT:    andb $1, %al
-; X86-O0-NEXT:    movb %al, -{{[0-9]+}}(%rsp)
-; X86-O0-NEXT:    movl var_5, %eax
-; X86-O0-NEXT:    xorl $-1, %eax
-; X86-O0-NEXT:    cmpl $0, %eax
-; X86-O0-NEXT:    setne %al
-; X86-O0-NEXT:    xorb $-1, %al
-; X86-O0-NEXT:    andb $1, %al
-; X86-O0-NEXT:    movzbl %al, %eax
-; X86-O0-NEXT:    # kill: def $rax killed $eax
+; X86-O0-NEXT:    setne %dl
+; X86-O0-NEXT:    andb $1, %dl
+; X86-O0-NEXT:    movb %dl, -{{[0-9]+}}(%rsp)
+; X86-O0-NEXT:    movl var_5, %esi
+; X86-O0-NEXT:    xorl $-1, %esi
+; X86-O0-NEXT:    cmpl $0, %esi
+; X86-O0-NEXT:    setne %dl
+; X86-O0-NEXT:    xorb $-1, %dl
+; X86-O0-NEXT:    andb $1, %dl
+; X86-O0-NEXT:    movzbl %dl, %esi
+; X86-O0-NEXT:    movl %esi, %eax
 ; X86-O0-NEXT:    movslq var_5, %rcx
 ; X86-O0-NEXT:    addq $7093, %rcx # imm = 0x1BB5
 ; X86-O0-NEXT:    cmpq %rcx, %rax
-; X86-O0-NEXT:    setg %al
-; X86-O0-NEXT:    andb $1, %al
-; X86-O0-NEXT:    movzbl %al, %eax
-; X86-O0-NEXT:    # kill: def $rax killed $eax
+; X86-O0-NEXT:    setg %dl
+; X86-O0-NEXT:    andb $1, %dl
+; X86-O0-NEXT:    movzbl %dl, %esi
+; X86-O0-NEXT:    movl %esi, %eax
 ; X86-O0-NEXT:    movq %rax, var_57
-; X86-O0-NEXT:    movl var_5, %eax
-; X86-O0-NEXT:    xorl $-1, %eax
-; X86-O0-NEXT:    cmpl $0, %eax
-; X86-O0-NEXT:    setne %al
-; X86-O0-NEXT:    xorb $-1, %al
-; X86-O0-NEXT:    andb $1, %al
-; X86-O0-NEXT:    movzbl %al, %eax
-; X86-O0-NEXT:    # kill: def $rax killed $eax
+; X86-O0-NEXT:    movl var_5, %esi
+; X86-O0-NEXT:    xorl $-1, %esi
+; X86-O0-NEXT:    cmpl $0, %esi
+; X86-O0-NEXT:    setne %dl
+; X86-O0-NEXT:    xorb $-1, %dl
+; X86-O0-NEXT:    andb $1, %dl
+; X86-O0-NEXT:    movzbl %dl, %esi
+; X86-O0-NEXT:    movl %esi, %eax
 ; X86-O0-NEXT:    movq %rax, _ZN8struct_210member_2_0E
 ; X86-O0-NEXT:    retq
 ;
@@ -178,17 +178,20 @@ define void @f1() {
 ;
 ; 686-O0-LABEL: f1:
 ; 686-O0:       # %bb.0: # %entry
-; 686-O0-NEXT:    pushl %ebx
+; 686-O0-NEXT:    pushl %ebp
 ; 686-O0-NEXT:    .cfi_def_cfa_offset 8
-; 686-O0-NEXT:    pushl %edi
+; 686-O0-NEXT:    pushl %ebx
 ; 686-O0-NEXT:    .cfi_def_cfa_offset 12
-; 686-O0-NEXT:    pushl %esi
+; 686-O0-NEXT:    pushl %edi
 ; 686-O0-NEXT:    .cfi_def_cfa_offset 16
+; 686-O0-NEXT:    pushl %esi
+; 686-O0-NEXT:    .cfi_def_cfa_offset 20
 ; 686-O0-NEXT:    subl $1, %esp
-; 686-O0-NEXT:    .cfi_def_cfa_offset 17
-; 686-O0-NEXT:    .cfi_offset %esi, -16
-; 686-O0-NEXT:    .cfi_offset %edi, -12
-; 686-O0-NEXT:    .cfi_offset %ebx, -8
+; 686-O0-NEXT:    .cfi_def_cfa_offset 21
+; 686-O0-NEXT:    .cfi_offset %esi, -20
+; 686-O0-NEXT:    .cfi_offset %edi, -16
+; 686-O0-NEXT:    .cfi_offset %ebx, -12
+; 686-O0-NEXT:    .cfi_offset %ebp, -8
 ; 686-O0-NEXT:    movl var_5, %eax
 ; 686-O0-NEXT:    movl %eax, %ecx
 ; 686-O0-NEXT:    sarl $31, %ecx
@@ -214,16 +217,18 @@ define void @f1() {
 ; 686-O0-NEXT:    movl var_5, %edi
 ; 686-O0-NEXT:    subl $-1, %edi
 ; 686-O0-NEXT:    sete %bl
-; 686-O0-NEXT:    movzbl %bl, %ebx
-; 686-O0-NEXT:    movl %ebx, _ZN8struct_210member_2_0E
+; 686-O0-NEXT:    movzbl %bl, %ebp
+; 686-O0-NEXT:    movl %ebp, _ZN8struct_210member_2_0E
 ; 686-O0-NEXT:    movl $0, _ZN8struct_210member_2_0E+4
 ; 686-O0-NEXT:    addl $1, %esp
-; 686-O0-NEXT:    .cfi_def_cfa_offset 16
+; 686-O0-NEXT:    .cfi_def_cfa_offset 20
 ; 686-O0-NEXT:    popl %esi
-; 686-O0-NEXT:    .cfi_def_cfa_offset 12
+; 686-O0-NEXT:    .cfi_def_cfa_offset 16
 ; 686-O0-NEXT:    popl %edi
-; 686-O0-NEXT:    .cfi_def_cfa_offset 8
+; 686-O0-NEXT:    .cfi_def_cfa_offset 12
 ; 686-O0-NEXT:    popl %ebx
+; 686-O0-NEXT:    .cfi_def_cfa_offset 8
+; 686-O0-NEXT:    popl %ebp
 ; 686-O0-NEXT:    .cfi_def_cfa_offset 4
 ; 686-O0-NEXT:    retl
 ;
@@ -305,25 +310,25 @@ define void @f2() {
 ; X86-O0-NEXT:    setne %cl
 ; X86-O0-NEXT:    xorb $-1, %cl
 ; X86-O0-NEXT:    andb $1, %cl
-; X86-O0-NEXT:    movzbl %cl, %ecx
-; X86-O0-NEXT:    xorl %ecx, %eax
+; X86-O0-NEXT:    movzbl %cl, %edx
+; X86-O0-NEXT:    xorl %edx, %eax
 ; X86-O0-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X86-O0-NEXT:    movw %ax, -{{[0-9]+}}(%rsp)
-; X86-O0-NEXT:    movzbl var_7, %eax
-; X86-O0-NEXT:    # kill: def $ax killed $ax killed $eax
-; X86-O0-NEXT:    cmpw $0, %ax
-; X86-O0-NEXT:    setne %al
-; X86-O0-NEXT:    xorb $-1, %al
-; X86-O0-NEXT:    andb $1, %al
-; X86-O0-NEXT:    movzbl %al, %eax
-; X86-O0-NEXT:    movzbl var_7, %ecx
-; X86-O0-NEXT:    cmpl %ecx, %eax
-; X86-O0-NEXT:    sete %al
-; X86-O0-NEXT:    andb $1, %al
-; X86-O0-NEXT:    movzbl %al, %eax
-; X86-O0-NEXT:    # kill: def $ax killed $ax killed $eax
-; X86-O0-NEXT:    # implicit-def: $rcx
-; X86-O0-NEXT:    movw %ax, (%rcx)
+; X86-O0-NEXT:    movzbl var_7, %edx
+; X86-O0-NEXT:    # kill: def $dx killed $dx killed $edx
+; X86-O0-NEXT:    cmpw $0, %dx
+; X86-O0-NEXT:    setne %cl
+; X86-O0-NEXT:    xorb $-1, %cl
+; X86-O0-NEXT:    andb $1, %cl
+; X86-O0-NEXT:    movzbl %cl, %esi
+; X86-O0-NEXT:    movzbl var_7, %edi
+; X86-O0-NEXT:    cmpl %edi, %esi
+; X86-O0-NEXT:    sete %cl
+; X86-O0-NEXT:    andb $1, %cl
+; X86-O0-NEXT:    movzbl %cl, %esi
+; X86-O0-NEXT:    # kill: def $si killed $si killed $esi
+; X86-O0-NEXT:    # implicit-def: $r8
+; X86-O0-NEXT:    movw %si, (%r8)
 ; X86-O0-NEXT:    retq
 ;
 ; X64-LABEL: f2:
@@ -345,33 +350,43 @@ define void @f2() {
 ;
 ; 686-O0-LABEL: f2:
 ; 686-O0:       # %bb.0: # %entry
+; 686-O0-NEXT:    pushl %edi
+; 686-O0-NEXT:    .cfi_def_cfa_offset 8
+; 686-O0-NEXT:    pushl %esi
+; 686-O0-NEXT:    .cfi_def_cfa_offset 12
 ; 686-O0-NEXT:    subl $2, %esp
-; 686-O0-NEXT:    .cfi_def_cfa_offset 6
+; 686-O0-NEXT:    .cfi_def_cfa_offset 14
+; 686-O0-NEXT:    .cfi_offset %esi, -12
+; 686-O0-NEXT:    .cfi_offset %edi, -8
 ; 686-O0-NEXT:    movzbl var_7, %eax
 ; 686-O0-NEXT:    cmpb $0, var_7
 ; 686-O0-NEXT:    setne %cl
 ; 686-O0-NEXT:    xorb $-1, %cl
 ; 686-O0-NEXT:    andb $1, %cl
-; 686-O0-NEXT:    movzbl %cl, %ecx
-; 686-O0-NEXT:    xorl %ecx, %eax
+; 686-O0-NEXT:    movzbl %cl, %edx
+; 686-O0-NEXT:    xorl %edx, %eax
 ; 686-O0-NEXT:    # kill: def $ax killed $ax killed $eax
 ; 686-O0-NEXT:    movw %ax, (%esp)
-; 686-O0-NEXT:    movzbl var_7, %eax
-; 686-O0-NEXT:    # kill: def $ax killed $ax killed $eax
-; 686-O0-NEXT:    cmpw $0, %ax
-; 686-O0-NEXT:    setne %al
-; 686-O0-NEXT:    xorb $-1, %al
-; 686-O0-NEXT:    andb $1, %al
-; 686-O0-NEXT:    movzbl %al, %eax
-; 686-O0-NEXT:    movzbl var_7, %ecx
-; 686-O0-NEXT:    cmpl %ecx, %eax
-; 686-O0-NEXT:    sete %al
-; 686-O0-NEXT:    andb $1, %al
-; 686-O0-NEXT:    movzbl %al, %eax
-; 686-O0-NEXT:    # kill: def $ax killed $ax killed $eax
-; 686-O0-NEXT:    # implicit-def: $ecx
-; 686-O0-NEXT:    movw %ax, (%ecx)
+; 686-O0-NEXT:    movzbl var_7, %edx
+; 686-O0-NEXT:    # kill: def $dx killed $dx killed $edx
+; 686-O0-NEXT:    cmpw $0, %dx
+; 686-O0-NEXT:    setne %cl
+; 686-O0-NEXT:    xorb $-1, %cl
+; 686-O0-NEXT:    andb $1, %cl
+; 686-O0-NEXT:    movzbl %cl, %esi
+; 686-O0-NEXT:    movzbl var_7, %edi
+; 686-O0-NEXT:    cmpl %edi, %esi
+; 686-O0-NEXT:    sete %cl
+; 686-O0-NEXT:    andb $1, %cl
+; 686-O0-NEXT:    movzbl %cl, %esi
+; 686-O0-NEXT:    # kill: def $si killed $si killed $esi
+; 686-O0-NEXT:    # implicit-def: $edi
+; 686-O0-NEXT:    movw %si, (%edi)
 ; 686-O0-NEXT:    addl $2, %esp
+; 686-O0-NEXT:    .cfi_def_cfa_offset 12
+; 686-O0-NEXT:    popl %esi
+; 686-O0-NEXT:    .cfi_def_cfa_offset 8
+; 686-O0-NEXT:    popl %edi
 ; 686-O0-NEXT:    .cfi_def_cfa_offset 4
 ; 686-O0-NEXT:    retl
 ;
@@ -431,35 +446,35 @@ define void @f3() #0 {
 ; X86-O0-NEXT:    movl var_13, %eax
 ; X86-O0-NEXT:    xorl $-1, %eax
 ; X86-O0-NEXT:    movl %eax, %eax
-; X86-O0-NEXT:    # kill: def $rax killed $eax
+; X86-O0-NEXT:    movl %eax, %ecx
 ; X86-O0-NEXT:    cmpl $0, var_13
-; X86-O0-NEXT:    setne %cl
-; X86-O0-NEXT:    xorb $-1, %cl
-; X86-O0-NEXT:    andb $1, %cl
-; X86-O0-NEXT:    movzbl %cl, %ecx
-; X86-O0-NEXT:    # kill: def $rcx killed $ecx
-; X86-O0-NEXT:    movl var_13, %edx
-; X86-O0-NEXT:    xorl $-1, %edx
-; X86-O0-NEXT:    xorl var_16, %edx
-; X86-O0-NEXT:    movl %edx, %edx
-; X86-O0-NEXT:    # kill: def $rdx killed $edx
-; X86-O0-NEXT:    andq %rdx, %rcx
-; X86-O0-NEXT:    orq %rcx, %rax
-; X86-O0-NEXT:    movq %rax, -{{[0-9]+}}(%rsp)
+; X86-O0-NEXT:    setne %dl
+; X86-O0-NEXT:    xorb $-1, %dl
+; X86-O0-NEXT:    andb $1, %dl
+; X86-O0-NEXT:    movzbl %dl, %eax
+; X86-O0-NEXT:    movl %eax, %esi
 ; X86-O0-NEXT:    movl var_13, %eax
 ; X86-O0-NEXT:    xorl $-1, %eax
+; X86-O0-NEXT:    xorl var_16, %eax
 ; X86-O0-NEXT:    movl %eax, %eax
-; X86-O0-NEXT:    # kill: def $rax killed $eax
+; X86-O0-NEXT:    movl %eax, %edi
+; X86-O0-NEXT:    andq %rdi, %rsi
+; X86-O0-NEXT:    orq %rsi, %rcx
+; X86-O0-NEXT:    movq %rcx, -{{[0-9]+}}(%rsp)
+; X86-O0-NEXT:    movl var_13, %eax
+; X86-O0-NEXT:    xorl $-1, %eax
+; X86-O0-NEXT:    movl %eax, %eax
+; X86-O0-NEXT:    movl %eax, %ecx
 ; X86-O0-NEXT:    cmpl $0, var_13
-; X86-O0-NEXT:    setne %cl
-; X86-O0-NEXT:    xorb $-1, %cl
-; X86-O0-NEXT:    andb $1, %cl
-; X86-O0-NEXT:    movzbl %cl, %ecx
-; X86-O0-NEXT:    # kill: def $rcx killed $ecx
-; X86-O0-NEXT:    andq $0, %rcx
-; X86-O0-NEXT:    orq %rcx, %rax
-; X86-O0-NEXT:    # kill: def $eax killed $eax killed $rax
-; X86-O0-NEXT:    movl %eax, var_46
+; X86-O0-NEXT:    setne %dl
+; X86-O0-NEXT:    xorb $-1, %dl
+; X86-O0-NEXT:    andb $1, %dl
+; X86-O0-NEXT:    movzbl %dl, %eax
+; X86-O0-NEXT:    movl %eax, %esi
+; X86-O0-NEXT:    andq $0, %rsi
+; X86-O0-NEXT:    orq %rsi, %rcx
+; X86-O0-NEXT:    # kill: def $ecx killed $ecx killed $rcx
+; X86-O0-NEXT:    movl %ecx, var_46
 ; X86-O0-NEXT:    retq
 ;
 ; X64-LABEL: f3:
@@ -484,28 +499,31 @@ define void @f3() #0 {
 ; 686-O0-NEXT:    .cfi_offset %ebp, -8
 ; 686-O0-NEXT:    movl %esp, %ebp
 ; 686-O0-NEXT:    .cfi_def_cfa_register %ebp
+; 686-O0-NEXT:    pushl %edi
 ; 686-O0-NEXT:    pushl %esi
 ; 686-O0-NEXT:    andl $-8, %esp
-; 686-O0-NEXT:    subl $16, %esp
-; 686-O0-NEXT:    .cfi_offset %esi, -12
+; 686-O0-NEXT:    subl $8, %esp
+; 686-O0-NEXT:    .cfi_offset %esi, -16
+; 686-O0-NEXT:    .cfi_offset %edi, -12
 ; 686-O0-NEXT:    movl var_13, %eax
 ; 686-O0-NEXT:    movl %eax, %ecx
 ; 686-O0-NEXT:    notl %ecx
 ; 686-O0-NEXT:    testl %eax, %eax
-; 686-O0-NEXT:    sete %al
-; 686-O0-NEXT:    movzbl %al, %eax
-; 686-O0-NEXT:    movl var_16, %edx
-; 686-O0-NEXT:    movl %ecx, %esi
-; 686-O0-NEXT:    xorl %edx, %esi
-; 686-O0-NEXT:    andl %esi, %eax
+; 686-O0-NEXT:    sete %dl
+; 686-O0-NEXT:    movzbl %dl, %eax
+; 686-O0-NEXT:    movl var_16, %esi
+; 686-O0-NEXT:    movl %ecx, %edi
+; 686-O0-NEXT:    xorl %esi, %edi
+; 686-O0-NEXT:    andl %edi, %eax
 ; 686-O0-NEXT:    orl %eax, %ecx
 ; 686-O0-NEXT:    movl %ecx, (%esp)
 ; 686-O0-NEXT:    movl $0, {{[0-9]+}}(%esp)
 ; 686-O0-NEXT:    movl var_13, %eax
 ; 686-O0-NEXT:    notl %eax
 ; 686-O0-NEXT:    movl %eax, var_46
-; 686-O0-NEXT:    leal -4(%ebp), %esp
+; 686-O0-NEXT:    leal -8(%ebp), %esp
 ; 686-O0-NEXT:    popl %esi
+; 686-O0-NEXT:    popl %edi
 ; 686-O0-NEXT:    popl %ebp
 ; 686-O0-NEXT:    .cfi_def_cfa %esp, 4
 ; 686-O0-NEXT:    retl
diff --git a/llvm/test/CodeGen/X86/pr32340.ll b/llvm/test/CodeGen/X86/pr32340.ll
index 98685b959f642..1e428ac7d83a6 100644
--- a/llvm/test/CodeGen/X86/pr32340.ll
+++ b/llvm/test/CodeGen/X86/pr32340.ll
@@ -14,37 +14,37 @@ define void @foo() {
 ; X64-LABEL: foo:
 ; X64:       # %bb.0: # %entry
 ; X64-NEXT:    xorl %eax, %eax
-; X64-NEXT:    # kill: def $rax killed $eax
+; X64-NEXT:    movl %eax, %ecx
 ; X64-NEXT:    movw $0, var_825
-; X64-NEXT:    movzwl var_32, %ecx
+; X64-NEXT:    movzwl var_32, %eax
 ; X64-NEXT:    movzwl var_901, %edx
-; X64-NEXT:    movl %ecx, %esi
+; X64-NEXT:    movl %eax, %esi
 ; X64-NEXT:    xorl %edx, %esi
-; X64-NEXT:    movl %ecx, %edx
+; X64-NEXT:    movl %eax, %edx
 ; X64-NEXT:    xorl %esi, %edx
-; X64-NEXT:    addl %ecx, %edx
-; X64-NEXT:    movslq %edx, %rcx
-; X64-NEXT:    movq %rcx, var_826
-; X64-NEXT:    movzwl var_32, %ecx
-; X64-NEXT:    # kill: def $rcx killed $ecx
-; X64-NEXT:    movzwl var_901, %edx
-; X64-NEXT:    xorl $51981, %edx # imm = 0xCB0D
-; X64-NEXT:    movslq %edx, %rdx
-; X64-NEXT:    movabsq $-1142377792914660288, %rsi # imm = 0xF02575732E06E440
-; X64-NEXT:    xorq %rsi, %rdx
-; X64-NEXT:    movq %rcx, %rsi
-; X64-NEXT:    xorq %rdx, %rsi
-; X64-NEXT:    xorq $-1, %rsi
-; X64-NEXT:    xorq %rsi, %rcx
-; X64-NEXT:    movq %rcx, %rdx
-; X64-NEXT:    orq var_57, %rdx
-; X64-NEXT:    orq %rdx, %rcx
-; X64-NEXT:    # kill: def $cx killed $cx killed $rcx
-; X64-NEXT:    movw %cx, var_900
-; X64-NEXT:    cmpq var_28, %rax
-; X64-NEXT:    setne %al
-; X64-NEXT:    andb $1, %al
-; X64-NEXT:    movzbl %al, %eax
+; X64-NEXT:    addl %eax, %edx
+; X64-NEXT:    movslq %edx, %rdi
+; X64-NEXT:    movq %rdi, var_826
+; X64-NEXT:    movzwl var_32, %eax
+; X64-NEXT:    movl %eax, %edi
+; X64-NEXT:    movzwl var_901, %eax
+; X64-NEXT:    xorl $51981, %eax # imm = 0xCB0D
+; X64-NEXT:    movslq %eax, %r8
+; X64-NEXT:    movabsq $-1142377792914660288, %r9 # imm = 0xF02575732E06E440
+; X64-NEXT:    xorq %r9, %r8
+; X64-NEXT:    movq %rdi, %r9
+; X64-NEXT:    xorq %r8, %r9
+; X64-NEXT:    xorq $-1, %r9
+; X64-NEXT:    xorq %r9, %rdi
+; X64-NEXT:    movq %rdi, %r8
+; X64-NEXT:    orq var_57, %r8
+; X64-NEXT:    orq %r8, %rdi
+; X64-NEXT:    # kill: def $di killed $di killed $rdi
+; X64-NEXT:    movw %di, var_900
+; X64-NEXT:    cmpq var_28, %rcx
+; X64-NEXT:    setne %r10b
+; X64-NEXT:    andb $1, %r10b
+; X64-NEXT:    movzbl %r10b, %eax
 ; X64-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X64-NEXT:    movw %ax, var_827
 ; X64-NEXT:    retq
diff --git a/llvm/test/CodeGen/X86/pr32345.ll b/llvm/test/CodeGen/X86/pr32345.ll
index 165e0292d4648..d5f7fde77f6d2 100644
--- a/llvm/test/CodeGen/X86/pr32345.ll
+++ b/llvm/test/CodeGen/X86/pr32345.ll
@@ -15,23 +15,23 @@ define void @foo() {
 ; X640-NEXT:    xorl %ecx, %eax
 ; X640-NEXT:    movzwl var_27, %ecx
 ; X640-NEXT:    xorl %ecx, %eax
-; X640-NEXT:    cltq
-; X640-NEXT:    movq %rax, -{{[0-9]+}}(%rsp)
+; X640-NEXT:    movslq %eax, %rdx
+; X640-NEXT:    movq %rdx, -{{[0-9]+}}(%rsp)
 ; X640-NEXT:    movzwl var_22, %eax
 ; X640-NEXT:    movzwl var_27, %ecx
 ; X640-NEXT:    xorl %ecx, %eax
 ; X640-NEXT:    movzwl var_27, %ecx
 ; X640-NEXT:    xorl %ecx, %eax
-; X640-NEXT:    cltq
-; X640-NEXT:    movzwl var_27, %ecx
-; X640-NEXT:    subl $16610, %ecx # imm = 0x40E2
-; X640-NEXT:    movl %ecx, %ecx
-; X640-NEXT:    # kill: def $rcx killed $ecx
+; X640-NEXT:    movslq %eax, %rdx
+; X640-NEXT:    movzwl var_27, %eax
+; X640-NEXT:    subl $16610, %eax # imm = 0x40E2
+; X640-NEXT:    movl %eax, %eax
+; X640-NEXT:    movl %eax, %ecx
 ; X640-NEXT:    # kill: def $cl killed $rcx
-; X640-NEXT:    sarq %cl, %rax
-; X640-NEXT:    # kill: def $al killed $al killed $rax
-; X640-NEXT:    # implicit-def: $rcx
-; X640-NEXT:    movb %al, (%rcx)
+; X640-NEXT:    sarq %cl, %rdx
+; X640-NEXT:    # kill: def $dl killed $dl killed $rdx
+; X640-NEXT:    # implicit-def: $rsi
+; X640-NEXT:    movb %dl, (%rsi)
 ; X640-NEXT:    retq
 ;
 ; 6860-LABEL: foo:
@@ -41,37 +41,43 @@ define void @foo() {
 ; 6860-NEXT:    .cfi_offset %ebp, -8
 ; 6860-NEXT:    movl %esp, %ebp
 ; 6860-NEXT:    .cfi_def_cfa_register %ebp
+; 6860-NEXT:    pushl %ebx
+; 6860-NEXT:    pushl %edi
+; 6860-NEXT:    pushl %esi
 ; 6860-NEXT:    andl $-8, %esp
-; 6860-NEXT:    subl $24, %esp
+; 6860-NEXT:    subl $32, %esp
+; 6860-NEXT:    .cfi_offset %esi, -20
+; 6860-NEXT:    .cfi_offset %edi, -16
+; 6860-NEXT:    .cfi_offset %ebx, -12
 ; 6860-NEXT:    movw var_22, %ax
 ; 6860-NEXT:    movzwl var_27, %ecx
 ; 6860-NEXT:    movw %cx, %dx
 ; 6860-NEXT:    xorw %dx, %ax
-; 6860-NEXT:    # implicit-def: $edx
-; 6860-NEXT:    movw %ax, %dx
-; 6860-NEXT:    xorl %ecx, %edx
-; 6860-NEXT:    # kill: def $dx killed $dx killed $edx
-; 6860-NEXT:    movzwl %dx, %eax
-; 6860-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; 6860-NEXT:    # implicit-def: $esi
+; 6860-NEXT:    movw %ax, %si
+; 6860-NEXT:    xorl %ecx, %esi
+; 6860-NEXT:    # kill: def $si killed $si killed $esi
+; 6860-NEXT:    movzwl %si, %ecx
+; 6860-NEXT:    movl %ecx, {{[0-9]+}}(%esp)
 ; 6860-NEXT:    movl $0, {{[0-9]+}}(%esp)
 ; 6860-NEXT:    movw var_22, %ax
 ; 6860-NEXT:    movzwl var_27, %ecx
 ; 6860-NEXT:    movw %cx, %dx
 ; 6860-NEXT:    xorw %dx, %ax
-; 6860-NEXT:    # implicit-def: $edx
-; 6860-NEXT:    movw %ax, %dx
-; 6860-NEXT:    xorl %ecx, %edx
-; 6860-NEXT:    # kill: def $dx killed $dx killed $edx
-; 6860-NEXT:    movzwl %dx, %eax
+; 6860-NEXT:    # implicit-def: $edi
+; 6860-NEXT:    movw %ax, %di
+; 6860-NEXT:    xorl %ecx, %edi
+; 6860-NEXT:    # kill: def $di killed $di killed $edi
+; 6860-NEXT:    movzwl %di, %ebx
 ; 6860-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; 6860-NEXT:    addb $30, %cl
-; 6860-NEXT:    xorl %edx, %edx
+; 6860-NEXT:    xorl %eax, %eax
 ; 6860-NEXT:    movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
-; 6860-NEXT:    shrdl %cl, %edx, %eax
+; 6860-NEXT:    shrdl %cl, %eax, %ebx
 ; 6860-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
 ; 6860-NEXT:    testb $32, %cl
+; 6860-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; 6860-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; 6860-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; 6860-NEXT:    jne .LBB0_2
 ; 6860-NEXT:  # %bb.1: # %bb
 ; 6860-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
@@ -81,7 +87,10 @@ define void @foo() {
 ; 6860-NEXT:    # kill: def $al killed $al killed $eax
 ; 6860-NEXT:    # implicit-def: $ecx
 ; 6860-NEXT:    movb %al, (%ecx)
-; 6860-NEXT:    movl %ebp, %esp
+; 6860-NEXT:    leal -12(%ebp), %esp
+; 6860-NEXT:    popl %esi
+; 6860-NEXT:    popl %edi
+; 6860-NEXT:    popl %ebx
 ; 6860-NEXT:    popl %ebp
 ; 6860-NEXT:    .cfi_def_cfa %esp, 4
 ; 6860-NEXT:    retl
diff --git a/llvm/test/CodeGen/X86/pr32451.ll b/llvm/test/CodeGen/X86/pr32451.ll
index 3b1997234ce55..4754d8e4cf6cb 100644
--- a/llvm/test/CodeGen/X86/pr32451.ll
+++ b/llvm/test/CodeGen/X86/pr32451.ll
@@ -9,24 +9,29 @@ target triple = "x86_64-unknown-linux-gnu"
 define i8** @japi1_convert_690(i8**, i8***, i32) {
 ; CHECK-LABEL: japi1_convert_690:
 ; CHECK:       # %bb.0: # %top
+; CHECK-NEXT:    pushl %ebx
+; CHECK-NEXT:    .cfi_def_cfa_offset 8
 ; CHECK-NEXT:    subl $16, %esp
-; CHECK-NEXT:    .cfi_def_cfa_offset 20
+; CHECK-NEXT:    .cfi_def_cfa_offset 24
+; CHECK-NEXT:    .cfi_offset %ebx, -8
 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; CHECK-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; CHECK-NEXT:    movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill
 ; CHECK-NEXT:    calll julia.gc_root_decl
-; CHECK-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; CHECK-NEXT:    movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill
 ; CHECK-NEXT:    calll jl_get_ptls_states
-; CHECK-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx # 4-byte Reload
 ; CHECK-NEXT:    movl 4(%ecx), %edx
-; CHECK-NEXT:    movb (%edx), %dl
-; CHECK-NEXT:    andb $1, %dl
-; CHECK-NEXT:    movzbl %dl, %edx
+; CHECK-NEXT:    movb (%edx), %bl
+; CHECK-NEXT:    andb $1, %bl
+; CHECK-NEXT:    movzbl %bl, %edx
 ; CHECK-NEXT:    movl %edx, (%esp)
-; CHECK-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; CHECK-NEXT:    movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill
 ; CHECK-NEXT:    calll jl_box_int32
-; CHECK-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx # 4-byte Reload
 ; CHECK-NEXT:    movl %eax, (%ecx)
 ; CHECK-NEXT:    addl $16, %esp
+; CHECK-NEXT:    .cfi_def_cfa_offset 8
+; CHECK-NEXT:    popl %ebx
 ; CHECK-NEXT:    .cfi_def_cfa_offset 4
 ; CHECK-NEXT:    retl
 top:
diff --git a/llvm/test/CodeGen/X86/pr34592.ll b/llvm/test/CodeGen/X86/pr34592.ll
index 25b068c8fad6f..0f73036a4c6c9 100644
--- a/llvm/test/CodeGen/X86/pr34592.ll
+++ b/llvm/test/CodeGen/X86/pr34592.ll
@@ -10,7 +10,7 @@ define <16 x i64> @pluto(<16 x i64> %arg, <16 x i64> %arg1, <16 x i64> %arg2, <1
 ; CHECK-NEXT:    movq %rsp, %rbp
 ; CHECK-NEXT:    .cfi_def_cfa_register %rbp
 ; CHECK-NEXT:    andq $-32, %rsp
-; CHECK-NEXT:    subq $160, %rsp
+; CHECK-NEXT:    subq $192, %rsp
 ; CHECK-NEXT:    vmovaps 240(%rbp), %ymm8
 ; CHECK-NEXT:    vmovaps 208(%rbp), %ymm9
 ; CHECK-NEXT:    vmovaps 176(%rbp), %ymm10
@@ -27,14 +27,14 @@ define <16 x i64> @pluto(<16 x i64> %arg, <16 x i64> %arg1, <16 x i64> %arg2, <1
 ; CHECK-NEXT:    vpalignr {{.*#+}} ymm2 = ymm2[8,9,10,11,12,13,14,15],ymm11[0,1,2,3,4,5,6,7],ymm2[24,25,26,27,28,29,30,31],ymm11[16,17,18,19,20,21,22,23]
 ; CHECK-NEXT:    vpermq {{.*#+}} ymm2 = ymm2[2,3,2,0]
 ; CHECK-NEXT:    vpblendd {{.*#+}} ymm0 = ymm2[0,1,2,3],ymm0[4,5],ymm2[6,7]
-; CHECK-NEXT:    vmovaps %xmm7, %xmm2
-; CHECK-NEXT:    vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0,1,2,3,4,5,6,7]
-; CHECK-NEXT:    # implicit-def: $ymm9
-; CHECK-NEXT:    vmovaps %xmm2, %xmm9
-; CHECK-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %ymm2 # 32-byte Reload
-; CHECK-NEXT:    vpalignr {{.*#+}} ymm11 = ymm2[8,9,10,11,12,13,14,15],ymm5[0,1,2,3,4,5,6,7],ymm2[24,25,26,27,28,29,30,31],ymm5[16,17,18,19,20,21,22,23]
-; CHECK-NEXT:    vpermq {{.*#+}} ymm11 = ymm11[0,1,0,3]
-; CHECK-NEXT:    vpblendd {{.*#+}} ymm9 = ymm9[0,1,2,3],ymm11[4,5,6,7]
+; CHECK-NEXT:    vmovaps %xmm7, %xmm9
+; CHECK-NEXT:    vpslldq {{.*#+}} xmm9 = zero,zero,zero,zero,zero,zero,zero,zero,xmm9[0,1,2,3,4,5,6,7]
+; CHECK-NEXT:    # implicit-def: $ymm2
+; CHECK-NEXT:    vmovaps %xmm9, %xmm2
+; CHECK-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %ymm11 # 32-byte Reload
+; CHECK-NEXT:    vpalignr {{.*#+}} ymm9 = ymm11[8,9,10,11,12,13,14,15],ymm5[0,1,2,3,4,5,6,7],ymm11[24,25,26,27,28,29,30,31],ymm5[16,17,18,19,20,21,22,23]
+; CHECK-NEXT:    vpermq {{.*#+}} ymm9 = ymm9[0,1,0,3]
+; CHECK-NEXT:    vpblendd {{.*#+}} ymm2 = ymm2[0,1,2,3],ymm9[4,5,6,7]
 ; CHECK-NEXT:    vpblendd {{.*#+}} ymm8 = ymm7[0,1],ymm8[2,3],ymm7[4,5,6,7]
 ; CHECK-NEXT:    vpermq {{.*#+}} ymm8 = ymm8[2,1,1,3]
 ; CHECK-NEXT:    vpshufd {{.*#+}} ymm5 = ymm5[0,1,0,1,4,5,4,5]
@@ -43,11 +43,14 @@ define <16 x i64> @pluto(<16 x i64> %arg, <16 x i64> %arg1, <16 x i64> %arg2, <1
 ; CHECK-NEXT:    vmovq {{.*#+}} xmm7 = xmm7[0],zero
 ; CHECK-NEXT:    # implicit-def: $ymm8
 ; CHECK-NEXT:    vmovaps %xmm7, %xmm8
-; CHECK-NEXT:    vperm2i128 {{.*#+}} ymm2 = ymm8[0,1],ymm6[0,1]
+; CHECK-NEXT:    vperm2i128 {{.*#+}} ymm6 = ymm8[0,1],ymm6[0,1]
 ; CHECK-NEXT:    vmovaps %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
 ; CHECK-NEXT:    vmovaps %ymm5, %ymm1
+; CHECK-NEXT:    vmovaps %ymm2, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
+; CHECK-NEXT:    vmovaps %ymm6, %ymm2
+; CHECK-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %ymm5 # 32-byte Reload
 ; CHECK-NEXT:    vmovaps %ymm3, (%rsp) # 32-byte Spill
-; CHECK-NEXT:    vmovaps %ymm9, %ymm3
+; CHECK-NEXT:    vmovaps %ymm5, %ymm3
 ; CHECK-NEXT:    movq %rbp, %rsp
 ; CHECK-NEXT:    popq %rbp
 ; CHECK-NEXT:    .cfi_def_cfa %rsp, 8
diff --git a/llvm/test/CodeGen/X86/pr39733.ll b/llvm/test/CodeGen/X86/pr39733.ll
index 75f9dc51b85eb..4c7153852d22c 100644
--- a/llvm/test/CodeGen/X86/pr39733.ll
+++ b/llvm/test/CodeGen/X86/pr39733.ll
@@ -23,8 +23,8 @@ define void @test55() {
 ; CHECK-NEXT:    vmovaps %xmm1, %xmm2
 ; CHECK-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
 ; CHECK-NEXT:    vpmovsxwd %xmm0, %xmm0
-; CHECK-NEXT:    vinsertf128 $1, %xmm0, %ymm2, %ymm0
-; CHECK-NEXT:    vmovdqa %ymm0, (%rsp)
+; CHECK-NEXT:    vinsertf128 $1, %xmm0, %ymm2, %ymm2
+; CHECK-NEXT:    vmovdqa %ymm2, (%rsp)
 ; CHECK-NEXT:    movq %rbp, %rsp
 ; CHECK-NEXT:    popq %rbp
 ; CHECK-NEXT:    .cfi_def_cfa %rsp, 8
diff --git a/llvm/test/CodeGen/X86/pr44749.ll b/llvm/test/CodeGen/X86/pr44749.ll
index 1012d8c723b13..d465009c7c38a 100644
--- a/llvm/test/CodeGen/X86/pr44749.ll
+++ b/llvm/test/CodeGen/X86/pr44749.ll
@@ -14,22 +14,20 @@ define i32 @a() {
 ; CHECK-NEXT:    movsd %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
 ; CHECK-NEXT:    callq _b
 ; CHECK-NEXT:    cvtsi2sd %eax, %xmm0
-; CHECK-NEXT:    movq _calloc@{{.*}}(%rip), %rax
-; CHECK-NEXT:    subq $-1, %rax
-; CHECK-NEXT:    setne %cl
-; CHECK-NEXT:    movzbl %cl, %ecx
-; CHECK-NEXT:    ## kill: def $rcx killed $ecx
-; CHECK-NEXT:    leaq {{.*}}(%rip), %rdx
+; CHECK-NEXT:    movq _calloc@{{.*}}(%rip), %rcx
+; CHECK-NEXT:    subq $-1, %rcx
+; CHECK-NEXT:    setne %dl
+; CHECK-NEXT:    movzbl %dl, %eax
+; CHECK-NEXT:    movl %eax, %esi
+; CHECK-NEXT:    leaq {{.*}}(%rip), %rdi
 ; CHECK-NEXT:    movsd {{.*#+}} xmm1 = mem[0],zero
 ; CHECK-NEXT:    ucomisd %xmm1, %xmm0
-; CHECK-NEXT:    setae %cl
-; CHECK-NEXT:    movzbl %cl, %ecx
-; CHECK-NEXT:    ## kill: def $rcx killed $ecx
-; CHECK-NEXT:    leaq {{.*}}(%rip), %rdx
+; CHECK-NEXT:    setae %dl
+; CHECK-NEXT:    movzbl %dl, %eax
+; CHECK-NEXT:    movl %eax, %esi
+; CHECK-NEXT:    leaq {{.*}}(%rip), %rdi
 ; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
-; CHECK-NEXT:    cvttsd2si %xmm0, %ecx
-; CHECK-NEXT:    movq %rax, (%rsp) ## 8-byte Spill
-; CHECK-NEXT:    movl %ecx, %eax
+; CHECK-NEXT:    cvttsd2si %xmm0, %eax
 ; CHECK-NEXT:    addq $24, %rsp
 ; CHECK-NEXT:    retq
 entry:
diff --git a/llvm/test/CodeGen/X86/pr47000.ll b/llvm/test/CodeGen/X86/pr47000.ll
index 083aa780a07c2..922b6403cc4f4 100755
--- a/llvm/test/CodeGen/X86/pr47000.ll
+++ b/llvm/test/CodeGen/X86/pr47000.ll
@@ -12,47 +12,51 @@ define <4 x half> @doTheTestMod(<4 x half> %0, <4 x half> %1) nounwind {
 ; CHECK-NEXT:    pushl %edi
 ; CHECK-NEXT:    pushl %esi
 ; CHECK-NEXT:    subl $124, %esp
-; CHECK-NEXT:    movl 144(%esp), %eax
+; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; CHECK-NEXT:    movl %eax, %ecx
-; CHECK-NEXT:    movw 176(%esp), %dx
-; CHECK-NEXT:    movw 172(%esp), %si
-; CHECK-NEXT:    movw 168(%esp), %di
-; CHECK-NEXT:    movw 164(%esp), %bx
-; CHECK-NEXT:    movw 160(%esp), %bp
+; CHECK-NEXT:    movw {{[0-9]+}}(%esp), %dx
+; CHECK-NEXT:    movw {{[0-9]+}}(%esp), %si
+; CHECK-NEXT:    movw {{[0-9]+}}(%esp), %di
+; CHECK-NEXT:    movw {{[0-9]+}}(%esp), %bx
+; CHECK-NEXT:    movw {{[0-9]+}}(%esp), %bp
+; CHECK-NEXT:    movw %dx, {{[-0-9]+}}(%e{{[sb]}}p) # 2-byte Spill
+; CHECK-NEXT:    movw {{[0-9]+}}(%esp), %dx
+; CHECK-NEXT:    movw %dx, {{[-0-9]+}}(%e{{[sb]}}p) # 2-byte Spill
+; CHECK-NEXT:    movw {{[0-9]+}}(%esp), %dx
+; CHECK-NEXT:    movw %dx, {{[-0-9]+}}(%e{{[sb]}}p) # 2-byte Spill
+; CHECK-NEXT:    movw {{[0-9]+}}(%esp), %dx
+; CHECK-NEXT:    movw %dx, {{[0-9]+}}(%esp)
+; CHECK-NEXT:    movw {{[-0-9]+}}(%e{{[sb]}}p), %dx # 2-byte Reload
+; CHECK-NEXT:    movw %dx, {{[0-9]+}}(%esp)
+; CHECK-NEXT:    movw {{[-0-9]+}}(%e{{[sb]}}p), %dx # 2-byte Reload
+; CHECK-NEXT:    movw %dx, {{[0-9]+}}(%esp)
+; CHECK-NEXT:    movw %bp, {{[0-9]+}}(%esp)
+; CHECK-NEXT:    movw {{[-0-9]+}}(%e{{[sb]}}p), %bp # 2-byte Reload
+; CHECK-NEXT:    movw %bp, {{[0-9]+}}(%esp)
+; CHECK-NEXT:    movw %si, {{[0-9]+}}(%esp)
+; CHECK-NEXT:    movw %di, {{[0-9]+}}(%esp)
+; CHECK-NEXT:    movw %bx, {{[0-9]+}}(%esp)
+; CHECK-NEXT:    movzwl {{[0-9]+}}(%esp), %esi
+; CHECK-NEXT:    movzwl {{[0-9]+}}(%esp), %edi
+; CHECK-NEXT:    movzwl {{[0-9]+}}(%esp), %ebx
 ; CHECK-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; CHECK-NEXT:    movw 156(%esp), %ax
-; CHECK-NEXT:    movw %ax, {{[-0-9]+}}(%e{{[sb]}}p) # 2-byte Spill
-; CHECK-NEXT:    movw 152(%esp), %ax
-; CHECK-NEXT:    movw %ax, {{[-0-9]+}}(%e{{[sb]}}p) # 2-byte Spill
-; CHECK-NEXT:    movw 148(%esp), %ax
-; CHECK-NEXT:    movw %ax, 112(%esp)
-; CHECK-NEXT:    movw {{[-0-9]+}}(%e{{[sb]}}p), %ax # 2-byte Reload
-; CHECK-NEXT:    movw %ax, 114(%esp)
-; CHECK-NEXT:    movw {{[-0-9]+}}(%e{{[sb]}}p), %ax # 2-byte Reload
-; CHECK-NEXT:    movw %ax, 116(%esp)
-; CHECK-NEXT:    movw %bp, 118(%esp)
-; CHECK-NEXT:    movw %dx, 110(%esp)
-; CHECK-NEXT:    movw %si, 108(%esp)
-; CHECK-NEXT:    movw %di, 106(%esp)
-; CHECK-NEXT:    movw %bx, 104(%esp)
-; CHECK-NEXT:    movzwl 118(%esp), %edx
-; CHECK-NEXT:    movzwl 116(%esp), %esi
-; CHECK-NEXT:    movzwl 114(%esp), %edi
-; CHECK-NEXT:    movzwl 112(%esp), %ebx
-; CHECK-NEXT:    movzwl 110(%esp), %ebp
-; CHECK-NEXT:    movzwl 108(%esp), %eax
+; CHECK-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
 ; CHECK-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; CHECK-NEXT:    movzwl 106(%esp), %eax
+; CHECK-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
 ; CHECK-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; CHECK-NEXT:    movzwl 104(%esp), %eax
+; CHECK-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; CHECK-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; CHECK-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
 ; CHECK-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; CHECK-NEXT:    movl %esp, %eax
-; CHECK-NEXT:    movl %ebx, (%eax)
 ; CHECK-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; CHECK-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; CHECK-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; CHECK-NEXT:    movl %ecx, (%eax)
 ; CHECK-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; CHECK-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; CHECK-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; CHECK-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; CHECK-NEXT:    calll __gnu_h2f_ieee
 ; CHECK-NEXT:    movl %esp, %eax
 ; CHECK-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
@@ -68,58 +72,58 @@ define <4 x half> @doTheTestMod(<4 x half> %0, <4 x half> %1) nounwind {
 ; CHECK-NEXT:    fstps (%eax)
 ; CHECK-NEXT:    calll __gnu_f2h_ieee
 ; CHECK-NEXT:    movl %esp, %ecx
-; CHECK-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
-; CHECK-NEXT:    movl %edx, (%ecx)
+; CHECK-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; CHECK-NEXT:    movl %esi, (%ecx)
 ; CHECK-NEXT:    movw %ax, {{[-0-9]+}}(%e{{[sb]}}p) # 2-byte Spill
 ; CHECK-NEXT:    calll __gnu_h2f_ieee
-; CHECK-NEXT:    movl %esp, %eax
-; CHECK-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; CHECK-NEXT:    movl %ecx, (%eax)
+; CHECK-NEXT:    movl %esp, %ecx
+; CHECK-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; CHECK-NEXT:    movl %esi, (%ecx)
 ; CHECK-NEXT:    fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill
 ; CHECK-NEXT:    calll __gnu_h2f_ieee
-; CHECK-NEXT:    movl %esp, %eax
-; CHECK-NEXT:    fstps 4(%eax)
+; CHECK-NEXT:    movl %esp, %ecx
+; CHECK-NEXT:    fstps 4(%ecx)
 ; CHECK-NEXT:    fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
-; CHECK-NEXT:    fstps (%eax)
+; CHECK-NEXT:    fstps (%ecx)
 ; CHECK-NEXT:    calll fmodf
-; CHECK-NEXT:    movl %esp, %eax
-; CHECK-NEXT:    fstps (%eax)
+; CHECK-NEXT:    movl %esp, %ecx
+; CHECK-NEXT:    fstps (%ecx)
 ; CHECK-NEXT:    calll __gnu_f2h_ieee
 ; CHECK-NEXT:    movl %esp, %ecx
-; CHECK-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
-; CHECK-NEXT:    movl %edx, (%ecx)
+; CHECK-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; CHECK-NEXT:    movl %esi, (%ecx)
 ; CHECK-NEXT:    movw %ax, {{[-0-9]+}}(%e{{[sb]}}p) # 2-byte Spill
 ; CHECK-NEXT:    calll __gnu_h2f_ieee
-; CHECK-NEXT:    movl %esp, %eax
-; CHECK-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; CHECK-NEXT:    movl %ecx, (%eax)
+; CHECK-NEXT:    movl %esp, %ecx
+; CHECK-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; CHECK-NEXT:    movl %esi, (%ecx)
 ; CHECK-NEXT:    fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill
 ; CHECK-NEXT:    calll __gnu_h2f_ieee
-; CHECK-NEXT:    movl %esp, %eax
-; CHECK-NEXT:    fstps 4(%eax)
+; CHECK-NEXT:    movl %esp, %ecx
+; CHECK-NEXT:    fstps 4(%ecx)
 ; CHECK-NEXT:    fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
-; CHECK-NEXT:    fstps (%eax)
+; CHECK-NEXT:    fstps (%ecx)
 ; CHECK-NEXT:    calll fmodf
-; CHECK-NEXT:    movl %esp, %eax
-; CHECK-NEXT:    fstps (%eax)
+; CHECK-NEXT:    movl %esp, %ecx
+; CHECK-NEXT:    fstps (%ecx)
 ; CHECK-NEXT:    calll __gnu_f2h_ieee
 ; CHECK-NEXT:    movl %esp, %ecx
-; CHECK-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
-; CHECK-NEXT:    movl %edx, (%ecx)
+; CHECK-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; CHECK-NEXT:    movl %esi, (%ecx)
 ; CHECK-NEXT:    movw %ax, {{[-0-9]+}}(%e{{[sb]}}p) # 2-byte Spill
 ; CHECK-NEXT:    calll __gnu_h2f_ieee
-; CHECK-NEXT:    movl %esp, %eax
-; CHECK-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; CHECK-NEXT:    movl %ecx, (%eax)
+; CHECK-NEXT:    movl %esp, %ecx
+; CHECK-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; CHECK-NEXT:    movl %esi, (%ecx)
 ; CHECK-NEXT:    fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill
 ; CHECK-NEXT:    calll __gnu_h2f_ieee
-; CHECK-NEXT:    movl %esp, %eax
-; CHECK-NEXT:    fstps 4(%eax)
+; CHECK-NEXT:    movl %esp, %ecx
+; CHECK-NEXT:    fstps 4(%ecx)
 ; CHECK-NEXT:    fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
-; CHECK-NEXT:    fstps (%eax)
+; CHECK-NEXT:    fstps (%ecx)
 ; CHECK-NEXT:    calll fmodf
-; CHECK-NEXT:    movl %esp, %eax
-; CHECK-NEXT:    fstps (%eax)
+; CHECK-NEXT:    movl %esp, %ecx
+; CHECK-NEXT:    fstps (%ecx)
 ; CHECK-NEXT:    calll __gnu_f2h_ieee
 ; CHECK-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
 ; CHECK-NEXT:    movw %ax, 6(%ecx)
@@ -127,9 +131,10 @@ define <4 x half> @doTheTestMod(<4 x half> %0, <4 x half> %1) nounwind {
 ; CHECK-NEXT:    movw %ax, 4(%ecx)
 ; CHECK-NEXT:    movw {{[-0-9]+}}(%e{{[sb]}}p), %dx # 2-byte Reload
 ; CHECK-NEXT:    movw %dx, 2(%ecx)
-; CHECK-NEXT:    movw {{[-0-9]+}}(%e{{[sb]}}p), %si # 2-byte Reload
-; CHECK-NEXT:    movw %si, (%ecx)
-; CHECK-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; CHECK-NEXT:    movw {{[-0-9]+}}(%e{{[sb]}}p), %bp # 2-byte Reload
+; CHECK-NEXT:    movw %bp, (%ecx)
+; CHECK-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; CHECK-NEXT:    movl %esi, %eax
 ; CHECK-NEXT:    addl $124, %esp
 ; CHECK-NEXT:    popl %esi
 ; CHECK-NEXT:    popl %edi
diff --git a/llvm/test/CodeGen/X86/regalloc-fast-missing-live-out-spill.mir b/llvm/test/CodeGen/X86/regalloc-fast-missing-live-out-spill.mir
index 2821f00940ecf..0fe9f60897fd1 100644
--- a/llvm/test/CodeGen/X86/regalloc-fast-missing-live-out-spill.mir
+++ b/llvm/test/CodeGen/X86/regalloc-fast-missing-live-out-spill.mir
@@ -23,15 +23,15 @@ body:             |
   ; CHECK:   successors: %bb.3(0x80000000)
   ; CHECK:   $rax = MOV64rm %stack.1, 1, $noreg, 0, $noreg :: (load 8 from %stack.1)
   ; CHECK:   renamable $ecx = MOV32r0 implicit-def $eflags
-  ; CHECK:   renamable $rcx = SUBREG_TO_REG 0, killed renamable $ecx, %subreg.sub_32bit
+  ; CHECK:   renamable $rdx = SUBREG_TO_REG 0, killed renamable $ecx, %subreg.sub_32bit
   ; CHECK:   MOV64mi32 killed renamable $rax, 1, $noreg, 0, $noreg, 0 :: (volatile store 8)
-  ; CHECK:   MOV64mr %stack.0, 1, $noreg, 0, $noreg, killed $rcx :: (store 8 into %stack.0)
+  ; CHECK:   MOV64mr %stack.0, 1, $noreg, 0, $noreg, killed $rdx :: (store 8 into %stack.0)
   ; CHECK: bb.3:
   ; CHECK:   successors: %bb.2(0x40000000), %bb.1(0x40000000)
   ; CHECK:   $rax = MOV64rm %stack.0, 1, $noreg, 0, $noreg :: (load 8 from %stack.0)
   ; CHECK:   renamable $ecx = MOV32r0 implicit-def dead $eflags
-  ; CHECK:   renamable $rcx = SUBREG_TO_REG 0, killed renamable $ecx, %subreg.sub_32bit
-  ; CHECK:   MOV64mr %stack.1, 1, $noreg, 0, $noreg, killed $rcx :: (store 8 into %stack.1)
+  ; CHECK:   renamable $rdx = SUBREG_TO_REG 0, killed renamable $ecx, %subreg.sub_32bit
+  ; CHECK:   MOV64mr %stack.1, 1, $noreg, 0, $noreg, killed $rdx :: (store 8 into %stack.1)
   ; CHECK:   JMP64r killed renamable $rax
   bb.0:
     liveins: $edi, $rsi
diff --git a/llvm/test/CodeGen/X86/swift-return.ll b/llvm/test/CodeGen/X86/swift-return.ll
index 4934419055acd..c62e92f2cac55 100644
--- a/llvm/test/CodeGen/X86/swift-return.ll
+++ b/llvm/test/CodeGen/X86/swift-return.ll
@@ -28,10 +28,11 @@ define i16 @test(i32 %key) {
 ; CHECK-O0-NEXT:    movl %edi, {{[0-9]+}}(%rsp)
 ; CHECK-O0-NEXT:    movl {{[0-9]+}}(%rsp), %edi
 ; CHECK-O0-NEXT:    callq gen
-; CHECK-O0-NEXT:    cwtl
-; CHECK-O0-NEXT:    movsbl %dl, %ecx
-; CHECK-O0-NEXT:    addl %ecx, %eax
-; CHECK-O0-NEXT:    # kill: def $ax killed $ax killed $eax
+; CHECK-O0-NEXT:    movswl %ax, %ecx
+; CHECK-O0-NEXT:    movsbl %dl, %esi
+; CHECK-O0-NEXT:    addl %esi, %ecx
+; CHECK-O0-NEXT:    # kill: def $cx killed $cx killed $ecx
+; CHECK-O0-NEXT:    movw %cx, %ax
 ; CHECK-O0-NEXT:    popq %rcx
 ; CHECK-O0-NEXT:    .cfi_def_cfa_offset 8
 ; CHECK-O0-NEXT:    retq
@@ -79,16 +80,16 @@ define i32 @test2(i32 %key) #0 {
 ; CHECK-O0-NEXT:    movl {{[0-9]+}}(%rsp), %edi
 ; CHECK-O0-NEXT:    movq %rsp, %rax
 ; CHECK-O0-NEXT:    callq gen2
-; CHECK-O0-NEXT:    movl {{[0-9]+}}(%rsp), %eax
 ; CHECK-O0-NEXT:    movl {{[0-9]+}}(%rsp), %ecx
 ; CHECK-O0-NEXT:    movl {{[0-9]+}}(%rsp), %edx
-; CHECK-O0-NEXT:    movl (%rsp), %esi
-; CHECK-O0-NEXT:    movl {{[0-9]+}}(%rsp), %edi
-; CHECK-O0-NEXT:    addl %edi, %esi
-; CHECK-O0-NEXT:    addl %edx, %esi
-; CHECK-O0-NEXT:    addl %ecx, %esi
-; CHECK-O0-NEXT:    addl %eax, %esi
-; CHECK-O0-NEXT:    movl %esi, %eax
+; CHECK-O0-NEXT:    movl {{[0-9]+}}(%rsp), %esi
+; CHECK-O0-NEXT:    movl (%rsp), %edi
+; CHECK-O0-NEXT:    movl {{[0-9]+}}(%rsp), %r8d
+; CHECK-O0-NEXT:    addl %r8d, %edi
+; CHECK-O0-NEXT:    addl %esi, %edi
+; CHECK-O0-NEXT:    addl %edx, %edi
+; CHECK-O0-NEXT:    addl %ecx, %edi
+; CHECK-O0-NEXT:    movl %edi, %eax
 ; CHECK-O0-NEXT:    addq $24, %rsp
 ; CHECK-O0-NEXT:    .cfi_def_cfa_offset 8
 ; CHECK-O0-NEXT:    retq
@@ -263,17 +264,17 @@ define void @consume_i1_ret() {
 ; CHECK-O0-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-O0-NEXT:    callq produce_i1_ret
 ; CHECK-O0-NEXT:    andb $1, %al
-; CHECK-O0-NEXT:    movzbl %al, %eax
-; CHECK-O0-NEXT:    movl %eax, var
+; CHECK-O0-NEXT:    movzbl %al, %esi
+; CHECK-O0-NEXT:    movl %esi, var
 ; CHECK-O0-NEXT:    andb $1, %dl
-; CHECK-O0-NEXT:    movzbl %dl, %eax
-; CHECK-O0-NEXT:    movl %eax, var
+; CHECK-O0-NEXT:    movzbl %dl, %esi
+; CHECK-O0-NEXT:    movl %esi, var
 ; CHECK-O0-NEXT:    andb $1, %cl
-; CHECK-O0-NEXT:    movzbl %cl, %eax
-; CHECK-O0-NEXT:    movl %eax, var
+; CHECK-O0-NEXT:    movzbl %cl, %esi
+; CHECK-O0-NEXT:    movl %esi, var
 ; CHECK-O0-NEXT:    andb $1, %r8b
-; CHECK-O0-NEXT:    movzbl %r8b, %eax
-; CHECK-O0-NEXT:    movl %eax, var
+; CHECK-O0-NEXT:    movzbl %r8b, %esi
+; CHECK-O0-NEXT:    movl %esi, var
 ; CHECK-O0-NEXT:    popq %rax
 ; CHECK-O0-NEXT:    .cfi_def_cfa_offset 8
 ; CHECK-O0-NEXT:    retq
diff --git a/llvm/test/CodeGen/X86/swifterror.ll b/llvm/test/CodeGen/X86/swifterror.ll
index 1afae31b2b8d2..1388c61c18984 100644
--- a/llvm/test/CodeGen/X86/swifterror.ll
+++ b/llvm/test/CodeGen/X86/swifterror.ll
@@ -790,8 +790,8 @@ a:
 ; CHECK-O0-LABEL: testAssign4
 ; CHECK-O0:        callq   _foo2
 ; CHECK-O0:        xorl    %eax, %eax
-; CHECK-O0: ## kill: def $rax killed $eax
-; CHECK-O0:        movq    %rax, [[SLOT:[-a-z0-9\(\)\%]*]]
+; CHECK-O0:        movl    %eax, %ecx
+; CHECK-O0:        movq    %rcx, [[SLOT:[-a-z0-9\(\)\%]*]]
 ; CHECK-O0:        movq    [[SLOT]], %rax
 ; CHECK-O0:        movq    %rax, [[SLOT2:[-a-z0-9\(\)\%]*]]
 ; CHECK-O0:        movq    [[SLOT2]], %r12
diff --git a/llvm/test/DebugInfo/X86/op_deref.ll b/llvm/test/DebugInfo/X86/op_deref.ll
index 1b49dc554f7ef..5de9976d6de2a 100644
--- a/llvm/test/DebugInfo/X86/op_deref.ll
+++ b/llvm/test/DebugInfo/X86/op_deref.ll
@@ -6,10 +6,10 @@
 ; RUN:     | FileCheck %s -check-prefix=CHECK -check-prefix=DWARF3
 
 ; DWARF4: DW_AT_location [DW_FORM_sec_offset]                      (0x00000000
-; DWARF4-NEXT:  {{.*}}: DW_OP_breg2 RCX+0, DW_OP_deref
+; DWARF4-NEXT:  {{.*}}: DW_OP_breg1 RDX+0, DW_OP_deref
 
 ; DWARF3: DW_AT_location [DW_FORM_data4]                      (0x00000000
-; DWARF3-NEXT:  {{.*}}: DW_OP_breg2 RCX+0, DW_OP_deref
+; DWARF3-NEXT:  {{.*}}: DW_OP_breg1 RDX+0, DW_OP_deref
 
 ; CHECK-NOT: DW_TAG
 ; CHECK: DW_AT_name [DW_FORM_strp]  ( .debug_str[0x00000067] = "vla")
@@ -17,8 +17,8 @@
 ; Check the DEBUG_VALUE comments for good measure.
 ; RUN: llc -O0 -mtriple=x86_64-apple-darwin %s -o - -filetype=asm | FileCheck %s -check-prefix=ASM-CHECK
 ; vla should have a register-indirect address at one point.
-; ASM-CHECK: DEBUG_VALUE: vla <- [DW_OP_deref] [$rcx+0]
-; ASM-CHECK: DW_OP_breg2
+; ASM-CHECK: DEBUG_VALUE: vla <- [DW_OP_deref] [$rdx+0]
+; ASM-CHECK: DW_OP_breg1
 
 ; RUN: llvm-as %s -o - | llvm-dis - | FileCheck %s --check-prefix=PRETTY-PRINT
 ; PRETTY-PRINT: DIExpression(DW_OP_deref)

From 158581772fc8f3d6c601ceba14a08285e46cb7e9 Mon Sep 17 00:00:00 2001
From: Hans Wennborg <hans@chromium.org>
Date: Tue, 15 Sep 2020 20:18:00 +0200
Subject: [PATCH 092/109] ReleaseNotes: PowerPC changes

By Ahsan Saghir!
---
 llvm/docs/ReleaseNotes.rst | 36 +++++++++++++++++++++++++++++++++++-
 1 file changed, 35 insertions(+), 1 deletion(-)

diff --git a/llvm/docs/ReleaseNotes.rst b/llvm/docs/ReleaseNotes.rst
index e87bf3d146f54..977ba26f9e236 100644
--- a/llvm/docs/ReleaseNotes.rst
+++ b/llvm/docs/ReleaseNotes.rst
@@ -188,7 +188,41 @@ During this release ...
 Changes to the PowerPC Target
 -----------------------------
 
-During this release ...
+Optimization:
+
+* Improved Loop Unroll-and-Jam legality checks, allowing it to handle more than two level loop nests
+* Improved Loop Unroll to be able to unroll more loops
+* Implemented an option to allow loop fusion to work on loops with different constant trip counts
+
+Codegen:
+
+* POWER10 support
+* Added PC Relative addressing
+* Added __int128 vector bool support
+* Security enhancement via probe-stack attribute support to protect against stack clash
+* Floating point support enhancements
+* Improved half precision and quad precision support, including GLIBC
+* constrained FP operation support for arithmetic/rounding/max/min
+* cleaning up fast math flags checks in DAGCombine, Legalizer, and Lowering
+* Performance improvements from instruction exploitation, especially for vector permute on LE
+* Scheduling enhancements
+* Added MacroFusion for POWER8
+* Added post-ra heuristics for POWER9
+* Target dependent passes tuning
+* Updated LoopStrengthReduce to use instruction number as first priority
+* Enhanced MachineCombiner to expose more ILP
+* Code quality and maintenance enhancements
+* Enabled more machine verification passes
+* Added ability to parse and emit additional extended mnemonics
+* Numerous bug fixes
+
+AIX Support Improvements:
+
+* Enabled compile and link such that a simple <stdio.h> "Hello World" program works with standard headers
+* Added support for the C calling convention for non-vector code
+* Implemented correct stack frame layout for functions
+* In llvm-objdump, added support for relocations, improved selection of symbol labels, and added the --symbol-description option
+
 
 Changes to the RISC-V Target
 ----------------------------

From 8f2c29681ce768afb739b6cf5ccca81dd87d5326 Mon Sep 17 00:00:00 2001
From: Richard Barton <richard.barton@arm.com>
Date: Wed, 16 Sep 2020 08:18:08 +0100
Subject: [PATCH 093/109] [flang] Fix docs build

Apply a local fix to an issue with recommonmark's AutoStructify extension
when used with certain versions of sphinx.

See https://github.com/readthedocs/recommonmark/issues/93

Reviewed By: hans

Differential Revision: https://reviews.llvm.org/D87714

(cherry picked from commit af56be339f8c9660747794cc6755384154602535)
---
 flang/docs/conf.py | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/flang/docs/conf.py b/flang/docs/conf.py
index 21362fc3449e9..f5eb283a186a5 100644
--- a/flang/docs/conf.py
+++ b/flang/docs/conf.py
@@ -50,6 +50,17 @@
 
   # Setup AutoStructify for inline .rst toctrees in index.md
   from recommonmark.transform import AutoStructify
+
+  # Stolen from https://github.com/readthedocs/recommonmark/issues/93
+  # Monkey patch to fix recommonmark 0.4 doc reference issues.
+  from recommonmark.states import DummyStateMachine
+  orig_run_role = DummyStateMachine.run_role
+  def run_role(self, name, options=None, content=None):
+    if name == 'doc':
+      name = 'any'
+      return orig_run_role(self, name, options, content)
+  DummyStateMachine.run_role = run_role
+
   def setup(app):
     # Disable inline math to avoid
     # https://github.com/readthedocs/recommonmark/issues/120 in Extensions.md

From 4a26e3b33798424dc5a4843f7b29a617bef81656 Mon Sep 17 00:00:00 2001
From: Adam Czachorowski <adamcz@google.com>
Date: Tue, 15 Sep 2020 20:13:00 +0200
Subject: [PATCH 094/109] [clangd] Actually parse Index section of the YAML
 file.

This fixes a bug in dbf486c0de92c76df77c1a1f815cf16533ecbb3a, which
introduced the Index section of the config, but did not register the
parse method, so it didn't work in a YAML file (but did in a test).

Differential Revision: https://reviews.llvm.org/D87710

(cherry picked from commit 7029e5d4ca20d20982da8efe89de27acd8d7d75b)
---
 clang-tools-extra/clangd/ConfigYAML.cpp         |  1 +
 .../clangd/unittests/ConfigYAMLTests.cpp        | 17 +++++++++++------
 2 files changed, 12 insertions(+), 6 deletions(-)

diff --git a/clang-tools-extra/clangd/ConfigYAML.cpp b/clang-tools-extra/clangd/ConfigYAML.cpp
index 16639f6649c2b..9988fe3766480 100644
--- a/clang-tools-extra/clangd/ConfigYAML.cpp
+++ b/clang-tools-extra/clangd/ConfigYAML.cpp
@@ -38,6 +38,7 @@ class Parser {
     DictParser Dict("Config", this);
     Dict.handle("If", [&](Node &N) { parse(F.If, N); });
     Dict.handle("CompileFlags", [&](Node &N) { parse(F.CompileFlags, N); });
+    Dict.handle("Index", [&](Node &N) { parse(F.Index, N); });
     Dict.parse(N);
     return !(N.failed() || HadError);
   }
diff --git a/clang-tools-extra/clangd/unittests/ConfigYAMLTests.cpp b/clang-tools-extra/clangd/unittests/ConfigYAMLTests.cpp
index a9526ce2367c4..27b1c0cfc56dd 100644
--- a/clang-tools-extra/clangd/unittests/ConfigYAMLTests.cpp
+++ b/clang-tools-extra/clangd/unittests/ConfigYAMLTests.cpp
@@ -47,16 +47,21 @@ CompileFlags: { Add: [foo, bar] }
   Add: |
     b
     az
+---
+Index:
+  Background: Skip
   )yaml";
   auto Results = Fragment::parseYAML(YAML, "config.yaml", Diags.callback());
   EXPECT_THAT(Diags.Diagnostics, IsEmpty());
-  ASSERT_EQ(Results.size(), 2u);
-  EXPECT_FALSE(Results.front().If.HasUnrecognizedCondition);
-  EXPECT_THAT(Results.front().If.PathMatch, ElementsAre(Val("abc")));
-  EXPECT_THAT(Results.front().CompileFlags.Add,
-              ElementsAre(Val("foo"), Val("bar")));
+  ASSERT_EQ(Results.size(), 3u);
+  EXPECT_FALSE(Results[0].If.HasUnrecognizedCondition);
+  EXPECT_THAT(Results[0].If.PathMatch, ElementsAre(Val("abc")));
+  EXPECT_THAT(Results[0].CompileFlags.Add, ElementsAre(Val("foo"), Val("bar")));
+
+  EXPECT_THAT(Results[1].CompileFlags.Add, ElementsAre(Val("b\naz\n")));
 
-  EXPECT_THAT(Results.back().CompileFlags.Add, ElementsAre(Val("b\naz\n")));
+  ASSERT_TRUE(Results[2].Index.Background);
+  EXPECT_EQ("Skip", *Results[2].Index.Background.getValue());
 }
 
 TEST(ParseYAML, Locations) {

From 339a0e2d114ecd15eb7289425851e356af7bb8a7 Mon Sep 17 00:00:00 2001
From: Hans Wennborg <hans@chromium.org>
Date: Wed, 16 Sep 2020 17:03:14 +0200
Subject: [PATCH 095/109] llvm release notes: drop in-progress warnings; minor
 cleanups

---
 llvm/docs/ReleaseNotes.rst | 79 ++++++--------------------------------
 1 file changed, 12 insertions(+), 67 deletions(-)

diff --git a/llvm/docs/ReleaseNotes.rst b/llvm/docs/ReleaseNotes.rst
index 977ba26f9e236..2af813fda1aa9 100644
--- a/llvm/docs/ReleaseNotes.rst
+++ b/llvm/docs/ReleaseNotes.rst
@@ -5,12 +5,6 @@ LLVM 11.0.0 Release Notes
 .. contents::
     :local:
 
-.. warning::
-   These are in-progress notes for the upcoming LLVM 11 release.
-   Release notes for previous releases can be found on
-   `the Download Page <https://releases.llvm.org/download.html>`_.
-
-
 Introduction
 ============
 
@@ -26,48 +20,16 @@ have questions or comments, the `LLVM Developer's Mailing List
 <https://lists.llvm.org/mailman/listinfo/llvm-dev>`_ is a good place to send
 them.
 
-Note that if you are reading this file from a Git checkout or the main
-LLVM web page, this document applies to the *next* release, not the current
-one.  To see the release notes for a specific release, please see the `releases
-page <https://llvm.org/releases/>`_.
-
 Deprecated and Removed Features/APIs
 =================================================
 * BG/Q support, including QPX, will be removed in the 12.0.0 release.
 
 Non-comprehensive list of changes in this release
 =================================================
-.. NOTE
-   For small 1-3 sentence descriptions, just add an entry at the end of
-   this list. If your description won't fit comfortably in one bullet
-   point (e.g. maybe you would like to give an example of the
-   functionality, or simply have a lot to talk about), see the `NOTE` below
-   for adding a new subsection.
-
-* The LLVM project has started the migration towards Python 3, and the build
-  system now prefers Python 3 whenever available.  If the Python 3 interpreter
-  (or libraries) are not found, the build system will, for the time being, fall
-  back to Python 2.  It is recommended that downstream projects migrate to
-  Python 3 as Python 2 has been end-of-life'd by the Python Software
-  Foundation.
 
 * The llgo frontend has been removed for now, but may be resurrected in the
   future.
 
-* ...
-
-
-.. NOTE
-   If you would like to document a larger change, then you can add a
-   subsection about it right here. You can copy the following boilerplate
-   and un-indent it (the indentation causes it to be inside this comment).
-
-   Special New Feature
-   -------------------
-
-   Makes programs 10x faster by doing Special New Thing.
-
-
 Changes to the LLVM IR
 ----------------------
 
@@ -116,6 +78,13 @@ Changes to the LLVM IR
 Changes to building LLVM
 ------------------------
 
+* The LLVM project has started the migration towards Python 3, and the build
+  system now prefers Python 3 whenever available.  If the Python 3 interpreter
+  (or libraries) are not found, the build system will, for the time being, fall
+  back to Python 2.  It is recommended that downstream projects migrate to
+  Python 3 as Python 2 has been end-of-life'd by the Python Software
+  Foundation.
+
 Changes to the AArch64 Backend
 ------------------------------
 
@@ -134,6 +103,7 @@ Changes to the AArch64 Backend
 * Added support for Armv8.6-A:
 
   Assembly support for the following extensions:
+
   - Enhanced Counter Virtualization (ARMv8.6-ECV).
   - Fine Grained Traps (ARMv8.6-FGT).
   - Activity Monitors virtualization (ARMv8.6-AMU).
@@ -179,11 +149,6 @@ Changes to the ARM Backend
 
 * Added support for Cortex-M55, Cortex-A77, Cortex-A78 and Cortex-X1 cores.
 
-Changes to the MIPS Target
---------------------------
-
-During this release ...
-
 
 Changes to the PowerPC Target
 -----------------------------
@@ -228,6 +193,7 @@ Changes to the RISC-V Target
 ----------------------------
 
 New features:
+
 * After consultation through an RFC, the RISC-V backend now accepts patches for
   proposed instruction set extensions that have not yet been ratified.  For these
   experimental extensions, there is no expectation of ongoing support - the
@@ -244,6 +210,7 @@ New features:
 * llvm-objdump will now print branch targets as part of disassembly.
 
 Improvements:
+
 * If an immediate can be generated using a pair of `addi` instructions, that
   pair will be selected rather than materialising the immediate into a
   separate register with an `lui` and `addi` pair.
@@ -265,6 +232,7 @@ Improvements:
 * The `jump` pseudo instruction is now supported.
 
 Bug fixes:
+
 * A failure to insert indirect branches in position independent code
   was fixed.
 * The calculated expanded size of atomic pseudo operations was fixed, avoiding
@@ -277,9 +245,6 @@ Bug fixes:
 Changes to the X86 Target
 -------------------------
 
-During this release ...
-
-
 * Functions with the probe-stack attribute set to "inline-asm" are now protected
   against stack clash without the need of a third-party probing function and
   with limited impact on performance.
@@ -335,18 +300,6 @@ Changes to the Windows Target
 * Produce COFF weak external symbols for IR level weak symbols without a comdat
   (e.g. for `__attribute__((weak))` in C)
 
-Changes to the OCaml bindings
------------------------------
-
-
-
-Changes to the C API
---------------------
-
-
-Changes to the Go bindings
---------------------------
-
 
 Changes to the DAG infrastructure
 ---------------------------------
@@ -357,7 +310,7 @@ Changes to the DAG infrastructure
   MachineIR.
 
 Changes to the Debug Info
----------------------------------
+-------------------------
 
 * LLVM now supports the debug entry values (DW_OP_entry_value) production for
   the x86, ARM, and AArch64 targets by default. Other targets can use
@@ -389,14 +342,6 @@ Changes to the LLVM tools
 * llvm-lib supports adding import library objects in addition to regular
   object files
 
-Changes to LLDB
-===============
-
-External Open Source Projects Using LLVM 11
-===========================================
-
-* A project...
-
 Additional Information
 ======================
 

From 1a51c113148a10a2d7313aae313039ef8e0db0cc Mon Sep 17 00:00:00 2001
From: Hans Wennborg <hans@chromium.org>
Date: Wed, 16 Sep 2020 17:10:31 +0200
Subject: [PATCH 096/109] clang release notes: drop in-progress warnings; minor
 cleanups

---
 clang/docs/ReleaseNotes.rst | 78 +++----------------------------------
 1 file changed, 6 insertions(+), 72 deletions(-)

diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 1c02c478be688..0ccaa7a821212 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -1,6 +1,6 @@
-========================================
-Clang 11.0.0 (In-Progress) Release Notes
-========================================
+==========================
+Clang 11.0.0 Release Notes
+==========================
 
 .. contents::
    :local:
@@ -8,12 +8,6 @@ Clang 11.0.0 (In-Progress) Release Notes
 
 Written by the `LLVM Team <https://llvm.org/>`_
 
-.. warning::
-
-   These are in-progress notes for the upcoming Clang 11 release.
-   Release notes for previous releases can be found on
-   `the Download Page <https://releases.llvm.org/download.html>`_.
-
 Introduction
 ============
 
@@ -30,11 +24,6 @@ For more information about Clang or LLVM, including information about the
 latest release, please see the `Clang Web Site <https://clang.llvm.org>`_ or the
 `LLVM Web Site <https://llvm.org>`_.
 
-Note that if you are reading this file from a Git checkout or the
-main Clang web page, this document applies to the *next* release, not
-the current one. To see the release notes for a specific release, please
-see the `releases page <https://llvm.org/releases/>`_.
-
 What's New in Clang 11.0.0?
 ===========================
 
@@ -43,13 +32,9 @@ here. Generic improvements to Clang as a whole or to its underlying
 infrastructure are described first, followed by language-specific
 sections with improvements to Clang's support for those languages.
 
-Major New Features
-------------------
-
-- ...
 
 Recovery AST
-^^^^^^^^^^^^
+------------
 
 clang's AST now improves support for representing broken C++ code. This improves
 the quality of subsequent diagnostics after an error is encountered. It also
@@ -89,7 +74,7 @@ This feature is on by default for C++ code, and can be explicitly controlled
 with `-Xclang -f[no-]recovery-ast`.
 
 Improvements to Clang's diagnostics
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+-----------------------------------
 
 - -Wpointer-to-int-cast is a new warning group. This group warns about C-style
   casts of pointers to a integer type too small to hold all possible values.
@@ -269,13 +254,6 @@ New Compiler Flags
   compiler support will continue to change until the specification is
   finalised.
 
-Deprecated Compiler Flags
--------------------------
-
-The following options are deprecated and ignored. They will be removed in
-future versions of Clang.
-
-- ...
 
 Modified Compiler Flags
 -----------------------
@@ -346,8 +324,6 @@ C Language Changes in Clang
 - Clang now supports the GNU C extension `asm inline`; it won't do anything
   *yet*, but it will be parsed.
 
-- ...
-
 C++ Language Changes in Clang
 -----------------------------
 
@@ -389,13 +365,6 @@ C++ Language Changes in Clang
       int f() { return 0; }
     } S;
 
-C++1z Feature Support
-^^^^^^^^^^^^^^^^^^^^^
-
-...
-
-Objective-C Language Changes in Clang
--------------------------------------
 
 OpenCL Kernel Language Changes in Clang
 ---------------------------------------
@@ -420,7 +389,7 @@ OpenCL Kernel Language Changes in Clang
   `cl_arm_integer_dot_product`.
 
 Changes related to C++ for OpenCL
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+---------------------------------
 
 - Added `addrspace_cast` operator.
 
@@ -486,10 +455,6 @@ New features for OpenMP 5.0 were implemented.
 
 - Bug fixes and optimizations.
 
-CUDA Support in Clang
----------------------
-
-- ...
 
 Internal API Changes
 --------------------
@@ -538,11 +503,6 @@ release of Clang. Users of the build system should adjust accordingly.
   something you need, please reach out to the mailing list to discuss possible
   ways forward.
 
-AST Matchers
-------------
-
-- ...
-
 clang-format
 ------------
 
@@ -644,10 +604,6 @@ clang-format
         foo();
       } while(1);
 
-libclang
---------
-
-- ...
 
 .. _release-notes-clang-static-analyzer:
 
@@ -723,28 +679,6 @@ Static Analyzer
 
 .. _release-notes-ubsan:
 
-Undefined Behavior Sanitizer (UBSan)
-------------------------------------
-
-Core Analysis Improvements
-==========================
-
-- ...
-
-New Issues Found
-================
-
-- ...
-
-Python Binding Changes
-----------------------
-
-The following methods have been added:
-
--  ...
-
-Significant Known Problems
-==========================
 
 Additional Information
 ======================

From 19d7a9fa9d665c75655db70873782e60cad56bb7 Mon Sep 17 00:00:00 2001
From: Hans Wennborg <hans@chromium.org>
Date: Wed, 16 Sep 2020 17:14:46 +0200
Subject: [PATCH 097/109] clang-tools-extra release notes: drop in-progress
 warnings; minor cleanups

---
 clang-tools-extra/docs/ReleaseNotes.rst | 60 ++-----------------------
 1 file changed, 3 insertions(+), 57 deletions(-)

diff --git a/clang-tools-extra/docs/ReleaseNotes.rst b/clang-tools-extra/docs/ReleaseNotes.rst
index 83ae2c6605fd1..0471c5e9c4ebd 100644
--- a/clang-tools-extra/docs/ReleaseNotes.rst
+++ b/clang-tools-extra/docs/ReleaseNotes.rst
@@ -1,6 +1,6 @@
-====================================================
-Extra Clang Tools 11.0.0 (In-Progress) Release Notes
-====================================================
+======================================
+Extra Clang Tools 11.0.0 Release Notes
+======================================
 
 .. contents::
    :local:
@@ -8,12 +8,6 @@ Extra Clang Tools 11.0.0 (In-Progress) Release Notes
 
 Written by the `LLVM Team <https://llvm.org/>`_
 
-.. warning::
-
-   These are in-progress notes for the upcoming Extra Clang Tools 11 release.
-   Release notes for previous releases can be found on
-   `the Download Page <https://releases.llvm.org/download.html>`_.
-
 Introduction
 ============
 
@@ -27,11 +21,6 @@ For more information about Clang or LLVM, including information about
 the latest release, please see the `Clang Web Site <https://clang.llvm.org>`_ or
 the `LLVM Web Site <https://llvm.org>`_.
 
-Note that if you are reading this file from a Git checkout or the
-main Clang web page, this document applies to the *next* release, not
-the current one. To see the release notes for a specific release, please
-see the `releases page <https://llvm.org/releases/>`_.
-
 What's New in Extra Clang Tools 11.0.0?
 =======================================
 
@@ -39,11 +28,6 @@ Some of the major new features and improvements to Extra Clang Tools are listed
 here. Generic improvements to Extra Clang Tools as a whole or to its underlying
 infrastructure are described first, followed by tool-specific sections.
 
-Major New Features
-------------------
-
-...
-
 Improvements to clangd
 ----------------------
 
@@ -238,21 +222,6 @@ Miscellaneous
 
 - Too many stability and correctness fixes to mention.
 
-Improvements to clang-doc
--------------------------
-
-The improvements are...
-
-Improvements to clang-query
----------------------------
-
-The improvements are...
-
-Improvements to clang-rename
-----------------------------
-
-The improvements are...
-
 Improvements to clang-tidy
 --------------------------
 
@@ -439,26 +408,3 @@ Other improvements
 
 - For `run-clang-tidy.py` add option to use alpha checkers from
   `clang-analyzer`.
-
-Improvements to include-fixer
------------------------------
-
-The improvements are...
-
-Improvements to clang-include-fixer
------------------------------------
-
-The improvements are...
-
-Improvements to modularize
---------------------------
-
-The improvements are...
-
-Improvements to pp-trace
-------------------------
-
-The improvements are...
-
-Clang-tidy visual studio plugin
--------------------------------

From 6afefb45dbffc4a8187192f3c70655343a10febf Mon Sep 17 00:00:00 2001
From: Hans Wennborg <hans@chromium.org>
Date: Wed, 16 Sep 2020 17:16:34 +0200
Subject: [PATCH 098/109] lld release notes: drop in-progress warnings; minor
 cleanups

---
 lld/docs/ReleaseNotes.rst | 14 --------------
 1 file changed, 14 deletions(-)

diff --git a/lld/docs/ReleaseNotes.rst b/lld/docs/ReleaseNotes.rst
index 880f933e51be2..fcb8eaefa5946 100644
--- a/lld/docs/ReleaseNotes.rst
+++ b/lld/docs/ReleaseNotes.rst
@@ -5,11 +5,6 @@ lld 11.0.0 Release Notes
 .. contents::
     :local:
 
-.. warning::
-   These are in-progress notes for the upcoming LLVM 11.0.0 release.
-   Release notes for previous releases can be found on
-   `the Download Page <https://releases.llvm.org/download.html>`_.
-
 Introduction
 ============
 
@@ -176,12 +171,3 @@ MinGW Improvements
   ``--disable-runtime-pseudo-reloc``), the ``--no-seh`` flag and options
   for selecting file and section alignment (``--file-alignment`` and
   ``--section-alignment``).
-
-MachO Improvements
-------------------
-
-* Item 1.
-
-WebAssembly Improvements
-------------------------
-

From 34c21f8dbe656969c209803012234901a1d4ae19 Mon Sep 17 00:00:00 2001
From: Hans Wennborg <hans@chromium.org>
Date: Wed, 16 Sep 2020 17:18:40 +0200
Subject: [PATCH 099/109] flang release notes: drop in-progress warnings; minor
 cleanups

---
 flang/docs/ReleaseNotes.md | 20 +-------------------
 1 file changed, 1 insertion(+), 19 deletions(-)

diff --git a/flang/docs/ReleaseNotes.md b/flang/docs/ReleaseNotes.md
index b891ab904a04c..19939a539c43b 100644
--- a/flang/docs/ReleaseNotes.md
+++ b/flang/docs/ReleaseNotes.md
@@ -1,10 +1,4 @@
-# Flang 11.0.0 (In-Progress) Release Notes
-
-> **warning**
->
-> These are in-progress notes for the upcoming LLVM 11.0.0 release.
-> Release notes for previous releases can be found on [the Download
-> Page](https://releases.llvm.org/download.html).
+# Flang 11.0.0 Release Notes
 
 ## Introduction
 
@@ -17,18 +11,6 @@ documentation](https://llvm.org/docs/ReleaseNotes.html). All LLVM
 releases may be downloaded from the [LLVM releases web
 site](https://llvm.org/releases/).
 
-Note that if you are reading this file from a Git checkout, this
-document applies to the *next* release, not the current one. To see the
-release notes for a specific release, please see the [releases
-page](https://llvm.org/releases/).
-
-## Known Issues
-
-These are issues that couldn't be fixed before the release. See the bug
-reports for the latest status.
-
- *   ...
-
 ## Introducing Flang
 
 Flang is LLVM's Fortran front end and is new for the LLVM 11 release.

From 952e7c3b81ffa34130b571afe028debc0ef36691 Mon Sep 17 00:00:00 2001
From: Hans Wennborg <hans@chromium.org>
Date: Wed, 16 Sep 2020 17:20:04 +0200
Subject: [PATCH 100/109] libc++ release notes: drop in-progress warnings;
 minor cleanups

---
 libcxx/docs/ReleaseNotes.rst | 21 +++------------------
 1 file changed, 3 insertions(+), 18 deletions(-)

diff --git a/libcxx/docs/ReleaseNotes.rst b/libcxx/docs/ReleaseNotes.rst
index 1db79153ed894..001957570c905 100644
--- a/libcxx/docs/ReleaseNotes.rst
+++ b/libcxx/docs/ReleaseNotes.rst
@@ -1,6 +1,6 @@
-=========================================
-Libc++ 11.0.0 (In-Progress) Release Notes
-=========================================
+===========================
+Libc++ 11.0.0 Release Notes
+===========================
 
 .. contents::
    :local:
@@ -8,12 +8,6 @@ Libc++ 11.0.0 (In-Progress) Release Notes
 
 Written by the `Libc++ Team <https://libcxx.llvm.org>`_
 
-.. warning::
-
-   These are in-progress notes for the upcoming libc++ 11 release.
-   Release notes for previous releases can be found on
-   `the Download Page <https://releases.llvm.org/download.html>`_.
-
 Introduction
 ============
 
@@ -27,11 +21,6 @@ be downloaded from the `LLVM releases web site <https://llvm.org/releases/>`_.
 For more information about libc++, please see the `Libc++ Web Site
 <https://libcxx.llvm.org>`_ or the `LLVM Web Site <https://llvm.org>`_.
 
-Note that if you are reading this file from a Git checkout or the
-main Libc++ web page, this document applies to the *next* release, not
-the current one. To see the release notes for a specific release, please
-see the `releases page <https://llvm.org/releases/>`_.
-
 What's New in Libc++ 11.0.0?
 ============================
 
@@ -39,7 +28,3 @@ New Features
 ------------
 
 - ``<numbers>``
-
-API Changes
------------
-- ...

From f9572abae17c483c861702d440b4eeb0f08879d1 Mon Sep 17 00:00:00 2001
From: Hans Wennborg <hans@chromium.org>
Date: Wed, 16 Sep 2020 17:21:17 +0200
Subject: [PATCH 101/109] openmp release notes: drop in-progress warnings;
 minor cleanups

---
 openmp/docs/ReleaseNotes.rst | 15 ---------------
 1 file changed, 15 deletions(-)

diff --git a/openmp/docs/ReleaseNotes.rst b/openmp/docs/ReleaseNotes.rst
index b7f2ec42277e3..e09ef5f5b6382 100644
--- a/openmp/docs/ReleaseNotes.rst
+++ b/openmp/docs/ReleaseNotes.rst
@@ -5,11 +5,6 @@ openmp 11.0.0 Release Notes
 .. contents::
     :local:
 
-.. warning::
-   These are in-progress notes for the upcoming LLVM 11.0.0 release.
-   Release notes for previous releases can be found on
-   `the Download Page <https://releases.llvm.org/download.html>`_.
-
 Introduction
 ============
 
@@ -21,16 +16,6 @@ from the `LLVM releases web site <https://llvm.org/releases/>`_.
 Non-comprehensive list of changes in this release
 =================================================
 
-5.0 features
-------------
-
-* ...
-
-5.1 features
-------------
-
-* ...
-
 OMPT Improvements
 -----------------
 

From c2f4de353b2e9f6658febe5735c615ee433fd062 Mon Sep 17 00:00:00 2001
From: Hans Wennborg <hans@chromium.org>
Date: Wed, 16 Sep 2020 17:22:02 +0200
Subject: [PATCH 102/109] polly release notes: drop in-progress warnings; minor
 cleanups

---
 polly/docs/ReleaseNotes.rst | 14 +++-----------
 1 file changed, 3 insertions(+), 11 deletions(-)

diff --git a/polly/docs/ReleaseNotes.rst b/polly/docs/ReleaseNotes.rst
index ab95eae4e57ed..8aaa6f0564da4 100644
--- a/polly/docs/ReleaseNotes.rst
+++ b/polly/docs/ReleaseNotes.rst
@@ -1,17 +1,9 @@
-=============================
-Release Notes 11.0 (upcoming)
-=============================
+==================
+Release Notes 11.0
+==================
 
 In Polly 11 the following important changes have been incorporated.
 
-.. warning::
-
-  These releaes notes are for the next release of Polly and describe
-  the new features that have recently been committed to our development
-  branch.
-
-- Change ...
-
  * The LLVM option -polly-isl-arg was added to pass options to ISL's
    command line option parser. For instance,
    -polly-isl-arg=--schedule-algorithm=feautrier switches to the

From 80e2fc1e6e68d6ed57dccc03c6a5121e216bfd43 Mon Sep 17 00:00:00 2001
From: Ben Dunbobbin <Ben.Dunbobbin@sony.com>
Date: Thu, 13 Aug 2020 23:58:40 +0100
Subject: [PATCH 103/109] [X86][ELF] Prefer lowering MC_GlobalAddress operands
 to .Lfoo$local for STV_DEFAULT only

This patch restricts the behaviour of referencing via .Lfoo$local
local aliases, introduced in https://reviews.llvm.org/D73230, to
STV_DEFAULT globals only.

Hidden symbols via --fvisiblity=hidden (https://gcc.gnu.org/wiki/Visibility)
is an important scenario.

Benefits:

- Improves the size of object files by using fewer STT_SECTION symbols.

- The code reads a bit better (it was not obvious to me without going
  back to the code reviews why the canBenefitFromLocalAlias function
  currently doesn't consider visibility).

- There is also a side benefit in restoring the effectiveness of the
  --wrap linker option and making the behavior of --wrap consistent
  between LTO and normal builds for references within a translation-unit.
  Note: this --wrap behavior (which is specific to LLD) should not be
  considered reliable. See comments on https://reviews.llvm.org/D73230
  for more.

Differential Revision: https://reviews.llvm.org/D85782

(cherry picked from commit 4cb016cd2d8467c572b2e5c5d34f376ee79e4ac1)
---
 llvm/lib/IR/Globals.cpp                       |  3 +-
 llvm/test/CodeGen/AArch64/emutls.ll           |  2 --
 llvm/test/CodeGen/ARM/emutls.ll               |  2 --
 .../X86/2008-03-12-ThreadLocalAlias.ll        |  4 +--
 llvm/test/CodeGen/X86/linux-preemption.ll     | 29 +++++++++++++++++++
 .../X86/semantic-interposition-comdat.ll      |  2 +-
 llvm/test/CodeGen/X86/tailcallpic1.ll         |  2 +-
 llvm/test/CodeGen/X86/tailcallpic3.ll         |  2 +-
 llvm/test/CodeGen/X86/tailccpic1.ll           |  2 +-
 9 files changed, 37 insertions(+), 11 deletions(-)

diff --git a/llvm/lib/IR/Globals.cpp b/llvm/lib/IR/Globals.cpp
index dd8e62164de1e..ed946ef3fd12b 100644
--- a/llvm/lib/IR/Globals.cpp
+++ b/llvm/lib/IR/Globals.cpp
@@ -104,7 +104,8 @@ bool GlobalValue::isInterposable() const {
 
 bool GlobalValue::canBenefitFromLocalAlias() const {
   // See AsmPrinter::getSymbolPreferLocal().
-  return GlobalObject::isExternalLinkage(getLinkage()) && !isDeclaration() &&
+  return hasDefaultVisibility() &&
+         GlobalObject::isExternalLinkage(getLinkage()) && !isDeclaration() &&
          !isa<GlobalIFunc>(this) && !hasComdat();
 }
 
diff --git a/llvm/test/CodeGen/AArch64/emutls.ll b/llvm/test/CodeGen/AArch64/emutls.ll
index 85d2c1a3b3151..25be391bbfaa4 100644
--- a/llvm/test/CodeGen/AArch64/emutls.ll
+++ b/llvm/test/CodeGen/AArch64/emutls.ll
@@ -155,7 +155,6 @@ entry:
 ; ARM64:      .data{{$}}
 ; ARM64:      .globl __emutls_v.i4
 ; ARM64-LABEL: __emutls_v.i4:
-; ARM64-NEXT: .L__emutls_v.i4$local:
 ; ARM64-NEXT: .xword 4
 ; ARM64-NEXT: .xword 4
 ; ARM64-NEXT: .xword 0
@@ -163,7 +162,6 @@ entry:
 
 ; ARM64:      .section .rodata,
 ; ARM64-LABEL: __emutls_t.i4:
-; ARM64-NEXT: .L__emutls_t.i4$local:
 ; ARM64-NEXT: .word 15
 
 ; ARM64-NOT:   __emutls_v.i5:
diff --git a/llvm/test/CodeGen/ARM/emutls.ll b/llvm/test/CodeGen/ARM/emutls.ll
index 4327086685e91..92b656d9ba095 100644
--- a/llvm/test/CodeGen/ARM/emutls.ll
+++ b/llvm/test/CodeGen/ARM/emutls.ll
@@ -238,7 +238,6 @@ entry:
 ; ARM32:      .data{{$}}
 ; ARM32:      .globl __emutls_v.i4
 ; ARM32-LABEL: __emutls_v.i4:
-; ARM32-NEXT:  .L__emutls_v.i4$local:
 ; ARM32-NEXT: .long 4
 ; ARM32-NEXT: .long 4
 ; ARM32-NEXT: .long 0
@@ -246,7 +245,6 @@ entry:
 
 ; ARM32:      .section .rodata,
 ; ARM32-LABEL: __emutls_t.i4:
-; ARM32-NEXT:  .L__emutls_t.i4$local:
 ; ARM32-NEXT: .long 15
 
 ; ARM32-NOT:   __emutls_v.i5:
diff --git a/llvm/test/CodeGen/X86/2008-03-12-ThreadLocalAlias.ll b/llvm/test/CodeGen/X86/2008-03-12-ThreadLocalAlias.ll
index 89d249c091786..2ca003e052aa6 100644
--- a/llvm/test/CodeGen/X86/2008-03-12-ThreadLocalAlias.ll
+++ b/llvm/test/CodeGen/X86/2008-03-12-ThreadLocalAlias.ll
@@ -12,7 +12,7 @@ target triple = "i386-pc-linux-gnu"
 
 define i32 @foo() {
 ; CHECK-LABEL: foo:
-; CHECK: leal    .L__libc_resp$local@TLSLDM
+; CHECK: leal    __libc_resp@TLSLD
 entry:
 	%retval = alloca i32		; <i32*> [#uses=1]
 	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
@@ -27,7 +27,7 @@ return:		; preds = %entry
 
 define i32 @bar() {
 ; CHECK-LABEL: bar:
-; CHECK: leal    .L__libc_resp$local@TLSLDM
+; CHECK: leal    __libc_resp@TLSLD
 entry:
 	%retval = alloca i32		; <i32*> [#uses=1]
 	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
diff --git a/llvm/test/CodeGen/X86/linux-preemption.ll b/llvm/test/CodeGen/X86/linux-preemption.ll
index 49a7becf13432..15265f4019924 100644
--- a/llvm/test/CodeGen/X86/linux-preemption.ll
+++ b/llvm/test/CodeGen/X86/linux-preemption.ll
@@ -20,6 +20,14 @@ define i32* @get_strong_default_global() {
 ; STATIC: movl $strong_default_global, %eax
 ; CHECK32: movl strong_default_global@GOT(%eax), %eax
 
+@strong_hidden_global = hidden global i32 42
+define i32* @get_hidden_default_global() {
+  ret i32* @strong_hidden_global
+}
+; CHECK: leaq strong_hidden_global(%rip), %rax
+; STATIC: movl $strong_hidden_global, %eax
+; CHECK32: leal strong_hidden_global@GOTOFF(%eax), %eax
+
 @weak_default_global = weak global i32 42
 define i32* @get_weak_default_global() {
   ret i32* @weak_default_global
@@ -96,6 +104,14 @@ define i32* @get_strong_default_alias() {
 ; STATIC: movl $strong_default_alias, %eax
 ; CHECK32: movl strong_default_alias@GOT(%eax), %eax
 
+@strong_hidden_alias = hidden alias i32, i32* @aliasee
+define i32* @get_strong_hidden_alias() {
+  ret i32* @strong_hidden_alias
+}
+; CHECK: leaq strong_hidden_alias(%rip), %rax
+; STATIC: movl $strong_hidden_alias, %eax
+; CHECK32: leal strong_hidden_alias@GOTOFF(%eax), %eax
+
 @weak_default_alias = weak alias i32, i32* @aliasee
 define i32* @get_weak_default_alias() {
   ret i32* @weak_default_alias
@@ -149,6 +165,16 @@ define void()* @get_strong_default_function() {
 ; STATIC: movl $strong_default_function, %eax
 ; CHECK32: movl strong_default_function@GOT(%eax), %eax
 
+define hidden void @strong_hidden_function() {
+  ret void
+}
+define void()* @get_strong_hidden_function() {
+  ret void()* @strong_hidden_function
+}
+; CHECK: leaq strong_hidden_function(%rip), %rax
+; STATIC: movl $strong_hidden_function, %eax
+; CHECK32: leal strong_hidden_function@GOTOFF(%eax), %eax
+
 define weak void @weak_default_function() {
   ret void
 }
@@ -234,6 +260,9 @@ define void()* @get_external_preemptable_function() {
 
 ; COMMON:      .globl strong_default_alias
 ; COMMON-NEXT: .set strong_default_alias, aliasee
+; COMMON-NEXT: .globl strong_hidden_alias
+; COMMON-NEXT: .hidden strong_hidden_alias
+; COMMON-NEXT: .set strong_hidden_alias, aliasee
 ; COMMON-NEXT: .weak weak_default_alias
 ; COMMON-NEXT: .set weak_default_alias, aliasee
 ; COMMON-NEXT: .globl strong_local_alias
diff --git a/llvm/test/CodeGen/X86/semantic-interposition-comdat.ll b/llvm/test/CodeGen/X86/semantic-interposition-comdat.ll
index d0efd4d11c958..d11be2d6bd0c2 100644
--- a/llvm/test/CodeGen/X86/semantic-interposition-comdat.ll
+++ b/llvm/test/CodeGen/X86/semantic-interposition-comdat.ll
@@ -3,7 +3,7 @@
 $comdat_func = comdat any
 
 ; CHECK-LABEL: func2:
-; CHECK-NEXT: .Lfunc2$local
+; CHECK-NOT: .Lfunc2$local
 
 declare void @func()
 
diff --git a/llvm/test/CodeGen/X86/tailcallpic1.ll b/llvm/test/CodeGen/X86/tailcallpic1.ll
index 717cc1fddec93..ed101fcccd2db 100644
--- a/llvm/test/CodeGen/X86/tailcallpic1.ll
+++ b/llvm/test/CodeGen/X86/tailcallpic1.ll
@@ -12,5 +12,5 @@ define fastcc i32 @tailcaller(i32 %in1, i32 %in2) {
 entry:
 	%tmp11 = tail call fastcc i32 @tailcallee( i32 %in1, i32 %in2, i32 %in1, i32 %in2 )		; <i32> [#uses=1]
 	ret i32 %tmp11
-; CHECK: jmp .Ltailcallee$local
+; CHECK: jmp tailcallee
 }
diff --git a/llvm/test/CodeGen/X86/tailcallpic3.ll b/llvm/test/CodeGen/X86/tailcallpic3.ll
index 13b160aae2f63..edc58052d82f6 100644
--- a/llvm/test/CodeGen/X86/tailcallpic3.ll
+++ b/llvm/test/CodeGen/X86/tailcallpic3.ll
@@ -16,7 +16,7 @@ entry:
   ret void
 }
 ; CHECK: tailcall_hidden:
-; CHECK: jmp .Ltailcallee_hidden$local
+; CHECK: jmp tailcallee_hidden
 
 define internal void @tailcallee_internal() {
 entry:
diff --git a/llvm/test/CodeGen/X86/tailccpic1.ll b/llvm/test/CodeGen/X86/tailccpic1.ll
index dbdc56aa61c74..de8f2219bc2f3 100644
--- a/llvm/test/CodeGen/X86/tailccpic1.ll
+++ b/llvm/test/CodeGen/X86/tailccpic1.ll
@@ -12,5 +12,5 @@ define tailcc i32 @tailcaller(i32 %in1, i32 %in2) {
 entry:
 	%tmp11 = tail call tailcc i32 @tailcallee( i32 %in1, i32 %in2, i32 %in1, i32 %in2 )		; <i32> [#uses=1]
 	ret i32 %tmp11
-; CHECK: jmp .Ltailcallee$local
+; CHECK: jmp tailcallee
 }

From 4fe4e35452ef17ce3db08b080e3b0642d36c5094 Mon Sep 17 00:00:00 2001
From: sameeran joshi <sameeranjayant.joshi@amd.com>
Date: Sun, 13 Sep 2020 17:24:34 +0530
Subject: [PATCH 104/109] [Flang] Add GettingInvolved documentation page and
 sidebar.

Adds a new GettingInvolved page to documentation which provides details about
mailing list, chats and calls.

Adds a sidebar page which provides common links on
all documentation pages.
The links include:
-  Getting Started
-  Getting Involved
-  Github Repository
-  Bug Reports
-  Code Review

Depends on https://reviews.llvm.org/D87242

Reviewed By: richard.barton.arm

Differential Revision: https://reviews.llvm.org/D87270

(cherry picked from commit fe395aecd9e70b815e6490639098d815385f9932)
---
 flang/docs/GettingInvolved.md           | 72 +++++++++++++++++++++++++
 flang/docs/_templates/indexsidebar.html | 26 +++++++++
 flang/docs/_templates/layout.html       | 14 +++++
 flang/docs/conf.py                      |  8 ++-
 flang/docs/index.md                     |  1 +
 5 files changed, 120 insertions(+), 1 deletion(-)
 create mode 100644 flang/docs/GettingInvolved.md
 create mode 100644 flang/docs/_templates/indexsidebar.html
 create mode 100644 flang/docs/_templates/layout.html

diff --git a/flang/docs/GettingInvolved.md b/flang/docs/GettingInvolved.md
new file mode 100644
index 0000000000000..a244fbcee56a0
--- /dev/null
+++ b/flang/docs/GettingInvolved.md
@@ -0,0 +1,72 @@
+<!--===- docs/GettingInvolved.md
+
+   Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+   See https://llvm.org/LICENSE.txt for license information.
+   SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+-->
+# Getting Involved
+
+```eval_rst
+.. contents::
+   :local:
+```
+
+The Flang Project welcomes contributions of all kinds.
+Please feel free to join the mailing list or the slack channel for discussions related to development of Flang.
+To understand the status of various developments in Flang please join the respective call.
+
+## Mailing Lists
+
+[Developer's List (flang-dev)](http://lists.llvm.org/mailman/listinfo/flang-dev)
+
+  This list is for people who want to be included in technical discussions related to Flang. People post to this list when they have questions about writing code
+  for or using the Flang tools. It is relatively low volume.
+
+
+[Commits Archive (flang-commits)](http://lists.llvm.org/pipermail/flang-commits)
+
+  This list contains all commit messages that are made when Flang developers
+  commit code changes to the repository. It also serves as a forum for
+  patch review (i.e. send patches here). It is useful for those who want to
+  stay on the bleeding edge of Flang development. This list is high
+  volume.
+
+## Chat
+
+### Flang Slack Workspace
+
+-   There is a Slack workspace dedicated to Flang.
+-   There are a number of topic-oriented channels available (e.g., #driver, #f18-semantics, #fir).
+-   Add yourself via the *[invitation link](https://join.slack.com/t/flang-compiler/shared_invite/zt-2pcn51lh-VrRQL_YUOkxA_1CEfMGQhw "title")*
+
+## Calls
+
+### Flang Community Biweekly Call
+
+-   General updates on the Flang Project, both LLVM Flang and current Flang.
+-   Join [Flang Community Biweekly Call](https://nvmeet.webex.com/nvmeet/j.php?MTID=mb4edb8c799f69ec2dc0554acc969a162)
+-   Time: On Wednesdays 8:30 Pacific Time, on the weeks alternating with regular Flang Community Technical Biweekly Call.
+-   Minutes: They are sent to [flang-dev](http://lists.llvm.org/mailman/listinfo/flang-dev). Search for `Flang Biweekly Sync - Notes`.
+
+### Flang Community Technical Biweekly Call
+
+-   Technical topics call.
+-   Join [Flang Community Technical Biweekly Call](https://bluejeans.com/625064848?src=join_info)
+-   Time: On Mondays 8:30 Pacific Time, on the weeks alternating with regular Flang Community Biweekly Call.
+-   The agenda is in this [Google Doc](https://docs.google.com/document/d/1Z2U5UAtJ-Dag5wlMaLaW1KRmNgENNAYynJqLW2j2AZQ/).
+
+### LLVM Alias Analysis Technical Call
+
+-   For people working on improvements to LLVM alias analysis.
+-   Join [LLVM Alias Analysis Technical Call](https://bluejeans.com/101176001?src=join_info)
+-   Time: Tuesdays 10:00 AM Pacific Time, every 4 weeks.
+-   The agenda is in this [Google Doc](https://docs.google.com/document/d/1ybwEKDVtIbhIhK50qYtwKsL50K-NvB6LfuBsfepBZ9Y/).
+
+### OpenMP Technical Call
+
+-   Development updates on OpenMP and OpenACC in the Flang Project.
+-   Join [OpenMP Technical Call](https://bit.ly/39eQW3o)
+-   Time: Weekly call on every Thursdays 8:00 AM Pacific time.
+-   Meeting minutes are [here](https://docs.google.com/document/d/1yA-MeJf6RYY-ZXpdol0t7YoDoqtwAyBhFLr5thu5pFI).
+-   Status tracking [page](https://docs.google.com/spreadsheets/d/1FvHPuSkGbl4mQZRAwCIndvQx9dQboffiD-xD0oqxgU0/edit#gid=0).
diff --git a/flang/docs/_templates/indexsidebar.html b/flang/docs/_templates/indexsidebar.html
new file mode 100644
index 0000000000000..3c8f1abdf9000
--- /dev/null
+++ b/flang/docs/_templates/indexsidebar.html
@@ -0,0 +1,26 @@
+{# This template defines sidebar which can be used to provide common links on
+   all documentation pages. #}
+
+<h3>Documentation</h3>
+
+<ul class="want-points">
+    <li><a href="https://github.com/llvm/llvm-project/blob/master/flang/README.md#getting-started">Getting Started</a></li>
+</ul>
+
+<h3>Getting Involved</h3>
+<! TODO: Point links to website(flang.llvm.org) and not github once webpage comes up.>
+<ul class="want-points">
+    <li><a href="https://github.com/llvm/llvm-project/blob/master/flang/docs/GettingInvolved.md#mailing-lists">Mailing Lists</a></li>
+    <li><a href="https://github.com/llvm/llvm-project/blob/master/flang/docs/GettingInvolved.md#chat">Slack</a></li>
+    <li><a href="https://github.com/llvm/llvm-project/blob/master/flang/docs/GettingInvolved.md#calls">Calls</a></li>
+</ul>
+
+<h3>Additional Links</h3>
+
+<ul class="want-points">
+    <li><a href="https://github.com/llvm/llvm-project/tree/master/flang/">Github Repository</a></li>
+    <li><a href="https://bugs.llvm.org/">Bug Reports</a></li>
+    <li><a href="https://reviews.llvm.org/">Code Review</a></li>
+<! TODO: Have the bots setup first>
+    <li><a href="#">Doxygen API</a></li>
+</ul>
diff --git a/flang/docs/_templates/layout.html b/flang/docs/_templates/layout.html
new file mode 100644
index 0000000000000..12b7731ccca7d
--- /dev/null
+++ b/flang/docs/_templates/layout.html
@@ -0,0 +1,14 @@
+{% extends "!layout.html" %}
+
+{% block extrahead %}
+<style type="text/css">
+  table.right { float: right; margin-left: 20px; }
+  table.right td { border: 1px solid #ccc; }
+</style>
+{% endblock %}
+
+{% block rootrellink %}
+<! TODO: Change the webpage  >
+  <li><a href="https://flang.llvm.org">Flang Home</a>&nbsp;|&nbsp;</li>
+  <li><a href="{{ pathto('index') }}">Documentation</a>&raquo;</li>
+{% endblock %}
diff --git a/flang/docs/conf.py b/flang/docs/conf.py
index f5eb283a186a5..197721a4e4c80 100644
--- a/flang/docs/conf.py
+++ b/flang/docs/conf.py
@@ -178,7 +178,13 @@ def setup(app):
 #html_use_smartypants = True
 
 # Custom sidebar templates, maps document names to template names.
-#html_sidebars = {}
+html_sidebars = {
+    '**': [
+        'indexsidebar.html',
+        'searchbox.html',
+    ]
+}
+
 
 # Additional templates that should be rendered to pages, maps page names to
 # template names.
diff --git a/flang/docs/index.md b/flang/docs/index.md
index 4c07170565227..bd7092a418f33 100644
--- a/flang/docs/index.md
+++ b/flang/docs/index.md
@@ -15,6 +15,7 @@ Flang is LLVM's Fortran frontend
 .. toctree::
    :titlesonly:
 
+   GettingInvolved
    FortranForCProgrammers
    C++style
    C++17

From b78e5de029c26c309f541ab883fa5d6d953b073d Mon Sep 17 00:00:00 2001
From: Qiu Chaofan <qiucofan@cn.ibm.com>
Date: Thu, 17 Sep 2020 16:00:54 +0800
Subject: [PATCH 105/109] [SelectionDAG] Check any use of negation result
 before removal

2508ef01 fixed a bug about constant removal in negation. But after
sanitizing check I found there's still some issue about it so it's
reverted.

Temporary nodes will be removed if useless in negation. Before the
removal, they'd be checked if any other nodes used it. So the removal
was moved after getNode. However in rare cases the node to be removed is
the same as result of getNode. We missed that and will be fixed by this
patch.

Reviewed By: steven.zhang

Differential Revision: https://reviews.llvm.org/D87614

(cherry picked from commit a2fb5446be960ad164060b3c05fc268f7f72d67a)
---
 .../CodeGen/SelectionDAG/TargetLowering.cpp   | 22 ++++++++++-----
 llvm/test/CodeGen/X86/pr47517.ll              | 28 +++++++++++++++++++
 2 files changed, 43 insertions(+), 7 deletions(-)
 create mode 100644 llvm/test/CodeGen/X86/pr47517.ll

diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 819e608c6896e..64af293caf9ea 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -5751,8 +5751,10 @@ SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG,
 
     // If we already have the use of the negated floating constant, it is free
     // to negate it even it has multiple uses.
-    if (!Op.hasOneUse() && CFP.use_empty())
+    if (!Op.hasOneUse() && CFP.use_empty()) {
+      RemoveDeadNode(CFP);
       break;
+    }
     Cost = NegatibleCost::Neutral;
     return CFP;
   }
@@ -5810,7 +5812,8 @@ SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG,
     if (NegX && (CostX <= CostY)) {
       Cost = CostX;
       SDValue N = DAG.getNode(ISD::FSUB, DL, VT, NegX, Y, Flags);
-      RemoveDeadNode(NegY);
+      if (NegY != N)
+        RemoveDeadNode(NegY);
       return N;
     }
 
@@ -5818,7 +5821,8 @@ SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG,
     if (NegY) {
       Cost = CostY;
       SDValue N = DAG.getNode(ISD::FSUB, DL, VT, NegY, X, Flags);
-      RemoveDeadNode(NegX);
+      if (NegX != N)
+        RemoveDeadNode(NegX);
       return N;
     }
     break;
@@ -5857,7 +5861,8 @@ SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG,
     if (NegX && (CostX <= CostY)) {
       Cost = CostX;
       SDValue N = DAG.getNode(Opcode, DL, VT, NegX, Y, Flags);
-      RemoveDeadNode(NegY);
+      if (NegY != N)
+        RemoveDeadNode(NegY);
       return N;
     }
 
@@ -5870,7 +5875,8 @@ SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG,
     if (NegY) {
       Cost = CostY;
       SDValue N = DAG.getNode(Opcode, DL, VT, X, NegY, Flags);
-      RemoveDeadNode(NegX);
+      if (NegX != N)
+        RemoveDeadNode(NegX);
       return N;
     }
     break;
@@ -5901,7 +5907,8 @@ SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG,
     if (NegX && (CostX <= CostY)) {
       Cost = std::min(CostX, CostZ);
       SDValue N = DAG.getNode(Opcode, DL, VT, NegX, Y, NegZ, Flags);
-      RemoveDeadNode(NegY);
+      if (NegY != N)
+        RemoveDeadNode(NegY);
       return N;
     }
 
@@ -5909,7 +5916,8 @@ SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG,
     if (NegY) {
       Cost = std::min(CostY, CostZ);
       SDValue N = DAG.getNode(Opcode, DL, VT, X, NegY, NegZ, Flags);
-      RemoveDeadNode(NegX);
+      if (NegX != N)
+        RemoveDeadNode(NegX);
       return N;
     }
     break;
diff --git a/llvm/test/CodeGen/X86/pr47517.ll b/llvm/test/CodeGen/X86/pr47517.ll
new file mode 100644
index 0000000000000..5672fbc69a41d
--- /dev/null
+++ b/llvm/test/CodeGen/X86/pr47517.ll
@@ -0,0 +1,28 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple x86_64 < %s | FileCheck %s
+
+; To ensure unused floating point constant is correctly removed
+define float @test(float %src, float* %p) {
+; CHECK-LABEL: test:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movq $0, (%rdi)
+; CHECK-NEXT:    xorps %xmm0, %xmm0
+; CHECK-NEXT:    retq
+entry:
+  %a0 = getelementptr inbounds float, float* %p, i32 0
+  %a1 = getelementptr inbounds float, float* %p, i32 1
+  store float 0.000000e+00, float* %a0
+  store float 0.000000e+00, float* %a1
+  %zero = load float, float* %a0
+  %fmul1 = fmul fast float %zero, %src
+  %fadd1 = fadd fast float %fmul1, %zero
+  %fmul2 = fmul fast float %fadd1, 2.000000e+00
+  %fmul3 = fmul fast float %fmul2, %fmul2
+  %fmul4 = fmul fast float %fmul2, 2.000000e+00
+  %fadd2 = fadd fast float %fmul4, -3.000000e+00
+  %fmul5 = fmul fast float %fadd2, %fmul2
+  %fadd3 = fadd fast float %fmul2, %src
+  %fadd4 = fadd fast float %fadd3, %fmul5
+  %fmul6 = fmul fast float %fmul3, %fadd4
+  ret float %fmul6
+}

From 410b0dc84bbdafabe3a2c3eedd96e50340a6e0d0 Mon Sep 17 00:00:00 2001
From: David Blaikie <dblaikie@gmail.com>
Date: Fri, 17 Jul 2020 10:41:35 -0700
Subject: [PATCH 106/109] [llvm] Add contains(KeyType) -> bool methods to
 SmallPtrSet

Matches C++20 API addition.

Differential Revision: https://reviews.llvm.org/D83449

(cherry picked from commit a0385bd7acd6e1d16224b4257f4cb50e59f1d75e)
---
 llvm/include/llvm/ADT/SmallPtrSet.h    |  3 +++
 llvm/unittests/ADT/SmallPtrSetTest.cpp | 34 +++++++++++++++++++++++---
 2 files changed, 34 insertions(+), 3 deletions(-)

diff --git a/llvm/include/llvm/ADT/SmallPtrSet.h b/llvm/include/llvm/ADT/SmallPtrSet.h
index 0ab05cfe611aa..57dd8f6b695d7 100644
--- a/llvm/include/llvm/ADT/SmallPtrSet.h
+++ b/llvm/include/llvm/ADT/SmallPtrSet.h
@@ -378,6 +378,9 @@ class SmallPtrSetImpl : public SmallPtrSetImplBase {
   iterator find(ConstPtrType Ptr) const {
     return makeIterator(find_imp(ConstPtrTraits::getAsVoidPointer(Ptr)));
   }
+  bool contains(ConstPtrType Ptr) const {
+    return find_imp(ConstPtrTraits::getAsVoidPointer(Ptr)) != EndPointer();
+  }
 
   template <typename IterT>
   void insert(IterT I, IterT E) {
diff --git a/llvm/unittests/ADT/SmallPtrSetTest.cpp b/llvm/unittests/ADT/SmallPtrSetTest.cpp
index 3226fe615509c..eacd62ffc6ff0 100644
--- a/llvm/unittests/ADT/SmallPtrSetTest.cpp
+++ b/llvm/unittests/ADT/SmallPtrSetTest.cpp
@@ -313,8 +313,8 @@ TEST(SmallPtrSetTest, ConstTest) {
   IntSet.insert(B);
   EXPECT_EQ(IntSet.count(B), 1u);
   EXPECT_EQ(IntSet.count(C), 1u);
-  EXPECT_NE(IntSet.find(B), IntSet.end());
-  EXPECT_NE(IntSet.find(C), IntSet.end());
+  EXPECT_TRUE(IntSet.contains(B));
+  EXPECT_TRUE(IntSet.contains(C));
 }
 
 // Verify that we automatically get the const version of PointerLikeTypeTraits
@@ -327,7 +327,7 @@ TEST(SmallPtrSetTest, ConstNonPtrTest) {
   TestPair Pair(&A[0], 1);
   IntSet.insert(Pair);
   EXPECT_EQ(IntSet.count(Pair), 1u);
-  EXPECT_NE(IntSet.find(Pair), IntSet.end());
+  EXPECT_TRUE(IntSet.contains(Pair));
 }
 
 // Test equality comparison.
@@ -367,3 +367,31 @@ TEST(SmallPtrSetTest, EqualityComparison) {
   EXPECT_NE(c, e);
   EXPECT_NE(e, d);
 }
+
+TEST(SmallPtrSetTest, Contains) {
+  SmallPtrSet<int *, 2> Set;
+  int buf[4] = {0, 11, 22, 11};
+  EXPECT_FALSE(Set.contains(&buf[0]));
+  EXPECT_FALSE(Set.contains(&buf[1]));
+
+  Set.insert(&buf[0]);
+  Set.insert(&buf[1]);
+  EXPECT_TRUE(Set.contains(&buf[0]));
+  EXPECT_TRUE(Set.contains(&buf[1]));
+  EXPECT_FALSE(Set.contains(&buf[3]));
+
+  Set.insert(&buf[1]);
+  EXPECT_TRUE(Set.contains(&buf[0]));
+  EXPECT_TRUE(Set.contains(&buf[1]));
+  EXPECT_FALSE(Set.contains(&buf[3]));
+
+  Set.erase(&buf[1]);
+  EXPECT_TRUE(Set.contains(&buf[0]));
+  EXPECT_FALSE(Set.contains(&buf[1]));
+
+  Set.insert(&buf[1]);
+  Set.insert(&buf[2]);
+  EXPECT_TRUE(Set.contains(&buf[0]));
+  EXPECT_TRUE(Set.contains(&buf[1]));
+  EXPECT_TRUE(Set.contains(&buf[2]));
+}

From 6250d4944539f67d6a605928e97c087fe306a79e Mon Sep 17 00:00:00 2001
From: James Y Knight <jyknight@google.com>
Date: Thu, 17 Sep 2020 18:10:19 -0400
Subject: [PATCH 107/109] PR47468: Fix findPHICopyInsertPoint, so that copies
 aren't incorrectly inserted after an INLINEASM_BR.

findPHICopyInsertPoint special cases placement in a block with a
callbr or invoke in it. In that case, we must ensure that the copy is
placed before the INLINEASM_BR or call instruction, if the register is
defined prior to that instruction, because it may jump out of the
block.

Previously, the code placed it immediately after the last def _or
use_. This is wrong, if the use is the instruction which may jump.  We
could correctly place it immediately after the last def (ignoring
uses), but that is non-optimal for register pressure.

Instead, place the copy after the last def, or before the
call/inlineasm_br, whichever is later.

Differential Revision: https://reviews.llvm.org/D87865

(cherry picked from commit f7a53d82c0902147909f28a9295a9d00b4b27d38)
---
 llvm/lib/CodeGen/PHIEliminationUtils.cpp      | 44 ++++++++++---------
 .../CodeGen/X86/callbr-asm-phi-placement.ll   | 44 +++++++++++++++++++
 2 files changed, 68 insertions(+), 20 deletions(-)
 create mode 100644 llvm/test/CodeGen/X86/callbr-asm-phi-placement.ll

diff --git a/llvm/lib/CodeGen/PHIEliminationUtils.cpp b/llvm/lib/CodeGen/PHIEliminationUtils.cpp
index bae96eb84521a..2a72717e711dc 100644
--- a/llvm/lib/CodeGen/PHIEliminationUtils.cpp
+++ b/llvm/lib/CodeGen/PHIEliminationUtils.cpp
@@ -27,31 +27,35 @@ llvm::findPHICopyInsertPoint(MachineBasicBlock* MBB, MachineBasicBlock* SuccMBB,
   // Usually, we just want to insert the copy before the first terminator
   // instruction. However, for the edge going to a landing pad, we must insert
   // the copy before the call/invoke instruction. Similarly for an INLINEASM_BR
-  // going to an indirect target.
-  if (!SuccMBB->isEHPad() && !SuccMBB->isInlineAsmBrIndirectTarget())
+  // going to an indirect target. This is similar to SplitKit.cpp's
+  // computeLastInsertPoint, and similarly assumes that there cannot be multiple
+  // instructions that are Calls with EHPad successors or INLINEASM_BR in a
+  // block.
+  bool EHPadSuccessor = SuccMBB->isEHPad();
+  if (!EHPadSuccessor && !SuccMBB->isInlineAsmBrIndirectTarget())
     return MBB->getFirstTerminator();
 
-  // Discover any defs/uses in this basic block.
-  SmallPtrSet<MachineInstr*, 8> DefUsesInMBB;
+  // Discover any defs in this basic block.
+  SmallPtrSet<MachineInstr *, 8> DefsInMBB;
   MachineRegisterInfo& MRI = MBB->getParent()->getRegInfo();
-  for (MachineInstr &RI : MRI.reg_instructions(SrcReg)) {
+  for (MachineInstr &RI : MRI.def_instructions(SrcReg))
     if (RI.getParent() == MBB)
-      DefUsesInMBB.insert(&RI);
-  }
+      DefsInMBB.insert(&RI);
 
-  MachineBasicBlock::iterator InsertPoint;
-  if (DefUsesInMBB.empty()) {
-    // No defs.  Insert the copy at the start of the basic block.
-    InsertPoint = MBB->begin();
-  } else if (DefUsesInMBB.size() == 1) {
-    // Insert the copy immediately after the def/use.
-    InsertPoint = *DefUsesInMBB.begin();
-    ++InsertPoint;
-  } else {
-    // Insert the copy immediately after the last def/use.
-    InsertPoint = MBB->end();
-    while (!DefUsesInMBB.count(&*--InsertPoint)) {}
-    ++InsertPoint;
+  MachineBasicBlock::iterator InsertPoint = MBB->begin();
+  // Insert the copy at the _latest_ point of:
+  // 1. Immediately AFTER the last def
+  // 2. Immediately BEFORE a call/inlineasm_br.
+  for (auto I = MBB->rbegin(), E = MBB->rend(); I != E; ++I) {
+    if (DefsInMBB.contains(&*I)) {
+      InsertPoint = std::next(I.getReverse());
+      break;
+    }
+    if ((EHPadSuccessor && I->isCall()) ||
+        I->getOpcode() == TargetOpcode::INLINEASM_BR) {
+      InsertPoint = I.getReverse();
+      break;
+    }
   }
 
   // Make sure the copy goes after any phi nodes but before
diff --git a/llvm/test/CodeGen/X86/callbr-asm-phi-placement.ll b/llvm/test/CodeGen/X86/callbr-asm-phi-placement.ll
new file mode 100644
index 0000000000000..9bad6a7e0892f
--- /dev/null
+++ b/llvm/test/CodeGen/X86/callbr-asm-phi-placement.ll
@@ -0,0 +1,44 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu -verify-machineinstrs -O2 < %s | FileCheck %s
+
+;; https://bugs.llvm.org/PR47468
+
+;; PHI elimination should place copies BEFORE the inline asm, not
+;; after, even if the inline-asm uses as an input the same value as
+;; the PHI.
+
+declare void @foo(i8*)
+
+define void @test1(i8* %arg, i8** %mem) nounwind {
+; CHECK-LABEL: test1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    pushq %r14
+; CHECK-NEXT:    pushq %rbx
+; CHECK-NEXT:    pushq %rax
+; CHECK-NEXT:    movq %rsi, %r14
+; CHECK-NEXT:  .Ltmp0: # Block address taken
+; CHECK-NEXT:  .LBB0_1: # %loop
+; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    movq (%r14), %rbx
+; CHECK-NEXT:    callq foo
+; CHECK-NEXT:    movq %rbx, %rdi
+; CHECK-NEXT:    #APP
+; CHECK-NEXT:    #NO_APP
+; CHECK-NEXT:  # %bb.2: # %end
+; CHECK-NEXT:    addq $8, %rsp
+; CHECK-NEXT:    popq %rbx
+; CHECK-NEXT:    popq %r14
+; CHECK-NEXT:    retq
+entry:
+  br label %loop
+
+loop:
+  %a = phi i8* [ %arg, %entry ], [ %b, %loop ]
+  %b = load i8*, i8** %mem, align 8
+  call void @foo(i8* %a)
+  callbr void asm sideeffect "", "*m,X"(i8* %b, i8* blockaddress(@test1, %loop))
+          to label %end [label %loop]
+
+end:
+  ret void
+}

From b513e1963f3a7edc897c6c4e675934d0c58f1802 Mon Sep 17 00:00:00 2001
From: Lucas Prates <lucas.prates@arm.com>
Date: Thu, 17 Sep 2020 18:07:35 +0100
Subject: [PATCH 108/109] [CodeGen] Fixing inconsistent ABI mangling of vlaues
 in SelectionDAGBuilder

SelectionDAGBuilder was inconsistently mangling values based on ABI
Calling Conventions when getting them through copyFromRegs in
SelectionDAGBuilder, causing duplicate value type convertions for
function arguments. The checking for the mangling requirement was based
on the value's originating instruction and was performed outside of, and
inspite of, the regular Calling Convention Lowering.

The issue could be observed in a scenario such as:

```
%arg1 = load half, half* %const, align 2
%arg2 = call fastcc half @someFunc()
call fastcc void @otherFunc(half %arg1, half %arg2)
; Here, %arg2 was incorrectly mangled twice, as the CallConv data from
; the call to @someFunc() was taken into consideration for the check
; when getting the value for processing the call to @otherFunc(...),
; after the proper convertion had taken place when lowering the return
; value of the first call.
```

This patch fixes the issue by disregarding the Calling Convention
information for such copyFromRegs, making sure the ABI mangling is
properly contanined in the Calling Convention Lowering.

This fixes Bugzilla #47454.

Reviewed By: efriedma

Differential Revision: https://reviews.llvm.org/D87844

(cherry picked from commit 53d238a961d14eae46f6f2b296ce48026c7bd0a1)
---
 .../SelectionDAG/SelectionDAGBuilder.cpp      | 30 +-----------
 llvm/test/CodeGen/ARM/pr47454.ll              | 49 +++++++++++++++++++
 2 files changed, 51 insertions(+), 28 deletions(-)
 create mode 100644 llvm/test/CodeGen/ARM/pr47454.ll

diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index feb949f81eba3..d2930391f87a5 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -169,32 +169,6 @@ static cl::opt<unsigned> SwitchPeelThreshold(
 // store [4096 x i8] %data, [4096 x i8]* %buffer
 static const unsigned MaxParallelChains = 64;
 
-// Return the calling convention if the Value passed requires ABI mangling as it
-// is a parameter to a function or a return value from a function which is not
-// an intrinsic.
-static Optional<CallingConv::ID> getABIRegCopyCC(const Value *V) {
-  if (auto *R = dyn_cast<ReturnInst>(V))
-    return R->getParent()->getParent()->getCallingConv();
-
-  if (auto *CI = dyn_cast<CallInst>(V)) {
-    const bool IsInlineAsm = CI->isInlineAsm();
-    const bool IsIndirectFunctionCall =
-        !IsInlineAsm && !CI->getCalledFunction();
-
-    // It is possible that the call instruction is an inline asm statement or an
-    // indirect function call in which case the return value of
-    // getCalledFunction() would be nullptr.
-    const bool IsInstrinsicCall =
-        !IsInlineAsm && !IsIndirectFunctionCall &&
-        CI->getCalledFunction()->getIntrinsicID() != Intrinsic::not_intrinsic;
-
-    if (!IsInlineAsm && !IsInstrinsicCall)
-      return CI->getCallingConv();
-  }
-
-  return None;
-}
-
 static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL,
                                       const SDValue *Parts, unsigned NumParts,
                                       MVT PartVT, EVT ValueVT, const Value *V,
@@ -1624,7 +1598,7 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) {
     unsigned InReg = FuncInfo.InitializeRegForValue(Inst);
 
     RegsForValue RFV(*DAG.getContext(), TLI, DAG.getDataLayout(), InReg,
-                     Inst->getType(), getABIRegCopyCC(V));
+                     Inst->getType(), None);
     SDValue Chain = DAG.getEntryNode();
     return RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr, V);
   }
@@ -5555,7 +5529,7 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(
     if (VMI != FuncInfo.ValueMap.end()) {
       const auto &TLI = DAG.getTargetLoweringInfo();
       RegsForValue RFV(V->getContext(), TLI, DAG.getDataLayout(), VMI->second,
-                       V->getType(), getABIRegCopyCC(V));
+                       V->getType(), None);
       if (RFV.occupiesMultipleRegs()) {
         splitMultiRegDbgValue(RFV.getRegsAndSizes());
         return true;
diff --git a/llvm/test/CodeGen/ARM/pr47454.ll b/llvm/test/CodeGen/ARM/pr47454.ll
new file mode 100644
index 0000000000000..d36a29c4e77ce
--- /dev/null
+++ b/llvm/test/CodeGen/ARM/pr47454.ll
@@ -0,0 +1,49 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=armv8-unknown-linux-unknown -mattr=-fp16 -O0 < %s | FileCheck %s
+
+declare fastcc half @getConstant()
+
+declare fastcc i1 @isEqual(half %0, half %1)
+
+define internal fastcc void @main() {
+; CHECK-LABEL: main:
+; CHECK:       @ %bb.0: @ %Entry
+; CHECK-NEXT:    push {r11, lr}
+; CHECK-NEXT:    mov r11, sp
+; CHECK-NEXT:    sub sp, sp, #16
+; CHECK-NEXT:    mov r0, #31744
+; CHECK-NEXT:    strh r0, [r11, #-2]
+; CHECK-NEXT:    ldrh r0, [r11, #-2]
+; CHECK-NEXT:    bl __gnu_h2f_ieee
+; CHECK-NEXT:    vmov s0, r0
+; CHECK-NEXT:    vstr s0, [sp, #8] @ 4-byte Spill
+; CHECK-NEXT:    bl getConstant
+; CHECK-NEXT:    vmov r0, s0
+; CHECK-NEXT:    bl __gnu_h2f_ieee
+; CHECK-NEXT:    vmov s0, r0
+; CHECK-NEXT:    vmov r0, s0
+; CHECK-NEXT:    bl __gnu_f2h_ieee
+; CHECK-NEXT:    vldr s0, [sp, #8] @ 4-byte Reload
+; CHECK-NEXT:    vmov r1, s0
+; CHECK-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-NEXT:    mov r0, r1
+; CHECK-NEXT:    bl __gnu_f2h_ieee
+; CHECK-NEXT:    uxth r0, r0
+; CHECK-NEXT:    vmov s0, r0
+; CHECK-NEXT:    ldr r0, [sp, #4] @ 4-byte Reload
+; CHECK-NEXT:    uxth r1, r0
+; CHECK-NEXT:    vmov s1, r1
+; CHECK-NEXT:    bl isEqual
+; CHECK-NEXT:    mov sp, r11
+; CHECK-NEXT:    pop {r11, pc}
+Entry:
+    ; First arg directly from constant
+    %const = alloca half, align 2
+    store half 0xH7C00, half* %const, align 2
+    %arg1 = load half, half* %const, align 2
+    ; Second arg from fucntion return
+    %arg2 = call fastcc half @getConstant()
+    ; Arguments should have equivalent mangling
+    %result = call fastcc i1 @isEqual(half %arg1, half %arg2)
+    ret void
+}

From 8aca41f39c207b6f9efe2e448986d109892072ad Mon Sep 17 00:00:00 2001
From: Jessica Clarke <jrtc27@jrtc27.com>
Date: Thu, 17 Sep 2020 13:44:01 +0100
Subject: [PATCH 109/109] [clang][docs] Fix documentation of -O

D79916 changed the behaviour from -O2 to -O1 but the documentation was
not updated to reflect this.

(cherry picked from commit 788c7d2ec11dfc868a5b03478c922dc9699c6d47)
---
 clang/docs/CommandGuide/clang.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang/docs/CommandGuide/clang.rst b/clang/docs/CommandGuide/clang.rst
index 2cca04fb31f1a..2dfeafd1817ad 100644
--- a/clang/docs/CommandGuide/clang.rst
+++ b/clang/docs/CommandGuide/clang.rst
@@ -385,7 +385,7 @@ Code Generation Options
     :option:`-Og` Like :option:`-O1`. In future versions, this option might
     disable different optimizations in order to improve debuggability.
 
-    :option:`-O` Equivalent to :option:`-O2`.
+    :option:`-O` Equivalent to :option:`-O1`.
 
     :option:`-O4` and higher